Tables
erDiagram
Scores {
student_id int PK
subject varchar PK
score int
exam_date varchar PK
}
Mermaid
복사
Question
Write a solution to find the students who have shown improvement. A student is considered to have shown improvement if they meet both of these conditions:
•
Have taken exams in the same subject on at least two different dates
•
Their latest score in that subject is higher than their first score
Return the result table ordered by student_id, subject in ascending order.
The result format is in the following example.
DataFlow
make table -> first score -> groupby student_id, subject -> having count date >= 2 > -> min date, max datescore INNER JOIN -> 필요한 컬러만 남김case when -> first score < latest score
make table -> min date, min score
make table -> max date, max score
min max join -> where min < max score
이 min, max 테이블을 만드는 과정에서 join을 두번하는 경우가 발생 -> 제일 빠른 날짜를 찾아서, 그날의 점수를 가지고 와야하기 때문
처음에는 데이터를 날짜만을 가지고 추출할 생각을 하다가 포기하고 단계적으로 해결하기로 함
Answer
SELECT
f.student_id,
f.subject,
f.score AS first_score,
l.score AS latest_score
FROM (
SELECT
s1.student_id,
s1.subject,
s1.score,
s1.exam_date
FROM Scores s1
INNER JOIN (
SELECT
student_id,
subject,
MIN(exam_date) AS first_date
FROM Scores
GROUP BY student_id, subject
HAVING COUNT(exam_date) >= 2
) s2
ON s1.student_id = s2.student_id
AND s1.subject = s2.subject
AND s1.exam_date = s2.first_date
) f
INNER JOIN (
SELECT
s1.student_id,
s1.subject,
s1.score,
s1.exam_date
FROM Scores s1
INNER JOIN (
SELECT
student_id,
subject,
MAX(exam_date) AS latest_date
FROM Scores
GROUP BY student_id, subject
HAVING COUNT(exam_date) >= 2
) s2
ON s1.student_id = s2.student_id
AND s1.subject = s2.subject
AND s1.exam_date = s2.latest_date
) l
ON f.student_id = l.student_id
AND f.subject = l.subject
WHERE f.score < l.score
ORDER BY f.student_id ASC, f.subject ASC;
SQL
복사
쿼리의 가독성이 떨어지고 성능도 만족스럽지가 않음.
다른 사람들의 솔루션도 찾아봄.
WITH A as(
SELECT
*,
ROW_NUMBER() OVER (PARTITION BY student_id, subject ORDER BY exam_date) as rn,
COUNT(*) OVER (PARTITION BY student_id, subject) as ct
FROM Scores
),
B as(
SELECT
student_id,
subject,
MAX(CASE WHEN rn=1 THEN score END) as first_score,
MAX(CASE WHEN rn = ct THEN score END) as latest_score,
COUNT(*) as ct
FROM A
GROUP BY student_id, subject
)
SELECT
student_id,
subject,
first_score,
latest_score
FROM B
WHERE ct > 1
AND latest_score > first_score
ORDER BY student_id, subject;
SQL
복사
마지막 순서를 ct로 잡고 최신 점수를 추출하는 방법
WITH Ranked AS (
SELECT
student_id,
subject,
FIRST_VALUE(score) OVER(PARTITION BY student_id,subject ORDER BY exam_date) AS first_score,
FIRST_VALUE(score) OVER(PARTITION BY student_id,subject ORDER BY exam_date DESC) AS latest_score
FROM Scores
)
SELECT
DISTINCT *
FROM Ranked
WHERE first_score<latest_score
ORDER BY student_id,subject;
SQL
복사
winodw function으로 해결
처음값, 마지막 값만 추출 -> 크기 비교
INNER JOIN을 두번 하는 형태로 했을 때 답변이 내가 처음 생각했던 것과 제일 가깝고,
성능도 좋게 나온다.
WITH g AS (
SELECT
student_id,
subject,
MIN(exam_date) AS first_date,
MAX(exam_date) AS latest_date
FROM Scores
GROUP BY student_id, subject
HAVING COUNT(*) >= 2
)
SELECT
f.student_id,
f.subject,
f.score AS first_score,
l.score AS latest_score
FROM g
INNER JOIN Scores f
ON f.student_id = g.student_id
AND f.subject = g.subject
AND f.exam_date = g.first_date
INNER JOIN Scores l
ON l.student_id = g.student_id
AND l.subject = g.subject
AND l.exam_date = g.latest_date
WHERE f.score < l.score
ORDER BY f.student_id, f.subject;
SQL
복사