SQL/문제

SQL 과제

jjangdoll 2025. 1. 2. 12:42

4.

동일한 결제수단(payment_type)에서 이루어진 다른 결제의 평균 금액보다 높은 결제들 중에서, 해당 결제가 해당 결제수단의 총 결제 금액 대비 20% 이상을 차지하는 주문을 조회하세요.
payment_ratio는 결제 금액이 총 결제 금액에서 차지하는 비율을 소수점 둘째 자리까지 계산해주세요.

# 1) 동일 결제수단 별 -> 결제수단끼리 그룹
SELECT
	payment_type,
	sum(payment_value) payment_value,
	avg(payment_value) avg_payment
FROM 
	payments p 
group by
	payment_type;
with a as
	(SELECT
		payment_type,
		sum(payment_value) total_payment_value,
		avg(payment_value) avg_payment
	FROM 
		payments p 
	group by
		payment_type)
SELECT
	p.order_id,
	p.payment_type ,
	p.payment_value,
	round(p.payment_value / a.total_payment_value *100,2) payment__ratio  
                     # 4) 비율 소수점 둘째 자리까지 계산
FROM
	payments p join a on p.payment_type = a.payment_type
where                # 2) 평균 결제 금액보다 높은 결제 -> where
	p.payment_value > a.avg_payment  
                     # 3) 높은 결제가 총 결제 금액 20% 차지하는 주문 조회
	AND p.payment_value / a.total_payment_value >= 0.2 ;

문제 뜯어보고 단계별로 차근차근 

5.

orders 테이블에서 월별(년-월) 주문 건수를 계산하되, 주문이 없는 달도 0건으로 포함하고, 지난달 대비 주문 건수 증감율(growth_rate)을 계산하세요.
결과는 년-월(month) 순서대로 정렬하며, 증감율은 소수점 둘째 자리까지 반올림 해주세요.

WITH RECURSIVE 테이블명 (컬럼명) AS (
    SELECT 초기값 
    UNION ALL
    SELECT 반복할 쿼리 -- 이전값에 대해 쿼리 적용
    WHERE 반복 조건 -- FALSE가 나오면 반복종료!! where, limit 등 사용하시면됩니다
)
SELECT * FROM 테이블명;

DATE_ADD(날짜, INTERVAL) # 시간 더하는 함수
DATE_SUB(날짜, INTERVAL) # 시간 빼는 함수
STR_TO_DATE(날짜, '형식')  # 문자를 날짜 형식으로 변환

# 오류나는 이유 : '%Y-%m'만으로 날짜 형식을 완벽하게 생성할 수 없기 때문에 날짜를 추가해줘 형식을 맞춰줘야함
SELECT date_add(str_to_date(left(order_purchase_timestamp,7),'%Y-%m'), interval 1 month) 
FROM orders o
# 일을 추가해 형식을 맞추고 숫자형식으로 변환
SELECT date_format(date_add(str_to_date(concat(left(order_purchase_timestamp,7),'-01'),'%Y-%m-%d'), interval 1 month),'%Y-%m') months
from orders
# 조금 더 간단?하게 가능
SELECT date_format(date_add(concat(date_format(order_purchase_timestamp,'%Y-%m'),'-01'),interval 1 month),'%Y-%m') months from orders
# 1.년월, 주문건수 확인 (누락년월 확인)
SELECT 
	date_format(order_purchase_timestamp,'%Y-%m') y_m,
	count(order_id) cnt_orders # 월별(년-월) 주문 건수 계산 
FROM 
	orders o 
group by	1
order BY	1;
# 2. 누락년월 생성 
WITH RECURSIVE mmonth AS (
    SELECT date_format(min(order_purchase_timestamp),'%Y-%m') as y_m
    FROM orders
    UNION ALL
    SELECT date_format(date_add(concat(y_m,'-01'),interval 1 month),'%Y-%m')
    FROM mmonth
    WHERE  date_add(concat(y_m,'-01'),interval 1 month) 
    		<= (select date_format(max(order_purchase_timestamp),'%Y-%m') from orders)
)
SELECT *
FROM mmonth ;
# 3. 주문건수 포함(누락년월은 0으로 표시)
WITH RECURSIVE mmonth AS (
    SELECT date_format(min(order_purchase_timestamp),'%Y-%m') as y_m
    FROM orders
    UNION ALL
    SELECT date_format(date_add(concat(y_m,'-01'),interval 1 month),'%Y-%m')
    FROM mmonth
    WHERE  date_add(concat(y_m,'-01'),interval 1 month) 
    		<= (select date_format(max(order_purchase_timestamp),'%Y-%m') from orders)
)
SELECT 
	m.y_m,
	COALESCE(count(o.order_id),0) cur_orders 
    # 주문 없는 달 0건으로 포함 -> coalesce
FROM mmonth m LEFT JOIN orders o ON date_format(order_purchase_timestamp,'%Y-%m')=y_m 
GROUP BY 1
ORDER BY 1;
# 4. 지난달 주문량 계산 -> lag 함수 이용 
WITH RECURSIVE mmonth AS (
    SELECT date_format(min(order_purchase_timestamp),'%Y-%m') as y_m
    FROM orders
    UNION ALL
    SELECT date_format(date_add(concat(y_m,'-01'),interval 1 month),'%Y-%m')
    FROM mmonth
    WHERE  date_add(concat(y_m,'-01'),interval 1 month) 
    		<= (select date_format(max(order_purchase_timestamp),'%Y-%m') from orders)
),
monthly_order as 
	(SELECT 
		m.y_m,
		COALESCE(count(o.order_id),0) cur_orders
	FROM mmonth m LEFT JOIN orders o ON date_format(order_purchase_timestamp,'%Y-%m')=y_m 
	GROUP BY 1
	ORDER BY 1)
	SELECT
		y_m,
		cur_orders,
		lag(cur_orders) over (order by y_m ) as prev_order
	FROM monthly_order;
# 5. 주문 증감율 계산 (현재-과거)/과거*100
WITH RECURSIVE mmonth AS (
    SELECT date_format(min(order_purchase_timestamp),'%Y-%m') as y_m
    FROM orders
    UNION ALL
    SELECT date_format(date_add(concat(y_m,'-01'),interval 1 month),'%Y-%m')
    FROM mmonth
    WHERE  date_add(concat(y_m,'-01'),interval 1 month) 
    		<= (select date_format(max(order_purchase_timestamp),'%Y-%m') from orders)
),
monthly_order as 
	(SELECT 
		m.y_m,
		COALESCE(count(o.order_id),0) cnt_orders
	FROM mmonth m LEFT JOIN orders o ON date_format(order_purchase_timestamp,'%Y-%m')=y_m 
	GROUP BY 1
	ORDER BY 1)
SELECT 
	y_m,
	cnt_orders,
	round(case when prev_order IS NULL then NULL
	 when prev_order = 0 then cnt_orders *100
	 else (cnt_orders - prev_order)/prev_order *100 end ,2) growth_rate
FROM 
	(SELECT
		y_m,
		cnt_orders,
		lag(cnt_orders) over (order by y_m ) as prev_order
	FROM 
		monthly_order 
	) a
order by y_m ;

진짜 너무 어렵다. 재귀적 CTE(WITH RECURSIVE) 부분은 다시 봐야할듯

6.

각 결제 방식(payment_type)별 결제 금액의 평균 ± 3 표준편차(standard deviation)를 기준으로 이상치를 ‘Yes’/’No’로 탐지하세요. 결제 금액이 이 범위를 벗어나면 이상치로 간주합니다.
결제 금액이 큰 순으로 정렬해주세요.

# 1) 결제 방식 별 -> 결제 방식으로 그룹
SELECT 
	payment_type,
	avg(payment_value) avg_payment,
	STD(payment_value) std_payment
FROM 
	payments p 
group by
	payment_type
with a as
	(SELECT 
		payment_type,
		avg(payment_value) avg_payment,
		STD(payment_value) std_payment
	FROM 
		payments p 
	group by
		payment_type)
SELECT
	p.order_id,
	p.payment_type,
	p.payment_value,
    # 2) 결제 금액 평균, 표준편차 +-3 벗어나면 이상치, 이상치면 YES, 아니면 NO -> case when 
	case when a.avg_payment-(3*std_payment) > p.payment_value then 'YES'
		 when a.avg_payment+(3*std_payment) < p.payment_value then 'YES'
		 else 'NO' end is_outlier
FROM 
	payments p join a on p.payment_type = a.payment_type
ORDER BY # 3) 결제 금액 큰 순으로 정렬
	p.payment_value desc ;

표준편차 함수 STD, STDDEV