Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 2a2285e

Browse files
Add files via upload
1 parent 1da0998 commit 2a2285e

File tree

1 file changed

+233
-0
lines changed

1 file changed

+233
-0
lines changed

‎COHORT ANALYSIS REPORT.sql‎

Lines changed: 233 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,233 @@
1+
-- Project Name :- Analyzing Customer Retention and Lifetime Value Through Cohort Analysis
2+
-- DEVIKA AGARWAL
3+
/*
4+
The goal of this project is to perform cohort analysis to understand and
5+
improve customer retention on Gameflix using SQL. Here's how we can approach it:
6+
7+
1. Data Preparation
8+
2. Creating Customer Cohorts & Analyzing Customer Retention
9+
3. Customer Lifetime Value (CLTV)
10+
*/
11+
-- 1. Data Preparation
12+
-- step (1) - Checking for data quality and consistency.
13+
-- Checking for missing values in the ORDER table
14+
SELECT
15+
SUM(CASE WHEN o."ORDER_ID" IS NULL THEN 1 ELSE 0 END) AS missing_order_id,
16+
SUM(CASE WHEN o."USER_ID" IS NULL THEN 1 ELSE 0 END) AS missing_user_id,
17+
SUM(CASE WHEN o."PROMO_ID" IS NULL THEN 1 ELSE 0 END) AS missing_promo_id,
18+
SUM(CASE WHEN o."ORDER_DATE" IS NULL THEN 1 ELSE 0 END) AS missing_order_date,
19+
SUM(CASE WHEN o."ORDER_SEQ" IS NULL THEN 1 ELSE 0 END) AS missing_order_seq,
20+
SUM(CASE WHEN o."REDEMPTION_DATE" IS NULL THEN 1 ELSE 0 END) AS missing_redemption_date,
21+
SUM(CASE WHEN o."REDEMPTION_DATE" IS NULL THEN 1 ELSE 0 END) AS missing_validity_till_date,
22+
SUM(CASE WHEN o."ORDER_STAUS" IS NULL THEN 1 ELSE 0 END) AS missing_order_status
23+
FROM "ORDER" o
24+
-- zero missing values in the orders table
25+
26+
-- Checking for missing values in the promotional_plan table
27+
SELECT
28+
SUM(CASE WHEN pp."PROMO_ID" IS NULL THEN 1 ELSE 0 END) AS missing_promo_id,
29+
SUM(CASE WHEN pp."PROMO_PLAN" IS NULL THEN 1 ELSE 0 END) AS missing_promo_plan,
30+
SUM(CASE WHEN pp."PROMO_OFFER_TYPE" IS NULL THEN 1 ELSE 0 END) AS missing_promo_offer_type,
31+
SUM(CASE WHEN pp."SUBSCRIPTION_TYPE" IS NULL THEN 1 ELSE 0 END) AS missing_subscription_type,
32+
SUM(CASE WHEN pp."BASE PRICE" IS NULL THEN 1 ELSE 0 END) AS missing_base_price,
33+
SUM(CASE WHEN pp."DISCOUNT_PERCENTAGE" IS NULL THEN 1 ELSE 0 END) AS missing_discount_percentage,
34+
SUM(CASE WHEN pp."EFFECTIVE_PRICE" IS NULL THEN 1 ELSE 0 END) AS missing_effective_price
35+
FROM promotional_plan pp ;
36+
-- zero missing values in the promotion_plan table
37+
38+
39+
-- Checking for missing values in the user_registration table
40+
SELECT
41+
SUM(CASE WHEN ur."User Id" IS NULL THEN 1 ELSE 0 END) AS missing_user_id,
42+
SUM(CASE WHEN ur."Full Name" IS NULL THEN 1 ELSE 0 END) AS missing_full_name,
43+
SUM(CASE WHEN ur."Age" IS NULL THEN 1 ELSE 0 END) AS missing_age,
44+
SUM(CASE WHEN ur."Gender" IS NULL THEN 1 ELSE 0 END) AS missing_gender,
45+
SUM(CASE WHEN ur."Country" IS NULL THEN 1 ELSE 0 END) AS missing_country,
46+
SUM(CASE WHEN ur."City" IS NULL THEN 1 ELSE 0 END) AS missing_city
47+
FROM user_registration ur ;
48+
-- zero missing values in the user_registration table
49+
50+
51+
-- Checking for duplicate order_id in the ORDER table
52+
SELECT
53+
o."ORDER_ID" ,
54+
COUNT(*) AS count
55+
FROM "ORDER" o
56+
GROUP BY o."ORDER_ID"
57+
HAVING COUNT(*) > 1;
58+
-- zero duplicate values in order table
59+
60+
61+
-- Checking for duplicate promo_id in the promotional_plan table
62+
SELECT
63+
pp."PROMO_ID" ,
64+
COUNT(*) AS count
65+
FROM promotional_plan pp
66+
GROUP BY pp."PROMO_ID"
67+
HAVING COUNT(*) > 1;
68+
--zero duplicate values in promotional_plan table
69+
70+
71+
-- Checking for duplicate user_id in the USER_REGISTRATION table
72+
SELECT
73+
ur."User Id" ,
74+
COUNT(*) AS count
75+
FROM user_registration ur
76+
GROUP BY ur."User Id"
77+
HAVING COUNT(*) > 1;
78+
-- zero duplicate values
79+
80+
-- Checking for promo_id in the ORDER table that do not exist in the promotional_plan table
81+
SELECT DISTINCT o."PROMO_ID"
82+
FROM "ORDER" o
83+
WHERE o."PROMO_ID" NOT IN (SELECT pp."PROMO_ID" FROM promotional_plan pp);
84+
--All promo id present in the order is exist in promotional_plan table
85+
86+
-- Checking for user_id in the ORDER table that do not exist in the USER_REGISTRATION table
87+
SELECT DISTINCT o."USER_ID"
88+
FROM "ORDER" o
89+
WHERE o."USER_ID" NOT IN (SELECT User Id FROM user_registration ur);
90+
-- user id are missing in the orders table
91+
-- we can adjust by excluding records with missing user_id to ensure data consistency.
92+
93+
94+
-- step (2) - Extracting Required Fields
95+
96+
ALTER TABLE "ORDER" ADD COLUMN active_month DATE;
97+
UPDATE "ORDER" o
98+
SET active_month = DATE_TRUNC('month', o."ORDER_DATE"::timestamp);
99+
100+
ALTER TABLE "ORDER" ADD COLUMN promo_activation_month DATE;
101+
UPDATE "ORDER" o
102+
SET promo_activation_month = DATE_TRUNC('month', TO_TIMESTAMP(o."REDEMPTION_DATE", 'DD-MM-YYYY'));
103+
104+
ALTER TABLE "ORDER" ADD COLUMN promo_ending_month DATE;
105+
UPDATE "ORDER" o
106+
SET promo_ending_month = DATE_TRUNC('month', o."VALIDITY_TILL_DATE"::DATE);
107+
108+
109+
-- 2. Creating Customer Cohorts & Analyzing Customer Retention
110+
111+
-- Creating cohorts based on the month of first subscription
112+
WITH user_cohorts AS (
113+
SELECT
114+
o."USER_ID",
115+
DATE_TRUNC('month', MIN(TO_DATE(o."ORDER_DATE", 'DD-MM-YYYY'))) AS cohort_month
116+
FROM "ORDER" o
117+
GROUP BY o."USER_ID"
118+
)
119+
-- Calculating the number of users retained in each subsequent month
120+
, cohort_retention AS (
121+
SELECT
122+
uc.cohort_month,
123+
DATE_TRUNC('month', TO_DATE(o2."ORDER_DATE", 'DD-MM-YYYY')) AS active_month,
124+
COUNT(DISTINCT o2."USER_ID") AS retained_users
125+
FROM user_cohorts uc
126+
JOIN "ORDER" o2 ON uc."USER_ID" = o2."USER_ID"
127+
GROUP BY uc.cohort_month, DATE_TRUNC('month', TO_DATE(o2."ORDER_DATE", 'DD-MM-YYYY'))
128+
ORDER BY uc.cohort_month, active_month
129+
)
130+
-- Calculating retention rates
131+
SELECT
132+
TO_CHAR(cr.cohort_month, 'YYYY-MM') AS cohort_month,
133+
TO_CHAR(cr.active_month, 'YYYY-MM') AS active_month,
134+
cr.retained_users,
135+
(cr.retained_users * 1.0 / cohort_size.cohort_count) AS retention_rate
136+
FROM cohort_retention cr
137+
JOIN (
138+
SELECT
139+
cohort_month,
140+
COUNT("USER_ID") AS cohort_count
141+
FROM user_cohorts
142+
GROUP BY cohort_month
143+
) AS cohort_size ON cr.cohort_month = cohort_size.cohort_month
144+
ORDER BY cr.cohort_month, cr.active_month;
145+
146+
147+
-- 3. Customer Lifetime Value (CLTV)
148+
149+
-- step (1) - Calculating Total Revenue Generated by Each Cohort
150+
151+
-- Calculating monthly revenue generated by each cohort over its lifetime
152+
WITH user_cohorts AS (
153+
SELECT
154+
o."USER_ID",
155+
DATE_TRUNC('month', MIN(o."ORDER_DATE"::DATE)) AS cohort_month
156+
FROM "ORDER" o
157+
GROUP BY o."USER_ID"
158+
),
159+
-- Calculating total cohort revenue and average revenue per customer
160+
cohort_revenue AS (
161+
SELECT
162+
uc.cohort_month,
163+
DATE_TRUNC('month', o."ORDER_DATE"::DATE) AS revenue_month,
164+
SUM(pp."EFFECTIVE_PRICE") AS monthly_revenue
165+
FROM user_cohorts uc
166+
JOIN "ORDER" o ON uc."USER_ID" = o."USER_ID"
167+
JOIN promotional_plan pp ON o."PROMO_ID" = pp."PROMO_ID"
168+
GROUP BY uc.cohort_month, DATE_TRUNC('month', o."ORDER_DATE"::DATE)
169+
)
170+
-- Calculating total cohort revenue and average revenue per customer
171+
SELECT
172+
cr.cohort_month,
173+
SUM(cr.monthly_revenue) AS total_cohort_revenue,
174+
(SUM(cr.monthly_revenue) * 1.0 / cohort_size.cohort_count) AS avg_revenue_per_customer
175+
FROM cohort_revenue cr
176+
JOIN (
177+
SELECT
178+
cohort_month,
179+
COUNT("USER_ID") AS cohort_count
180+
FROM user_cohorts
181+
GROUP BY cohort_month
182+
) AS cohort_size ON cr.cohort_month = cohort_size.cohort_month
183+
GROUP BY cr.cohort_month, cohort_size.cohort_count
184+
ORDER BY cr.cohort_month;
185+
186+
-- step (3) - Calculating Gross Margin
187+
188+
-- Step 1: Defining User Cohorts based on the month of first subscription
189+
WITH user_cohorts AS (
190+
SELECT
191+
o."USER_ID",
192+
DATE_TRUNC('month', MIN(o."ORDER_DATE"::DATE)) AS cohort_month
193+
FROM "ORDER" o
194+
GROUP BY o."USER_ID"
195+
),
196+
197+
-- Step 2: Calculating monthly revenue for each cohort
198+
cohort_revenue AS (
199+
SELECT
200+
uc.cohort_month,
201+
DATE_TRUNC('month', o."ORDER_DATE"::DATE) AS revenue_month,
202+
SUM(pp."EFFECTIVE_PRICE") AS monthly_revenue
203+
FROM user_cohorts uc
204+
JOIN "ORDER" o ON uc."USER_ID" = o."USER_ID"
205+
JOIN promotional_plan pp ON o."PROMO_ID" = pp."PROMO_ID"
206+
GROUP BY uc.cohort_month, DATE_TRUNC('month', o."ORDER_DATE"::DATE)
207+
),
208+
209+
-- Step 3: Aggregating cohort revenue and calculating average revenue per customer
210+
cohort_revenue_analysis AS (
211+
SELECT
212+
cr.cohort_month,
213+
SUM(cr.monthly_revenue) AS total_cohort_revenue,
214+
(SUM(cr.monthly_revenue) * 1.0 / cohort_size.cohort_count) AS avg_revenue_per_customer
215+
FROM cohort_revenue cr
216+
JOIN (
217+
SELECT
218+
cohort_month,
219+
COUNT("USER_ID") AS cohort_count
220+
FROM user_cohorts
221+
GROUP BY cohort_month
222+
) AS cohort_size ON cr.cohort_month = cohort_size.cohort_month
223+
GROUP BY cr.cohort_month, cohort_size.cohort_count
224+
)
225+
226+
-- Step 4: Calculating CLTV with gross margin
227+
SELECT
228+
TO_CHAR(cohort_month, 'YYYY-MM') AS cohort_month,
229+
total_cohort_revenue,
230+
avg_revenue_per_customer,
231+
(total_cohort_revenue * 0.65) AS cltv_with_gross_margin -- Calculating CLTV with gross margin
232+
FROM cohort_revenue_analysis
233+
ORDER BY cohort_month;

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /