Target is a globally recognized brand and a leading retailer in the United States, known for offering exceptional value, inspiration, innovation, and a unique shopping experience.
This dataset focuses on Target's operations in Brazil, covering 100,000 orders placed between 2016 and 2018. It includes detailed information on order status, pricing, payment and shipping performance, customer locations, product attributes, and customer reviews.
The data is available in 8 csv files:
- customers.csv
- sellers.csv
- order_items.csv
- geolocation.csv
- payments.csv
- orders.csv
- products.csv
Analyzing this dataset offers valuable insights into Target's Brazilian operations, revealing details about order processing, pricing strategies, payment and shipping efficiency, customer demographics, product characteristics, and customer satisfaction. This comprehensive dataset is a valuable resource for understanding various business aspects and enhancing strategic decision-making.
- Calculate the cumulative sales per month for each year.
Select year, month, value, sum(value) over(order by year, month) as cumulative_sales from (SELECT YEAR(orders.order_purchase_timestamp) AS year, MONTH(orders.order_purchase_timestamp) AS month, ROUND(SUM(payments.payment_value), 2) AS value FROM orders join payments on orders.order_id = payments.order_id group by year, month order by year, month) as a;
- Calculate the retention rate of customers, defined as the percentage of customers who make another purchase within 6 months of their first purchase.
With a as( SELECT customers.customer_id, MIN(orders.order_purchase_timestamp) AS first_order FROM customers JOIN orders ON customers.customer_id = orders.customer_id GROUP BY customers.customer_id ), b as(SELECT a.customer_id, COUNT(DISTINCT orders.order_purchase_timestamp) next_order FROM a JOIN orders ON a.customer_id = orders.customer_id AND orders.order_purchase_timestamp > first_order AND orders.order_purchase_timestamp < DATE_ADD(first_order, INTERVAL 6 MONTH) GROUP BY a.customer_id ) SELECT 100 * (COUNT(DISTINCT a.customer_id) / COUNT(DISTINCT b.customer_id)) AS retention_rate FROM a LEFT JOIN b ON a.customer_id = b.customer_id;
- Identify the top 3 customers who spent the most money in each year.
With top_customers as( select *, rank() over(partition by year order by year, indi_purchase DESC) as cust_rank from( SELECT YEAR(orders.order_purchase_timestamp) year, orders.customer_id, SUM(payments.payment_value) AS indi_purchase FROM orders join payments on orders.order_id = payments.order_id group by year, orders.customer_id) as a ) SELECT * FROM top_customers WHERE cust_rank <= 3;
- Find the average number of products per order, grouped by customer city.
With count_order as( SELECT orders.order_id, orders.customer_id, COUNT(order_items.order_id) AS order_count FROM orders JOIN order_items ON orders.order_id = order_items.order_id GROUP BY orders.order_id , orders.customer_id ) SELECT customers.customer_city, ROUND(AVG(count_order.order_count), 2) AS avg_order FROM count_order JOIN customers ON count_order.customer_id = customers.customer_id GROUP BY customers.customer_city ORDER BY avg_order DESC;
- Calculate the total revenue generated by each seller, and rank them by revenue.
With top_seller As ( SELECT order_items.seller_id, ROUND(SUM(payments.payment_value), 2) AS total_revenue FROM order_items JOIN payments ON order_items.order_id = payments.order_id GROUP BY order_items.seller_id -- order by total_revenue DESC ) Select *, Rank() over(order by total_revenue DESC) as seller_rank from top_seller;
- Calculate the percentage of orders that were paid in installments.
SELECT ((SUM(CASE WHEN payment_installments >= 1 THEN 1 ELSE 0 END)) / COUNT(*)) * 100 AS order_percentage FROM payments;
- Calculate the percentage of total revenue contributed by each product category.
SELECT products.product_category, ROUND((SUM(payments.payment_value) / (SELECT SUM(payment_value) FROM payments)) * 100, 2) AS percent_cont FROM products JOIN order_items ON products.product_id = order_items.product_id JOIN payments ON order_items.order_id = payments.order_id GROUP BY products.product_category ORDER BY percent_cont DESC;
- Calculate the year-over-year growth rate of total sales.
WITH sales_rate as( SELECT YEAR(orders.order_purchase_timestamp) AS year, ROUND(SUM(payments.payment_value), 2) AS sales FROM orders JOIN payments ON orders.order_id = payments.order_id GROUP BY year ) select year, sales, ((sales - lag(sales,1) over (order by year)) / lag(sales,1) over(order by year))*100 as year_growth_rate from sales_rate;
- Identify the correlation between product price and the number of times a product has been purchased.
SELECT products.product_category, COUNT(order_items.product_id) order_count, Round(AVG(order_items.price),2) price FROM products JOIN order_items ON products.product_id = order_items.product_id GROUP BY products.product_category;
- Calculate the moving average of order values for each customer over their order history.
Select customer_id, order_purchase_timestamp, payment, avg(payment) over(partition by customer_id order by order_purchase_timestamp rows between 2 preceding and current row) as moving_avg from ( SELECT orders.customer_id, orders.order_purchase_timestamp, payments.payment_value AS payment FROM orders JOIN payments ON orders.order_id = payments.order_id) as a;