Solutions¶

In [1]:
%load_ext sql
In [2]:
%sql sqlite:///data/large_sqlite_db.db
In [3]:
%%sql
-- Exercise 1
SELECT region, strftime('%Y-%m', sale_date) AS sale_month, SUM(sale_amount) AS total_sales
FROM sales_data
GROUP BY region, sale_month;
 * sqlite:///data/large_sqlite_db.db
Done.
Out[3]:
region sale_month total_sales
East 2023-10 20814
North 2023-10 26462
South 2023-10 26639
West 2023-10 30859
In [4]:
%%sql
-- Exercise 2
SELECT region, SUM(sale_amount) AS total_sales
FROM sales_data
GROUP BY region
HAVING SUM(sale_amount) > 3000;
 * sqlite:///data/large_sqlite_db.db
Done.
Out[4]:
region total_sales
East 20814
North 26462
South 26639
West 30859
In [5]:
%%sql
-- Exercise 3
-- Step 1: Calculate the overall average sales across all products
WITH avg_sales AS (
    SELECT AVG(total_sales) AS avg_total_sales
    FROM (
        SELECT product_id, SUM(sale_amount) AS total_sales
        FROM sales_data
        GROUP BY product_id
    )
)

-- Step 2: Find products with sales greater than the average
SELECT product_id, SUM(sale_amount) AS total_sales
FROM sales_data
GROUP BY product_id
HAVING SUM(sale_amount) > (SELECT avg_total_sales FROM avg_sales);
 * sqlite:///data/large_sqlite_db.db
Done.
Out[5]:
product_id total_sales
1 10559
4 11263
5 11189
7 11131
8 13444
In [6]:
%%sql
SELECT product_id, SUM(sale_amount) AS total_sales
FROM sales_data
GROUP BY product_id
 * sqlite:///data/large_sqlite_db.db
Done.
Out[6]:
product_id total_sales
1 10559
2 9092
3 9983
4 11263
5 11189
6 10270
7 11131
8 13444
9 8304
10 9539
In [7]:
%%sql
-- Exercise 4
SELECT customer_id, SUM(order_amount) AS total_spent
FROM customer_orders
GROUP BY customer_id
HAVING SUM(order_amount) > 5000;
 * sqlite:///data/large_sqlite_db.db
Done.
Out[7]:
customer_id total_spent
6 11538
8 5052
9 5169
14 5329
17 6589
18 6265
20 7088
21 5614
23 5422
25 6034
26 5919
27 7795
33 10560
37 9520
41 7189
46 9445
47 5797
48 6273
50 5433
In [8]:
%%sql
-- Exercise 5
SELECT c.customer_id, COUNT(s.product_id) AS total_products_sold
FROM customer_orders c
JOIN sales_data s ON c.customer_id = s.product_id  -- Assume product_id is related
GROUP BY c.customer_id
HAVING SUM(c.order_amount) > 3000;
 * sqlite:///data/large_sqlite_db.db
Done.
Out[8]:
customer_id total_products_sold
1 57
2 38
3 66
4 84
5 60
6 153
7 60
8 115
9 95
10 80
In [ ]: