Crafting a Custom Synthetic Dataset
Building a Robust Training Set
Created and validated 100 extremely complex queries
Added 200 less complex queries for
comprehensive coverage
WITH CustomerOrderValues AS (
SELECT
"f_sales"."BILL_CUSTOMER_SID",
DATE_TRUNC('quarter', "d_date"."DATE") AS "quarter",
AVG("f_sales"."EXTENDED_PRICE") AS "avg_order_value",
COUNT(DISTINCT "f_sales"."SALES_DOCUMENT_SID") AS "order_count"
FROM "f_sales"
JOIN "d_date" ON "f_sales"."ORDER_DATE_SID" = "d_date".date_sid
WHERE "d_date"."DATE" >= DATE_TRUNC('quarter', CURRENT_DATE) - INTERVAL '3 months'
AND "d_date"."DATE" < DATE_TRUNC('quarter', CURRENT_DATE) + INTERVAL '3 months'
GROUP BY "f_sales"."BILL_CUSTOMER_SID", DATE_TRUNC('quarter', "d_date"."DATE")
),
CustomerGrowth AS (
SELECT
"c"."CUSTOMER_SID",
"c"."CUSTOMER_NAME",
"cov_current"."avg_order_value" AS "current_avg_order_value",
"cov_previous"."avg_order_value" AS "previous_avg_order_value",
("cov_current"."avg_order_value" - "cov_previous"."avg_order_value") / "cov_previous"."avg_order_value" AS "growth_rate"
FROM "d_customers" "c"
JOIN CustomerOrderValues "cov_current" ON "c"."CUSTOMER_SID" = "cov_current"."BILL_CUSTOMER_SID"
JOIN CustomerOrderValues "cov_previous" ON "c"."CUSTOMER_SID" = "cov_previous"."BILL_CUSTOMER_SID"
WHERE "cov_current"."quarter" = DATE_TRUNC('quarter', CURRENT_DATE)
AND "cov_previous"."quarter" = DATE_TRUNC('quarter', CURRENT_DATE) - INTERVAL '3 months'
AND "cov_current"."order_count" >= 5
AND "cov_previous"."order_count" >= 5
),
CompanyAverage AS (
SELECT SUM("f_sales"."EXTENDED_PRICE") / COUNT("f_sales"."SALES_DOCUMENT_SID") AS "company_avg_order_value"
FROM "f_sales"
JOIN "d_date" ON "f_sales"."ORDER_DATE_SID" = "d_date".date_sid
WHERE "d_date"."DATE" >= DATE_TRUNC('quarter', CURRENT_DATE)
AND "d_date"."DATE" < DATE_TRUNC('quarter', CURRENT_DATE) + INTERVAL '3 months'
)
SELECT
"cg"."CUSTOMER_NAME",
ROUND("cg"."current_avg_order_value", 2) AS "current_avg_order_value",
ROUND("cg"."previous_avg_order_value", 2) AS "previous_avg_order_value",
ROUND("cg"."growth_rate" * 100, 2) AS "growth_percentage",
ROUND(("cg"."current_avg_order_value" - "ca"."company_avg_order_value") / "ca"."company_avg_order_value" * 100, 2) AS "percent_difference_from_company_avg"
FROM CustomerGrowth "cg"
CROSS JOIN CompanyAverage "ca"
WHERE "cg"."growth_rate" > 0
ORDER BY "cg"."growth_rate" DESC
LIMIT 10;
"Which customers have
shown the highest increase
in average order value from
last quarter to this quarter,
and how does their current
performance compare to
the overall company
average?"