接昨天,补齐数据再转列。
--0. 测试数据
with tmp_rst_ra_skc_org_detail as (
select org_name, in_store_city, tiny_class, color_name, sum(allot_in_org_suit_total_sales_qty) allot_in_org_suit_total_sales_qty
, sum(in_org_fill_rate) in_org_fill_rate,
sum(allot_in_org_all_last_7_14days_sales_qty) allot_in_org_all_last_7_14days_sales_qty --select count(*)
from tenant_2000000001_biz.rst_ra_skc_org_detail rrsod --4523839
where day_date='2021-10-20'--tiny_class in('3/4裤')
group by org_name, in_store_city, tiny_class, color_name
)--5s,46570
--1. 求出小类、颜色的全部组合(distinct) 466ms
,tmp_tinycolor as(
select tiny_class, color_name from tmp_rst_ra_skc_org_detail
group by tiny_class, color_name
)
--2. 求出区域、城市与小类、颜色的全部组合(cross join)
,tmp_cross as(
select *
from (select org_name, in_store_city from tmp_rst_ra_skc_org_detail group by org_name, in_store_city)t
cross join tmp_tinycolor
)
--3. 追加缺失的组合数据,值默认为null
,tmp_plus as(
select * from tmp_rst_ra_skc_org_detail
union all
(select *,null,null,null from tmp_cross except --39
select org_name, in_store_city,tiny_class, color_name,null,null,null from tmp_rst_ra_skc_org_detail
))
--4. grouping sets求出各级的汇总值
,tmp_groupby as(--create table tenant_2000000001_biz.tmp_groupby as
select org_name, in_store_city, tiny_class, color_name, sum(allot_in_org_suit_total_sales_qty) allot_in_org_suit_total_sales_qty
, sum(in_org_fill_rate) in_org_fill_rate,
sum(allot_in_org_all_last_7_14days_sales_qty) allot_in_org_all_last_7_14days_sales_qty --select count(*)
from tmp_plus
group by grouping sets((org_name, in_store_city,tiny_class, color_name)
,(org_name, in_store_city,tiny_class)
,(org_name, in_store_city)
))
--5. 不用row_to_json,用json_build_object做出转列的嵌套json
select org_name, in_store_city
,array_agg(row(allot_in_org_suit_total_sales_qty,in_org_fill_rate,allot_in_org_all_last_7_14days_sales_qty) order by tiny_class, color_name) tboby
,array_agg(row(tiny_class, color_name) order by tiny_class, color_name) theader--select *
from tmp_groupby
group by org_name, in_store_city
;
取了一天(日分区)比较多的200万数据要2.5s,因为用到了笛卡尔积,想再试试有没有更快的方法。
最终表头列要一样,要么转之前补,要么转之后补。
转之后补,只有数值不够行转列,需要kv结构行转列,补充缺的那些key,这需要json结构。但补json的key还是要展开为行再匹配:
SELECT
key,
jsonb_object_agg(t.k, t.v ORDER BY t.k) AS json
FROM (
SELECT
mt.key,
t.k,
t.v
FROM
mytable mt
CROSS JOIN LATERAL jsonb_each(mt.json) AS t(k, v)
) AS t
RIGHT JOIN (
SELECT
unnest(ARRAY['a', 'b', 'c', 'd']) AS k
FROM
generate_series(1, 1)
) AS s(k) ON t.k = s.k
GROUP BY key;
这不多次一举吗?为什么一定要关联匹配找缺失的key呢?直接对key从1开始编号,缺的序号不就是要补的嘛!
create temp table t2(key text,rn int,v numeric);
insert into t2
values('a',1,1.1),('a',3,1.3),('b',2,2.1),('b',3,2.3);
drop table rst;
create temp table rst as
select key,jsonb_object_agg(rn::text, v ORDER BY rn) AS json--select *
from t2 group by key
;
select *,json->'1',json->'2',json->'3' from rst
合并成1个json:
a {"1": 1.1, "3": 1.3}
b {"2": 2.1, "3": 2.3}
假设总共有3个key,则补齐所有key的结果为:
a {"1": 1.1, "2": null, "3": 1.3}
b {"1": null, "2": 2.1, "3": 2.3}
with tmp_rst_ra_skc_org_detail as (
select org_name, in_store_city, tiny_class, color_name, sum(allot_in_org_suit_total_sales_qty) allot_in_org_suit_total_sales_qty
, sum(in_org_fill_rate) in_org_fill_rate,
sum(allot_in_org_all_last_7_14days_sales_qty) allot_in_org_all_last_7_14days_sales_qty --select count(*)
from tenant_2000000001_biz.rst_ra_skc_org_detail rrsod --4523839
where day_date='2021-10-20'--tiny_class in('3/4裤')
group by org_name, in_store_city, tiny_class, color_name
)
--1. 求出小类、颜色的全部组合(distinct) 也是表头
,tmp_header as(
select tiny_class, color_name,row_number() over (order by tiny_class, color_name) rn
from tmp_rst_ra_skc_org_detail
group by grouping sets((tiny_class, color_name),(tiny_class))
order by tiny_class, color_name)
--4. grouping sets求出各级的汇总值
,tmp_groupby as(--create table tenant_2000000001_biz.tmp_groupby as
select org_name, in_store_city, tiny_class, color_name, sum(allot_in_org_suit_total_sales_qty) allot_in_org_suit_total_sales_qty
, sum(in_org_fill_rate) in_org_fill_rate,
sum(allot_in_org_all_last_7_14days_sales_qty) allot_in_org_all_last_7_14days_sales_qty --select count(*)
from tmp_rst_ra_skc_org_detail--tmp_plus
group by grouping sets((org_name, in_store_city,tiny_class, color_name)
,(org_name, in_store_city,tiny_class)
,(org_name, in_store_city)
))
select org_name, in_store_city
--,jsonb_object_agg(t.rn, a.allot_in_org_suit_total_sales_qty ORDER BY t.rn) AS j
--,regexp_split_to_array(string_agg(rn::text||','||allot_in_org_suit_total_sales_qty::text,',' order by rn),',') str
,json_object(array_agg(rn::text order by rn),array_agg(allot_in_org_suit_total_sales_qty::text order by rn)) j
--,array_agg(row(allot_in_org_suit_total_sales_qty,in_org_fill_rate,allot_in_org_all_last_7_14days_sales_qty) order by tiny_class, color_name) tboby
--,array_agg(row(tiny_class, color_name) order by tiny_class, color_name) theader--select *--select count(*)
from tmp_groupby a inner join tmp_header t
on coalesce(a.tiny_class,'')=coalesce(t.tiny_class,'') and coalesce(a.color_name,'')=coalesce(t.color_name,'')
group by org_name, in_store_city
;
上面sql生成了json列,耗时2.2s。
再动态生成列头个数的json解析列:
select count(*) from tmp_header--1405
-- 动态生成列头
with t as(SELECT generate_series(1,1405) as rn
)select 'select org_name, in_store_city,'||string_agg('j->'''||rn||''' ',',')||' from tmp_result'
from t ;
执行动态生成的sql,解析json有点慢啊,就这步花了5s。
还是在补数据上下功夫吧,调整了以下写法,步骤少一点,后续改动态方便些,时间上没有太大变化。
--0. 测试数据
with tmp_rst_ra_skc_org_detail as (
select org_name, in_store_city, tiny_class, color_name, sum(allot_in_org_suit_total_sales_qty) allot_in_org_suit_total_sales_qty
, sum(in_org_fill_rate) in_org_fill_rate,
sum(allot_in_org_all_last_7_14days_sales_qty) allot_in_org_all_last_7_14days_sales_qty --select count(*) --select *
from tenant_2000000001_biz.rst_ra_skc_org_detail rrsod --4523839
where day_date='2021-10-20'--tiny_class in('3/4裤')
group by org_name, in_store_city, tiny_class, color_name
)--5s,46570
--1. 求出小类、颜色的全部组合(distinct) 466ms
,tmp_header as(
select tiny_class, color_name from tmp_rst_ra_skc_org_detail
group by tiny_class, color_name
)
,tmp_plus as(
select s.org_name, s.in_store_city,sub.tiny_class , sub.color_name
,COALESCE(sc.allot_in_org_suit_total_sales_qty, 0) AS allot_in_org_suit_total_sales_qty
,COALESCE(sc.in_org_fill_rate, 0) AS in_org_fill_rate
,COALESCE(sc.allot_in_org_all_last_7_14days_sales_qty, 0) AS allot_in_org_all_last_7_14days_sales_qty
from (select org_name, in_store_city from tmp_rst_ra_skc_org_detail group by org_name, in_store_city) s
cross join tmp_header sub
left join tmp_rst_ra_skc_org_detail sc
on s.org_name=sc.org_name and s.in_store_city=sc.in_store_city and sc.tiny_class=sub.tiny_class and sub.color_name=sc.color_name
)
--4. grouping sets求出各级的汇总值
,tmp_groupby as(--create table tenant_2000000001_biz.tmp_groupby as
select org_name, in_store_city, tiny_class, color_name, sum(allot_in_org_suit_total_sales_qty) allot_in_org_suit_total_sales_qty
, sum(in_org_fill_rate) in_org_fill_rate,
sum(allot_in_org_all_last_7_14days_sales_qty) allot_in_org_all_last_7_14days_sales_qty --select count(*)
from tmp_plus
group by grouping sets((org_name, in_store_city,tiny_class, color_name)
,(org_name, in_store_city,tiny_class)
,(org_name, in_store_city)
))
--5. 不用row_to_json,用json_build_object做出转列的嵌套json
select org_name, in_store_city
,array_agg(row(allot_in_org_suit_total_sales_qty,in_org_fill_rate,allot_in_org_all_last_7_14days_sales_qty) order by tiny_class, color_name) tboby
,array_agg(row(tiny_class, color_name) order by tiny_class, color_name) theader--select *
from tmp_groupby
group by org_name, in_store_city
;
限时特惠:本站每日持续更新海量展厅资源,一年会员只需29.9元,全站资源免费下载
站长微信:zhanting688
声明:本站所有文章,如无特殊说明或标注,均为本站原创发布。任何个人或组织,在未征得本站同意时,禁止复制、盗用、采集、发布本站内容到任何网站、书籍等各类媒体平台。如若本站内容侵犯了原著者的合法权益,可联系我们进行处理。