需求描述

构建商品维度表的拉链存储,商品名称与商品描述的更新频率较低,属缓慢变化维。

涉及的表

  • ods_t_product_di

每天从应用库增量拉取新增/更新的数据

  • dim_prd_product_zip_d

商品维度表(拉链存储表),通过start_dt与end_dt描述每条记录的生命开始与结束,end_dt=99990101为当前生效记录

SQL编写

-- 建表语句
CREATE TABLE ods_t_product_di (
	id int COMMENT '产品ID',
	prod_name VARCHAR(64) COMMENT '产品名称', 
	description VARCHAR(256) COMMENT '产品描述',
	created_time DATETIME COMMENT '记录创建时间',
	updated_time DATETIME COMMENT '记录更新时间',
	dt CHAR(8) COMMENT '分区字段,业务日期',
	PRIMARY KEY (id, dt)
);

CREATE TABLE dim_prd_product_zip_d(
 	id int COMMENT '产品ID',
	prod_name VARCHAR(64) COMMENT '产品名称', 
	description VARCHAR(256) COMMENT '产品描述',
	created_time DATETIME COMMENT '记录创建时间',
	updated_time DATETIME COMMENT '记录更新时间',
	start_dt char(8) COMMENT '记录生效开始时间',
	end_dt char(8) COMMENT '记录生效截止时间',
	dt CHAR(8) COMMENT '分区字段,业务日期',
	PRIMARY KEY (id, end_dt, dt)
);

-- 20241017分区为初始化全量数据
INSERT INTO ods_t_product_di
(id, prod_name, description, created_time, updated_time, dt)
VALUES 
(1,'茄子', '普通茄子', '2024-10-16 10:00:00', '2024-10-16 10:00:00', '20241017')
, (2,'西红柿', '普通西红柿', '2024-10-17 10:00:00', '2024-10-17 10:00:00', '20241017')
, (3,'土豆', '普通土豆', '2024-10-17 10:00:00', '2024-10-17 10:00:00', '20241017');

-- 20241018分区为增量新增/更新数据
INSERT INTO ods_t_product_di
(id, prod_name, description, created_time, updated_time, dt)
VALUES 
(1,'圆茄子', '圆圆的茄子', '2024-10-16 10:00:00', '2024-10-16 10:00:00', '20241018'), 
(3,'水果土豆', '水果土豆可以生吃', '2024-10-17 10:00:00', '2024-10-18 10:00:00', '20241018'), 
(4,'豆角', '国产豆角', '2024-10-18 10:00:00', '2024-10-18 10:00:00', '20241018');

-- dim_prd_product_zip_d表生成任务
INSERT INTO dim_prd_product_zip_d (
	id
	, prod_name
	, description
	, created_time
	, updated_time
	, start_dt
	, end_dt
    , dt
)
SELECT IF(b.id IS NULL, a.id, b.id) AS id
	, IF(b.id IS NULL, a.prod_name, b.prod_name) AS prod_name
	, IF(b.id IS NULL, a.description, b.description) AS description
	, IF(b.id IS NULL, a.created_time, b.created_time) AS created_time
	, IF(b.id IS NULL, a.updated_time, b.updated_time) AS updated_time
	, IF(b.id IS NULL, a.dt, b.dt) AS start_dt
	, '99990101' AS end_dt
    , '${bizDate}' AS dt
FROM (
	SELECT *
	FROM dim_prd_product_zip_d
	WHERE dt = '${bizDate - 1}' -- 20241016
	  AND end_time = '99990101'
) a
FULL JOIN (
	SELECT *
	FROM ods_t_product_di 
	WHERE dt = '${bizDate}' -- 20241017
) b ON a.id = b.id
UNION ALL
SELECT a.ID
	, a.prod_name
	, a.description
	, a.created_time
	, a.updated_time
	, a.start_dt
	, '${bizDate - 1}' AS end_dt
	, '${bizDate}' AS dt
FROM (
	SELECT *
	FROM dim_prd_product_zip_d
	WHERE dt = '${bizDate - 1}' -- 20241016
	  AND end_dt = '99990101'
) a
INNER JOIN (
	SELECT *
	FROM ods_t_product_di 
	WHERE dt = '${bizDate}' -- 20241017
) b ON a.id = b.id;

-- 解链操作
-- 拉链的逆操作,此处省略

点赞(0) 打赏

评论列表 共有 0 条评论

暂无评论

微信公众账号

微信扫一扫加关注

发表
评论
返回
顶部