Question :
How do I find the minimum value greater than the previous row?
This code needs to be optimized to run over million rows and business keys, we may have 3-10 timevalue columns at most. I am thinking its some type of recursive CTE.
background: conducting slowly changing dimensions for many tables which are being denormalized. And trying to find proper begin dates.
Data Sample:
create table dbo.timetest
(
TimeTestIdentityId int primary key identity(1,1),
businesskey int,
timevalue1 int,
timevalue2 int,
timevalue3 int
)
insert into timetest
values
(5,131,134,137),
(5,131,138,135),
(5,131,140,135),
(5,143,141,145),
(5,149,141,148),
(5,150,141,148),
(6,134,137,140),
(6,134,141,138),
(6,134,143,138),
(6,146,144,148),
(6,152,144,151),
(6,153,144,151)
Final Values:
,etc
Row are ordered by TimeValue1.
Answer :
My solution using a cursor and a temporary table, I can imagine another solution, but it involves an expensive recursive query.
CREATE TABLE #tv ( businesskey int NOT NULL, id int NOT NULL, timevalue int, ); GO
DECLARE @bk int, @tv1 int, @tv2 int, @tv3 int; DECLARE @last_min int = 0; DECLARE @last_bk int = 0; DECLARE @id int; DECLARE curtv CURSOR FORWARD_ONLY READ_ONLY FOR SELECT businesskey, timevalue1, timevalue2, timevalue3 FROM timetest ORDER BY businesskey, TimeTestIdentityId; OPEN curtv; FETCH NEXT FROM curtv INTO @bk, @tv1, @tv2, @tv3; WHILE @@fetch_status = 0 BEGIN -- First row of a businesskey, basically set counters to 0 IF @last_bk @bk BEGIN SET @last_bk = @bk; SET @last_min = 0; SET @id = 0; end SET @id = @id + 1; -- select minimum positive value -- it could be replaced by the below inline UDF but I think it doesn't worth. SELECT @last_min = min(v) + @last_min FROM (VALUES (@tv1-@last_min),(@tv2-@last_min),(@tv3-@last_min)) as t(v) WHERE v > 0; INSERT INTO #tv VALUES (@bk, @id, @last_min); FETCH NEXT FROM curtv INTO @bk, @tv1, @tv2, @tv3; END CLOSE curtv; DEALLOCATE curtv; GO
-- one temp table has been generated you could add an index. ALTER TABLE #tv ADD CONSTRAINT [PK_tv] PRIMARY KEY (businesskey, id); GO SELECT * FROM #tv ORDER BY businesskey, id; GO IF OBJECT_ID('tempdb..#tv') IS NOT NULL DROP TABLE #tv; GO
businesskey | timevalue ----------: | --------: 5 | 131 5 | 135 5 | 140 5 | 141 5 | 148 5 | 150 6 | 134 6 | 138 6 | 143 6 | 144 6 | 151 6 | 153
db<>fiddle here
Using a recursive CTE:
IF OBJECT_ID('min_positive') IS NOT NULL DROP FUNCTION min_positive; GO -- inline UDF you should add as many parameters as you need. CREATE FUNCTION min_positive(@v1 int, @v2 int, @v3 int) RETURNS TABLE AS RETURN SELECT MIN(v) AS minpos FROM (VALUES (@v1),(@v2),(@v3)) AS t(v) WHERE v > 0; GO
;WITH ct1 AS ( -- set a rank for each businesskey -- I need it for the anchor in the recursive part SELECT businesskey, timevalue1, timevalue2, timevalue3, RANK() OVER (PARTITION BY businesskey ORDER BY TimeTestIdentityId) rk FROM timetest ) , ct2 AS ( -- first row of each businesskey SELECT businesskey, timevalue1, timevalue2, timevalue3, rk, (SELECT minpos FROM min_positive(timevalue1, timevalue2, timevalue3)) AS lm FROM ct1 WHERE rk = 1 UNION ALL -- next row of the businesskey untill the end of the rank SELECT ct1.businesskey, ct1.timevalue1, ct1.timevalue2, ct1.timevalue3, ct1.rk, (SELECT minpos + ct2.lm FROM min_positive(ct1.timevalue1-ct2.lm, ct1.timevalue2-ct2.lm, ct1.timevalue3-ct2.lm)) lm FROM ct1 JOIN ct2 ON ct2.businesskey = ct1.businesskey AND ct1.rk = ct2.rk + 1 ) SELECT businesskey, lm FROM ct2 ORDER BY businesskey, rk; GO
db<>fiddle here
with
cte1 (businesskey,timevalue1,timevalue2,timevalue3,rn) as (
select businesskey,timevalue1,timevalue2,timevalue3,
row_number() over (partition by businesskey order by timevalue1,timevalue2,timevalue3) rn
from timetest
),
cte2 (businesskey,timevalue1,timevalue2,timevalue3,rn,timevalue0) as (
select businesskey,timevalue1,timevalue2,timevalue3,rn,timevalue1
from cte1
where rn=1
union all
select t2.businesskey,t2.timevalue1,t2.timevalue2,t2.timevalue3,t2.rn,
case when t2.timevalue1>t1.timevalue0 then t2.timevalue1 else 2147483647 end
from cte2 t1
inner join cte1 t2 on t1.businesskey=t2.businesskey
and t1.rn+1=t2.rn
union all
select t2.businesskey,t2.timevalue1,t2.timevalue2,t2.timevalue3,t2.rn,
case when t2.timevalue2>t1.timevalue0 then t2.timevalue2 else 2147483647 end
from cte2 t1
inner join cte1 t2 on t1.businesskey=t2.businesskey
and t1.rn+1=t2.rn
union all
select t2.businesskey,t2.timevalue1,t2.timevalue2,t2.timevalue3,t2.rn,
case when t2.timevalue3>t1.timevalue0 then t2.timevalue3 else 2147483647 end
from cte2 t1
inner join cte1 t2 on t1.businesskey=t2.businesskey
and t1.rn+1=t2.rn
)
select businesskey,timevalue1,timevalue2,timevalue3,rn,min(timevalue0) timevalue0
from cte2
group by businesskey,timevalue1,timevalue2,timevalue3,rn
order by 1,5;
Output:
businesskey timevalue1 timevalue2 timevalue3 rn timevalue0
5 131 134 137 1 131
5 131 138 135 2 135
5 131 140 135 3 140
5 143 141 145 4 141
5 149 141 148 5 148
5 150 141 148 6 150
6 134 137 140 1 134
6 134 141 138 2 138
6 134 143 138 3 143
6 146 144 148 4 144
6 152 144 151 5 151
6 153 144 151 6 153