What is the best way to split a char separated string into rows and columns?
Here is my input string:
!1;100;10;200;0;500;2;1000;30!2;100;3;500;1;2000;5
And here is my desired output table:
Id Value Count
1 100 10
1 200 0
1 500 2
1 1000 30
2 100 3
2 500 1
2 2000 5
Best Regards,
-
this has been answered before, check stackoverflow.com/questions/10914576/tsql-split-stringSpörri– Spörri2015年06月09日 14:55:45 +00:00Commented Jun 9, 2015 at 14:55
-
That's a fairly basic split. This one requires two levels of splitting and breaking the output up. First split by ! then split the result set up using the first value as Id then the rest get paired up to make the Value and Count columns.Kenneth Fisher– Kenneth Fisher2015年06月09日 14:58:43 +00:00Commented Jun 9, 2015 at 14:58
3 Answers 3
It is not neccesary to modify Jeff Moden's function. I have devised a little bit sophisticated way of achieving the result just using the original function.
First Here it is the original Moden's function: (Source: http://www.sqlservercentral.com/articles/Tally+Table/72993/)
CREATE FUNCTION [dbo].[DelimitedSplit8K]
--===== Define I/O parameters
(@pString VARCHAR(8000), @pDelimiter CHAR(1))
--WARNING!!! DO NOT USE MAX DATA-TYPES HERE! IT WILL KILL PERFORMANCE!
RETURNS TABLE WITH SCHEMABINDING AS
RETURN
--===== "Inline" CTE Driven "Tally Table" produces values from 1 up to 10,000...
-- enough to cover VARCHAR(8000)
WITH E1(N) AS (
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
), --10E+1 or 10 rows
E2(N) AS (SELECT 1 FROM E1 a, E1 b), --10E+2 or 100 rows
E4(N) AS (SELECT 1 FROM E2 a, E2 b), --10E+4 or 10,000 rows max
cteTally(N) AS (--==== This provides the "base" CTE and limits the number of rows right up front
-- for both a performance gain and prevention of accidental "overruns"
SELECT TOP (ISNULL(DATALENGTH(@pString),0)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4
),
cteStart(N1) AS (--==== This returns N+1 (starting position of each "element" just once for each delimiter)
SELECT 1 UNION ALL
SELECT t.N+1 FROM cteTally t WHERE SUBSTRING(@pString,t.N,1) = @pDelimiter
),
cteLen(N1,L1) AS(--==== Return start and length (for use in substring)
SELECT s.N1,
ISNULL(NULLIF(CHARINDEX(@pDelimiter,@pString,s.N1),0)-s.N1,8000)
FROM cteStart s
)
--===== Do the actual split. The ISNULL/NULLIF combo handles the length for the final element when no delimiter is found.
SELECT ItemNumber = ROW_NUMBER() OVER(ORDER BY l.N1),
Item = SUBSTRING(@pString, l.N1, l.L1)
FROM cteLen l
;
And the query for obtaining the required result (I have created a CTE just for clarity)
DECLARE @string VARCHAR(8000)
SET @string = '!1;100;10;200;0;500;2;1000;30!2;100;3;500;1;2000;5'
;WITH CTE AS
(
SELECT
SUBSTRING(A.Item,1,CHARINDEX(';',A.Item)-1) As Id,
B.Item
FROM dbo.DelimitedSplit8K(@string, '!') AS A
CROSS APPLY dbo.DelimitedSplit8K(A.Item, ';') AS B
WHERE A.Item <> '' --to avoid blank first row because the first !
)
SELECT Id, [Value], [Count]
FROM
(
SELECT Id, Item As [Value],
ROW_NUMBER() OVER(PARTITION BY Id ORDER BY Id) As RowNumber,
LEAD(Item,1,0) OVER(PARTITION BY Id ORDER BY Id) As [Count]
FROM CTE
) As T
WHERE RowNumber % 2 = 0
EDITED: I realized that my solution assummed that id value was ordered or that the order in the original string was not neccesary to be preserved in the result. Below the general solution (note that I changed the first number in the string for testing the situation)
DECLARE @string VARCHAR(8000)
SET @string = '!3;100;10;200;0;500;2;1000;30!2;100;3;500;1;2000;5'
;WITH CTE AS
(
SELECT
ROW_NUMBER() OVER(ORDER BY (SELECT 0)) As RN,
CAST(SUBSTRING(A.Item,1,CHARINDEX(';',A.Item)-1) AS Varchar(400)) As Id,
B.Item
FROM dbo.DelimitedSplit8K(@string, '!') AS A
CROSS APPLY dbo.DelimitedSplit8K(A.Item, ';') AS B
WHERE A.Item <> '' --to avoid blank first row because the first !
)
SELECT Id, [Value], [Count]
FROM
(
SELECT Id, Item As [Value],
ROW_NUMBER() OVER(PARTITION BY Id ORDER BY Id) As RowNumber,
LEAD(Item,1,0) OVER(PARTITION BY Id ORDER BY Id) As [Count]
,FIRST_VALUE(RN) OVER(PARTITION BY Id ORDER BY Id) AS FV
FROM CTE
) As T
WHERE RowNumber % 2 = 0
ORDER BY FV
Jeff Moden created a splitter function over on www.SQLServerCentral.com called DelimitedSplit8K. It takes two parameters @pString (the string to split) & @pDelimiter (the delimiter, in your case ;).
However in your case you aren't doing a straight up split, so it will have to be modified a little bit. In fact we have to do two different types of splits. The first a normal one to split on ! and the second a modified one to get the different rows using the first value as the first column and the rest split in pairs, one pair per row.
Test code first
DECLARE @splitstring varchar(8000)
SET @splitstring = '!1;100;10;200;0;500;2;1000;30!2;100;3;500;1;2000;5'
-- First use the original function to split out the groups.
-- The STUFF function removes the first ! since this is a split function
-- and if it's left we get a blank line.
SELECT FinalDelim.*
FROM DelimitedSplit8K(STUFF(@splitstring,1,1,''),'!') AS FirstDelim
-- Here we CROSS APPLY the modified version over each of the original splits
CROSS APPLY ModifiedDelimitedSplit8K(FirstDelim.Item,';') AS FinalDelim
GO
And here is the ModifiedDelimitedSplit8K code.
-- Based Jeff Modem's original script
CREATE FUNCTION [dbo].[ModifiedDelimitedSplit8K]
--===== Define I/O parameters
(@pString VARCHAR(8000), @pDelimiter CHAR(1))
--WARNING!!! DO NOT USE MAX DATA-TYPES HERE! IT WILL KILL PERFORMANCE!
RETURNS TABLE WITH SCHEMABINDING AS
RETURN
--===== "Inline" CTE Driven "Tally Table" produces values from 1 up to 10,000...
-- enough to cover VARCHAR(8000)
WITH E1(N) AS (
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
), --10E+1 or 10 rows
E2(N) AS (SELECT 1 FROM E1 a, E1 b), --10E+2 or 100 rows
E4(N) AS (SELECT 1 FROM E2 a, E2 b), --10E+4 or 10,000 rows max
cteTally(N) AS (--==== This provides the "base" CTE and limits the number of rows right up front
-- for both a performance gain and prevention of accidental "overruns"
SELECT TOP (ISNULL(DATALENGTH(@pString),0)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4
),
cteStart(N1) AS (--==== This returns N+1 (starting position of each "element" just once for each delimiter)
SELECT 1 UNION ALL
SELECT t.N+1 FROM cteTally t WHERE SUBSTRING(@pString,t.N,1) = @pDelimiter
),
cteLen(N1,L1) AS(--==== Return start and length (for use in substring)
SELECT s.N1,
ISNULL(NULLIF(CHARINDEX(@pDelimiter,@pString,s.N1),0)-s.N1,8000)
FROM cteStart s
),
--===== Do the actual split. The ISNULL/NULLIF combo handles the length for the final element when no delimiter is found.
-- Putting this in it's own CTE since we need ItemNumber 1 for all of the rows
cteSplitString(ItemNumber, Item) AS (
SELECT ItemNumber = ROW_NUMBER() OVER(ORDER BY l.N1),
Item = SUBSTRING(@pString, l.N1, l.L1)
FROM cteLen l)
SELECT cteItem1.Item AS Id,
cteItem2.Value,
cteItem2.Count
FROM (SELECT Item FROM cteSplitString WHERE ItemNumber = 1) AS cteItem1
CROSS JOIN (SELECT MAX(CASE WHEN ItemNumber%2 = 0 THEN Item ELSE NULL END) AS Value,
MAX(CASE WHEN ItemNumber%2 = 1 THEN Item ELSE NULL END) AS Count
FROM cteSplitString
WHERE ItemNumber > 1
GROUP BY ItemNumber/2) AS cteItem2
This is probably not the best solutuion. But since your proposal was a bit particular I decided to create an ad-hoc solution. It was fun. Can be tested here: http://sqlfiddle.com/#!3/9eecb7db59d16c80417c72d1/623
declare @string varchar(max)
set @string = '!1;100;10;200;0;500;2;1000;30!2;100;3;500;1;2000;5'
declare @t1 table (row varchar(max))
declare @end int
set @end = 1
set @string = substring ( @string ,2, len(@string))
while charindex ( '!' ,@string,1) > 0
begin
set @end = charindex ( '!' ,@string,1)
insert into @t1 (row) values (substring ( @string ,1, @end - 1))
set @string = substring ( @string ,@end+1, len(@string))
end
set @end = len(@string)
insert into @t1 (row) values (substring ( @string ,1, @end))
declare cur cursor for select * from @t1
open cur
declare @row varchar(max)
fetch next from cur into @row
declare @result table
(id varchar(max), [value] varchar(max), [count] varchar(max))
declare @id varchar(max), @value varchar(max), @count varchar(max)
while @@fetch_status = 0
begin
set @end = charindex( ';' ,@row)
set @id = substring(@row,1,@end-1)
set @row = substring(@row,@end +1,len(@row))
while len(@row) > 0
begin
set @end = charindex( ';' ,@row)
set @value = substring(@row,1,@end-1)
set @row = substring(@row,@end +1,len(@row))
set @end = charindex( ';' ,@row)
if @end > 0
begin
set @count = substring(@row,1,@end-1)
set @row = substring(@row,@end +1,len(@row))
set @end = charindex( ';' ,@row)
end
else
begin
set @count = substring(@row,1,len(@row))
set @row = ''
end
insert into @result (id, [value], [count]) values (@id, @value, @count)
end
fetch next from cur into @row
end
close cur
deallocate cur
select * from @result