I've just started using OPENJSON
with SQL Server 2016 SP1.
I have this sentence:
select c.Serial as Parent,
(Select co.Serial, agc.Position
from AggregationChildren agc, Aggregation ag, Code co
where agc.AggregationId = a.AggregationId
and co.CodeId = agc.AggregationChildrenId for json path) as children
from Aggregation a, Code c
where c.CodeId = a.AggregationId for json path
To generate this JSON:
{"Parent":"4244287599479491","Children":
[{"Serial":"8915753733724633","Position":"1"},
{"Serial":"1247782815710855","Position":"2"},
...]}
But it is very very slow.
My problem is with the Children
array because I don't know how to get it.
Is there a way to do it faster?
These are the tables:
CREATE TABLE [dbo].[Code] (
[CodeId] INT IDENTITY (1, 1) NOT NULL,
[Serial] NVARCHAR (20) NOT NULL,
[ ... ],
CONSTRAINT [PK_CODE] PRIMARY KEY CLUSTERED ([CodeId] ASC),
[ ... ]
)
CREATE TABLE [dbo].[Aggregation] (
[AggregationId] INT NOT NULL,
[ ... ],
CONSTRAINT [PK_AGGREGATIONS] PRIMARY KEY CLUSTERED ([AggregationId] ASC),
CONSTRAINT [FK_Aggregation_Code]
FOREIGN KEY ([AggregationId])
REFERENCES [dbo].[Code] ([CodeId])
)
CREATE TABLE [dbo].[AggregationChildren] (
[AggregationChildrenId] INT NOT NULL,
[AggregationId] INT NOT NULL,
[Position] INT NOT NULL,
CONSTRAINT [PK_AGGREGATION_CHILDS] PRIMARY KEY CLUSTERED ([AggregationChildrenId] ASC),
CONSTRAINT [FK_AggregationChildren_Code]
FOREIGN KEY ([AggregationChildrenId])
REFERENCES [dbo].[Code] ([CodeId]),
CONSTRAINT [FK_AggregationChildren_Aggregation]
FOREIGN KEY ([AggregationId])
REFERENCES [dbo].[Aggregation] ([AggregationId]) ON DELETE CASCADE
)
The Serial
column is an nvarchar(20)
since the values may be any combination of alphanumerics, even though my example only shows numerics.
1 Answer 1
I had a hard time parsing your query, however I believe this returns the same result, and is much faster:
SELECT Parent = c.Serial
, Children = (
SELECT c.Serial
, cac.Position
FROM dbo.Code cc
INNER JOIN dbo.AggregationChildren cac ON cac.AggregationChildrenId = cc.CodeId
WHERE cac.AggregationId = a.AggregationId
FOR JSON PATH
)
FROM dbo.Code c
INNER JOIN dbo.Aggregation a ON c.CodeId = a.AggregationId
FOR JSON PATH;
The plan for the above query looks like:
The plan for your query looks like:
If we add the following index, we can make the first variant even faster:
CREATE NONCLUSTERED INDEX IX_AggregationChildren_IX0
ON dbo.AggregationChildren (AggregationId)
INCLUDE (AggregationChildrenId,Position);
Clearly, you'd need to evaluate this against your workload, however.
I've created a minimally viable complete example setup to use for testing:
USE tempdb;
IF OBJECT_ID(N'dbo.AggregationChildren', N'U') IS NOT NULL
DROP TABLE dbo.AggregationChildren;
IF OBJECT_ID(N'dbo.Aggregation', N'U') IS NOT NULL
DROP TABLE dbo.Aggregation;
IF OBJECT_ID(N'dbo.Code', N'U') IS NOT NULL
DROP TABLE dbo.Code;
GO
CREATE TABLE dbo.Code (
CodeId int NOT NULL
CONSTRAINT PK_CODE
PRIMARY KEY
CLUSTERED
, Serial nvarchar(20) NOT NULL
);
CREATE TABLE dbo.Aggregation (
AggregationId int NOT NULL
CONSTRAINT PK_AGGREGATIONS
PRIMARY KEY
CLUSTERED
CONSTRAINT FK_Aggregation_Code
FOREIGN KEY (AggregationId)
REFERENCES dbo.Code (CodeId)
)
CREATE TABLE dbo.AggregationChildren (
AggregationChildrenId int NOT NULL
CONSTRAINT PK_AGGREGATION_CHILDS
PRIMARY KEY
CLUSTERED
CONSTRAINT FK_AggregationChildren_Code
FOREIGN KEY (AggregationChildrenId)
REFERENCES dbo.Code (CodeId)
, AggregationId int NOT NULL
CONSTRAINT FK_AggregationChildren_Aggregation
FOREIGN KEY (AggregationId)
REFERENCES dbo.Aggregation (AggregationId)
ON DELETE CASCADE
, Position int NOT NULL
)
I re-formatted the constraint clauses to more kind to my brain; essentially, the above code is the same as the DDL in your question.
This populates the three tables with enough data to make meaningful comparisons:
;WITH src AS
(
SELECT n.Val
FROM (VALUES (0), (1), (2), (3), (4), (5), (6), (7), (8), (9)) n(Val)
)
INSERT INTO dbo.Code (CodeId, Serial)
SELECT s1.Val
+ (s2.Val * 10)
+ (s3.Val * 100)
+ (s4.Val * 1000)
+ (s5.Val * 10000)
, CONVERT(bigint, CRYPT_GEN_RANDOM(8))
FROM src s1
CROSS JOIN src s2
CROSS JOIN src s3
CROSS JOIN src s4
CROSS JOIN src s5
;WITH src AS
(
SELECT n.Val
FROM (VALUES (0), (1), (2), (3), (4), (5), (6), (7), (8), (9)) n(Val)
)
INSERT INTO dbo.Aggregation (AggregationId)
SELECT s1.Val
+ (s2.Val * 10)
+ (s3.Val * 100)
FROM src s1
CROSS JOIN src s2
CROSS JOIN src s3;
;WITH src AS
(
SELECT n.Val
FROM (VALUES (0), (1), (2), (3), (4), (5), (6), (7), (8), (9)) n(Val)
)
INSERT INTO dbo.AggregationChildren (AggregationChildrenId, AggregationId, Position)
SELECT s1.Val
+ (s2.Val * 10)
+ (s3.Val * 100)
+ (s4.Val * 1000)
+ (s5.Val * 10000)
, s1.Val
+ (s2.Val * 10)
+ (s3.Val * 100)
, s1.Val
FROM src s1
CROSS JOIN src s2
CROSS JOIN src s3
CROSS JOIN src s4
CROSS JOIN src s5;
These are the row counts for each table:
╔════════╦═════════════╦═════════════════════╗ ║ Code ║ Aggregation ║ AggregationChildren ║ ╠════════╬═════════════╬═════════════════════╣ ║ 100000 ║ 1000 ║ 100000 ║ ╚════════╩═════════════╩═════════════════════╝
My version of the query:
SELECT Parent = c.Serial
, Children = (
SELECT c.Serial
, cac.Position
FROM dbo.Code cc
INNER JOIN dbo.AggregationChildren cac ON cac.AggregationChildrenId = cc.CodeId
WHERE cac.AggregationId = a.AggregationId
FOR JSON PATH
)
FROM dbo.Code c
INNER JOIN dbo.Aggregation a ON c.CodeId = a.AggregationId
FOR JSON PATH;
To compare the output of both queries, I've created two user-defined-functions, as in:
CREATE FUNCTION dbo.fn_json_test_1()
RETURNS nvarchar(max)
AS
BEGIN
RETURN (
SELECT Parent = c.Serial
, Children = (
SELECT c.Serial
, cac.Position
FROM dbo.Code cc
INNER JOIN dbo.AggregationChildren cac ON cac.AggregationChildrenId = cc.CodeId
WHERE cac.AggregationId = a.AggregationId
FOR JSON PATH
)
FROM dbo.Code c
INNER JOIN dbo.Aggregation a ON c.CodeId = a.AggregationId
FOR JSON PATH
);
END;
GO
GO
CREATE FUNCTION dbo.fn_json_test_2()
RETURNS nvarchar(max)
AS
BEGIN
RETURN (
SELECT c.Serial as Parent,
(Select co.Serial, agc.Position
from AggregationChildren agc, Aggregation ag, Code co
where agc.AggregationId = a.AggregationId
and co.CodeId = agc.AggregationChildrenId for json path) as children
from Aggregation a, Code c
where c.CodeId = a.AggregationId for json path
);
END;
GO
Now, I can compare the output for both queries via:
DECLARE @res1 nvarchar(max) = dbo.fn_json_test_1();
DECLARE @res2 nvarchar(max) = dbo.fn_json_test_2();
SELECT CASE WHEN @res1 <> @res2 THEN 'mismatch' ELSE 'match' END;
The result is:
The results do NOT match. The output of my query contains less child nodes than your query. I'm going back to the drawing board, and will simplify the test bed to see where the disparity lies.
The simplified testbed consists of 10 rows in the Code
table, 2 rows in the Aggregation
(Parent) table, and 8 rows in the AggregationChildren
(child) table:
;WITH src AS
(
SELECT n.Val
FROM (VALUES (0), (1), (2), (3), (4), (5), (6), (7), (8), (9)) n(Val)
)
INSERT INTO dbo.Code (CodeId, Serial)
SELECT s1.Val
, CONVERT(bigint, CRYPT_GEN_RANDOM(8))
FROM src s1
;WITH src AS
(
SELECT n.Val
FROM (VALUES (0), (1)) n(Val)
)
INSERT INTO dbo.Aggregation (AggregationId)
SELECT s1.Val
FROM src s1;
;WITH src AS
(
SELECT n.Val
FROM (VALUES (0), (1), (2), (3), (4), (5), (6), (7)) n(Val)
)
INSERT INTO dbo.AggregationChildren (AggregationChildrenId, AggregationId, Position)
SELECT s1.Val + 2
, s1.Val % 2
, s1.Val
FROM src s1;
Row counts:
SELECT Code = (SELECT COUNT(1) FROM dbo.Code)
, Aggregation = (SELECT COUNT(1) FROM dbo.Aggregation)
, AggregationChildren = (SELECT COUNT(1) FROM dbo.AggregationChildren)
╔══════╦═════════════╦═════════════════════╗ ║ Code ║ Aggregation ║ AggregationChildren ║ ╠══════╬═════════════╬═════════════════════╣ ║ 10 ║ 2 ║ 8 ║ ╚══════╩═════════════╩═════════════════════╝
The predicted pattern should be two parent json arrays, each with 4 child arrays.
My Results:
[ { "Parent": "-5601362097731340301", "Children": [ { "Serial": "-5601362097731340301", "Position": 0 }, { "Serial": "-5601362097731340301", "Position": 2 }, { "Serial": "-5601362097731340301", "Position": 4 }, { "Serial": "-5601362097731340301", "Position": 6 } ] }, { "Parent": "-8896860091721838065", "Children": [ { "Serial": "-8896860091721838065", "Position": 1 }, { "Serial": "-8896860091721838065", "Position": 3 }, { "Serial": "-8896860091721838065", "Position": 5 }, { "Serial": "-8896860091721838065", "Position": 7 } ] }
Your query:
[ { "Parent": "-5601362097731340301", "Children": [ { "Serial": "5802227619253639548", "Position": 0 }, { "Serial": "5802227619253639548", "Position": 0 }, { "Serial": "4504664379821512162", "Position": 2 }, { "Serial": "4504664379821512162", "Position": 2 }, { "Serial": "6561435639659176802", "Position": 4 }, { "Serial": "6561435639659176802", "Position": 4 }, { "Serial": "-7417083263182709739", "Position": 6 }, { "Serial": "-7417083263182709739", "Position": 6 } ] }, { "Parent": "-8896860091721838065", "Children": [ { "Serial": "-7646118996434234523", "Position": 1 }, { "Serial": "-7646118996434234523", "Position": 1 }, { "Serial": "-6372739442099935942", "Position": 3 }, { "Serial": "-6372739442099935942", "Position": 3 }, { "Serial": "-882384147532911428", "Position": 5 }, { "Serial": "-882384147532911428", "Position": 5 }, { "Serial": "4293317573306886053", "Position": 7 }, { "Serial": "4293317573306886053", "Position": 7 } ] } ]
Your query has too many children; my query returns the predicted number of children, and it returns the correct Position
values, however is returning the incorrect Serial
values.
The "bug" in my query appears in the inner query. The incorrect query is:
SELECT c.Serial
, cac.Position
FROM dbo.Code cc
INNER JOIN dbo.AggregationChildren cac ON cac.AggregationChildrenId = cc.CodeId
WHERE cac.AggregationId = a.AggregationId
ORDER BY c.Serial
FOR JSON PATH
The correct version is:
SELECT cc.Serial --changed "c." to "cc."
, cac.Position
FROM dbo.Code cc
INNER JOIN dbo.AggregationChildren cac ON cac.AggregationChildrenId = cc.CodeId
WHERE cac.AggregationId = a.AggregationId
ORDER BY cc.CodeId --not a big deal, but different order for children in output
FOR JSON PATH
The corrected query now looks like:
SELECT Parent = c.Serial
, Children = (
SELECT cc.Serial
, cac.Position
FROM dbo.Code cc
INNER JOIN dbo.AggregationChildren cac ON cac.AggregationChildrenId = cc.CodeId
WHERE cac.AggregationId = a.AggregationId
ORDER BY cc.CodeId
FOR JSON PATH
)
FROM dbo.Code c
INNER JOIN dbo.Aggregation a ON c.CodeId = a.AggregationId
ORDER BY c.Serial
FOR JSON PATH;
And returns the following result:
[ { "Parent": "-195930341251513493", "Children": [ { "Serial": "-6126601633786720400", "Position": 1 }, { "Serial": "5216562173012877678", "Position": 3 }, { "Serial": "-1992909345438478098", "Position": 5 }, { "Serial": "8329388691987940194", "Position": 7 } ] }, { "Parent": "8774608126018975726", "Children": [ { "Serial": "-3380643917643646211", "Position": 0 }, { "Serial": "-2042609074595538493", "Position": 2 }, { "Serial": "7345460002653774160", "Position": 4 }, { "Serial": "-2126530822210070443", "Position": 6 } ] } ]