2

I've just started using OPENJSON with SQL Server 2016 SP1.

I have this sentence:

select c.Serial as Parent,
 (Select co.Serial, agc.Position
 from AggregationChildren agc, Aggregation ag, Code co
 where agc.AggregationId = a.AggregationId 
 and co.CodeId = agc.AggregationChildrenId for json path) as children
 from Aggregation a, Code c
 where c.CodeId = a.AggregationId for json path

To generate this JSON:

{"Parent":"4244287599479491","Children":
[{"Serial":"8915753733724633","Position":"1"},
{"Serial":"1247782815710855","Position":"2"},
...]}

But it is very very slow.

My problem is with the Children array because I don't know how to get it.

Is there a way to do it faster?

These are the tables:

CREATE TABLE [dbo].[Code] (
 [CodeId] INT IDENTITY (1, 1) NOT NULL,
 [Serial] NVARCHAR (20) NOT NULL,
 [ ... ],
 CONSTRAINT [PK_CODE] PRIMARY KEY CLUSTERED ([CodeId] ASC),
 [ ... ]
)
CREATE TABLE [dbo].[Aggregation] (
 [AggregationId] INT NOT NULL,
 [ ... ], 
 CONSTRAINT [PK_AGGREGATIONS] PRIMARY KEY CLUSTERED ([AggregationId] ASC),
 CONSTRAINT [FK_Aggregation_Code]
 FOREIGN KEY ([AggregationId])
 REFERENCES [dbo].[Code] ([CodeId])
)
CREATE TABLE [dbo].[AggregationChildren] (
 [AggregationChildrenId] INT NOT NULL,
 [AggregationId] INT NOT NULL,
 [Position] INT NOT NULL,
 CONSTRAINT [PK_AGGREGATION_CHILDS] PRIMARY KEY CLUSTERED ([AggregationChildrenId] ASC),
 CONSTRAINT [FK_AggregationChildren_Code]
 FOREIGN KEY ([AggregationChildrenId])
 REFERENCES [dbo].[Code] ([CodeId]),
 CONSTRAINT [FK_AggregationChildren_Aggregation]
 FOREIGN KEY ([AggregationId])
 REFERENCES [dbo].[Aggregation] ([AggregationId]) ON DELETE CASCADE
)

The Serial column is an nvarchar(20) since the values may be any combination of alphanumerics, even though my example only shows numerics.

Hannah Vernon
71.1k22 gold badges178 silver badges323 bronze badges
asked Mar 8, 2018 at 13:45
0

1 Answer 1

6

I had a hard time parsing your query, however I believe this returns the same result, and is much faster:

SELECT Parent = c.Serial
 , Children = (
 SELECT c.Serial 
 , cac.Position
 FROM dbo.Code cc
 INNER JOIN dbo.AggregationChildren cac ON cac.AggregationChildrenId = cc.CodeId
 WHERE cac.AggregationId = a.AggregationId
 FOR JSON PATH 
 )
FROM dbo.Code c
 INNER JOIN dbo.Aggregation a ON c.CodeId = a.AggregationId
FOR JSON PATH;

The plan for the above query looks like:

enter image description here

The plan for your query looks like:

enter image description here

If we add the following index, we can make the first variant even faster:

CREATE NONCLUSTERED INDEX IX_AggregationChildren_IX0
ON dbo.AggregationChildren (AggregationId)
INCLUDE (AggregationChildrenId,Position);

Clearly, you'd need to evaluate this against your workload, however.


I've created a minimally viable complete example setup to use for testing:

USE tempdb;
IF OBJECT_ID(N'dbo.AggregationChildren', N'U') IS NOT NULL 
DROP TABLE dbo.AggregationChildren;
IF OBJECT_ID(N'dbo.Aggregation', N'U') IS NOT NULL 
DROP TABLE dbo.Aggregation;
IF OBJECT_ID(N'dbo.Code', N'U') IS NOT NULL 
DROP TABLE dbo.Code;
GO
CREATE TABLE dbo.Code (
 CodeId int NOT NULL
 CONSTRAINT PK_CODE 
 PRIMARY KEY 
 CLUSTERED
 , Serial nvarchar(20) NOT NULL
);
CREATE TABLE dbo.Aggregation (
 AggregationId int NOT NULL
 CONSTRAINT PK_AGGREGATIONS 
 PRIMARY KEY 
 CLUSTERED
 CONSTRAINT FK_Aggregation_Code
 FOREIGN KEY (AggregationId)
 REFERENCES dbo.Code (CodeId)
)
CREATE TABLE dbo.AggregationChildren (
 AggregationChildrenId int NOT NULL
 CONSTRAINT PK_AGGREGATION_CHILDS 
 PRIMARY KEY 
 CLUSTERED
 CONSTRAINT FK_AggregationChildren_Code
 FOREIGN KEY (AggregationChildrenId)
 REFERENCES dbo.Code (CodeId)
 , AggregationId int NOT NULL
 CONSTRAINT FK_AggregationChildren_Aggregation
 FOREIGN KEY (AggregationId)
 REFERENCES dbo.Aggregation (AggregationId) 
 ON DELETE CASCADE
 , Position int NOT NULL
)

I re-formatted the constraint clauses to more kind to my brain; essentially, the above code is the same as the DDL in your question.

This populates the three tables with enough data to make meaningful comparisons:

;WITH src AS 
(
 SELECT n.Val
 FROM (VALUES (0), (1), (2), (3), (4), (5), (6), (7), (8), (9)) n(Val)
)
INSERT INTO dbo.Code (CodeId, Serial)
SELECT s1.Val 
 + (s2.Val * 10)
 + (s3.Val * 100)
 + (s4.Val * 1000)
 + (s5.Val * 10000)
 , CONVERT(bigint, CRYPT_GEN_RANDOM(8))
FROM src s1
 CROSS JOIN src s2
 CROSS JOIN src s3
 CROSS JOIN src s4
 CROSS JOIN src s5
;WITH src AS 
(
 SELECT n.Val
 FROM (VALUES (0), (1), (2), (3), (4), (5), (6), (7), (8), (9)) n(Val)
)
INSERT INTO dbo.Aggregation (AggregationId)
SELECT s1.Val 
 + (s2.Val * 10)
 + (s3.Val * 100)
FROM src s1
 CROSS JOIN src s2
 CROSS JOIN src s3;
;WITH src AS 
(
 SELECT n.Val
 FROM (VALUES (0), (1), (2), (3), (4), (5), (6), (7), (8), (9)) n(Val)
)
INSERT INTO dbo.AggregationChildren (AggregationChildrenId, AggregationId, Position)
SELECT s1.Val 
 + (s2.Val * 10)
 + (s3.Val * 100)
 + (s4.Val * 1000)
 + (s5.Val * 10000)
 , s1.Val 
 + (s2.Val * 10)
 + (s3.Val * 100)
 , s1.Val 
FROM src s1
 CROSS JOIN src s2
 CROSS JOIN src s3
 CROSS JOIN src s4
 CROSS JOIN src s5;

These are the row counts for each table:

╔════════╦═════════════╦═════════════════════╗
║ Code ║ Aggregation ║ AggregationChildren ║
╠════════╬═════════════╬═════════════════════╣
║ 100000 ║ 1000 ║ 100000 ║
╚════════╩═════════════╩═════════════════════╝

My version of the query:

SELECT Parent = c.Serial
 , Children = (
 SELECT c.Serial 
 , cac.Position
 FROM dbo.Code cc
 INNER JOIN dbo.AggregationChildren cac ON cac.AggregationChildrenId = cc.CodeId
 WHERE cac.AggregationId = a.AggregationId
 FOR JSON PATH 
 )
FROM dbo.Code c
 INNER JOIN dbo.Aggregation a ON c.CodeId = a.AggregationId
FOR JSON PATH;

To compare the output of both queries, I've created two user-defined-functions, as in:

CREATE FUNCTION dbo.fn_json_test_1()
RETURNS nvarchar(max)
AS
BEGIN
 RETURN (
 SELECT Parent = c.Serial
 , Children = (
 SELECT c.Serial 
 , cac.Position
 FROM dbo.Code cc
 INNER JOIN dbo.AggregationChildren cac ON cac.AggregationChildrenId = cc.CodeId
 WHERE cac.AggregationId = a.AggregationId
 FOR JSON PATH 
 )
 FROM dbo.Code c
 INNER JOIN dbo.Aggregation a ON c.CodeId = a.AggregationId
 FOR JSON PATH
 );
END;
GO
GO
CREATE FUNCTION dbo.fn_json_test_2()
RETURNS nvarchar(max)
AS
BEGIN
 RETURN (
 SELECT c.Serial as Parent,
 (Select co.Serial, agc.Position
 from AggregationChildren agc, Aggregation ag, Code co
 where agc.AggregationId = a.AggregationId 
 and co.CodeId = agc.AggregationChildrenId for json path) as children
 from Aggregation a, Code c
 where c.CodeId = a.AggregationId for json path
 );
END;
GO

Now, I can compare the output for both queries via:

DECLARE @res1 nvarchar(max) = dbo.fn_json_test_1();
DECLARE @res2 nvarchar(max) = dbo.fn_json_test_2();
SELECT CASE WHEN @res1 <> @res2 THEN 'mismatch' ELSE 'match' END;

The result is:

The results do NOT match. The output of my query contains less child nodes than your query. I'm going back to the drawing board, and will simplify the test bed to see where the disparity lies.

The simplified testbed consists of 10 rows in the Code table, 2 rows in the Aggregation (Parent) table, and 8 rows in the AggregationChildren (child) table:

;WITH src AS 
(
 SELECT n.Val
 FROM (VALUES (0), (1), (2), (3), (4), (5), (6), (7), (8), (9)) n(Val)
)
INSERT INTO dbo.Code (CodeId, Serial)
SELECT s1.Val 
 , CONVERT(bigint, CRYPT_GEN_RANDOM(8))
FROM src s1
;WITH src AS 
(
 SELECT n.Val
 FROM (VALUES (0), (1)) n(Val)
)
INSERT INTO dbo.Aggregation (AggregationId)
SELECT s1.Val 
FROM src s1;
;WITH src AS 
(
 SELECT n.Val
 FROM (VALUES (0), (1), (2), (3), (4), (5), (6), (7)) n(Val)
)
INSERT INTO dbo.AggregationChildren (AggregationChildrenId, AggregationId, Position)
SELECT s1.Val + 2
 , s1.Val % 2
 , s1.Val 
FROM src s1;

Row counts:

SELECT Code = (SELECT COUNT(1) FROM dbo.Code)
 , Aggregation = (SELECT COUNT(1) FROM dbo.Aggregation)
 , AggregationChildren = (SELECT COUNT(1) FROM dbo.AggregationChildren)
╔══════╦═════════════╦═════════════════════╗
║ Code ║ Aggregation ║ AggregationChildren ║
╠══════╬═════════════╬═════════════════════╣
║ 10 ║ 2 ║ 8 ║
╚══════╩═════════════╩═════════════════════╝

The predicted pattern should be two parent json arrays, each with 4 child arrays.

My Results:

[
 {
 "Parent": "-5601362097731340301",
 "Children": [
 {
 "Serial": "-5601362097731340301",
 "Position": 0
 },
 {
 "Serial": "-5601362097731340301",
 "Position": 2
 },
 {
 "Serial": "-5601362097731340301",
 "Position": 4
 },
 {
 "Serial": "-5601362097731340301",
 "Position": 6
 }
 ]
 },
 {
 "Parent": "-8896860091721838065",
 "Children": [
 {
 "Serial": "-8896860091721838065",
 "Position": 1
 },
 {
 "Serial": "-8896860091721838065",
 "Position": 3
 },
 {
 "Serial": "-8896860091721838065",
 "Position": 5
 },
 {
 "Serial": "-8896860091721838065",
 "Position": 7
 }
 ]
 }

Your query:

[
 {
 "Parent": "-5601362097731340301",
 "Children": [
 {
 "Serial": "5802227619253639548",
 "Position": 0
 },
 {
 "Serial": "5802227619253639548",
 "Position": 0
 },
 {
 "Serial": "4504664379821512162",
 "Position": 2
 },
 {
 "Serial": "4504664379821512162",
 "Position": 2
 },
 {
 "Serial": "6561435639659176802",
 "Position": 4
 },
 {
 "Serial": "6561435639659176802",
 "Position": 4
 },
 {
 "Serial": "-7417083263182709739",
 "Position": 6
 },
 {
 "Serial": "-7417083263182709739",
 "Position": 6
 }
 ]
 },
 {
 "Parent": "-8896860091721838065",
 "Children": [
 {
 "Serial": "-7646118996434234523",
 "Position": 1
 },
 {
 "Serial": "-7646118996434234523",
 "Position": 1
 },
 {
 "Serial": "-6372739442099935942",
 "Position": 3
 },
 {
 "Serial": "-6372739442099935942",
 "Position": 3
 },
 {
 "Serial": "-882384147532911428",
 "Position": 5
 },
 {
 "Serial": "-882384147532911428",
 "Position": 5
 },
 {
 "Serial": "4293317573306886053",
 "Position": 7
 },
 {
 "Serial": "4293317573306886053",
 "Position": 7
 }
 ]
 }
]

Your query has too many children; my query returns the predicted number of children, and it returns the correct Position values, however is returning the incorrect Serial values.

The "bug" in my query appears in the inner query. The incorrect query is:

SELECT c.Serial 
 , cac.Position
FROM dbo.Code cc
 INNER JOIN dbo.AggregationChildren cac ON cac.AggregationChildrenId = cc.CodeId
WHERE cac.AggregationId = a.AggregationId
ORDER BY c.Serial
FOR JSON PATH 

The correct version is:

SELECT cc.Serial --changed "c." to "cc."
 , cac.Position
FROM dbo.Code cc
 INNER JOIN dbo.AggregationChildren cac ON cac.AggregationChildrenId = cc.CodeId
WHERE cac.AggregationId = a.AggregationId
ORDER BY cc.CodeId --not a big deal, but different order for children in output
FOR JSON PATH 

The corrected query now looks like:

SELECT Parent = c.Serial
 , Children = (
 SELECT cc.Serial 
 , cac.Position
 FROM dbo.Code cc
 INNER JOIN dbo.AggregationChildren cac ON cac.AggregationChildrenId = cc.CodeId
 WHERE cac.AggregationId = a.AggregationId
 ORDER BY cc.CodeId
 FOR JSON PATH 
 )
FROM dbo.Code c
 INNER JOIN dbo.Aggregation a ON c.CodeId = a.AggregationId
ORDER BY c.Serial
FOR JSON PATH;

And returns the following result:

[
 {
 "Parent": "-195930341251513493",
 "Children": [
 {
 "Serial": "-6126601633786720400",
 "Position": 1
 },
 {
 "Serial": "5216562173012877678",
 "Position": 3
 },
 {
 "Serial": "-1992909345438478098",
 "Position": 5
 },
 {
 "Serial": "8329388691987940194",
 "Position": 7
 }
 ]
 },
 {
 "Parent": "8774608126018975726",
 "Children": [
 {
 "Serial": "-3380643917643646211",
 "Position": 0
 },
 {
 "Serial": "-2042609074595538493",
 "Position": 2
 },
 {
 "Serial": "7345460002653774160",
 "Position": 4
 },
 {
 "Serial": "-2126530822210070443",
 "Position": 6
 }
 ]
 }
]
answered Mar 8, 2018 at 16:24
0

Your Answer

Draft saved
Draft discarded

Sign up or log in

Sign up using Google
Sign up using Email and Password

Post as a guest

Required, but never shown

Post as a guest

Required, but never shown

By clicking "Post Your Answer", you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.