Generate nested array with OPENJSON very slow

Question 1

I've just started using OPENJSON with SQL Server 2016 SP1.

I have this sentence:

select c.Serial as Parent,
 (Select co.Serial, agc.Position
 from AggregationChildren agc, Aggregation ag, Code co
 where agc.AggregationId = a.AggregationId 
 and co.CodeId = agc.AggregationChildrenId for json path) as children
 from Aggregation a, Code c
 where c.CodeId = a.AggregationId for json path

To generate this JSON:

{"Parent":"4244287599479491","Children":
[{"Serial":"8915753733724633","Position":"1"},
{"Serial":"1247782815710855","Position":"2"},
...]}

But it is very very slow.

My problem is with the Children array because I don't know how to get it.

Is there a way to do it faster?

These are the tables:

CREATE TABLE [dbo].[Code] (
 [CodeId] INT IDENTITY (1, 1) NOT NULL,
 [Serial] NVARCHAR (20) NOT NULL,
 [ ... ],
 CONSTRAINT [PK_CODE] PRIMARY KEY CLUSTERED ([CodeId] ASC),
 [ ... ]
)
CREATE TABLE [dbo].[Aggregation] (
 [AggregationId] INT NOT NULL,
 [ ... ], 
 CONSTRAINT [PK_AGGREGATIONS] PRIMARY KEY CLUSTERED ([AggregationId] ASC),
 CONSTRAINT [FK_Aggregation_Code]
 FOREIGN KEY ([AggregationId])
 REFERENCES [dbo].[Code] ([CodeId])
)
CREATE TABLE [dbo].[AggregationChildren] (
 [AggregationChildrenId] INT NOT NULL,
 [AggregationId] INT NOT NULL,
 [Position] INT NOT NULL,
 CONSTRAINT [PK_AGGREGATION_CHILDS] PRIMARY KEY CLUSTERED ([AggregationChildrenId] ASC),
 CONSTRAINT [FK_AggregationChildren_Code]
 FOREIGN KEY ([AggregationChildrenId])
 REFERENCES [dbo].[Code] ([CodeId]),
 CONSTRAINT [FK_AggregationChildren_Aggregation]
 FOREIGN KEY ([AggregationId])
 REFERENCES [dbo].[Aggregation] ([AggregationId]) ON DELETE CASCADE
)

The Serial column is an nvarchar(20) since the values may be any combination of alphanumerics, even though my example only shows numerics.

Question 2

I had a hard time parsing your query, however I believe this returns the same result, and is much faster:

SELECT Parent = c.Serial
 , Children = (
 SELECT c.Serial 
 , cac.Position
 FROM dbo.Code cc
 INNER JOIN dbo.AggregationChildren cac ON cac.AggregationChildrenId = cc.CodeId
 WHERE cac.AggregationId = a.AggregationId
 FOR JSON PATH 
 )
FROM dbo.Code c
 INNER JOIN dbo.Aggregation a ON c.CodeId = a.AggregationId
FOR JSON PATH;

The plan for the above query looks like:

enter image description here

The plan for your query looks like:

enter image description here

If we add the following index, we can make the first variant even faster:

CREATE NONCLUSTERED INDEX IX_AggregationChildren_IX0
ON dbo.AggregationChildren (AggregationId)
INCLUDE (AggregationChildrenId,Position);

Clearly, you'd need to evaluate this against your workload, however.

I've created a minimally viable complete example setup to use for testing:

USE tempdb;
IF OBJECT_ID(N'dbo.AggregationChildren', N'U') IS NOT NULL 
DROP TABLE dbo.AggregationChildren;
IF OBJECT_ID(N'dbo.Aggregation', N'U') IS NOT NULL 
DROP TABLE dbo.Aggregation;
IF OBJECT_ID(N'dbo.Code', N'U') IS NOT NULL 
DROP TABLE dbo.Code;
GO
CREATE TABLE dbo.Code (
 CodeId int NOT NULL
 CONSTRAINT PK_CODE 
 PRIMARY KEY 
 CLUSTERED
 , Serial nvarchar(20) NOT NULL
);
CREATE TABLE dbo.Aggregation (
 AggregationId int NOT NULL
 CONSTRAINT PK_AGGREGATIONS 
 PRIMARY KEY 
 CLUSTERED
 CONSTRAINT FK_Aggregation_Code
 FOREIGN KEY (AggregationId)
 REFERENCES dbo.Code (CodeId)
)
CREATE TABLE dbo.AggregationChildren (
 AggregationChildrenId int NOT NULL
 CONSTRAINT PK_AGGREGATION_CHILDS 
 PRIMARY KEY 
 CLUSTERED
 CONSTRAINT FK_AggregationChildren_Code
 FOREIGN KEY (AggregationChildrenId)
 REFERENCES dbo.Code (CodeId)
 , AggregationId int NOT NULL
 CONSTRAINT FK_AggregationChildren_Aggregation
 FOREIGN KEY (AggregationId)
 REFERENCES dbo.Aggregation (AggregationId) 
 ON DELETE CASCADE
 , Position int NOT NULL
)

I re-formatted the constraint clauses to more kind to my brain; essentially, the above code is the same as the DDL in your question.

This populates the three tables with enough data to make meaningful comparisons:

;WITH src AS 
(
 SELECT n.Val
 FROM (VALUES (0), (1), (2), (3), (4), (5), (6), (7), (8), (9)) n(Val)
)
INSERT INTO dbo.Code (CodeId, Serial)
SELECT s1.Val 
 + (s2.Val * 10)
 + (s3.Val * 100)
 + (s4.Val * 1000)
 + (s5.Val * 10000)
 , CONVERT(bigint, CRYPT_GEN_RANDOM(8))
FROM src s1
 CROSS JOIN src s2
 CROSS JOIN src s3
 CROSS JOIN src s4
 CROSS JOIN src s5
;WITH src AS 
(
 SELECT n.Val
 FROM (VALUES (0), (1), (2), (3), (4), (5), (6), (7), (8), (9)) n(Val)
)
INSERT INTO dbo.Aggregation (AggregationId)
SELECT s1.Val 
 + (s2.Val * 10)
 + (s3.Val * 100)
FROM src s1
 CROSS JOIN src s2
 CROSS JOIN src s3;
;WITH src AS 
(
 SELECT n.Val
 FROM (VALUES (0), (1), (2), (3), (4), (5), (6), (7), (8), (9)) n(Val)
)
INSERT INTO dbo.AggregationChildren (AggregationChildrenId, AggregationId, Position)
SELECT s1.Val 
 + (s2.Val * 10)
 + (s3.Val * 100)
 + (s4.Val * 1000)
 + (s5.Val * 10000)
 , s1.Val 
 + (s2.Val * 10)
 + (s3.Val * 100)
 , s1.Val 
FROM src s1
 CROSS JOIN src s2
 CROSS JOIN src s3
 CROSS JOIN src s4
 CROSS JOIN src s5;

These are the row counts for each table:

╔════════╦═════════════╦═════════════════════╗
║ Code ║ Aggregation ║ AggregationChildren ║
╠════════╬═════════════╬═════════════════════╣
║ 100000 ║ 1000 ║ 100000 ║
╚════════╩═════════════╩═════════════════════╝

My version of the query:

SELECT Parent = c.Serial
 , Children = (
 SELECT c.Serial 
 , cac.Position
 FROM dbo.Code cc
 INNER JOIN dbo.AggregationChildren cac ON cac.AggregationChildrenId = cc.CodeId
 WHERE cac.AggregationId = a.AggregationId
 FOR JSON PATH 
 )
FROM dbo.Code c
 INNER JOIN dbo.Aggregation a ON c.CodeId = a.AggregationId
FOR JSON PATH;

To compare the output of both queries, I've created two user-defined-functions, as in:

CREATE FUNCTION dbo.fn_json_test_1()
RETURNS nvarchar(max)
AS
BEGIN
 RETURN (
 SELECT Parent = c.Serial
 , Children = (
 SELECT c.Serial 
 , cac.Position
 FROM dbo.Code cc
 INNER JOIN dbo.AggregationChildren cac ON cac.AggregationChildrenId = cc.CodeId
 WHERE cac.AggregationId = a.AggregationId
 FOR JSON PATH 
 )
 FROM dbo.Code c
 INNER JOIN dbo.Aggregation a ON c.CodeId = a.AggregationId
 FOR JSON PATH
 );
END;
GO
GO
CREATE FUNCTION dbo.fn_json_test_2()
RETURNS nvarchar(max)
AS
BEGIN
 RETURN (
 SELECT c.Serial as Parent,
 (Select co.Serial, agc.Position
 from AggregationChildren agc, Aggregation ag, Code co
 where agc.AggregationId = a.AggregationId 
 and co.CodeId = agc.AggregationChildrenId for json path) as children
 from Aggregation a, Code c
 where c.CodeId = a.AggregationId for json path
 );
END;
GO

Now, I can compare the output for both queries via:

DECLARE @res1 nvarchar(max) = dbo.fn_json_test_1();
DECLARE @res2 nvarchar(max) = dbo.fn_json_test_2();
SELECT CASE WHEN @res1 <> @res2 THEN 'mismatch' ELSE 'match' END;

The result is:

The results do NOT match. The output of my query contains less child nodes than your query. I'm going back to the drawing board, and will simplify the test bed to see where the disparity lies.

The simplified testbed consists of 10 rows in the Code table, 2 rows in the Aggregation (Parent) table, and 8 rows in the AggregationChildren (child) table:

;WITH src AS 
(
 SELECT n.Val
 FROM (VALUES (0), (1), (2), (3), (4), (5), (6), (7), (8), (9)) n(Val)
)
INSERT INTO dbo.Code (CodeId, Serial)
SELECT s1.Val 
 , CONVERT(bigint, CRYPT_GEN_RANDOM(8))
FROM src s1
;WITH src AS 
(
 SELECT n.Val
 FROM (VALUES (0), (1)) n(Val)
)
INSERT INTO dbo.Aggregation (AggregationId)
SELECT s1.Val 
FROM src s1;
;WITH src AS 
(
 SELECT n.Val
 FROM (VALUES (0), (1), (2), (3), (4), (5), (6), (7)) n(Val)
)
INSERT INTO dbo.AggregationChildren (AggregationChildrenId, AggregationId, Position)
SELECT s1.Val + 2
 , s1.Val % 2
 , s1.Val 
FROM src s1;

Row counts:

SELECT Code = (SELECT COUNT(1) FROM dbo.Code)
 , Aggregation = (SELECT COUNT(1) FROM dbo.Aggregation)
 , AggregationChildren = (SELECT COUNT(1) FROM dbo.AggregationChildren)

╔══════╦═════════════╦═════════════════════╗
║ Code ║ Aggregation ║ AggregationChildren ║
╠══════╬═════════════╬═════════════════════╣
║ 10 ║ 2 ║ 8 ║
╚══════╩═════════════╩═════════════════════╝

The predicted pattern should be two parent json arrays, each with 4 child arrays.

My Results:

[
 {
 "Parent": "-5601362097731340301",
 "Children": [
 {
 "Serial": "-5601362097731340301",
 "Position": 0
 },
 {
 "Serial": "-5601362097731340301",
 "Position": 2
 },
 {
 "Serial": "-5601362097731340301",
 "Position": 4
 },
 {
 "Serial": "-5601362097731340301",
 "Position": 6
 }
 ]
 },
 {
 "Parent": "-8896860091721838065",
 "Children": [
 {
 "Serial": "-8896860091721838065",
 "Position": 1
 },
 {
 "Serial": "-8896860091721838065",
 "Position": 3
 },
 {
 "Serial": "-8896860091721838065",
 "Position": 5
 },
 {
 "Serial": "-8896860091721838065",
 "Position": 7
 }
 ]
 }

Your query:

[
 {
 "Parent": "-5601362097731340301",
 "Children": [
 {
 "Serial": "5802227619253639548",
 "Position": 0
 },
 {
 "Serial": "5802227619253639548",
 "Position": 0
 },
 {
 "Serial": "4504664379821512162",
 "Position": 2
 },
 {
 "Serial": "4504664379821512162",
 "Position": 2
 },
 {
 "Serial": "6561435639659176802",
 "Position": 4
 },
 {
 "Serial": "6561435639659176802",
 "Position": 4
 },
 {
 "Serial": "-7417083263182709739",
 "Position": 6
 },
 {
 "Serial": "-7417083263182709739",
 "Position": 6
 }
 ]
 },
 {
 "Parent": "-8896860091721838065",
 "Children": [
 {
 "Serial": "-7646118996434234523",
 "Position": 1
 },
 {
 "Serial": "-7646118996434234523",
 "Position": 1
 },
 {
 "Serial": "-6372739442099935942",
 "Position": 3
 },
 {
 "Serial": "-6372739442099935942",
 "Position": 3
 },
 {
 "Serial": "-882384147532911428",
 "Position": 5
 },
 {
 "Serial": "-882384147532911428",
 "Position": 5
 },
 {
 "Serial": "4293317573306886053",
 "Position": 7
 },
 {
 "Serial": "4293317573306886053",
 "Position": 7
 }
 ]
 }
]

Your query has too many children; my query returns the predicted number of children, and it returns the correct Position values, however is returning the incorrect Serial values.

The "bug" in my query appears in the inner query. The incorrect query is:

SELECT c.Serial 
 , cac.Position
FROM dbo.Code cc
 INNER JOIN dbo.AggregationChildren cac ON cac.AggregationChildrenId = cc.CodeId
WHERE cac.AggregationId = a.AggregationId
ORDER BY c.Serial
FOR JSON PATH

The correct version is:

SELECT cc.Serial --changed "c." to "cc."
 , cac.Position
FROM dbo.Code cc
 INNER JOIN dbo.AggregationChildren cac ON cac.AggregationChildrenId = cc.CodeId
WHERE cac.AggregationId = a.AggregationId
ORDER BY cc.CodeId --not a big deal, but different order for children in output
FOR JSON PATH

The corrected query now looks like:

SELECT Parent = c.Serial
 , Children = (
 SELECT cc.Serial 
 , cac.Position
 FROM dbo.Code cc
 INNER JOIN dbo.AggregationChildren cac ON cac.AggregationChildrenId = cc.CodeId
 WHERE cac.AggregationId = a.AggregationId
 ORDER BY cc.CodeId
 FOR JSON PATH 
 )
FROM dbo.Code c
 INNER JOIN dbo.Aggregation a ON c.CodeId = a.AggregationId
ORDER BY c.Serial
FOR JSON PATH;

And returns the following result:

[
 {
 "Parent": "-195930341251513493",
 "Children": [
 {
 "Serial": "-6126601633786720400",
 "Position": 1
 },
 {
 "Serial": "5216562173012877678",
 "Position": 3
 },
 {
 "Serial": "-1992909345438478098",
 "Position": 5
 },
 {
 "Serial": "8329388691987940194",
 "Position": 7
 }
 ]
 },
 {
 "Parent": "8774608126018975726",
 "Children": [
 {
 "Serial": "-3380643917643646211",
 "Position": 0
 },
 {
 "Serial": "-2042609074595538493",
 "Position": 2
 },
 {
 "Serial": "7345460002653774160",
 "Position": 4
 },
 {
 "Serial": "-2126530822210070443",
 "Position": 6
 }
 ]
 }
]

score 6 · Accepted Answer · 2018-03-08 16:24:38Z

I had a hard time parsing your query, however I believe this returns the same result, and is much faster:

SELECT Parent = c.Serial
 , Children = (
 SELECT c.Serial 
 , cac.Position
 FROM dbo.Code cc
 INNER JOIN dbo.AggregationChildren cac ON cac.AggregationChildrenId = cc.CodeId
 WHERE cac.AggregationId = a.AggregationId
 FOR JSON PATH 
 )
FROM dbo.Code c
 INNER JOIN dbo.Aggregation a ON c.CodeId = a.AggregationId
FOR JSON PATH;

The plan for the above query looks like:

enter image description here

The plan for your query looks like:

enter image description here

If we add the following index, we can make the first variant even faster:

CREATE NONCLUSTERED INDEX IX_AggregationChildren_IX0
ON dbo.AggregationChildren (AggregationId)
INCLUDE (AggregationChildrenId,Position);

Clearly, you'd need to evaluate this against your workload, however.

I've created a minimally viable complete example setup to use for testing:

USE tempdb;
IF OBJECT_ID(N'dbo.AggregationChildren', N'U') IS NOT NULL 
DROP TABLE dbo.AggregationChildren;
IF OBJECT_ID(N'dbo.Aggregation', N'U') IS NOT NULL 
DROP TABLE dbo.Aggregation;
IF OBJECT_ID(N'dbo.Code', N'U') IS NOT NULL 
DROP TABLE dbo.Code;
GO
CREATE TABLE dbo.Code (
 CodeId int NOT NULL
 CONSTRAINT PK_CODE 
 PRIMARY KEY 
 CLUSTERED
 , Serial nvarchar(20) NOT NULL
);
CREATE TABLE dbo.Aggregation (
 AggregationId int NOT NULL
 CONSTRAINT PK_AGGREGATIONS 
 PRIMARY KEY 
 CLUSTERED
 CONSTRAINT FK_Aggregation_Code
 FOREIGN KEY (AggregationId)
 REFERENCES dbo.Code (CodeId)
)
CREATE TABLE dbo.AggregationChildren (
 AggregationChildrenId int NOT NULL
 CONSTRAINT PK_AGGREGATION_CHILDS 
 PRIMARY KEY 
 CLUSTERED
 CONSTRAINT FK_AggregationChildren_Code
 FOREIGN KEY (AggregationChildrenId)
 REFERENCES dbo.Code (CodeId)
 , AggregationId int NOT NULL
 CONSTRAINT FK_AggregationChildren_Aggregation
 FOREIGN KEY (AggregationId)
 REFERENCES dbo.Aggregation (AggregationId) 
 ON DELETE CASCADE
 , Position int NOT NULL
)

I re-formatted the constraint clauses to more kind to my brain; essentially, the above code is the same as the DDL in your question.

This populates the three tables with enough data to make meaningful comparisons:

;WITH src AS 
(
 SELECT n.Val
 FROM (VALUES (0), (1), (2), (3), (4), (5), (6), (7), (8), (9)) n(Val)
)
INSERT INTO dbo.Code (CodeId, Serial)
SELECT s1.Val 
 + (s2.Val * 10)
 + (s3.Val * 100)
 + (s4.Val * 1000)
 + (s5.Val * 10000)
 , CONVERT(bigint, CRYPT_GEN_RANDOM(8))
FROM src s1
 CROSS JOIN src s2
 CROSS JOIN src s3
 CROSS JOIN src s4
 CROSS JOIN src s5
;WITH src AS 
(
 SELECT n.Val
 FROM (VALUES (0), (1), (2), (3), (4), (5), (6), (7), (8), (9)) n(Val)
)
INSERT INTO dbo.Aggregation (AggregationId)
SELECT s1.Val 
 + (s2.Val * 10)
 + (s3.Val * 100)
FROM src s1
 CROSS JOIN src s2
 CROSS JOIN src s3;
;WITH src AS 
(
 SELECT n.Val
 FROM (VALUES (0), (1), (2), (3), (4), (5), (6), (7), (8), (9)) n(Val)
)
INSERT INTO dbo.AggregationChildren (AggregationChildrenId, AggregationId, Position)
SELECT s1.Val 
 + (s2.Val * 10)
 + (s3.Val * 100)
 + (s4.Val * 1000)
 + (s5.Val * 10000)
 , s1.Val 
 + (s2.Val * 10)
 + (s3.Val * 100)
 , s1.Val 
FROM src s1
 CROSS JOIN src s2
 CROSS JOIN src s3
 CROSS JOIN src s4
 CROSS JOIN src s5;

These are the row counts for each table:

╔════════╦═════════════╦═════════════════════╗
║ Code ║ Aggregation ║ AggregationChildren ║
╠════════╬═════════════╬═════════════════════╣
║ 100000 ║ 1000 ║ 100000 ║
╚════════╩═════════════╩═════════════════════╝

My version of the query:

SELECT Parent = c.Serial
 , Children = (
 SELECT c.Serial 
 , cac.Position
 FROM dbo.Code cc
 INNER JOIN dbo.AggregationChildren cac ON cac.AggregationChildrenId = cc.CodeId
 WHERE cac.AggregationId = a.AggregationId
 FOR JSON PATH 
 )
FROM dbo.Code c
 INNER JOIN dbo.Aggregation a ON c.CodeId = a.AggregationId
FOR JSON PATH;

To compare the output of both queries, I've created two user-defined-functions, as in:

CREATE FUNCTION dbo.fn_json_test_1()
RETURNS nvarchar(max)
AS
BEGIN
 RETURN (
 SELECT Parent = c.Serial
 , Children = (
 SELECT c.Serial 
 , cac.Position
 FROM dbo.Code cc
 INNER JOIN dbo.AggregationChildren cac ON cac.AggregationChildrenId = cc.CodeId
 WHERE cac.AggregationId = a.AggregationId
 FOR JSON PATH 
 )
 FROM dbo.Code c
 INNER JOIN dbo.Aggregation a ON c.CodeId = a.AggregationId
 FOR JSON PATH
 );
END;
GO
GO
CREATE FUNCTION dbo.fn_json_test_2()
RETURNS nvarchar(max)
AS
BEGIN
 RETURN (
 SELECT c.Serial as Parent,
 (Select co.Serial, agc.Position
 from AggregationChildren agc, Aggregation ag, Code co
 where agc.AggregationId = a.AggregationId 
 and co.CodeId = agc.AggregationChildrenId for json path) as children
 from Aggregation a, Code c
 where c.CodeId = a.AggregationId for json path
 );
END;
GO

Now, I can compare the output for both queries via:

DECLARE @res1 nvarchar(max) = dbo.fn_json_test_1();
DECLARE @res2 nvarchar(max) = dbo.fn_json_test_2();
SELECT CASE WHEN @res1 <> @res2 THEN 'mismatch' ELSE 'match' END;

The result is:

The results do NOT match. The output of my query contains less child nodes than your query. I'm going back to the drawing board, and will simplify the test bed to see where the disparity lies.

The simplified testbed consists of 10 rows in the Code table, 2 rows in the Aggregation (Parent) table, and 8 rows in the AggregationChildren (child) table:

;WITH src AS 
(
 SELECT n.Val
 FROM (VALUES (0), (1), (2), (3), (4), (5), (6), (7), (8), (9)) n(Val)
)
INSERT INTO dbo.Code (CodeId, Serial)
SELECT s1.Val 
 , CONVERT(bigint, CRYPT_GEN_RANDOM(8))
FROM src s1
;WITH src AS 
(
 SELECT n.Val
 FROM (VALUES (0), (1)) n(Val)
)
INSERT INTO dbo.Aggregation (AggregationId)
SELECT s1.Val 
FROM src s1;
;WITH src AS 
(
 SELECT n.Val
 FROM (VALUES (0), (1), (2), (3), (4), (5), (6), (7)) n(Val)
)
INSERT INTO dbo.AggregationChildren (AggregationChildrenId, AggregationId, Position)
SELECT s1.Val + 2
 , s1.Val % 2
 , s1.Val 
FROM src s1;

Row counts:

SELECT Code = (SELECT COUNT(1) FROM dbo.Code)
 , Aggregation = (SELECT COUNT(1) FROM dbo.Aggregation)
 , AggregationChildren = (SELECT COUNT(1) FROM dbo.AggregationChildren)

╔══════╦═════════════╦═════════════════════╗
║ Code ║ Aggregation ║ AggregationChildren ║
╠══════╬═════════════╬═════════════════════╣
║ 10 ║ 2 ║ 8 ║
╚══════╩═════════════╩═════════════════════╝

The predicted pattern should be two parent json arrays, each with 4 child arrays.

My Results:

[
 {
 "Parent": "-5601362097731340301",
 "Children": [
 {
 "Serial": "-5601362097731340301",
 "Position": 0
 },
 {
 "Serial": "-5601362097731340301",
 "Position": 2
 },
 {
 "Serial": "-5601362097731340301",
 "Position": 4
 },
 {
 "Serial": "-5601362097731340301",
 "Position": 6
 }
 ]
 },
 {
 "Parent": "-8896860091721838065",
 "Children": [
 {
 "Serial": "-8896860091721838065",
 "Position": 1
 },
 {
 "Serial": "-8896860091721838065",
 "Position": 3
 },
 {
 "Serial": "-8896860091721838065",
 "Position": 5
 },
 {
 "Serial": "-8896860091721838065",
 "Position": 7
 }
 ]
 }

Your query:

[
 {
 "Parent": "-5601362097731340301",
 "Children": [
 {
 "Serial": "5802227619253639548",
 "Position": 0
 },
 {
 "Serial": "5802227619253639548",
 "Position": 0
 },
 {
 "Serial": "4504664379821512162",
 "Position": 2
 },
 {
 "Serial": "4504664379821512162",
 "Position": 2
 },
 {
 "Serial": "6561435639659176802",
 "Position": 4
 },
 {
 "Serial": "6561435639659176802",
 "Position": 4
 },
 {
 "Serial": "-7417083263182709739",
 "Position": 6
 },
 {
 "Serial": "-7417083263182709739",
 "Position": 6
 }
 ]
 },
 {
 "Parent": "-8896860091721838065",
 "Children": [
 {
 "Serial": "-7646118996434234523",
 "Position": 1
 },
 {
 "Serial": "-7646118996434234523",
 "Position": 1
 },
 {
 "Serial": "-6372739442099935942",
 "Position": 3
 },
 {
 "Serial": "-6372739442099935942",
 "Position": 3
 },
 {
 "Serial": "-882384147532911428",
 "Position": 5
 },
 {
 "Serial": "-882384147532911428",
 "Position": 5
 },
 {
 "Serial": "4293317573306886053",
 "Position": 7
 },
 {
 "Serial": "4293317573306886053",
 "Position": 7
 }
 ]
 }
]

Your query has too many children; my query returns the predicted number of children, and it returns the correct Position values, however is returning the incorrect Serial values.

The "bug" in my query appears in the inner query. The incorrect query is:

SELECT c.Serial 
 , cac.Position
FROM dbo.Code cc
 INNER JOIN dbo.AggregationChildren cac ON cac.AggregationChildrenId = cc.CodeId
WHERE cac.AggregationId = a.AggregationId
ORDER BY c.Serial
FOR JSON PATH

The correct version is:

SELECT cc.Serial --changed "c." to "cc."
 , cac.Position
FROM dbo.Code cc
 INNER JOIN dbo.AggregationChildren cac ON cac.AggregationChildrenId = cc.CodeId
WHERE cac.AggregationId = a.AggregationId
ORDER BY cc.CodeId --not a big deal, but different order for children in output
FOR JSON PATH

The corrected query now looks like:

SELECT Parent = c.Serial
 , Children = (
 SELECT cc.Serial 
 , cac.Position
 FROM dbo.Code cc
 INNER JOIN dbo.AggregationChildren cac ON cac.AggregationChildrenId = cc.CodeId
 WHERE cac.AggregationId = a.AggregationId
 ORDER BY cc.CodeId
 FOR JSON PATH 
 )
FROM dbo.Code c
 INNER JOIN dbo.Aggregation a ON c.CodeId = a.AggregationId
ORDER BY c.Serial
FOR JSON PATH;

And returns the following result:

[
 {
 "Parent": "-195930341251513493",
 "Children": [
 {
 "Serial": "-6126601633786720400",
 "Position": 1
 },
 {
 "Serial": "5216562173012877678",
 "Position": 3
 },
 {
 "Serial": "-1992909345438478098",
 "Position": 5
 },
 {
 "Serial": "8329388691987940194",
 "Position": 7
 }
 ]
 },
 {
 "Parent": "8774608126018975726",
 "Children": [
 {
 "Serial": "-3380643917643646211",
 "Position": 0
 },
 {
 "Serial": "-2042609074595538493",
 "Position": 2
 },
 {
 "Serial": "7345460002653774160",
 "Position": 4
 },
 {
 "Serial": "-2126530822210070443",
 "Position": 6
 }
 ]
 }
]

Stack Exchange Network

Generate nested array with OPENJSON very slow

1 Answer 1

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Hot Network Questions

Generate nested array with OPENJSON very slow

1 Answer 1

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Related

Hot Network Questions