I'm receiving XML files as part of an EDI setup and I'm having an issues getting information from the child elements.
Here is the XML file:
<?xml version="1.0" encoding="UTF-8"?>
<cpi_event>
<header>
<source_tenant_uid>My_Coffee_Company_PM_Training</source_tenant_uid>
<event_type>receipt_received</event_type>
<event_uid>b631e1bb-d815-496c-bc50-6253f13b8d40</event_uid>
<event_created_at>2016年07月13日 22:26:13.437613</event_created_at>
<destination_tenant_uid>My_Coffee_Company</destination_tenant_uid>
<document_type>My_Coffee_Company_OB_Receipt_Received_V1</document_type>
<document_uid>975_2239_b631e1bb-d815-496c-bc50-6253f13b8d40_0</document_uid>
</header>
<content>
<receipt>
<received_at>2016年07月13日 15:26:13 -0700</received_at>
<reference_1>ddd</reference_1>
<reference_2>zzz</reference_2>
<customer>
<code>MYX001</code>
<name>My Coffee Company</name>
<uuid>5f17ba52-f408-43f7-9dab-f559e10c2b2f</uuid>
</customer>
<vendor>
<code>MY1200</code>
<name>My Coffee Company</name>
<uuid>a3c41ccf-a346-4cb0-8717-33dce5c4a3a5</uuid>
</vendor>
<receipt_items>
<receipt_item>
<code>017-001691</code>
<uuid>5e24c72b-da35-4b34-b8a6-5f00f5243a70</uuid>
<unit_quantity>500.0</unit_quantity>
<unit_of_measure>impressions</unit_of_measure>
<lot_code>A3</lot_code>
<expiry_date/>
<receive_to>
<code>Q-01-1-C</code>
<name>Q-01-1-C</name>
</receive_to>
</receipt_item>
<receipt_item>
<code>017-001692</code>
<uuid>5e24c72b-da35-4b34-b8a6-5f00f5243a71</uuid>
<unit_quantity>12500.5</unit_quantity>
<unit_of_measure>impressions</unit_of_measure>
<lot_code>A3</lot_code>
<expiry_date/>
<receive_to>
<code>CR15</code>
<name>Cups Racking 15</name>
</receive_to>
</receipt_item>
<receipt_item>
<code>017-001693</code>
<uuid>5e24c72b-da35-4b34-b8a6-5f00f5243a72</uuid>
<unit_quantity>25002.0</unit_quantity>
<unit_of_measure>impressions</unit_of_measure>
<lot_code>A2</lot_code>
<expiry_date/>
<receive_to>
<code>CR15FG</code>
<name>Cups Racking 15FG</name>
</receive_to>
</receipt_item>
<receipt_item>
<code>017-0016914</code>
<uuid>5e24c72b-da35-4b34-b8a6-5f00f5243a73</uuid>
<unit_quantity>25003.0</unit_quantity>
<unit_of_measure>impressions</unit_of_measure>
<lot_code>A1</lot_code>
<expiry_date/>
<receive_to>
<code>CR15A</code>
<name>Cups Racking 15A</name>
</receive_to>
</receipt_item>
</receipt_items>
</receipt>
</content>
</cpi_event>
I'm only concerned with the first step of my process which is extracting the <header>
information and the <receipt_item>
<uuid>
for each item and then inserting that information into a header table in SQL.
Using the sample XML above I would end up with four rows being inserted into the SQL header table. Each row would contain the <header>
information and the <receipt_item>
<uuid>
for one item.
What happens with my current code is it will grab the <header>
info and only the last <receipt_item>
<uuid>
. I've changed my code around and have been able to get it to grab the <header>
info and first <receipt_item>
<uuid>
, but not all four <receipt_item>
<uuid>
's.
Here is my current SQL stored proceedure (called from SSIS with filename as input):
USE [PackManger_Sandbox]
GO
/****** Object: StoredProcedure [dbo].[EDI_XML_HEADER_Import] Script Date: 7/15/2016 3:45:10 PM ******/
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
ALTER PROCEDURE [dbo].[EDI_XML_HEADER_Import]
(
@XML_FILE NVARCHAR(MAX)
)
AS
-- SET NOCOUNT ON added to prevent extra result sets from
-- interfering with SELECT statements.
SET NOCOUNT ON;
DECLARE @Event_Type as varchar(50)
-- Message log for logging
DECLARE @msg NVARCHAR(255)
-- Setup XML variable to be used to hold contents of XML file.
DECLARE @xml XML
-- The command line
DECLARE @COMMAND NVARCHAR(MAX)
-- The definition of the parameters used within the command line
DECLARE @PARAM_DEF NVARCHAR(500)
-- The parameter used to pass the file name into the command
DECLARE @FILEVAR NVARCHAR(MAX)
-- The output variable that holds the results of the OPENROWSET()
DECLARE @XML_OUT XML
SET @FILEVAR = @XML_FILE
SET @PARAM_DEF = N'@XML_FILE NVARCHAR(MAX), @XML_OUT XML OUTPUT'
SET @COMMAND = N'SELECT @XML_OUT = BulkColumn FROM OPENROWSET(BULK ''' + @XML_FILE + ''', SINGLE_BLOB) ROW_SET';
EXEC sp_executesql @COMMAND, @PARAM_DEF, @XML_FILE = @FILEVAR,@XML_OUT = @xml OUTPUT;
DECLARE @hdoc int
DECLARE @WatcherOutputEDIEventType int
DECLARE @sku_uuid nvarchar(100)
EXEC sp_xml_preparedocument @hdoc OUTPUT, @xml
BEGIN TRY
-- Save event_type. Used to set return code which gets returned to SSIS to decide how to process XML
SELECT
@Event_Type = XMLhdr.event_type
FROM OPENXML (@hdoc, '/cpi_event/header', 2)
WITH (
event_type nvarchar(50)
) AS XMLhdr
IF @Event_Type = 'bom_updated' SET @WatcherOutputEDIEventType = 100;
IF @Event_Type = 'sku_created' SET @WatcherOutputEDIEventType = 200;
IF @Event_Type = 'sku_updated' SET @WatcherOutputEDIEventType = 250;
IF @Event_Type = 'receipt_received' SET @WatcherOutputEDIEventType = 300;
IF @Event_Type = 'job_reconciled' SET @WatcherOutputEDIEventType = 400;
IF @Event_Type = 'shipment_shipped' SET @WatcherOutputEDIEventType = 500;
IF @Event_Type = 'spare1' SET @WatcherOutputEDIEventType = 600;
IF @Event_Type = 'spare2' SET @WatcherOutputEDIEventType = 700;
IF @WatcherOutputEDIEventType > 0 AND @WatcherOutputEDIEventType < 251
-- Save SKU uuid
SELECT
@sku_uuid = XMLhdr.uuid
FROM OPENXML (@hdoc, '/cpi_event/content/sku', 2)
WITH (
uuid nvarchar(100)
) AS XMLhdr;
ELSE IF @WatcherOutputEDIEventType = 300
-- Save SKU uuid
SELECT
@sku_uuid = XMLhdr.uuid
FROM OPENXML (@hdoc, '/cpi_event/content/receipt/receipt_items/receipt_item', 2)
WITH (
uuid nvarchar(100)
) AS XMLhdr;
-- Update header table
INSERT INTO
PackManger_Sandbox.dbo.header
(
source_tenant_uid,
event_type,
event_uid,
event_created_at,
destination_tenant_uid,
document_type,
document_uid,
sku_uuid)
SELECT
source_tenant_uid = source_tenant_uid,
event_type = event_type,
event_uid = event_uid,
event_created_at = event_created_at,
destination_tenant_uid = destination_tenant_uid,
document_type = document_type,
document_uid = document_uid,
uuid = @sku_uuid
FROM OPENXML (@hdoc, '/cpi_event/header', 2)
WITH (
source_tenant_uid nvarchar(255),
event_type nvarchar(255),
event_uid nvarchar(255),
event_created_at datetime,
destination_tenant_uid nvarchar(255),
document_type nvarchar(255),
document_uid nvarchar(255));
-- Log success
SET @msg = 'Success updating header table.' + CONVERT(varchar(50), @Event_Type);
EXEC utility.Log_ProcedureCall_Sandbox
@ObjectID = @@PROCID,
@AdditionalInfo = @msg;
END TRY
BEGIN CATCH
-- Log error
SET @msg = 'Error updating header table.';
EXEC utility.Log_ProcedureCall_Sandbox
@ObjectID = @@PROCID,
@AdditionalInfo = @msg;
END CATCH
-- Remove the XML file from memory
EXEC sp_xml_removedocument @hdoc
RETURN @WatcherOutputEDIEventType
I'm trying to avoid looping and doing multiple SQL inserts.
Thanks!
1 Answer 1
You have a couple of options: 1) try the XML datatype and its methods (eg .nodes, .query, .value) or use OPENXML
to drill down to the lowest level then then drill back up using the parent axis operator ('..'). There are pros and cons to each method (which you could research) but try both options with your data and see which works best for you. OPENXML
performance is sometimes better with large xml documents and it doesn't have the same problem with the parent axis operator that the xml datatype does. It may however take 1/8th of your buffer pool, up to a maximum of 500MB which is a known 'feature' and why you should always call sp_xml_removedocument
.
Here's a simple demo:
DECLARE @xml XML = '<?xml version="1.0" encoding="UTF-8"?>
<cpi_event>
<header>
<source_tenant_uid>My_Coffee_Company_PM_Training</source_tenant_uid>
<event_type>receipt_received</event_type>
<event_uid>b631e1bb-d815-496c-bc50-6253f13b8d40</event_uid>
<event_created_at>2016年07月13日 22:26:13.437613</event_created_at>
<destination_tenant_uid>My_Coffee_Company</destination_tenant_uid>
<document_type>My_Coffee_Company_OB_Receipt_Received_V1</document_type>
<document_uid>975_2239_b631e1bb-d815-496c-bc50-6253f13b8d40_0</document_uid>
</header>
<content>
<receipt>
<received_at>2016年07月13日 15:26:13 -0700</received_at>
<reference_1>ddd</reference_1>
<reference_2>zzz</reference_2>
<customer>
<code>MYX001</code>
<name>My Coffee Company</name>
<uuid>5f17ba52-f408-43f7-9dab-f559e10c2b2f</uuid>
</customer>
<vendor>
<code>MY1200</code>
<name>My Coffee Company</name>
<uuid>a3c41ccf-a346-4cb0-8717-33dce5c4a3a5</uuid>
</vendor>
<receipt_items>
<receipt_item>
<code>017-001691</code>
<uuid>5e24c72b-da35-4b34-b8a6-5f00f5243a70</uuid>
<unit_quantity>500.0</unit_quantity>
<unit_of_measure>impressions</unit_of_measure>
<lot_code>A3</lot_code>
<expiry_date/>
<receive_to>
<code>Q-01-1-C</code>
<name>Q-01-1-C</name>
</receive_to>
</receipt_item>
<receipt_item>
<code>017-001692</code>
<uuid>5e24c72b-da35-4b34-b8a6-5f00f5243a71</uuid>
<unit_quantity>12500.5</unit_quantity>
<unit_of_measure>impressions</unit_of_measure>
<lot_code>A3</lot_code>
<expiry_date/>
<receive_to>
<code>CR15</code>
<name>Cups Racking 15</name>
</receive_to>
</receipt_item>
<receipt_item>
<code>017-001693</code>
<uuid>5e24c72b-da35-4b34-b8a6-5f00f5243a72</uuid>
<unit_quantity>25002.0</unit_quantity>
<unit_of_measure>impressions</unit_of_measure>
<lot_code>A2</lot_code>
<expiry_date/>
<receive_to>
<code>CR15FG</code>
<name>Cups Racking 15FG</name>
</receive_to>
</receipt_item>
<receipt_item>
<code>017-0016914</code>
<uuid>5e24c72b-da35-4b34-b8a6-5f00f5243a73</uuid>
<unit_quantity>25003.0</unit_quantity>
<unit_of_measure>impressions</unit_of_measure>
<lot_code>A1</lot_code>
<expiry_date/>
<receive_to>
<code>CR15A</code>
<name>Cups Racking 15A</name>
</receive_to>
</receipt_item>
</receipt_items>
</receipt>
</content>
</cpi_event>'
DECLARE @hdoc int
EXEC sp_xml_preparedocument @hdoc OUTPUT, @xml
SELECT
h.c.value('(source_tenant_uid/text())[1]', 'nvarchar(255)'),
h.c.value('(event_type/text())[1]', 'nvarchar(255)'),
h.c.value('(event_uid/text())[1]', 'nvarchar(255)'),
h.c.value('(event_created_at/text())[1]', 'nvarchar(255)'),
h.c.value('(destination_tenant_uid/text())[1]', 'nvarchar(255)'),
h.c.value('(document_type/text())[1]', 'nvarchar(255)'),
h.c.value('(document_uid/text())[1]', 'nvarchar(255)'),
ri.c.value('(uuid/text())[1]', 'nvarchar(255)')
FROM @xml.nodes('/cpi_event') ce(c)
CROSS APPLY ce.c.nodes('header') h(c)
CROSS APPLY ce.c.nodes('content/receipt/receipt_items/receipt_item') ri(c)
SELECT
source_tenant_uid = source_tenant_uid,
event_type = event_type,
event_uid = event_uid,
event_created_at = event_created_at,
destination_tenant_uid = destination_tenant_uid,
document_type = document_type,
document_uid = document_uid,
--uuid = @sku_uuid
uuid
FROM OPENXML (@hdoc, '/cpi_event/content/receipt/receipt_items/receipt_item', 2)
WITH (
source_tenant_uid nvarchar(255) '../../../../header/source_tenant_uid',
event_type nvarchar(255) '../../../../header/event_type',
event_uid nvarchar(255) '../../../../header/event_uid',
event_created_at datetime '../../../../header/event_created_at',
destination_tenant_uid nvarchar(255) '../../../../header/destination_tenant_uid',
document_type nvarchar(255) '../../../../header/document_type',
document_uid nvarchar(255) '../../../../header/document_uid',
uuid nvarchar(255) 'uuid'
);
-- Remove the XML file from memory
EXEC sp_xml_removedocument @hdoc
GO
-
This is exactly what I needed! I tried both methods, XML data type and
openxml
, but was a little unclear on the use of what looks like alias arraysce(c)
h(c)
using XML data type. When I triedopenxml
, I think my issue was I'm pointing to the<header>
data in theopenxml
command, which has only one occurrence causing only one of the<receipt_item>
<uuid>
to be processed, vs pointing to the<receipt_item>
and processing the<header>
data for each<receipt_item>
<uuid>
.Dennis– Dennis2016年07月20日 19:42:32 +00:00Commented Jul 20, 2016 at 19:42 -
The
ce(c)
bit is a table name alias and column name alias for the table that is output from theAPPLY
. It can be anything you want really, but I tend to usec
for column, and make the table alias meaningful.wBob– wBob2016年07月20日 22:04:32 +00:00Commented Jul 20, 2016 at 22:04 -
I understand now, thank you for the follow up explanation.Dennis– Dennis2016年07月21日 21:54:49 +00:00Commented Jul 21, 2016 at 21:54