I would like to use the Oracle XML extensions for Hive to parse xml files to parquet tables, with the OXMLSerDe.
Given the following data, stored at /user/admin/hive/db/oudxml_test/oudxml-test.xml...
<acme:Transaction xmlns:acme="http://www.acme.com/ITServices/XMLGroup/Schemas/Enterprise/acmeXML" xmlns:ns2="http://www.acme.com/PubSubRouter/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:tns="http://xyz.services.acme.com/" acme:ID="5819f1b9-a3b4-28f9-e053-3e17320ad2ed.1" acme:Source="REFAPP" acme:SourceKey="150423903429063199" acme:Timestamp="2017-09-01T00:10:34.290-04:00" xsi:schemaLocation="http://acme.com/ITServices/XMLGroup/Schemas/Enterprise/acmeXML acmeXML_Schema_0001_01.xsd">
<acme:MetaData>
<acme:SourceSystem>
<acme:Name>REFAPP</acme:Name>
<acme:SourceBusinessUnit acme:MDMKey="016">
<acme:InternalName>RATP</acme:InternalName>
</acme:SourceBusinessUnit>
<acme:XMLProducts>
<acme:XMLProduct/>
</acme:XMLProducts>
<acme:ProcessDateTime>2017-09-01T00:10:34.290-04:00</acme:ProcessDateTime>
<acme:SubmissionID>9743333</acme:SubmissionID>
</acme:SourceSystem>
<acme:Adaptor>
<acme:AdaptorVersion>1.000.000</acme:AdaptorVersion>
<acme:acmeXMLVersion>1.000.000</acme:acmeXMLVersion>
<acme:ProcessDateTime>2017-09-01T00:10:34.290-04:00</acme:ProcessDateTime>
</acme:Adaptor>
</acme:MetaData>
</acme:Transaction>
and the following ddl ...
add jar hdfs:///apps/hive/auxjars/oxh/hive/lib/oxh-hive.jar;
add jar hdfs:///apps/hive/auxjars/oxh/hive/lib/oxh-mapreduce.jar;
add jar hdfs:///apps/hive/auxjars/oxh/hive/lib/oxquery.jar;
add jar hdfs:///apps/hive/auxjars/oxh/hive/lib/xqjapi.jar;
add jar hdfs:///apps/hive/auxjars/oxh/hive/lib/apache-xmlbeans.jar;
add jar hdfs:///apps/hive/auxjars/oxh/hive/lib/woodstox-core-5.0.2.jar;
add jar hdfs:///apps/hive/auxjars/oxh/hive/lib/stax2-api-3.1.4.jar;
DROP TABLE IF EXISTS admin.acme_transaction;
CREATE external TABLE IF NOT EXISTS admin.acme_transaction
(id string
,`source` string
,source_key bigint
,ts string
,source_system_name string
,source_system_business_unit_key string
,source_system_business_unit_name string
,source_system_process_date_time string
,source_system_submission_id bigint
,adaptor_verion string
,adaptor_xml_version string
,adaptor_process_date_time string )
ROW FORMAT SERDE 'oracle.hadoop.xquery.hive.OXMLSerDe'
STORED AS
INPUTFORMAT 'oracle.hadoop.xquery.hive.OXMLInputFormat'
OUTPUTFORMAT 'oracle.hadoop.xquery.hive.OXMLOutputFormat'
LOCATION '/user/admin/db/oudxml_test/'
TBLPROPERTIES
('oxh-namespace.acme' = 'http://www.acme.com/ITServices/XMLGroup/Schemas/Enterprise/OUDXML'
,'oxh-elements.transaction'='/acme:Transaction'
,'oxh-column.id'='/acme:Transaction/@acme:ID'
,'oxh-column.source'='/acme:Transaction/@acme:Source'
,'oxh-column.source_key'='/acme:Transaction/@acme:SourceKey'
,'oxh-column.ts'='/acme:Transaction/@acme:Timestamp'
,'oxh-column.source_system_name'='/acme:Transaction/acme:MetaData/acme:SourceSystem/acme:Name/text()'
,'oxh-column.source_system_business_unit_key'='/acme:Transaction/acme:MetaData/acme:SourceSystem/acme:SourceBusinessUnit/@acme:MDMKey'
,'oxh-column.source_system_business_unit_name'='/acme:Transaction/acme:MetaData/acme:SourceSystem/acme:SourceBusinessUnit/acme:InternalName/text()'
,'oxh-column.source_system_process_date_time'='/acme:Transaction/acme:MetaData/acme:SourceSystem/acme:ProcessDateTime/text()'
,'oxh-column.source_system_submission_id'='/acme:Transaction/acme:MetaData/acme:SourceSystem/acme:SubmissionID/text()'
,'oxh-column.adaptor_verion'='/acme:Transaction/acme:MetaData/acme:Adaptor/acme:AdaptorVersion/text()'
,'oxh-column.adaptor_xml_version'='/acme:Transaction/acme:MetaData/acme:Adaptor/acme:acmeXMLVersion/text()'
,'oxh-column.adaptor_process_date_time'='/acme:Transaction/acme:MetaData/acme:Adaptor/acme:ProcessDateTime/text()'
);
I'm getting the following error...
java.lang.NoClassDefFoundError: oracle/xml/xquery/tokens/NamespaceBinding
Any assistance is appreciated.
James