Java tutorial
/******************************************************************************* * Copyright (c) 2015 Eclipse RDF4J contributors, Aduna, and others. * All rights reserved. This program and the accompanying materials * are made available under the terms of the Eclipse Distribution License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/org/documents/edl-v10.php. *******************************************************************************/ package org.eclipse.rdf4j.rio.trix; import static org.eclipse.rdf4j.rio.trix.TriXConstants.BNODE_TAG; import static org.eclipse.rdf4j.rio.trix.TriXConstants.CONTEXT_TAG; import static org.eclipse.rdf4j.rio.trix.TriXConstants.DATATYPE_ATT; import static org.eclipse.rdf4j.rio.trix.TriXConstants.LANGUAGE_ATT; import static org.eclipse.rdf4j.rio.trix.TriXConstants.PLAIN_LITERAL_TAG; import static org.eclipse.rdf4j.rio.trix.TriXConstants.TRIPLE_TAG; import static org.eclipse.rdf4j.rio.trix.TriXConstants.TYPED_LITERAL_TAG; import static org.eclipse.rdf4j.rio.trix.TriXConstants.URI_TAG; import java.io.IOException; import java.io.InputStream; import java.io.Reader; import java.util.ArrayList; import java.util.List; import java.util.Map; import org.apache.commons.io.input.BOMInputStream; import org.eclipse.rdf4j.common.xml.SimpleSAXAdapter; import org.eclipse.rdf4j.common.xml.SimpleSAXParser; import org.eclipse.rdf4j.common.xml.XMLReaderFactory; import org.eclipse.rdf4j.model.IRI; import org.eclipse.rdf4j.model.Literal; import org.eclipse.rdf4j.model.Resource; import org.eclipse.rdf4j.model.Statement; import org.eclipse.rdf4j.model.Value; import org.eclipse.rdf4j.model.ValueFactory; import org.eclipse.rdf4j.model.impl.SimpleValueFactory; import org.eclipse.rdf4j.rio.RDFFormat; import org.eclipse.rdf4j.rio.RDFHandlerException; import org.eclipse.rdf4j.rio.RDFParseException; import org.eclipse.rdf4j.rio.RioSetting; import org.eclipse.rdf4j.rio.helpers.AbstractRDFParser; import org.eclipse.rdf4j.rio.helpers.TriXParserSettings; import org.eclipse.rdf4j.rio.helpers.XMLParserSettings; import org.xml.sax.ErrorHandler; import org.xml.sax.InputSource; import org.xml.sax.Locator; import org.xml.sax.SAXException; import org.xml.sax.SAXNotRecognizedException; import org.xml.sax.SAXNotSupportedException; import org.xml.sax.SAXParseException; import org.xml.sax.XMLReader; /** * A parser that can parse RDF files that are in the <a href="http://www.w3.org/2004/03/trix/">TriX format</a> * . * * @author Arjohn Kampman */ public class TriXParser extends AbstractRDFParser implements ErrorHandler { /*--------------* * Constructors * *--------------*/ private SimpleSAXParser saxParser; /** * Creates a new TriXParser that will use a {@link SimpleValueFactory} to create objects for resources, * bNodes, literals and statements. */ public TriXParser() { this(SimpleValueFactory.getInstance()); } /** * Creates a new TriXParser that will use the supplied ValueFactory to create objects for resources, * bNodes, literals and statements. * * @param valueFactory * A ValueFactory. */ public TriXParser(ValueFactory valueFactory) { super(valueFactory); } /*---------* * Methods * *---------*/ @Override public final RDFFormat getRDFFormat() { return RDFFormat.TRIX; } /** * Parses the data from the supplied InputStream, using the supplied baseURI to resolve any relative URI * references. * * @param in * The InputStream from which to read the data, must not be <tt>null</tt>. * @param baseURI * The URI associated with the data in the InputStream, must not be <tt>null</tt>. * @throws IOException * If an I/O error occurred while data was read from the InputStream. * @throws RDFParseException * If the parser has found an unrecoverable parse error. * @throws RDFHandlerException * If the configured statement handler encountered an unrecoverable error. * @throws IllegalArgumentException * If the supplied input stream or base URI is <tt>null</tt>. */ @Override public void parse(InputStream in, String baseURI) throws IOException, RDFParseException, RDFHandlerException { if (in == null) { throw new IllegalArgumentException("Input stream cannot be 'null'"); } if (baseURI == null) { throw new IllegalArgumentException("Base URI cannot be 'null'"); } InputSource inputSource = new InputSource(new BOMInputStream(in, false)); inputSource.setSystemId(baseURI); parse(inputSource); } /** * Parses the data from the supplied Reader, using the supplied baseURI to resolve any relative URI * references. * * @param reader * The Reader from which to read the data, must not be <tt>null</tt>. * @param baseURI * The URI associated with the data in the InputStream, must not be <tt>null</tt>. * @throws IOException * If an I/O error occurred while data was read from the InputStream. * @throws RDFParseException * If the parser has found an unrecoverable parse error. * @throws RDFHandlerException * If the configured statement handler has encountered an unrecoverable error. * @throws IllegalArgumentException * If the supplied reader or base URI is <tt>null</tt>. */ @Override public void parse(Reader reader, String baseURI) throws IOException, RDFParseException, RDFHandlerException { if (reader == null) { throw new IllegalArgumentException("Reader cannot be 'null'"); } if (baseURI == null) { throw new IllegalArgumentException("Base URI cannot be 'null'"); } InputSource inputSource = new InputSource(reader); inputSource.setSystemId(baseURI); parse(inputSource); } private void parse(InputSource inputStreamOrReader) throws IOException, RDFParseException, RDFHandlerException { clear(); try { if (rdfHandler != null) { rdfHandler.startRDF(); } XMLReader xmlReader; if (getParserConfig().isSet(XMLParserSettings.CUSTOM_XML_READER)) { xmlReader = getParserConfig().get(XMLParserSettings.CUSTOM_XML_READER); } else { xmlReader = XMLReaderFactory.createXMLReader(); } xmlReader.setErrorHandler(this); saxParser = new SimpleSAXParser(xmlReader); saxParser.setPreserveWhitespace(true); saxParser.setListener(new TriXSAXHandler()); saxParser.parse(inputStreamOrReader); } catch (SAXParseException e) { Exception wrappedExc = e.getException(); if (wrappedExc == null) { reportFatalError(e, e.getLineNumber(), e.getColumnNumber()); } else { reportFatalError(wrappedExc, e.getLineNumber(), e.getColumnNumber()); } } catch (SAXException e) { Exception wrappedExc = e.getException(); if (wrappedExc == null) { reportFatalError(e); } else if (wrappedExc instanceof RDFParseException) { throw (RDFParseException) wrappedExc; } else if (wrappedExc instanceof RDFHandlerException) { throw (RDFHandlerException) wrappedExc; } else { reportFatalError(wrappedExc); } } finally { clear(); } if (rdfHandler != null) { rdfHandler.endRDF(); } } @Override protected Literal createLiteral(String label, String lang, IRI datatype) throws RDFParseException { Locator locator = saxParser.getLocator(); if (locator != null) { return createLiteral(label, lang, datatype, locator.getLineNumber(), locator.getColumnNumber()); } else { return createLiteral(label, lang, datatype, -1, -1); } } /** * Overrides {@link AbstractRDFParser#reportWarning(String)}, adding line- and column number information * to the error. */ @Override protected void reportWarning(String msg) { Locator locator = saxParser.getLocator(); if (locator != null) { reportWarning(msg, locator.getLineNumber(), locator.getColumnNumber()); } else { reportWarning(msg, -1, -1); } } /** * Overrides {@link AbstractRDFParser#reportError(String, RioSetting)}, adding line- and column number * information to the error. */ @Override protected void reportError(String msg, RioSetting<Boolean> setting) throws RDFParseException { Locator locator = saxParser.getLocator(); if (locator != null) { reportError(msg, locator.getLineNumber(), locator.getColumnNumber(), setting); } else { reportError(msg, -1, -1, setting); } } /** * Overrides {@link AbstractRDFParser#reportFatalError(String)}, adding line- and column number * information to the error. */ @Override protected void reportFatalError(String msg) throws RDFParseException { Locator locator = saxParser.getLocator(); if (locator != null) { reportFatalError(msg, locator.getLineNumber(), locator.getColumnNumber()); } else { reportFatalError(msg, -1, -1); } } /** * Overrides {@link AbstractRDFParser#reportFatalError(Exception)}, adding line- and column number * information to the error. */ @Override protected void reportFatalError(Exception e) throws RDFParseException { Locator locator = saxParser.getLocator(); if (locator != null) { reportFatalError(e, locator.getLineNumber(), locator.getColumnNumber()); } else { reportFatalError(e, -1, -1); } } /*----------------------------* * Inner class TriXSAXHandler * *----------------------------*/ private class TriXSAXHandler extends SimpleSAXAdapter { private Resource currentContext; private boolean parsingContext; private List<Value> valueList; public TriXSAXHandler() { currentContext = null; valueList = new ArrayList<Value>(3); } @Override public void startTag(String tagName, Map<String, String> atts, String text) throws SAXException { try { if (tagName.equals(URI_TAG)) { valueList.add(createURI(text)); } else if (tagName.equals(BNODE_TAG)) { valueList.add(createNode(text)); } else if (tagName.equals(PLAIN_LITERAL_TAG)) { String lang = atts.get(LANGUAGE_ATT); valueList.add(createLiteral(text, lang, null)); } else if (tagName.equals(TYPED_LITERAL_TAG)) { String datatype = atts.get(DATATYPE_ATT); if (datatype == null) { reportError(DATATYPE_ATT + " attribute missing for typed literal", TriXParserSettings.FAIL_ON_TRIX_MISSING_DATATYPE); valueList.add(createLiteral(text, null, null)); } else { IRI dtURI = createURI(datatype); valueList.add(createLiteral(text, null, dtURI)); } } else if (tagName.equals(TRIPLE_TAG)) { if (parsingContext) { try { // First triple in a context, valueList can contain // context information if (valueList.size() > 1) { reportError("At most 1 resource can be specified for the context", TriXParserSettings.FAIL_ON_TRIX_INVALID_STATEMENT); } else if (valueList.size() == 1) { try { currentContext = (Resource) valueList.get(0); } catch (ClassCastException e) { reportError("Context identifier should be a URI or blank node", TriXParserSettings.FAIL_ON_TRIX_INVALID_STATEMENT); } } } finally { parsingContext = false; valueList.clear(); } } } else if (tagName.equals(CONTEXT_TAG)) { parsingContext = true; } } catch (RDFParseException e) { throw new SAXException(e); } } @Override public void endTag(String tagName) throws SAXException { try { if (tagName.equals(TRIPLE_TAG)) { reportStatement(); } else if (tagName.equals(CONTEXT_TAG)) { currentContext = null; } } catch (RDFParseException e) { throw new SAXException(e); } catch (RDFHandlerException e) { throw new SAXException(e); } } private void reportStatement() throws RDFParseException, RDFHandlerException { try { if (valueList.size() != 3) { reportError("exactly 3 values are required for a triple", TriXParserSettings.FAIL_ON_TRIX_INVALID_STATEMENT); return; } Resource subj; IRI pred; Value obj; try { subj = (Resource) valueList.get(0); } catch (ClassCastException e) { reportError("First value for a triple should be a URI or blank node", TriXParserSettings.FAIL_ON_TRIX_INVALID_STATEMENT); return; } try { pred = (IRI) valueList.get(1); } catch (ClassCastException e) { reportError("Second value for a triple should be a URI", TriXParserSettings.FAIL_ON_TRIX_INVALID_STATEMENT); return; } obj = valueList.get(2); Statement st = createStatement(subj, pred, obj, currentContext); if (rdfHandler != null) { rdfHandler.handleStatement(st); } } finally { valueList.clear(); } } } // end inner class TriXSAXHandler /** * Implementation of SAX ErrorHandler.warning */ @Override public void warning(SAXParseException exception) throws SAXException { this.reportWarning(exception.getMessage()); } /** * Implementation of SAX ErrorHandler.error */ @Override public void error(SAXParseException exception) throws SAXException { try { this.reportError(exception.getMessage(), XMLParserSettings.FAIL_ON_SAX_NON_FATAL_ERRORS); } catch (RDFParseException rdfpe) { throw new SAXException(rdfpe); } } /** * Implementation of SAX ErrorHandler.fatalError */ @Override public void fatalError(SAXParseException exception) throws SAXException { try { this.reportFatalError(exception.getMessage()); } catch (RDFParseException rdfpe) { throw new SAXException(rdfpe); } } }