Example usage for javax.xml.stream XMLEventReader hasNext

List of usage examples for javax.xml.stream XMLEventReader hasNext

Introduction

In this page you can find the example usage for javax.xml.stream XMLEventReader hasNext.

Prototype

@Override
public boolean hasNext();

Source Link

Document

Check if there are more events.

Usage

From source file:sapience.injectors.stax.inject.StringBasedStaxStreamInjector.java

/**
 * If the reference is more then a simple attribute, we have to add new XML (subtree) to the stream. We transform
 * the reference into an InputStream and invoke another SAX parsing process for it. But the parsed events are added
 * to the main XMLEventWriter. //from w  w  w . jav a  2s .c  o  m
 *
 * @param w
 * @param string
 * @throws XMLStreamException 
 * @throws XMLStreamException
 */
private void createEventsForElement(XMLEventWriter w, Reference ref) throws XMLStreamException {
    XMLEventReader r = null;
    try {
        StringBuilder target = new StringBuilder(ref.getTarget().toString());

        NamespaceContext c = w.getNamespaceContext();

        // process namespaces
        //processNamespace(target, w.getNamespaceContext());

        ByteArrayInputStream bais = new ByteArrayInputStream(target.toString().getBytes());
        this.inFac.setProperty(XMLInputFactory.IS_NAMESPACE_AWARE, false);
        r = this.inFac.createXMLEventReader(bais);
        // start a new line

        while (r.hasNext()) {
            XMLEvent e = r.nextEvent();
            switch (e.getEventType()) {
            case XMLEvent.START_DOCUMENT:
                break;
            case XMLEvent.END_DOCUMENT:
                break;
            default:
                w.add(e);
                break;
            }
        }
    } finally {
        ;

        if (r != null)
            r.close();
    }

}

From source file:de.dfki.km.leech.parser.wikipedia.WikipediaDumpParser.java

@Override
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context)
        throws IOException, SAXException, TikaException {

    try {/*w  w  w  . j  a  v a2 s.co m*/

        // wir iterieren schn ber die page-Eintrge. Darin gibt es dann title, timestamp, <contributor> => <username> und text. den text mssen
        // wir noch bereinigen. dazu nehmen wir eine Vorverarbeitung mit bliki - dazu mssen wir aber selbst nochmal den String vorbereiten und
        // nachbereinigen. Leider.

        WikipediaDumpParserConfig wikipediaDumpParserConfig = context.get(WikipediaDumpParserConfig.class);

        if (wikipediaDumpParserConfig == null) {
            Logger.getLogger(WikipediaDumpParser.class.getName())
                    .info("No wikipedia parser config found. Will take the default one.");
            wikipediaDumpParserConfig = new WikipediaDumpParserConfig();
        }

        TikaInputStream tikaStream = TikaInputStream.get(stream);

        File fWikipediaDumpFile4Stream = tikaStream.getFile();

        MultiValueHashMap<String, String> hsPageTitle2Redirects = new MultiValueHashMap<String, String>();
        if (wikipediaDumpParserConfig.determinePageRedirects)
            hsPageTitle2Redirects = getPageTitle2Redirects(new FileInputStream(fWikipediaDumpFile4Stream));

        HashSet<String> hsRedirectPageTitles = new HashSet<String>(hsPageTitle2Redirects.values());

        String strCleanedText = "";
        String strBaseURL = null;

        XMLInputFactory xmlInputFactory = XMLInputFactory.newInstance();
        XMLEventReader xmlEventReader = xmlInputFactory
                .createXMLEventReader(new FileInputStream(fWikipediaDumpFile4Stream), "Utf-8");
        while (xmlEventReader.hasNext()) {

            XMLEvent xmlEvent = xmlEventReader.nextEvent();

            if (xmlEvent.isEndElement() && xmlEvent.asEndElement().getName().getLocalPart().equals("page")) {
                if (metadata.size() == 0)
                    continue;

                // den mimetype wollen wir auch noch in den Metadaten haben
                metadata.add(Metadata.CONTENT_TYPE, "application/wikipedia+xml");

                XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
                xhtml.startDocument();

                xhtml.startElement("p");
                xhtml.characters(strCleanedText.toCharArray(), 0, strCleanedText.length());
                xhtml.endElement("p");

                xhtml.endDocument();

            }

            if (!xmlEvent.isStartElement())
                continue;

            // ##### die siteinfo

            if (strBaseURL == null && xmlEvent.asStartElement().getName().getLocalPart().equals("base")) {
                // http://de.wikipedia.org/wiki/Wikipedia:Hauptseite =>http://de.wikipedia.org/wiki/
                strBaseURL = readNextCharEventsText(xmlEventReader);
                strBaseURL = strBaseURL.substring(0, strBaseURL.lastIndexOf("/") + 1);
            }

            // ##### die page

            if (xmlEvent.asStartElement().getName().getLocalPart().equals("page")) {
                for (String strKey : metadata.names())
                    metadata.remove(strKey);
            }

            // ##### der Title

            if (xmlEvent.asStartElement().getName().getLocalPart().equals("title")) {
                // wir merken uns immer den aktuellen Titel
                String strCurrentTitle = readNextCharEventsText(xmlEventReader);

                if (strCurrentTitle.equalsIgnoreCase("DuckDuckGo")) {
                    int fasd = 8;
                }

                if (strCurrentTitle.toLowerCase().contains("duck")
                        && strCurrentTitle.toLowerCase().contains("go")) {
                    int is = 666;
                }

                // wenn der Titel eine redirect-Page ist, dann tragen wir die ganze Page aus der EventQueue aus, springen an das endPage, und
                // haben somit diese Seite ignoriert. Ferner ignorieren wir auch spezielle wikipedia-Seiten
                String strSmallTitle = strCurrentTitle.trim().toLowerCase();
                if (hsRedirectPageTitles.contains(strCurrentTitle)
                        || hsRedirectPageTitles.contains(strSmallTitle)
                        || hsRedirectPageTitles.contains(strCurrentTitle.trim())
                        || strSmallTitle.startsWith("category:") || strSmallTitle.startsWith("kategorie:")
                        || strSmallTitle.startsWith("vorlage:") || strSmallTitle.startsWith("template:")
                        || strSmallTitle.startsWith("hilfe:") || strSmallTitle.startsWith("help:")
                        || strSmallTitle.startsWith("wikipedia:") || strSmallTitle.startsWith("portal:")
                        || strSmallTitle.startsWith("mediawiki:")) {

                    while (true) {
                        XMLEvent nextXmlEvent = xmlEventReader.nextEvent();
                        if (nextXmlEvent.isEndElement()
                                && nextXmlEvent.asEndElement().getName().getLocalPart().equals("page"))
                            break;
                    }
                } else {
                    metadata.add(Metadata.TITLE, strCurrentTitle);
                    metadata.add(Metadata.SOURCE, strBaseURL + strCurrentTitle);

                    for (String strRedirect : hsPageTitle2Redirects.get(strCurrentTitle)) {
                        // wir ignorieren Titel, die sich lediglich durch gro/kleinschreibung unterscheiden
                        if (!StringUtils.containsIgnoreCase(strRedirect, metadata.getValues(Metadata.TITLE)))
                            metadata.add(Metadata.TITLE, strRedirect);
                    }
                }

                continue;
            }

            // ##### der text
            if (xmlEvent.asStartElement().getName().getLocalPart().equals("text")) {
                String strText = readNextCharEventsText(xmlEventReader);

                if (wikipediaDumpParserConfig.parseLinksAndCategories)
                    parseLinksAndCategories(strText, strBaseURL, metadata, handler);
                if (wikipediaDumpParserConfig.parseInfoBoxes)
                    parseInfoBox(strText, metadata, handler);
                if (wikipediaDumpParserConfig.parseGeoCoordinates)
                    parseGeoCoordinates(strText, metadata);

                // aufgrund einiger Defizite in dem verwendeten cleaner mssen wir hier leider noch zu-und nacharbeiten
                strText = strText.replaceAll("==\n", "==\n\n");
                strText = strText.replaceAll("\n==", "\n\n==");

                strCleanedText = m_wikiModel.render(new PlainTextConverter(), strText);

                strCleanedText = strCleanedText.replaceAll("\\{\\{", " ");
                strCleanedText = strCleanedText.replaceAll("\\}\\}", " ");

                strCleanedText = StringEscapeUtils.unescapeHtml4(strCleanedText);

                continue;
            }

            // ##### der timestamp
            if (xmlEvent.asStartElement().getName().getLocalPart().equals("timestamp")) {
                String strTimestamp = readNextCharEventsText(xmlEventReader);

                metadata.add(Metadata.MODIFIED, strTimestamp);

                continue;
            }

            // ##### der username
            if (xmlEvent.asStartElement().getName().getLocalPart().equals("username")) {
                String strUsername = readNextCharEventsText(xmlEventReader);

                metadata.add(Metadata.CREATOR, strUsername);

                continue;
            }

        }

    } catch (Exception e) {
        Logger.getLogger(WikipediaDumpParser.class.getName()).log(Level.SEVERE, "Error", e);
    }

}

From source file:com.msopentech.odatajclient.testservice.utils.XMLUtilities.java

private int countFeedElements(final InputStream is, final String elementName) throws XMLStreamException {
    final XMLEventReader reader = getEventReader(is);

    int count = 0;

    while (reader.hasNext()) {
        final XMLEvent event = reader.nextEvent();

        if (event.getEventType() == XMLStreamConstants.START_ELEMENT
                && elementName.equals(event.asStartElement().getName().getLocalPart())) {
            count++;/*from ww  w. ja  v a2 s  . co m*/
        }
    }

    reader.close();
    return count;
}

From source file:edu.jhu.hlt.concrete.ingesters.webposts.WebPostIngester.java

@Override
public Communication fromCharacterBasedFile(final Path path) throws IngestException {
    if (!Files.exists(path))
        throw new IngestException("No file at: " + path.toString());

    AnalyticUUIDGeneratorFactory f = new AnalyticUUIDGeneratorFactory();
    AnalyticUUIDGenerator g = f.create();
    Communication c = new Communication();
    c.setUuid(g.next());//from   ww  w.  j  ava  2  s. co  m
    c.setType(this.getKind());
    c.setMetadata(TooledMetadataConverter.convert(this));

    try {
        ExistingNonDirectoryFile ef = new ExistingNonDirectoryFile(path);
        c.setId(ef.getName().split("\\.")[0]);
    } catch (NoSuchFileException | NotFileException e) {
        // might throw if path is a directory.
        throw new IngestException(path.toString() + " is not a file, or is a directory.");
    }

    String content;
    try (InputStream is = Files.newInputStream(path);
            BufferedInputStream bin = new BufferedInputStream(is, 1024 * 8 * 8);) {
        content = IOUtils.toString(bin, StandardCharsets.UTF_8);
        c.setText(content);
    } catch (IOException e) {
        throw new IngestException(e);
    }

    try (InputStream is = Files.newInputStream(path);
            BufferedInputStream bin = new BufferedInputStream(is, 1024 * 8 * 8);
            BufferedReader reader = new BufferedReader(new InputStreamReader(bin, StandardCharsets.UTF_8));) {
        XMLEventReader rdr = null;
        try {
            rdr = inF.createXMLEventReader(reader);

            // Below method moves the reader
            // to the headline end element.
            Section headline = this.handleBeginning(rdr, content, c);
            headline.setUuid(g.next());
            c.addToSectionList(headline);
            TextSpan sts = headline.getTextSpan();
            LOGGER.debug("headline text: {}", c.getText().substring(sts.getStart(), sts.getEnding()));

            int sectNumber = 1;
            int subSect = 0;

            int currOff = -1;
            // Big amounts of characters.
            while (rdr.hasNext()) {
                XMLEvent nextEvent = rdr.nextEvent();
                currOff = nextEvent.getLocation().getCharacterOffset();

                // First: see if document is going to end.
                // If yes: exit.
                if (nextEvent.isEndDocument())
                    break;

                // region
                // enables ingestion of quotes inside a usenet webpost.
                // by Tongfei Chen
                if (nextEvent.isStartElement()
                        && nextEvent.asStartElement().getName().equals(QName.valueOf("QUOTE"))) {
                    Attribute attrQuote = nextEvent.asStartElement()
                            .getAttributeByName(QName.valueOf("PREVIOUSPOST"));
                    String quote = StringEscapeUtils.escapeXml(attrQuote.getValue());
                    int location = attrQuote.getLocation().getCharacterOffset()
                            + "<QUOTE PREVIOUSPOST=\"".length();
                    Section quoteSection = new Section(g.next(), "quote")
                            .setTextSpan(new TextSpan(location, location + quote.length()));
                    c.addToSectionList(quoteSection);
                }
                // endregion

                // Check if start element.
                if (nextEvent.isCharacters()) {
                    Characters chars = nextEvent.asCharacters();
                    if (!chars.isWhiteSpace()) {
                        String fpContent = chars.getData();
                        LOGGER.debug("Character offset: {}", currOff);
                        LOGGER.debug("Character based data: {}", fpContent);

                        SimpleImmutableEntry<Integer, Integer> pads = trimSpacing(fpContent);
                        final int tsb = currOff + pads.getKey();

                        final int tse = currOff + fpContent.replace("\"", "&quot;").replace("<", "&lt;")
                                .replace(">", "&gt;").length() - (pads.getValue());
                        // MAINTAIN CORRECT TEXT SPAN
                        // CANNOT USE StringEscapeUtils.escapeXml because it will escape "'", which
                        // is not escaped in the data
                        // @tongfei

                        LOGGER.debug("Section text: {}", content.substring(tsb, tse));
                        TextSpan ts = new TextSpan(tsb, tse);
                        String sk;
                        if (subSect == 0)
                            sk = "poster";
                        else if (subSect == 1)
                            sk = "postdate";
                        else
                            sk = "post";

                        Section s = new Section();
                        s.setKind(sk);
                        s.setTextSpan(ts);
                        s.setUuid(g.next());
                        List<Integer> intList = new ArrayList<>();
                        intList.add(sectNumber);
                        intList.add(subSect);
                        s.setNumberList(intList);
                        c.addToSectionList(s);

                        subSect++;
                    }
                } else if (nextEvent.isEndElement()) {
                    EndElement ee = nextEvent.asEndElement();
                    currOff = ee.getLocation().getCharacterOffset();
                    QName name = ee.getName();
                    String localName = name.getLocalPart();
                    LOGGER.debug("Hit end element: {}", localName);
                    if (localName.equalsIgnoreCase(POST_LOCAL_NAME)) {
                        LOGGER.debug("Switching to new post.");
                        sectNumber++;
                        subSect = 0;
                    } else if (localName.equalsIgnoreCase(TEXT_LOCAL_NAME)) {
                        // done with document.
                        break;
                    }
                }
            }

            return c;

        } catch (XMLStreamException | ConcreteException | StringIndexOutOfBoundsException
                | ClassCastException x) {
            throw new IngestException(x);
        } finally {
            if (rdr != null)
                try {
                    rdr.close();
                } catch (XMLStreamException e) {
                    // not likely.
                    LOGGER.info("Error closing XMLReader.", e);
                }
        }
    } catch (IOException e) {
        throw new IngestException(e);
    }
}

From source file:com.evolveum.polygon.connector.hcm.DocumentProcessing.java

public Map<String, Object> parseXMLData(HcmConnectorConfiguration conf, ResultsHandler handler,
        Map<String, Object> schemaAttributeMap, Filter query) {

    XMLInputFactory factory = XMLInputFactory.newInstance();
    try {/*  w w  w . j  av  a 2s.  c o m*/

        String uidAttributeName = conf.getUidAttribute();
        String primariId = conf.getPrimaryId();
        String startName = "";
        String value = null;

        StringBuilder assignmentXMLBuilder = null;

        List<String> builderList = new ArrayList<String>();

        Integer nOfIterations = 0;
        Boolean isSubjectToQuery = false;
        Boolean isAssigment = false;
        Boolean evaluateAttr = true;
        Boolean specificAttributeQuery = false;

        XMLEventReader eventReader = factory.createXMLEventReader(new FileReader(conf.getFilePath()));
        List<String> dictionary = populateDictionary(FIRSTFLAG);

        if (!attrsToGet.isEmpty()) {

            attrsToGet.add(uidAttributeName);
            attrsToGet.add(primariId);
            specificAttributeQuery = true;
            evaluateAttr = false;
            LOGGER.ok("The uid and primary id were added to the queried attribute list");

            schemaAttributeMap = modifySchemaAttributeMap(schemaAttributeMap);
        }

        while (eventReader.hasNext()) {

            XMLEvent event = eventReader.nextEvent();

            Integer code = event.getEventType();

            if (code == XMLStreamConstants.START_ELEMENT) {

                StartElement startElement = event.asStartElement();
                startName = startElement.getName().getLocalPart();

                if (!evaluateAttr && attrsToGet.contains(startName)) {

                    evaluateAttr = true;
                }

                if (!elementIsEmployeeData) {

                    if (startName.equals(EMPLOYEES)) {

                        if (dictionary.contains(nOfIterations.toString())) {
                            LOGGER.ok("The defined number of iterations has been hit: {0}",
                                    nOfIterations.toString());
                            break;
                        } else {
                            startName = "";
                            elementIsEmployeeData = true;
                            nOfIterations++;
                        }
                    }
                } else if (evaluateAttr) {

                    if (!isAssigment) {
                        if (!ASSIGNMENTTAG.equals(startName)) {

                        } else {
                            assignmentXMLBuilder = new StringBuilder();
                            isAssigment = true;
                        }
                    } else {

                        builderList = processAssignment(startName, null, START, builderList);
                    }

                    if (multiValuedAttributesList.contains(startName)) {

                        elementIsMultiValued = true;
                    }

                }

            } else if (elementIsEmployeeData) {

                if (code == XMLStreamConstants.CHARACTERS && evaluateAttr) {

                    Characters characters = event.asCharacters();

                    if (!characters.isWhiteSpace()) {

                        StringBuilder valueBuilder;
                        if (value != null) {
                            valueBuilder = new StringBuilder(value).append("")
                                    .append(characters.getData().toString());
                        } else {
                            valueBuilder = new StringBuilder(characters.getData().toString());
                        }
                        value = valueBuilder.toString();
                        // value = StringEscapeUtils.escapeXml10(value);
                        // LOGGER.info("The attribute value for: {0} is
                        // {1}", startName, value);
                    }
                } else if (code == XMLStreamConstants.END_ELEMENT) {

                    EndElement endElement = event.asEndElement();
                    String endName = endElement.getName().getLocalPart();

                    isSubjectToQuery = checkFilter(endName, value, query, uidAttributeName);

                    if (!isSubjectToQuery) {
                        attributeMap.clear();
                        elementIsEmployeeData = false;
                        value = null;

                        endName = EMPLOYEES;
                    }

                    if (endName.equals(EMPLOYEES)) {

                        attributeMap = handleEmployeeData(attributeMap, schemaAttributeMap, handler,
                                uidAttributeName, primariId);

                        elementIsEmployeeData = false;

                    } else if (evaluateAttr) {

                        if (endName.equals(startName)) {
                            if (value != null) {

                                if (!isAssigment) {
                                    if (!elementIsMultiValued) {

                                        attributeMap.put(startName, value);
                                    } else {

                                        multiValuedAttributeBuffer.put(startName, value);
                                    }
                                } else {

                                    value = StringEscapeUtils.escapeXml10(value);
                                    builderList = processAssignment(endName, value, VALUE, builderList);

                                    builderList = processAssignment(endName, null, END, builderList);
                                }
                                // LOGGER.info("Attribute name: {0} and the
                                // Attribute value: {1}", endName, value);
                                value = null;
                            }
                        } else {
                            if (endName.equals(ASSIGNMENTTAG)) {

                                builderList = processAssignment(endName, null, CLOSE, builderList);

                                // if (assigmentIsActive) {

                                for (String records : builderList) {
                                    assignmentXMLBuilder.append(records);

                                }
                                attributeMap.put(ASSIGNMENTTAG, assignmentXMLBuilder.toString());
                                // } else {
                                // }

                                builderList = new ArrayList<String>();
                                // assigmentIsActive = false;
                                isAssigment = false;

                            } else if (multiValuedAttributesList.contains(endName)) {
                                processMultiValuedAttributes(multiValuedAttributeBuffer);
                            }
                        }

                    }
                    if (specificAttributeQuery && evaluateAttr) {

                        evaluateAttr = false;
                    }
                }
            } else if (code == XMLStreamConstants.END_DOCUMENT) {
                handleBufferedData(uidAttributeName, primariId, handler);
            }
        }

    } catch (FileNotFoundException e) {
        StringBuilder errorBuilder = new StringBuilder("File not found at the specified path.")
                .append(e.getLocalizedMessage());
        LOGGER.error("File not found at the specified path: {0}", e);
        throw new ConnectorIOException(errorBuilder.toString());
    } catch (XMLStreamException e) {

        LOGGER.error("Unexpected processing error while parsing the .xml document : {0}", e);

        StringBuilder errorBuilder = new StringBuilder(
                "Unexpected processing error while parsing the .xml document. ")
                        .append(e.getLocalizedMessage());

        throw new ConnectorIOException(errorBuilder.toString());
    }
    return attributeMap;

}

From source file:com.msopentech.odatajclient.testservice.utils.XMLUtilities.java

private void addAtomElement(final InputStream content, final XMLEventWriter writer) throws Exception {
    final XMLEventReader reader = getEventReader(content);

    final XMLEventFactory eventFactory = XMLEventFactory.newInstance();
    XMLEvent newLine = eventFactory.createSpace("\n");

    try {/*from   w  w  w  .  ja  v a2  s .com*/
        writer.add(newLine);

        while (reader.hasNext()) {
            final XMLEvent event = reader.nextEvent();

            if (event.getEventType() != XMLStreamConstants.START_DOCUMENT
                    && event.getEventType() != XMLStreamConstants.END_DOCUMENT
                    && event.getEventType() != XMLStreamConstants.COMMENT) {
                writer.add(event);
            }
        }
        writer.add(newLine);
    } finally {
        reader.close();
        IOUtils.closeQuietly(content);
    }
}

From source file:com.logiware.accounting.domain.EdiInvoice.java

private void createEcuLineInvoice(File file) throws Exception {
    InputStream inputStream = null;
    XMLEventReader eventReader = null;
    try {//w  ww  . j a  va2  s.c om
        XMLInputFactory inputFactory = XMLInputFactory.newInstance();
        inputStream = new FileInputStream(file);
        eventReader = inputFactory.createXMLEventReader(inputStream);
        while (eventReader.hasNext()) {
            XMLEvent event = eventReader.nextEvent();
            if (event.isStartElement()) {
                StartElement startElement = event.asStartElement();
                if ("Header".equalsIgnoreCase(startElement.getName().toString())) {
                    isHeader = true;
                    elements.add("Header");
                } else if ("Body".equalsIgnoreCase(startElement.getName().toString())) {
                    isBody = true;
                    elements.add("Body");
                } else if (isBody && "Information".equalsIgnoreCase(startElement.getName().toString())) {
                    isInformation = true;
                    elements.add("Information");
                } else if (isBody && !isInformation
                        && "Details".equalsIgnoreCase(startElement.getName().toString())) {
                    isDetails = true;
                    elements.add("Details");
                } else if (isBody && !isInformation && !isDetails
                        && "Summary".equalsIgnoreCase(startElement.getName().toString())) {
                    isSummary = true;
                    elements.add("Summary");
                } else if (null == elementType) {
                    setElementType(startElement);
                } else if (null != elementType && null == characterType) {
                    setCharacterType(startElement);
                }
            } else if (event.isCharacters()) {
                setValue(event.asCharacters());
            } else if (event.isEndElement()) {
                EndElement endElement = event.asEndElement();
                if (null != characterType && null != elementType) {
                    removeCharacterType();
                } else if (null != elementType) {
                    removeElementType(endElement);
                } else if (isSummary && "Summary".equalsIgnoreCase(endElement.getName().toString())) {
                    isSummary = false;
                } else if (isDetails && "Details".equalsIgnoreCase(endElement.getName().toString())) {
                    isDetails = false;
                } else if (isBody && "Information".equalsIgnoreCase(endElement.getName().toString())) {
                    isInformation = false;
                } else if ("Body".equalsIgnoreCase(endElement.getName().toString())) {
                    isBody = false;
                } else if ("Header".equalsIgnoreCase(endElement.getName().toString())) {
                    isHeader = false;
                }
            }
        }
        this.company = Company.ECU_LINE;
        status = new EdiInvoiceDAO().getStatus(vendorNumber, invoiceNumber);
        if (!elements.contains("Header")) {
            throw new AccountingException("Bad File. <Header> element missing");
        } else if (!elements.contains("Body")) {
            throw new AccountingException("Bad File. <Body> missing");
        } else if (!elements.contains("Information")) {
            throw new AccountingException("Bad File. <Information> element under <Body> missing");
        } else if (!elements.contains("Details")) {
            throw new AccountingException("Bad File. <Details> element under <Body> missing");
        } else if (!elements.contains("Summary")) {
            throw new AccountingException("Bad File. <Summary> element under <Body> missing");
        } else if (!elements.contains("Applicationreference")) {
            throw new AccountingException("Bad File. <Applicationreference> element under <Header> missing");
        } else if (!elements.contains("Reference")) {
            throw new AccountingException("Bad File. <Reference> element under <Header> missing");
        } else if (!elements.contains("Sender")) {
            throw new AccountingException("Bad File. <Sender> element under <Header> missing");
        } else if (!elements.contains("Code")) {
            throw new AccountingException("Bad File. <Code> element under <Sender> of <Header> missing");
        } else if (!elements.contains("Invoice")) {
            throw new AccountingException(
                    "Bad File. <Invoice> element under <Information> element of <Body> missing");
        } else if (!elements.contains("RelatedReferences")) {
            throw new AccountingException(
                    "Bad File. <RelatedReferences> element under <Information> element of <Body> missing");
        } else if (!elements.contains("BY")) {
            throw new AccountingException(
                    "Bad File. <Parties Qualifier=\"BY\"> under <Information> element of <Body> missing");
        } else if (!elements.contains("SU")) {
            throw new AccountingException(
                    "Bad File. <Parties Qualifier=\"SU\"> under <Information> element of <Body> missing");
        } else if (!elements.contains("PaymentTerms")) {
            throw new AccountingException(
                    "Bad File. <PaymentTerms> element under <Information> element of <Body> missing");
        } else if (!elements.contains("ShipmentInformation")) {
            throw new AccountingException(
                    "Bad File. <ShipmentInformation> element under <Information> element of <Body> missing");
        } else if (!elements.contains("Detail")) {
            throw new AccountingException(
                    "Bad File. <Detail> element under <Details> element of <Body> missing");
        } else if (!elements.contains("TotalMonetaryAmount")) {
            throw new AccountingException(
                    "Bad File. <TotalMonetaryAmount> element under <Summary> element of <Body> missing");
        } else if (!elements.contains("TotalMonetaryAmountGroupByVAT")) {
            throw new AccountingException(
                    "Bad File. <TotalMonetaryAmountGroupByVAT> element under <Summary> element of <Body> missing");
        }
    } catch (Exception e) {
        throw e;
    } finally {
        if (null != eventReader) {
            eventReader.close();
        }
        if (null != inputStream) {
            inputStream.close();
        }
    }
}

From source file:ca.uhn.fhir.parser.XmlParser.java

private <T> T doXmlLoop(XMLEventReader streamReader, ParserState<T> parserState) {
    ourLog.trace("Entering XML parsing loop with state: {}", parserState);

    try {/*from  ww w . j av a  2s . co  m*/
        List<String> heldComments = new ArrayList<String>(1);

        while (streamReader.hasNext()) {
            XMLEvent nextEvent = streamReader.nextEvent();
            try {

                switch (nextEvent.getEventType()) {
                case XMLStreamConstants.START_ELEMENT: {
                    StartElement elem = nextEvent.asStartElement();

                    String namespaceURI = elem.getName().getNamespaceURI();

                    if ("extension".equals(elem.getName().getLocalPart())) {
                        Attribute urlAttr = elem.getAttributeByName(new QName("url"));
                        String url;
                        if (urlAttr == null || isBlank(urlAttr.getValue())) {
                            getErrorHandler().missingRequiredElement(new ParseLocation("extension"), "url");
                            url = null;
                        } else {
                            url = urlAttr.getValue();
                        }
                        parserState.enteringNewElementExtension(elem, url, false);
                    } else if ("modifierExtension".equals(elem.getName().getLocalPart())) {
                        Attribute urlAttr = elem.getAttributeByName(new QName("url"));
                        String url;
                        if (urlAttr == null || isBlank(urlAttr.getValue())) {
                            getErrorHandler().missingRequiredElement(new ParseLocation("modifierExtension"),
                                    "url");
                            url = null;
                        } else {
                            url = urlAttr.getValue();
                        }
                        parserState.enteringNewElementExtension(elem, url, true);
                    } else {
                        String elementName = elem.getName().getLocalPart();
                        parserState.enteringNewElement(namespaceURI, elementName);
                    }

                    if (!heldComments.isEmpty()) {
                        for (String next : heldComments) {
                            parserState.commentPre(next);
                        }
                        heldComments.clear();
                    }

                    @SuppressWarnings("unchecked")
                    Iterator<Attribute> attributes = elem.getAttributes();
                    for (Iterator<Attribute> iter = attributes; iter.hasNext();) {
                        Attribute next = iter.next();
                        parserState.attributeValue(next.getName().getLocalPart(), next.getValue());
                    }

                    break;
                }
                case XMLStreamConstants.END_DOCUMENT:
                case XMLStreamConstants.END_ELEMENT: {
                    if (!heldComments.isEmpty()) {
                        for (String next : heldComments) {
                            parserState.commentPost(next);
                        }
                        heldComments.clear();
                    }
                    parserState.endingElement();
                    //                  if (parserState.isComplete()) {
                    //                     return parserState.getObject();
                    //                  }
                    break;
                }
                case XMLStreamConstants.CHARACTERS: {
                    parserState.string(nextEvent.asCharacters().getData());
                    break;
                }
                case XMLStreamConstants.COMMENT: {
                    Comment comment = (Comment) nextEvent;
                    String commentText = comment.getText();
                    heldComments.add(commentText);
                    break;
                }
                }

                parserState.xmlEvent(nextEvent);

            } catch (DataFormatException e) {
                throw new DataFormatException("DataFormatException at [" + nextEvent.getLocation().toString()
                        + "]: " + e.getMessage(), e);
            }
        }
        return parserState.getObject();
    } catch (XMLStreamException e) {
        throw new DataFormatException(e);
    }
}

From source file:edu.unc.lib.dl.util.TripleStoreQueryServiceMulgaraImpl.java

/**
 * @param query/*  w w w  .j ava2 s .  c o  m*/
 *            an ITQL command
 * @return the message returned by Mulgara
 * @throws RemoteException
 *             for communication failure
 */
public String storeCommand(String query) {
    String result = null;
    String response = this.sendTQL(query);
    if (response != null) {
        XMLInputFactory factory = XMLInputFactory.newInstance();
        factory.setProperty(XMLInputFactory.IS_COALESCING, Boolean.TRUE);
        try (StringReader sr = new StringReader(response)) {
            XMLEventReader r = factory.createXMLEventReader(sr);
            boolean inMessage = false;
            StringBuffer message = new StringBuffer();
            while (r.hasNext()) {
                XMLEvent e = r.nextEvent();
                if (e.isStartElement()) {
                    StartElement s = e.asStartElement();
                    if ("message".equals(s.getName().getLocalPart())) {
                        inMessage = true;
                    }
                } else if (e.isEndElement()) {
                    EndElement end = e.asEndElement();
                    if ("message".equals(end.getName().getLocalPart())) {
                        inMessage = false;
                    }
                } else if (inMessage && e.isCharacters()) {
                    message.append(e.asCharacters().getData());
                }
            }
            r.close();
            result = message.toString();
        } catch (XMLStreamException e) {
            e.printStackTrace();
        }
    }
    return result;
}

From source file:edu.jhu.hlt.concrete.ingesters.bolt.BoltForumPostIngester.java

@Override
public Communication fromCharacterBasedFile(final Path path) throws IngestException {
    if (!Files.exists(path))
        throw new IngestException("No file at: " + path.toString());

    AnalyticUUIDGeneratorFactory f = new AnalyticUUIDGeneratorFactory();
    AnalyticUUIDGenerator gen = f.create();
    Communication c = new Communication();
    c.setUuid(gen.next());//from   w  w  w .jav  a  2  s  .  c  o  m
    c.setType(this.getKind());
    c.setMetadata(TooledMetadataConverter.convert(this));

    try {
        ExistingNonDirectoryFile ef = new ExistingNonDirectoryFile(path);
        c.setId(ef.getName().split("\\.")[0]);
    } catch (NoSuchFileException | NotFileException e) {
        // might throw if path is a directory.
        throw new IngestException(path.toString() + " is not a file, or is a directory.");
    }

    String content;
    try (InputStream is = Files.newInputStream(path);
            BufferedInputStream bin = new BufferedInputStream(is, 1024 * 8 * 8);) {
        content = IOUtils.toString(bin, StandardCharsets.UTF_8);
        c.setText(content);
    } catch (IOException e) {
        throw new IngestException(e);
    }

    try (InputStream is = Files.newInputStream(path);
            BufferedInputStream bin = new BufferedInputStream(is, 1024 * 8 * 8);
            BufferedReader reader = new BufferedReader(new InputStreamReader(bin, StandardCharsets.UTF_8));) {
        XMLEventReader rdr = null;
        try {
            rdr = inF.createXMLEventReader(reader);

            // Below method moves the reader
            // to the first post element.
            Section headline = handleHeadline(rdr, content);
            headline.setUuid(gen.next());
            c.addToSectionList(headline);
            int start = headline.getTextSpan().getStart();
            int ending = headline.getTextSpan().getEnding();
            if (ending < start)
                ending = start; // @tongfei: handle empty headlines
            String htxt = c.getText().substring(start, ending);
            LOGGER.debug("headline text: {}", htxt);

            // Section indices.
            int sectNumber = 1;
            int subSect = 0;

            // Move iterator to post start element.
            this.iterateToPosts(rdr);

            // Offset pointer.
            int currOff = -1;

            SectionFactory sf = new SectionFactory(gen);

            // First post element.
            while (rdr.hasNext()) {
                XMLEvent nextEvent = rdr.nextEvent();
                currOff = nextEvent.getLocation().getCharacterOffset();
                if (currOff > 0) {
                    int currOffPlus = currOff + 20;
                    int currOffLess = currOff - 20;
                    LOGGER.debug("Offset: {}", currOff);
                    if (currOffPlus < content.length())
                        LOGGER.debug("Surrounding text: {}", content.substring(currOffLess, currOffPlus));
                }

                // First: see if document is going to end.
                // If yes: exit.
                if (nextEvent.isEndDocument())
                    break;

                // XMLEvent peeker = rdr.peek();

                // Check if start element.
                if (nextEvent.isStartElement()) {
                    StartElement se = nextEvent.asStartElement();
                    QName name = se.getName();
                    final String localName = name.getLocalPart();
                    LOGGER.debug("Hit start element: {}", localName);

                    //region
                    // Add sections for authors and datetimes for each bolt post
                    // by Tongfei Chen
                    Attribute attrAuthor = se.getAttributeByName(QName.valueOf("author"));
                    Attribute attrDateTime = se.getAttributeByName(QName.valueOf("datetime"));

                    if (attrAuthor != null && attrDateTime != null) {

                        int loc = attrAuthor.getLocation().getCharacterOffset();

                        int sectAuthorBeginningOffset = loc + "<post author=\"".length();

                        Section sectAuthor = sf.fromTextSpan(new TextSpan(sectAuthorBeginningOffset,
                                sectAuthorBeginningOffset + attrAuthor.getValue().length()), "author");
                        c.addToSectionList(sectAuthor);

                        int sectDateTimeBeginningOffset = sectAuthorBeginningOffset
                                + attrAuthor.getValue().length() + " datetime=".length();

                        Section sectDateTime = sf.fromTextSpan(
                                new TextSpan(sectDateTimeBeginningOffset,
                                        sectDateTimeBeginningOffset + attrDateTime.getValue().length()),
                                "datetime");
                        c.addToSectionList(sectDateTime);
                    }
                    //endregion

                    // Move past quotes, images, and links.
                    if (localName.equals(QUOTE_LOCAL_NAME)) {
                        this.handleQuote(rdr);
                    } else if (localName.equals(IMG_LOCAL_NAME)) {
                        this.handleImg(rdr);
                    } else if (localName.equals(LINK_LOCAL_NAME)) {
                        this.handleLink(rdr);
                    }

                    // not a start element
                } else if (nextEvent.isCharacters()) {
                    Characters chars = nextEvent.asCharacters();
                    int coff = chars.getLocation().getCharacterOffset();
                    if (!chars.isWhiteSpace()) {
                        // content to be captured
                        String fpContent = chars.getData();
                        LOGGER.debug("Character offset: {}", coff);
                        LOGGER.debug("Character based data: {}", fpContent);
                        // LOGGER.debug("Character data via offset diff: {}", content.substring(coff - fpContent.length(), coff));

                        SimpleImmutableEntry<Integer, Integer> pads = trimSpacing(fpContent);
                        final int tsb = currOff + pads.getKey();
                        final int tse = currOff + fpContent.length() - pads.getValue();
                        final String subs = content.substring(tsb, tse);
                        if (subs.replaceAll("\\p{Zs}", "").replaceAll("\\n", "").isEmpty()) {
                            LOGGER.info("Found empty section: skipping.");
                            continue;
                        }

                        LOGGER.debug("Section text: {}", subs);
                        TextSpan ts = new TextSpan(tsb, tse);

                        Section s = sf.fromTextSpan(ts, "post");
                        List<Integer> intList = new ArrayList<>();
                        intList.add(sectNumber);
                        intList.add(subSect);
                        s.setNumberList(intList);
                        c.addToSectionList(s);

                        subSect++;
                    }
                } else if (nextEvent.isEndElement()) {
                    EndElement ee = nextEvent.asEndElement();
                    currOff = ee.getLocation().getCharacterOffset();
                    QName name = ee.getName();
                    String localName = name.getLocalPart();
                    LOGGER.debug("Hit end element: {}", localName);
                    if (localName.equalsIgnoreCase(POST_LOCAL_NAME)) {
                        sectNumber++;
                        subSect = 0;
                    }
                }
            }
            return c;
        } catch (XMLStreamException | ConcreteException | StringIndexOutOfBoundsException x) {
            throw new IngestException(x);
        } finally {
            if (rdr != null)
                try {
                    rdr.close();
                } catch (XMLStreamException e) {
                    // not likely.
                    LOGGER.info("Error closing XMLReader.", e);
                }
        }
    } catch (IOException e) {
        throw new IngestException(e);
    }
}