Example usage for javax.xml.stream XMLEventReader close

List of usage examples for javax.xml.stream XMLEventReader close

Introduction

In this page you can find the example usage for javax.xml.stream XMLEventReader close.

Prototype

public void close() throws XMLStreamException;

Source Link

Document

Frees any resources associated with this Reader.

Usage

From source file:edu.unc.lib.dl.util.TripleStoreQueryServiceMulgaraImpl.java

/**
 * @param query/*from   w  w  w  .  j  ava2  s .  co  m*/
 *            an ITQL command
 * @return the message returned by Mulgara
 * @throws RemoteException
 *             for communication failure
 */
public String storeCommand(String query) {
    String result = null;
    String response = this.sendTQL(query);
    if (response != null) {
        XMLInputFactory factory = XMLInputFactory.newInstance();
        factory.setProperty(XMLInputFactory.IS_COALESCING, Boolean.TRUE);
        try (StringReader sr = new StringReader(response)) {
            XMLEventReader r = factory.createXMLEventReader(sr);
            boolean inMessage = false;
            StringBuffer message = new StringBuffer();
            while (r.hasNext()) {
                XMLEvent e = r.nextEvent();
                if (e.isStartElement()) {
                    StartElement s = e.asStartElement();
                    if ("message".equals(s.getName().getLocalPart())) {
                        inMessage = true;
                    }
                } else if (e.isEndElement()) {
                    EndElement end = e.asEndElement();
                    if ("message".equals(end.getName().getLocalPart())) {
                        inMessage = false;
                    }
                } else if (inMessage && e.isCharacters()) {
                    message.append(e.asCharacters().getData());
                }
            }
            r.close();
            result = message.toString();
        } catch (XMLStreamException e) {
            e.printStackTrace();
        }
    }
    return result;
}

From source file:com.aionemu.gameserver.dataholders.loadingutils.XmlMerger.java

/**
 * This method processes the source file, replacing all of the 'import' tags
 * by the data from the relevant files.//w ww  . j a va 2  s . c  o m
 *
 * @throws XMLStreamException on event writing error.
 * @throws IOException        if the destination file exists but is a directory
 *                            rather than a regular file, does not exist but cannot be created, or
 *                            cannot be opened for any other reason
 */
private void doUpdate() throws XMLStreamException, IOException {
    XMLEventReader reader = null;
    XMLEventWriter writer = null;

    Properties metadata = new Properties();

    try {
        writer = outputFactory.createXMLEventWriter(new BufferedWriter(new FileWriter(destFile, false)));
        reader = inputFactory.createXMLEventReader(new FileReader(sourceFile));

        while (reader.hasNext()) {
            final XMLEvent xmlEvent = reader.nextEvent();

            if (xmlEvent.isStartElement() && isImportQName(xmlEvent.asStartElement().getName())) {
                processImportElement(xmlEvent.asStartElement(), writer, metadata);
                continue;
            }

            if (xmlEvent.isEndElement() && isImportQName(xmlEvent.asEndElement().getName())) {
                continue;
            }

            if (xmlEvent instanceof Comment)// skip comments.
            {
                continue;
            }

            if (xmlEvent.isCharacters())// skip whitespaces.
            {
                if (xmlEvent.asCharacters().isWhiteSpace() || xmlEvent.asCharacters().isIgnorableWhiteSpace())// skip
                // whitespaces.
                {
                    continue;
                }
            }

            writer.add(xmlEvent);

            if (xmlEvent.isStartDocument()) {
                writer.add(eventFactory.createComment("\nThis file is machine-generated. DO NOT MODIFY IT!\n"));
            }
        }

        storeFileModifications(metadata, metaDataFile);
    } finally {
        if (writer != null) {
            try {
                writer.close();
            } catch (Exception ignored) {
            }
        }
        if (reader != null) {
            try {
                reader.close();
            } catch (Exception ignored) {
            }
        }
    }
}

From source file:com.msopentech.odatajclient.testservice.utils.XMLUtilities.java

@Override
protected InputStream replaceLink(final InputStream toBeChanged, final String linkName,
        final InputStream replacement) throws Exception {
    final XMLEventReader reader = getEventReader(toBeChanged);

    final ByteArrayOutputStream bos = new ByteArrayOutputStream();
    final XMLOutputFactory xof = XMLOutputFactory.newInstance();
    final XMLEventWriter writer = xof.createXMLEventWriter(bos);

    final XMLEventFactory eventFactory = XMLEventFactory.newInstance();
    XMLEvent newLine = eventFactory.createSpace("\n");

    try {// ww  w. j a  v  a  2 s.  c o  m
        final XmlElement linkElement = getAtomElement(reader, writer, LINK,
                Collections.<Map.Entry<String, String>>singletonList(
                        new SimpleEntry<String, String>("title", linkName)));
        writer.add(linkElement.getStart());

        // ------------------------------------------
        // write inline ...
        // ------------------------------------------
        writer.add(newLine);
        writer.add(eventFactory.createStartElement("m", null, "inline"));

        addAtomElement(replacement, writer);

        writer.add(eventFactory.createEndElement("m", null, "inline"));
        writer.add(newLine);
        // ------------------------------------------

        writer.add(linkElement.getEnd());

        writer.add(reader);
        writer.flush();
        writer.close();
    } finally {
        reader.close();
        IOUtils.closeQuietly(toBeChanged);
    }

    return new ByteArrayInputStream(bos.toByteArray());
}

From source file:fr.dutra.confluence2wordpress.core.sync.DefaultAttachmentsSynchronizer.java

private Set<Attachment> parseForAttachments(ContentEntityObject page) throws SynchronizationException {
    Set<Attachment> attachments = new HashSet<Attachment>();
    try {/*from ww w .j  av  a  2s.c om*/
        XMLEventReader r = StaxUtils.getReader(page);
        String fileName = null;
        String pageTitle = null;
        String spaceKey = null;
        try {
            while (r.hasNext()) {
                XMLEvent e = r.nextEvent();
                if (e.isStartElement()) {
                    StartElement startElement = e.asStartElement();
                    QName name = startElement.getName();
                    if (name.equals(ATTACHMENT_QNAME)) {
                        Attribute att = startElement.getAttributeByName(FILENAME_QNAME);
                        if (att != null) {
                            fileName = att.getValue();
                        }
                    } else if (name.equals(PAGE_QNAME)) {
                        Attribute title = startElement.getAttributeByName(TITLE_QNAME);
                        if (title != null) {
                            pageTitle = title.getValue();
                        }
                        Attribute space = startElement.getAttributeByName(SPACE_QNAME);
                        if (space != null) {
                            spaceKey = space.getValue();
                        }
                    }
                } else if (e.isEndElement()) {
                    EndElement endElement = e.asEndElement();
                    if (endElement.getName().equals(ATTACHMENT_QNAME)) {
                        ContentEntityObject attachmentPage;
                        if (pageTitle == null) {
                            attachmentPage = page;
                        } else {
                            attachmentPage = pageManager.getPage(spaceKey, pageTitle);
                        }
                        Attachment attachment = attachmentManager.getAttachment(attachmentPage, fileName);
                        attachments.add(attachment);
                        fileName = null;
                        pageTitle = null;
                        spaceKey = null;
                    }
                }
            }
        } finally {
            r.close();
        }
    } catch (XMLStreamException e) {
        throw new SynchronizationException("Cannot read page: " + page.getTitle(), e);
    }
    return attachments;
}

From source file:com.msopentech.odatajclient.testservice.utils.XMLUtilities.java

/**
 * {@inheritDoc }// w ww . j  a  v  a2  s  .com
 */
@Override
protected InputStream addLinks(final String entitySetName, final String entitykey, final InputStream is,
        final Set<String> links) throws Exception {

    // -----------------------------------------
    // 0. Build reader and writer
    // -----------------------------------------
    final XMLEventReader reader = getEventReader(is);
    final XMLEventFactory eventFactory = XMLEventFactory.newInstance();

    final ByteArrayOutputStream bos = new ByteArrayOutputStream();
    final XMLOutputFactory xof = XMLOutputFactory.newInstance();
    final XMLEventWriter writer = xof.createXMLEventWriter(bos);
    // -----------------------------------------

    final XmlElement entry = getAtomElement(reader, writer, "entry");
    writer.add(entry.getStart());

    // add for links
    for (String link : links) {
        final Set<Attribute> attributes = new HashSet<Attribute>();
        attributes.add(eventFactory.createAttribute(new QName("title"), link));
        attributes.add(eventFactory.createAttribute(new QName("href"),
                Commons.getLinksURI(version, entitySetName, entitykey, link)));
        attributes.add(eventFactory.createAttribute(new QName("rel"), Constants.ATOM_LINK_REL + link));
        attributes.add(eventFactory.createAttribute(new QName("type"),
                Commons.linkInfo.get(version).isFeed(entitySetName, link) ? Constants.ATOM_LINK_FEED
                        : Constants.ATOM_LINK_ENTRY));

        writer.add(eventFactory.createStartElement(new QName(LINK), attributes.iterator(), null));
        writer.add(eventFactory.createEndElement(new QName(LINK), null));
    }

    writer.add(entry.getContentReader());
    writer.add(entry.getEnd());
    writer.add(reader);
    IOUtils.closeQuietly(is);

    writer.flush();
    writer.close();
    reader.close();

    return new ByteArrayInputStream(bos.toByteArray());
}

From source file:com.msopentech.odatajclient.testservice.utils.XMLUtilities.java

/**
 * {@inheritDoc }/*  w ww  .ja v  a  2 s.c o m*/
 */
@Override
protected InputStream normalizeLinks(final String entitySetName, final String entityKey, final InputStream is,
        final NavigationLinks links) throws Exception {

    // -----------------------------------------
    // 0. Build reader and writer
    // -----------------------------------------
    final ByteArrayOutputStream bos = new ByteArrayOutputStream();
    IOUtils.copy(is, bos);
    is.close();

    final ByteArrayOutputStream tmpBos = new ByteArrayOutputStream();
    final XMLOutputFactory xof = XMLOutputFactory.newInstance();
    final XMLEventWriter writer = xof.createXMLEventWriter(tmpBos);

    final XMLEventReader reader = getEventReader(new ByteArrayInputStream(bos.toByteArray()));
    // -----------------------------------------

    // -----------------------------------------
    // 1. Normalize links
    // -----------------------------------------
    final Set<String> added = new HashSet<String>();

    try {
        final List<Map.Entry<String, String>> filter = new ArrayList<Map.Entry<String, String>>();
        filter.add(new AbstractMap.SimpleEntry<String, String>("type", "application/atom+xml;type=entry"));
        filter.add(new AbstractMap.SimpleEntry<String, String>("type", "application/atom+xml;type=feed"));

        Map.Entry<Integer, XmlElement> linkInfo = null;

        while (true) {
            // a. search for link with type attribute equals to "application/atom+xml;type=entry/feed"
            linkInfo = getAtomElement(reader, writer, LINK, filter, linkInfo == null ? 0 : linkInfo.getKey(), 2,
                    2, true);
            final XmlElement link = linkInfo.getValue();

            final String title = link.getStart().getAttributeByName(new QName("title")).getValue();

            if (!added.contains(title)) {
                added.add(title);

                final String normalizedLink = String.format(
                        "<link href=\"%s(%s)/%s\" rel=\"%s\" title=\"%s\" type=\"%s\"/>", entitySetName,
                        entityKey, title, link.getStart().getAttributeByName(new QName("rel")).getValue(),
                        title, link.getStart().getAttributeByName(new QName("type")).getValue());

                addAtomElement(IOUtils.toInputStream(normalizedLink), writer);
            }
        }
    } catch (Exception ignore) {
        // ignore
    } finally {
        writer.close();
        reader.close();
    }
    // -----------------------------------------

    // -----------------------------------------
    // 2. Add edit link if missing
    // -----------------------------------------
    final InputStream content = addAtomEditLink(new ByteArrayInputStream(tmpBos.toByteArray()), entitySetName,
            Constants.DEFAULT_SERVICE_URL + entitySetName + "(" + entityKey + ")");
    // -----------------------------------------

    // -----------------------------------------
    // 3. Add content element if missing
    // -----------------------------------------
    return addAtomContent(content, entitySetName,
            Constants.DEFAULT_SERVICE_URL + entitySetName + "(" + entityKey + ")");
    // -----------------------------------------

}

From source file:com.msopentech.odatajclient.testservice.utils.XMLUtilities.java

@Override
public InputStream setChanges(final InputStream toBeChanged, final Map<String, InputStream> properties)
        throws Exception {
    XMLEventReader reader = getEventReader(toBeChanged);

    final ByteArrayOutputStream bos = new ByteArrayOutputStream();
    final XMLOutputFactory xof = XMLOutputFactory.newInstance();
    XMLEventWriter writer = xof.createXMLEventWriter(bos);

    // ---------------------------------
    // add property changes
    // ---------------------------------
    Map.Entry<Integer, XmlElement> propertyElement = getAtomElement(reader, writer, PROPERTIES, null, 0, 2, 3,
            false);//  ww  w  . j  a  v  a 2  s.com

    writer.flush();

    ByteArrayOutputStream pbos = new ByteArrayOutputStream();
    OutputStreamWriter pwriter = new OutputStreamWriter(pbos);

    final XMLEventReader propertyReader = propertyElement.getValue().getContentReader();

    try {
        while (true) {
            final XmlElement property = getAtomElement(propertyReader, null, null);
            final String name = property.getStart().getName().getLocalPart();

            if (properties.containsKey(name)) {
                // replace
                final InputStream replacement = properties.get(name);
                properties.remove(property.getStart().getName().getLocalPart());
                pwriter.append(IOUtils.toString(replacement));
                IOUtils.closeQuietly(replacement);
            } else {
                pwriter.append(IOUtils.toString(property.toStream()));
            }
        }
    } catch (Exception ignore) {
        // end
    }

    for (Map.Entry<String, InputStream> remains : properties.entrySet()) {
        if (!remains.getKey().startsWith("[LINK]")) {
            pwriter.append(IOUtils.toString(remains.getValue()));
            IOUtils.closeQuietly(remains.getValue());
        }
    }

    pwriter.flush();
    pwriter.close();

    writer.add(propertyElement.getValue().getStart());
    writer.add(new XMLEventReaderWrapper(new ByteArrayInputStream(pbos.toByteArray())));
    writer.add(propertyElement.getValue().getEnd());

    IOUtils.closeQuietly(pbos);

    writer.add(reader);
    reader.close();
    writer.flush();
    writer.close();
    // ---------------------------------

    // ---------------------------------
    // add navigationm changes
    // ---------------------------------

    // remove existent links
    for (Map.Entry<String, InputStream> remains : properties.entrySet()) {

        if (remains.getKey().startsWith("[LINK]")) {
            reader = getEventReader(new ByteArrayInputStream(bos.toByteArray()));

            bos.reset();
            writer = xof.createXMLEventWriter(bos);

            try {
                final String linkName = remains.getKey().substring(remains.getKey().indexOf("]") + 1);

                getAtomElement(reader, writer, LINK, Collections.<Map.Entry<String, String>>singleton(
                        new SimpleEntry<String, String>("title", linkName)), 0, 2, 2, false);

                writer.add(reader);

            } catch (Exception ignore) {
                // ignore
            }

            writer.flush();
            writer.close();
        }
    }

    reader = getEventReader(new ByteArrayInputStream(bos.toByteArray()));

    bos.reset();
    writer = xof.createXMLEventWriter(bos);

    propertyElement = getAtomElement(reader, writer, CONTENT, null, 0, 2, 2, false);
    writer.flush();

    pbos.reset();
    pwriter = new OutputStreamWriter(pbos);

    for (Map.Entry<String, InputStream> remains : properties.entrySet()) {
        if (remains.getKey().startsWith("[LINK]")) {
            pwriter.append(IOUtils.toString(remains.getValue()));
            IOUtils.closeQuietly(remains.getValue());
        }
    }

    pwriter.flush();
    pwriter.close();

    writer.add(new XMLEventReaderWrapper(new ByteArrayInputStream(pbos.toByteArray())));
    IOUtils.closeQuietly(pbos);

    writer.add(propertyElement.getValue().getStart());
    writer.add(propertyElement.getValue().getContentReader());
    writer.add(propertyElement.getValue().getEnd());

    writer.add(reader);
    reader.close();
    writer.flush();
    writer.close();
    // ---------------------------------

    return new ByteArrayInputStream(bos.toByteArray());
}

From source file:com.msopentech.odatajclient.testservice.utils.XMLUtilities.java

@Override
public InputStream selectEntity(final InputStream entity, final String[] propertyNames) throws Exception {
    final XMLEventReader reader = getEventReader(entity);

    final ByteArrayOutputStream bos = new ByteArrayOutputStream();
    final XMLOutputFactory xof = XMLOutputFactory.newInstance();
    final XMLEventWriter writer = xof.createXMLEventWriter(bos);

    final List<String> found = new ArrayList<String>(Arrays.asList(propertyNames));

    boolean inProperties = false;
    boolean writeCurrent = true;
    Boolean writeNext = null;/*w  w w .  j  a  va2s.  c  o m*/
    String currentName = null;

    final List<String> fieldToBeSaved = new ArrayList<String>(Arrays.asList(propertyNames));

    while (reader.hasNext()) {
        final XMLEvent event = reader.nextEvent();
        if (event.getEventType() == XMLStreamConstants.START_ELEMENT
                && LINK.equals(event.asStartElement().getName().getLocalPart())
                && !fieldToBeSaved
                        .contains(event.asStartElement().getAttributeByName(new QName("title")).getValue())
                && !"edit".equals(event.asStartElement().getAttributeByName(new QName("rel")).getValue())) {
            writeCurrent = false;
        } else if (event.getEventType() == XMLStreamConstants.END_ELEMENT
                && LINK.equals(event.asEndElement().getName().getLocalPart())) {
            writeNext = true;
        } else if (event.getEventType() == XMLStreamConstants.START_ELEMENT
                && (PROPERTIES).equals(event.asStartElement().getName().getLocalPart())) {
            writeCurrent = true;
            writeNext = false;
            inProperties = true;
        } else if (event.getEventType() == XMLStreamConstants.END_ELEMENT
                && (PROPERTIES).equals(event.asEndElement().getName().getLocalPart())) {
            writeCurrent = true;
        } else if (inProperties) {
            if (event.getEventType() == XMLStreamConstants.START_ELEMENT) {
                final String elementName = event.asStartElement().getName().getLocalPart();

                for (String propertyName : propertyNames) {
                    if ((ATOM_PROPERTY_PREFIX + propertyName.trim()).equals(elementName)) {
                        writeCurrent = true;
                        found.remove(propertyName);
                        currentName = propertyName;
                    }
                }

            } else if (event.getEventType() == XMLStreamConstants.END_ELEMENT
                    && StringUtils.isNotBlank(currentName) && (ATOM_PROPERTY_PREFIX + currentName.trim())
                            .equals(event.asEndElement().getName().getLocalPart())) {
                writeNext = false;
                currentName = null;
            }

        }

        if (writeCurrent) {
            writer.add(event);
        }

        if (writeNext != null) {
            writeCurrent = writeNext;
            writeNext = null;
        }
    }

    writer.flush();
    writer.close();
    reader.close();
    IOUtils.closeQuietly(entity);

    // Do not raise any exception in order to support FC properties as well
    // if (!found.isEmpty()) {
    //     throw new Exception(String.format("Could not find a properties '%s'", found));
    // }

    return new ByteArrayInputStream(bos.toByteArray());
}

From source file:edu.jhu.hlt.concrete.ingesters.webposts.WebPostIngester.java

@Override
public Communication fromCharacterBasedFile(final Path path) throws IngestException {
    if (!Files.exists(path))
        throw new IngestException("No file at: " + path.toString());

    AnalyticUUIDGeneratorFactory f = new AnalyticUUIDGeneratorFactory();
    AnalyticUUIDGenerator g = f.create();
    Communication c = new Communication();
    c.setUuid(g.next());// w  ww  .j  a  va  2 s  .  co m
    c.setType(this.getKind());
    c.setMetadata(TooledMetadataConverter.convert(this));

    try {
        ExistingNonDirectoryFile ef = new ExistingNonDirectoryFile(path);
        c.setId(ef.getName().split("\\.")[0]);
    } catch (NoSuchFileException | NotFileException e) {
        // might throw if path is a directory.
        throw new IngestException(path.toString() + " is not a file, or is a directory.");
    }

    String content;
    try (InputStream is = Files.newInputStream(path);
            BufferedInputStream bin = new BufferedInputStream(is, 1024 * 8 * 8);) {
        content = IOUtils.toString(bin, StandardCharsets.UTF_8);
        c.setText(content);
    } catch (IOException e) {
        throw new IngestException(e);
    }

    try (InputStream is = Files.newInputStream(path);
            BufferedInputStream bin = new BufferedInputStream(is, 1024 * 8 * 8);
            BufferedReader reader = new BufferedReader(new InputStreamReader(bin, StandardCharsets.UTF_8));) {
        XMLEventReader rdr = null;
        try {
            rdr = inF.createXMLEventReader(reader);

            // Below method moves the reader
            // to the headline end element.
            Section headline = this.handleBeginning(rdr, content, c);
            headline.setUuid(g.next());
            c.addToSectionList(headline);
            TextSpan sts = headline.getTextSpan();
            LOGGER.debug("headline text: {}", c.getText().substring(sts.getStart(), sts.getEnding()));

            int sectNumber = 1;
            int subSect = 0;

            int currOff = -1;
            // Big amounts of characters.
            while (rdr.hasNext()) {
                XMLEvent nextEvent = rdr.nextEvent();
                currOff = nextEvent.getLocation().getCharacterOffset();

                // First: see if document is going to end.
                // If yes: exit.
                if (nextEvent.isEndDocument())
                    break;

                // region
                // enables ingestion of quotes inside a usenet webpost.
                // by Tongfei Chen
                if (nextEvent.isStartElement()
                        && nextEvent.asStartElement().getName().equals(QName.valueOf("QUOTE"))) {
                    Attribute attrQuote = nextEvent.asStartElement()
                            .getAttributeByName(QName.valueOf("PREVIOUSPOST"));
                    String quote = StringEscapeUtils.escapeXml(attrQuote.getValue());
                    int location = attrQuote.getLocation().getCharacterOffset()
                            + "<QUOTE PREVIOUSPOST=\"".length();
                    Section quoteSection = new Section(g.next(), "quote")
                            .setTextSpan(new TextSpan(location, location + quote.length()));
                    c.addToSectionList(quoteSection);
                }
                // endregion

                // Check if start element.
                if (nextEvent.isCharacters()) {
                    Characters chars = nextEvent.asCharacters();
                    if (!chars.isWhiteSpace()) {
                        String fpContent = chars.getData();
                        LOGGER.debug("Character offset: {}", currOff);
                        LOGGER.debug("Character based data: {}", fpContent);

                        SimpleImmutableEntry<Integer, Integer> pads = trimSpacing(fpContent);
                        final int tsb = currOff + pads.getKey();

                        final int tse = currOff + fpContent.replace("\"", "&quot;").replace("<", "&lt;")
                                .replace(">", "&gt;").length() - (pads.getValue());
                        // MAINTAIN CORRECT TEXT SPAN
                        // CANNOT USE StringEscapeUtils.escapeXml because it will escape "'", which
                        // is not escaped in the data
                        // @tongfei

                        LOGGER.debug("Section text: {}", content.substring(tsb, tse));
                        TextSpan ts = new TextSpan(tsb, tse);
                        String sk;
                        if (subSect == 0)
                            sk = "poster";
                        else if (subSect == 1)
                            sk = "postdate";
                        else
                            sk = "post";

                        Section s = new Section();
                        s.setKind(sk);
                        s.setTextSpan(ts);
                        s.setUuid(g.next());
                        List<Integer> intList = new ArrayList<>();
                        intList.add(sectNumber);
                        intList.add(subSect);
                        s.setNumberList(intList);
                        c.addToSectionList(s);

                        subSect++;
                    }
                } else if (nextEvent.isEndElement()) {
                    EndElement ee = nextEvent.asEndElement();
                    currOff = ee.getLocation().getCharacterOffset();
                    QName name = ee.getName();
                    String localName = name.getLocalPart();
                    LOGGER.debug("Hit end element: {}", localName);
                    if (localName.equalsIgnoreCase(POST_LOCAL_NAME)) {
                        LOGGER.debug("Switching to new post.");
                        sectNumber++;
                        subSect = 0;
                    } else if (localName.equalsIgnoreCase(TEXT_LOCAL_NAME)) {
                        // done with document.
                        break;
                    }
                }
            }

            return c;

        } catch (XMLStreamException | ConcreteException | StringIndexOutOfBoundsException
                | ClassCastException x) {
            throw new IngestException(x);
        } finally {
            if (rdr != null)
                try {
                    rdr.close();
                } catch (XMLStreamException e) {
                    // not likely.
                    LOGGER.info("Error closing XMLReader.", e);
                }
        }
    } catch (IOException e) {
        throw new IngestException(e);
    }
}

From source file:edu.jhu.hlt.concrete.ingesters.bolt.BoltForumPostIngester.java

@Override
public Communication fromCharacterBasedFile(final Path path) throws IngestException {
    if (!Files.exists(path))
        throw new IngestException("No file at: " + path.toString());

    AnalyticUUIDGeneratorFactory f = new AnalyticUUIDGeneratorFactory();
    AnalyticUUIDGenerator gen = f.create();
    Communication c = new Communication();
    c.setUuid(gen.next());/*from  w w w.j av  a  2s .  co  m*/
    c.setType(this.getKind());
    c.setMetadata(TooledMetadataConverter.convert(this));

    try {
        ExistingNonDirectoryFile ef = new ExistingNonDirectoryFile(path);
        c.setId(ef.getName().split("\\.")[0]);
    } catch (NoSuchFileException | NotFileException e) {
        // might throw if path is a directory.
        throw new IngestException(path.toString() + " is not a file, or is a directory.");
    }

    String content;
    try (InputStream is = Files.newInputStream(path);
            BufferedInputStream bin = new BufferedInputStream(is, 1024 * 8 * 8);) {
        content = IOUtils.toString(bin, StandardCharsets.UTF_8);
        c.setText(content);
    } catch (IOException e) {
        throw new IngestException(e);
    }

    try (InputStream is = Files.newInputStream(path);
            BufferedInputStream bin = new BufferedInputStream(is, 1024 * 8 * 8);
            BufferedReader reader = new BufferedReader(new InputStreamReader(bin, StandardCharsets.UTF_8));) {
        XMLEventReader rdr = null;
        try {
            rdr = inF.createXMLEventReader(reader);

            // Below method moves the reader
            // to the first post element.
            Section headline = handleHeadline(rdr, content);
            headline.setUuid(gen.next());
            c.addToSectionList(headline);
            int start = headline.getTextSpan().getStart();
            int ending = headline.getTextSpan().getEnding();
            if (ending < start)
                ending = start; // @tongfei: handle empty headlines
            String htxt = c.getText().substring(start, ending);
            LOGGER.debug("headline text: {}", htxt);

            // Section indices.
            int sectNumber = 1;
            int subSect = 0;

            // Move iterator to post start element.
            this.iterateToPosts(rdr);

            // Offset pointer.
            int currOff = -1;

            SectionFactory sf = new SectionFactory(gen);

            // First post element.
            while (rdr.hasNext()) {
                XMLEvent nextEvent = rdr.nextEvent();
                currOff = nextEvent.getLocation().getCharacterOffset();
                if (currOff > 0) {
                    int currOffPlus = currOff + 20;
                    int currOffLess = currOff - 20;
                    LOGGER.debug("Offset: {}", currOff);
                    if (currOffPlus < content.length())
                        LOGGER.debug("Surrounding text: {}", content.substring(currOffLess, currOffPlus));
                }

                // First: see if document is going to end.
                // If yes: exit.
                if (nextEvent.isEndDocument())
                    break;

                // XMLEvent peeker = rdr.peek();

                // Check if start element.
                if (nextEvent.isStartElement()) {
                    StartElement se = nextEvent.asStartElement();
                    QName name = se.getName();
                    final String localName = name.getLocalPart();
                    LOGGER.debug("Hit start element: {}", localName);

                    //region
                    // Add sections for authors and datetimes for each bolt post
                    // by Tongfei Chen
                    Attribute attrAuthor = se.getAttributeByName(QName.valueOf("author"));
                    Attribute attrDateTime = se.getAttributeByName(QName.valueOf("datetime"));

                    if (attrAuthor != null && attrDateTime != null) {

                        int loc = attrAuthor.getLocation().getCharacterOffset();

                        int sectAuthorBeginningOffset = loc + "<post author=\"".length();

                        Section sectAuthor = sf.fromTextSpan(new TextSpan(sectAuthorBeginningOffset,
                                sectAuthorBeginningOffset + attrAuthor.getValue().length()), "author");
                        c.addToSectionList(sectAuthor);

                        int sectDateTimeBeginningOffset = sectAuthorBeginningOffset
                                + attrAuthor.getValue().length() + " datetime=".length();

                        Section sectDateTime = sf.fromTextSpan(
                                new TextSpan(sectDateTimeBeginningOffset,
                                        sectDateTimeBeginningOffset + attrDateTime.getValue().length()),
                                "datetime");
                        c.addToSectionList(sectDateTime);
                    }
                    //endregion

                    // Move past quotes, images, and links.
                    if (localName.equals(QUOTE_LOCAL_NAME)) {
                        this.handleQuote(rdr);
                    } else if (localName.equals(IMG_LOCAL_NAME)) {
                        this.handleImg(rdr);
                    } else if (localName.equals(LINK_LOCAL_NAME)) {
                        this.handleLink(rdr);
                    }

                    // not a start element
                } else if (nextEvent.isCharacters()) {
                    Characters chars = nextEvent.asCharacters();
                    int coff = chars.getLocation().getCharacterOffset();
                    if (!chars.isWhiteSpace()) {
                        // content to be captured
                        String fpContent = chars.getData();
                        LOGGER.debug("Character offset: {}", coff);
                        LOGGER.debug("Character based data: {}", fpContent);
                        // LOGGER.debug("Character data via offset diff: {}", content.substring(coff - fpContent.length(), coff));

                        SimpleImmutableEntry<Integer, Integer> pads = trimSpacing(fpContent);
                        final int tsb = currOff + pads.getKey();
                        final int tse = currOff + fpContent.length() - pads.getValue();
                        final String subs = content.substring(tsb, tse);
                        if (subs.replaceAll("\\p{Zs}", "").replaceAll("\\n", "").isEmpty()) {
                            LOGGER.info("Found empty section: skipping.");
                            continue;
                        }

                        LOGGER.debug("Section text: {}", subs);
                        TextSpan ts = new TextSpan(tsb, tse);

                        Section s = sf.fromTextSpan(ts, "post");
                        List<Integer> intList = new ArrayList<>();
                        intList.add(sectNumber);
                        intList.add(subSect);
                        s.setNumberList(intList);
                        c.addToSectionList(s);

                        subSect++;
                    }
                } else if (nextEvent.isEndElement()) {
                    EndElement ee = nextEvent.asEndElement();
                    currOff = ee.getLocation().getCharacterOffset();
                    QName name = ee.getName();
                    String localName = name.getLocalPart();
                    LOGGER.debug("Hit end element: {}", localName);
                    if (localName.equalsIgnoreCase(POST_LOCAL_NAME)) {
                        sectNumber++;
                        subSect = 0;
                    }
                }
            }
            return c;
        } catch (XMLStreamException | ConcreteException | StringIndexOutOfBoundsException x) {
            throw new IngestException(x);
        } finally {
            if (rdr != null)
                try {
                    rdr.close();
                } catch (XMLStreamException e) {
                    // not likely.
                    LOGGER.info("Error closing XMLReader.", e);
                }
        }
    } catch (IOException e) {
        throw new IngestException(e);
    }
}