Example usage for javax.xml.stream XMLStreamReader close

List of usage examples for javax.xml.stream XMLStreamReader close

Introduction

In this page you can find the example usage for javax.xml.stream XMLStreamReader close.

Prototype

public void close() throws XMLStreamException;

Source Link

Document

Frees any resources associated with this Reader.

Usage

From source file:ddf.catalog.source.opensearch.OpenSearchSource.java

private InputTransformer getInputTransformer(InputStream inputStream) {
    XMLStreamReader xmlStreamReader = null;
    try {/* w w w .j av  a  2  s. c  o m*/
        xmlStreamReader = xmlInputFactory.createXMLStreamReader(inputStream);
        while (xmlStreamReader.hasNext()) {
            int next = xmlStreamReader.next();
            if (next == XMLStreamConstants.START_ELEMENT) {
                String namespaceUri = xmlStreamReader.getNamespaceURI();
                InputTransformer transformerReference = lookupTransformerReference(namespaceUri);
                if (transformerReference != null) {
                    return transformerReference;
                }
            }
        }
    } catch (XMLStreamException | InvalidSyntaxException e) {
        LOGGER.error("Failed to parse transformer namespace", e);
    } finally {
        try {
            if (xmlStreamReader != null) {
                xmlStreamReader.close();
            }
        } catch (XMLStreamException e) {
            LOGGER.error("failed to close namespace reader", e);
        }
    }
    return null;
}

From source file:ddf.catalog.source.opensearch.impl.OpenSearchSource.java

private InputTransformer getInputTransformer(InputStream inputStream) {
    XMLStreamReader xmlStreamReader = null;
    try {/*w w  w.j ava  2 s.c o m*/
        xmlStreamReader = xmlInputFactory.createXMLStreamReader(inputStream);
        while (xmlStreamReader.hasNext()) {
            int next = xmlStreamReader.next();
            if (next == XMLStreamConstants.START_ELEMENT) {
                String namespaceUri = xmlStreamReader.getNamespaceURI();
                InputTransformer transformerReference = lookupTransformerReference(namespaceUri);
                if (transformerReference != null) {
                    return transformerReference;
                }
            }
        }
    } catch (XMLStreamException | InvalidSyntaxException e) {
        LOGGER.debug("Failed to parse transformer namespace", e);
    } finally {
        try {
            if (xmlStreamReader != null) {
                xmlStreamReader.close();
            }
        } catch (XMLStreamException e) {
            LOGGER.debug("failed to close namespace reader", e);
        }
    }
    return null;
}

From source file:edu.harvard.iq.dvn.ingest.statdataio.impl.plugins.ddi.DDIFileReader.java

private void processDDI(BufferedInputStream ddiStream, SDIOMetadata smd) throws IOException {
    XMLStreamReader xmlr = null;
    try {//from  www .  ja v  a2  s .c o m
        xmlr = xmlInputFactory.createXMLStreamReader(ddiStream);
        //processDDI( xmlr, smd );
        xmlr.nextTag();
        xmlr.require(XMLStreamConstants.START_ELEMENT, null, "codeBook");
        processCodeBook(xmlr, smd);
        dbgLog.info("processed DDI.");

    } catch (XMLStreamException ex) {
        Logger.getLogger("global").log(Level.SEVERE, null, ex);
        throw new IOException(ex.getMessage());
    } finally {
        try {
            if (xmlr != null) {
                xmlr.close();
            }
        } catch (XMLStreamException ex) {
            // The message in the exception should contain diagnostics
            // information -- what was wrong with the DDI, etc.
            throw new IOException(ex.getMessage());
        }
        if (ddiStream != null) {
            ddiStream.close();
        }
    }

    // Having processed the entire ddi, we should have obtained all the metadata
    // describing the data set.
    // Configure the SMD metadata object:

    if (getVarQnty() > 0) {
        smd.getFileInformation().put("varQnty", getVarQnty());
        dbgLog.info("var quantity: " + getVarQnty());
        // TODO:
        // Validate the value against the actual number of variable sections
        // found in the DDI.
    } else {
        throw new IOException("Failed to obtain the variable quantity from the DDI supplied.");
    }

    if (getCaseQnty() > 0) {
        smd.getFileInformation().put("caseQnty", getCaseQnty());
    }
    // It's ok if caseQnty was not defined in the DDI, we'll try to read
    // the tab file supplied and assume that the number of lines is the
    // number of observations.

    smd.setVariableName(variableNameList.toArray(new String[variableNameList.size()]));

    // "minimal" variable types: SPSS type binary definition:
    // 0 means numeric, >0 means string.

    smd.setVariableTypeMinimal(
            ArrayUtils.toPrimitive(variableTypeList.toArray(new Integer[variableTypeList.size()])));

    // This is how the "discrete" and "continuous" numeric values are
    // distinguished in the data set metadata:

    smd.setDecimalVariables(decimalVariableSet);

    //TODO: smd.getFileInformation().put("caseWeightVariableName", caseWeightVariableName);

    smd.setVariableFormat(printFormatList);
    smd.setVariableFormatName(printFormatNameTable);
    smd.setVariableFormatCategory(formatCategoryTable); //TODO: verify

    // Store the variable labels, if supplied:

    if (!variableLabelMap.isEmpty()) {
        smd.setVariableLabel(variableLabelMap);
    }

    // Value labels, if supplied:

    if (!valueLabelTable.isEmpty()) {
        smd.setValueLabelTable(valueLabelTable);
        smd.setValueLabelMappingTable(valueVariableMappingTable);
    }

    // And missing values:

    if (!missingValueTable.isEmpty()) {
        smd.setMissingValueTable(missingValueTable);
    }

}

From source file:jp.co.atware.solr.geta.GETAssocComponent.java

/**
 * GETAssoc?????<code>NamedList</code>???????
 * /*from   w  w  w  .  j a  va2s. c o  m*/
 * @param inputStream GETAssoc??
 * @return <code>NamedList</code>?
 * @throws FactoryConfigurationError
 * @throws IOException
 */
protected NamedList<Object> convertResult(InputStream inputStream)
        throws FactoryConfigurationError, IOException {
    NamedList<Object> result = new NamedList<Object>();
    LinkedList<NamedList<Object>> stack = new LinkedList<NamedList<Object>>();
    stack.push(result);
    try {
        XMLStreamReader xml = XMLInputFactory.newInstance().createXMLStreamReader(inputStream);
        while (xml.hasNext()) {
            switch (xml.getEventType()) {
            case XMLStreamConstants.START_ELEMENT:
                NamedList<Object> element = new NamedList<Object>();
                stack.peek().add(xml.getName().toString(), element);
                stack.push(element);
                for (int i = 0; i < xml.getAttributeCount(); i++) {
                    String name = xml.getAttributeName(i).toString();
                    String value = xml.getAttributeValue(i);
                    ValueOf valueOf = valueTransMap.get(name);
                    if (valueOf != null) {
                        try {
                            element.add(name, valueOf.toValue(value));
                        } catch (NumberFormatException e) {
                            element.add(name, value);
                        }
                    } else {
                        element.add(name, value);
                    }
                }
                break;
            case XMLStreamConstants.END_ELEMENT:
                stack.pop();
                break;
            default:
                break;
            }
            xml.next();

        }
        xml.close();
    } catch (XMLStreamException e) {
        throw new IOException(e);
    }

    LOG.debug(result.toString());
    return result;
}

From source file:com.liferay.portal.util.LocalizationImpl.java

private String _getRootAttribute(String xml, String name, String defaultValue) {

    String value = null;//from w w  w .  j  a v a2 s . co m

    XMLStreamReader xmlStreamReader = null;

    ClassLoader portalClassLoader = PortalClassLoaderUtil.getClassLoader();

    Thread currentThread = Thread.currentThread();

    ClassLoader contextClassLoader = currentThread.getContextClassLoader();

    try {
        if (contextClassLoader != portalClassLoader) {
            currentThread.setContextClassLoader(portalClassLoader);
        }

        XMLInputFactory xmlInputFactory = XMLInputFactory.newInstance();

        xmlStreamReader = xmlInputFactory.createXMLStreamReader(new UnsyncStringReader(xml));

        if (xmlStreamReader.hasNext()) {
            xmlStreamReader.nextTag();

            value = xmlStreamReader.getAttributeValue(null, name);
        }
    } catch (Exception e) {
        if (_log.isWarnEnabled()) {
            _log.warn(e, e);
        }
    } finally {
        if (contextClassLoader != portalClassLoader) {
            currentThread.setContextClassLoader(contextClassLoader);
        }

        if (xmlStreamReader != null) {
            try {
                xmlStreamReader.close();
            } catch (Exception e) {
            }
        }
    }

    if (Validator.isNull(value)) {
        value = defaultValue;
    }

    return value;
}

From source file:ar.com.zauber.commons.spring.test.impl.TamperdataHttpServletRequestFactory.java

/** hace el trabajo sucio 
 * @throws UnsupportedEncodingException */
private HttpServletRequest parse(final XMLStreamReader reader)
        throws XMLStreamException, UnsupportedEncodingException {
    final MockHttpServletRequest ret = new MockHttpServletRequest();
    ret.setMethod("POST");
    String header = null;/*from w w w.ja va 2s. c  om*/
    String postHeader = null;
    int event;
    while ((event = reader.next()) != XMLStreamConstants.END_DOCUMENT) {
        if (event == XMLStreamConstants.START_ELEMENT) {
            final String name = reader.getLocalName();
            if (name.equals("tdRequestHeader") || name.equals("tdPostHeader")) {
                header = reader.getAttributeValue(0);
            } else if (name.equals("tdPostElements")) {
                ret.setMethod("POST");
            } else if (name.equals("tdPostElement")) {
                postHeader = reader.getAttributeValue(0);
            }
        } else if (event == XMLStreamConstants.CHARACTERS) {
            String text = reader.getText();
            if (text.length() > 1 && Character.isWhitespace(text.charAt(0))) {
                text = text.substring(1);
            }
            if (text.length() > 1 && Character.isWhitespace(text.charAt(text.length() - 1))) {
                text = text.substring(0, text.length() - 1);
            }

            final String value = URLDecoder.decode(URLDecoder.decode(text, encoding), encoding);
            if (header != null) {
                ret.addHeader(header, value);
            } else if (postHeader != null) {
                ret.addParameter(postHeader, value);
            }
            header = null;
            postHeader = null;
        } else {
            header = null;
            postHeader = null;
        }
    }
    reader.close();
    return ret;
}

From source file:com.microsoft.alm.plugin.context.soap.CatalogServiceImpl.java

private void readResponse(final HttpResponse httpResponse, final ElementDeserializable readFromElement) {

    InputStream responseStream = null;
    try {/*w  w w. j  ava 2s .com*/

        final Header encoding = httpResponse.getFirstHeader("Content-Encoding"); //$NON-NLS-1$
        if (encoding != null && encoding.getValue().equalsIgnoreCase("gzip")) //$NON-NLS-1$
        {
            responseStream = new GZIPInputStream(httpResponse.getEntity().getContent());
        } else {
            responseStream = httpResponse.getEntity().getContent();
        }
        XMLStreamReader reader = null;
        try {

            reader = XML_INPUT_FACTORY.createXMLStreamReader(responseStream);

            final QName envelopeQName = new QName(SOAP, "Envelope", "soap"); //$NON-NLS-1$ //$NON-NLS-2$
            final QName headerQName = new QName(SOAP, "Header", "soap"); //$NON-NLS-1$ //$NON-NLS-2$
            final QName bodyQName = new QName(SOAP, "Body", "soap"); //$NON-NLS-1$ //$NON-NLS-2$

            // Read the envelope.
            if (reader.nextTag() == XMLStreamConstants.START_ELEMENT
                    && reader.getName().equals(envelopeQName)) {
                while (reader.nextTag() == XMLStreamConstants.START_ELEMENT) {
                    if (reader.getName().equals(headerQName)) {
                        // Ignore headers for now.
                        readUntilElementEnd(reader);
                    } else if (reader.getName().equals(bodyQName)) {
                        if (reader.nextTag() == XMLStreamConstants.START_ELEMENT
                                && reader.getName().getLocalPart().equals(QUERY_NODE_RESPONSE)) {
                            readFromElement.readFromElement(reader);
                            return;
                        }
                    }
                }
            }
        } catch (final XMLStreamException e) {
            logger.warn("readResponse", e);
            throw new RuntimeException(e);
        } finally {
            if (reader != null) {
                try {
                    reader.close();
                } catch (final XMLStreamException e) {
                    // Ignore and continue
                }
            }
        }
    } catch (IOException e) {
        logger.warn("readResponse", e);
        throw new RuntimeException(e);
    } finally {
        if (responseStream != null) {
            try {
                responseStream.close();
            } catch (IOException e) {
                // Ignore and continue
            }
        }
    }
}

From source file:com.ikanow.infinit.e.harvest.extraction.document.file.FileHarvester.java

private void parse(InfiniteFile f, SourcePojo source) throws MalformedURLException, URISyntaxException {

    //NOTE: we only ever break out of here because of max docs in standalone mode
    // (because we don't know how to continue reading)

    DocumentPojo doc = null;//ww  w .j  a v a 2s .  c  o  m
    //Determine File Extension
    String fileName = f.getName().toString();

    int mid = fileName.lastIndexOf(".");
    String extension = fileName.substring(mid + 1, fileName.length());

    //Checked to save processing time
    long fileTimestamp = (f.getDate() / 1000) * 1000;
    // (ensure truncated to seconds, since some operation somewhere hear does this...)

    Date modDate = new Date(fileTimestamp);
    //XML Data gets placed into MetaData

    boolean bIsXml = false;
    boolean bIsJson = false;
    boolean bIsLineOriented = false;
    if ((null != source.getFileConfig()) && (null != source.getFileConfig().type)) {
        extension = source.getFileConfig().type;
    }
    bIsXml = extension.equalsIgnoreCase("xml");
    bIsJson = extension.equalsIgnoreCase("json");
    bIsLineOriented = extension.endsWith("sv");

    if (bIsXml || bIsJson || bIsLineOriented) {
        int debugMaxDocs = Integer.MAX_VALUE; // by default don't set this, it's only for debug mode
        if (_context.isStandalone()) { // debug mode
            debugMaxDocs = maxDocsPerCycle;
        }
        //fast check to see if the file has changed before processing (or if it never existed)
        if (needsUpdated_SourceUrl(modDate, f.getUrlString(), source)) {
            if (0 != modDate.getTime()) { // if it ==0 then sourceUrl doesn't exist at all, no need to delete
                // This file already exists - in normal/managed mode will re-create
                // In streaming mode, simple skip over
                if (_streaming) {
                    return;
                } //TESTED

                DocumentPojo docRepresentingSrcUrl = new DocumentPojo();
                docRepresentingSrcUrl.setSourceUrl(f.getUrlString());
                docRepresentingSrcUrl.setSourceKey(source.getKey());
                docRepresentingSrcUrl.setCommunityId(source.getCommunityIds().iterator().next());
                sourceUrlsGettingUpdated.add(docRepresentingSrcUrl.getSourceUrl());
                this.docsToRemove.add(docRepresentingSrcUrl);
                // (can add documents with just source URL, are treated differently in the core libraries)               
            }

            SourceFileConfigPojo fileSystem = source.getFileConfig();
            if ((null == fileSystem) && (bIsXml || bIsJson)) {
                fileSystem = new SourceFileConfigPojo();
            }
            XmlToMetadataParser xmlParser = null;
            JsonToMetadataParser jsonParser = null;
            String urlType = extension;
            if (bIsXml) {
                xmlParser = new XmlToMetadataParser(fileSystem.XmlRootLevelValues, fileSystem.XmlIgnoreValues,
                        fileSystem.XmlSourceName, fileSystem.XmlPrimaryKey, fileSystem.XmlAttributePrefix,
                        fileSystem.XmlPreserveCase, debugMaxDocs);
            } //TESTED
            else if (bIsJson) {
                jsonParser = new JsonToMetadataParser(fileSystem.XmlSourceName, fileSystem.XmlRootLevelValues,
                        fileSystem.XmlPrimaryKey, fileSystem.XmlIgnoreValues, debugMaxDocs);
            } //TESTED

            List<DocumentPojo> partials = null;
            try {
                if (bIsXml) {
                    XMLStreamReader xmlStreamReader = null;
                    XMLInputFactory factory = XMLInputFactory.newInstance();
                    factory.setProperty(XMLInputFactory.IS_COALESCING, true);
                    factory.setProperty(XMLInputFactory.SUPPORT_DTD, false);
                    try {
                        xmlStreamReader = factory.createXMLStreamReader(f.getInputStream());
                        partials = xmlParser.parseDocument(xmlStreamReader);
                        long memUsage = xmlParser.getMemUsage();
                        _memUsage += memUsage;
                        _totalMemUsage.addAndGet(memUsage);
                    } finally {
                        if (null != xmlStreamReader)
                            xmlStreamReader.close();
                    }
                } //TESTED
                else if (bIsJson) {
                    JsonReader jsonReader = null;
                    try {
                        jsonReader = new JsonReader(new InputStreamReader(f.getInputStream(), "UTF-8"));
                        jsonReader.setLenient(true);
                        partials = jsonParser.parseDocument(jsonReader);
                        long memUsage = jsonParser.getMemUsage();
                        _memUsage += memUsage;
                        _totalMemUsage.addAndGet(memUsage);
                    } finally {
                        if (null != jsonReader)
                            jsonReader.close();
                    }
                } //TESTED
                else if (bIsLineOriented) { // Just generate a document for every line

                    BufferedReader lineReader = null;
                    try {
                        lineReader = new BufferedReader(new InputStreamReader(f.getInputStream(), "UTF-8"));
                        CsvToMetadataParser lineParser = new CsvToMetadataParser(debugMaxDocs);
                        partials = lineParser.parseDocument(lineReader, source);
                        long memUsage = lineParser.getMemUsage();
                        _memUsage += memUsage;
                        _totalMemUsage.addAndGet(memUsage);
                    } finally {
                        if (null != lineReader)
                            lineReader.close();
                    }
                } //TESTED

                MessageDigest md5 = null; // (generates unique urls if the user doesn't below)
                try {
                    md5 = MessageDigest.getInstance("MD5");
                } catch (NoSuchAlgorithmException e) {
                    // Do nothing, unlikely to happen...
                }
                int nIndex = 0;
                int numPartials = partials.size();
                for (DocumentPojo doctoAdd : partials) {
                    nIndex++;
                    doctoAdd.setSource(source.getTitle());
                    doctoAdd.setSourceKey(source.getKey());
                    doctoAdd.setMediaType(source.getMediaType());
                    doctoAdd.setModified(new Date(fileTimestamp));
                    doctoAdd.setCreated(new Date());

                    if (null == doctoAdd.getUrl()) { // Can be set in the parser or here
                        doctoAdd.setHasDefaultUrl(true); // (ie cannot occur in a different src URL)

                        if (1 == numPartials) {
                            String urlString = f.getUrlString();
                            if (urlString.endsWith(urlType)) {
                                doctoAdd.setUrl(urlString);
                            } else {
                                doctoAdd.setUrl(
                                        new StringBuffer(urlString).append('.').append(urlType).toString());
                            }
                            // (we always set sourceUrl as the true url of the file, so want to differentiate the URL with
                            //  some useful information)
                        } else if (null == doctoAdd.getMetadata()) { // Line oriented case
                            doctoAdd.setUrl(new StringBuffer(f.getUrlString()).append("/").append(nIndex)
                                    .append('.').append(urlType).toString());
                        } else {
                            if (null == md5) { // Will never happen, MD5 always exists
                                doctoAdd.setUrl(new StringBuffer(f.getUrlString()).append("/")
                                        .append(doctoAdd.getMetadata().hashCode()).append('.').append(urlType)
                                        .toString());
                            } else { // This is the standard call if the XML parser has not been configured to build the URL
                                doctoAdd.setUrl(new StringBuffer(f.getUrlString()).append("/")
                                        .append(DigestUtils.md5Hex(doctoAdd.getMetadata().toString()))
                                        .append('.').append(urlType).toString());
                            }
                        } //TESTED
                    }
                    doctoAdd.setTitle(f.getName().toString());
                    doctoAdd.setPublishedDate(new Date(fileTimestamp));
                    doctoAdd.setSourceUrl(f.getUrlString());

                    // Always add to files because I'm deleting the source URL
                    files.add(doctoAdd);
                } //TESTED 

            } catch (XMLStreamException e1) {
                errors++;
                _context.getHarvestStatus()
                        .logMessage(HarvestExceptionUtils.createExceptionMessage(e1).toString(), true);
            } catch (FactoryConfigurationError e1) {
                errors++;
                _context.getHarvestStatus().logMessage(e1.getMessage(), true);

            } catch (IOException e1) {
                errors++;
                _context.getHarvestStatus()
                        .logMessage(HarvestExceptionUtils.createExceptionMessage(e1).toString(), true);
            } catch (Exception e1) {
                errors++;
                _context.getHarvestStatus()
                        .logMessage(HarvestExceptionUtils.createExceptionMessage(e1).toString(), true);
            }
        } //(end if needs updated)
    } else //Tika supports Excel,Word,Powerpoint,Visio, & Outlook Documents
    {
        // (This dedup tells me if it's an add/update vs ignore - qr.isDuplicate higher up tells me if I need to add or update)
        if (needsUpdated_Url(modDate, f.getUrlString(), source)) {

            Metadata metadata = null;
            InputStream in = null;
            try {

                doc = new DocumentPojo();

                // Create a tika object (first time only)
                if (null == _tika) {
                    this.initializeTika(_context, source);
                }

                // BUGGERY
                // NEED TO LIKELY SET LIMIT TO BE 30MB or 50MB and BYPASS ANYTHING OVER THAT BELOW IS THE CODE TO DO THAT
                // tika.setMaxStringLength(30*1024*1024);
                // Disable the string length limit
                _tika.setMaxStringLength(-1);
                //input = new FileInputStream(new File(resourceLocation));
                // Create a metadata object to contain the metadata

                metadata = new Metadata();
                // Parse the file and get the text of the file
                doc.setSource(source.getTitle());
                doc.setSourceKey(source.getKey());
                doc.setMediaType(source.getMediaType());
                String fullText = "";

                in = f.getInputStream();
                try {
                    if (null == _tikaOutputFormat) { // text only
                        fullText = _tika.parseToString(in, metadata);
                    } //TESTED
                    else { // XML/HMTL
                        _tika.getParser().parse(in, _tikaOutputFormat, metadata, _tikaOutputParseContext);
                        fullText = _tikaXmlFormatWriter.toString();
                        _tikaXmlFormatWriter.getBuffer().setLength(0);
                    } //TESTED
                } finally {
                    if (null != in)
                        in.close();
                }
                int descCap = 500;
                doc.setFullText(fullText);
                if (descCap > fullText.length()) {
                    descCap = fullText.length();
                }
                doc.setDescription(fullText.substring(0, descCap));
                doc.setModified(new Date(fileTimestamp));
                doc.setCreated(new Date());
                doc.setUrl(f.getUrlString());
                doc.setTitle(f.getName().toString());
                doc.setPublishedDate(new Date(fileTimestamp));

                long memUsage = (250L * (doc.getFullText().length() + doc.getDescription().length())) / 100L; // 25% overhead, 2x for string->byte
                _memUsage += memUsage;
                _totalMemUsage.addAndGet(memUsage);

                // If the metadata contains a more plausible date then use that
                try {
                    String title = metadata.get(Metadata.TITLE);
                    if (null != title) {
                        doc.setTitle(title);
                    }
                } catch (Exception e) { // Fine just carry on                  
                }
                try {
                    Date date = metadata.getDate(Metadata.CREATION_DATE); // MS Word
                    if (null != date) {
                        doc.setPublishedDate(date);
                    } else {
                        date = metadata.getDate(Metadata.DATE); // Dublin
                        if (null != date) {
                            doc.setPublishedDate(date);
                        } else {
                            date = metadata.getDate(Metadata.ORIGINAL_DATE);
                            if (null != date) {
                                doc.setPublishedDate(date);
                            }
                        }
                    }
                } catch (Exception e) { // Fine just carry on                  
                }
                //TESTED

                // If the metadata contains a geotag then apply that:
                try {
                    String lat = metadata.get(Metadata.LATITUDE);
                    String lon = metadata.get(Metadata.LONGITUDE);
                    if ((null != lat) && (null != lon)) {
                        GeoPojo gt = new GeoPojo();
                        gt.lat = Double.parseDouble(lat);
                        gt.lon = Double.parseDouble(lon);
                        doc.setDocGeo(gt);
                    }
                } catch (Exception e) { // Fine just carry on                  
                }

                // Save the entire metadata:
                doc.addToMetadata("_FILE_METADATA_", metadata);

                for (ObjectId communityId : source.getCommunityIds()) {
                    doc.setCommunityId(communityId);
                }
                files.add(doc);

                // Close the input stream
                in.close();
                in = null;

                //TESTED

            } catch (SmbException e) {
                errors++;
                _context.getHarvestStatus()
                        .logMessage(HarvestExceptionUtils.createExceptionMessage(e).toString(), true);
            } catch (MalformedURLException e) {
                errors++;
                _context.getHarvestStatus()
                        .logMessage(HarvestExceptionUtils.createExceptionMessage(e).toString(), true);
            } catch (UnknownHostException e) {
                errors++;
                _context.getHarvestStatus()
                        .logMessage(HarvestExceptionUtils.createExceptionMessage(e).toString(), true);
            } catch (IOException e) {
                errors++;
                _context.getHarvestStatus().logMessage(e.getMessage(), true);
            } catch (TikaException e) {
                errors++;
                _context.getHarvestStatus().logMessage(e.getMessage(), true);
            } catch (Exception e) {
                errors++;
                _context.getHarvestStatus()
                        .logMessage(HarvestExceptionUtils.createExceptionMessage(e).toString(), true);
            } finally { // Close the input stream if an error occurs
                if (null != in) {
                    try {
                        in.close();
                    } catch (IOException e) {
                        // All good, do nothing
                    }
                }
            } // end exception handling
        } // end dedup check
    } // end XML vs "office" app

    //DEBUG
    //System.out.println("FILE=" + files.size() + " / MEM=" + _memUsage + " VS " + Runtime.getRuntime().totalMemory());
}

From source file:net.cloudkit.enterprises.ws.SuperPassQueryTest.java

public static String parsingReceiptData(String responseData) throws XMLStreamException {
    StringBuilder dataBuilder = new StringBuilder("");
    XMLStreamReader reader = factory.createXMLStreamReader(new StringReader(responseData));
    try {/*from   ww w . j a  va  2  s. c  o  m*/
        int event = reader.getEventType();
        while (true) {
            switch (event) {
            case XMLStreamConstants.START_ELEMENT:

                if (reader.getName().toString().equals("TradeName")) {
                    // System.out.println(reader.getElementText());
                    dataBuilder.append(reader.getElementText());
                    dataBuilder.append("\t");
                }
                // System.out.println(reader.getName());
                if (reader.getName().toString().equals("SeqNo")) {
                    // System.out.println(reader.getElementText());
                    dataBuilder.append(reader.getElementText());
                    dataBuilder.append("\t");
                }
                // BillNo
                if (reader.getName().toString().equals("EntryId")) {
                    // System.out.println(reader.getElementText());
                    dataBuilder.append(reader.getElementText());
                    dataBuilder.append("\t");
                }
                if (reader.getName().toString().equals("IEFlag")) {
                    // System.out.println(reader.getElementText());
                    dataBuilder.append(reader.getElementText());
                    dataBuilder.append("\t");
                }
                // ??
                if (reader.getName().toString().equals("IEDate")) {
                    // System.out.println(reader.getElementText());
                    dataBuilder.append(reader.getElementText());
                    dataBuilder.append("\t");
                }
                if (reader.getName().toString().equals("TradeMode")) {
                    // System.out.println(reader.getElementText());
                    dataBuilder.append(reader.getElementText());
                    dataBuilder.append("\t");
                }
                if (reader.getName().toString().equals("Status")) {
                    // System.out.println(reader.getElementText());
                    dataBuilder.append(reader.getElementText());
                    dataBuilder.append("\t");
                }
                if (reader.getName().toString().equals("RetExplain")) {
                    // System.out.println(reader.getElementText());
                    dataBuilder.append(reader.getElementText());
                    dataBuilder.append("\t");
                }
                // NoticeDate
                // CustomsCode
                break;
            case XMLStreamConstants.END_ELEMENT:
                // System.out.println("End Element:" + r.getName());
                break;
            }
            if (!reader.hasNext())
                break;
            event = reader.next();
        }
    } finally {
        reader.close();
    }
    dataBuilder.append("\n");
    return dataBuilder.toString();
}

From source file:com.ikanow.infinit.e.harvest.enrichment.custom.UnstructuredAnalysisHarvester.java

/**
 * processMeta - handle an individual field
 *//*  w  ww .  j a v  a  2  s .c  o  m*/
private void processMeta(DocumentPojo f, metaField m, String text, SourcePojo source,
        UnstructuredAnalysisConfigPojo uap) {

    boolean bAllowDuplicates = false;
    if ((null != m.flags) && m.flags.contains("U")) {
        bAllowDuplicates = true;
    }
    if ((null == m.scriptlang) || m.scriptlang.equalsIgnoreCase("regex")) {

        Pattern metaPattern = createRegex(m.script, m.flags);

        int timesToRun = 1;
        Object[] currField = null;
        if ((null != m.flags) && m.flags.contains("c")) {
            currField = f.getMetadata().get(m.fieldName);
        }
        if (null != currField) { // chained metadata
            timesToRun = currField.length;
            text = (String) currField[0];
        } //TESTED

        Matcher matcher = metaPattern.matcher(text);
        LinkedList<String> Llist = null;

        for (int ii = 0; ii < timesToRun; ++ii) {
            if (ii > 0) { // (else either just text, or in the above "chained metadata" initialization above)
                text = (String) currField[ii];
                matcher = metaPattern.matcher(text);
            } //TESTED

            StringBuffer prefix = new StringBuffer(m.fieldName).append(':');
            int nFieldNameLen = m.fieldName.length() + 1;

            try {
                while (matcher.find()) {
                    if (null == Llist) {
                        Llist = new LinkedList<String>();
                    }
                    if (null == m.groupNum) {
                        m.groupNum = 0;
                    }
                    String toAdd = matcher.group(m.groupNum);
                    if (null != m.replace) {
                        toAdd = metaPattern.matcher(toAdd).replaceFirst(m.replace);
                    }
                    if ((null != m.flags) && m.flags.contains("H")) {
                        toAdd = StringEscapeUtils.unescapeHtml(toAdd);
                    }
                    prefix.setLength(nFieldNameLen);
                    prefix.append(toAdd);
                    String dupCheck = prefix.toString();

                    if (!regexDuplicates.contains(dupCheck)) {
                        Llist.add(toAdd);
                        if (!bAllowDuplicates) {
                            regexDuplicates.add(dupCheck);
                        }
                    }
                }
            } catch (Exception e) {
                this._context.getHarvestStatus().logMessage("processMeta1: " + e.getMessage(), true);
            }
        } //(end metadata chaining handling)
        if (null != Llist) {
            if (null != currField) { // (overwrite)
                f.getMetadata().put(m.fieldName, Llist.toArray());
            } else {
                f.addToMetadata(m.fieldName, Llist.toArray());
            }
        } //TESTED
    } else if (m.scriptlang.equalsIgnoreCase("javascript")) {
        if (null == f.getMetadata()) {
            f.setMetadata(new LinkedHashMap<String, Object[]>());
        }
        //set the script engine up if necessary
        if ((null != source) && (null != uap)) {
            //(these are null if called from new processing pipeline vs legacy code)
            intializeScriptEngine(source, uap);
        }

        try {
            //TODO (INF-2488): in new format, this should only happen in between contentMeta blocks/docs
            // (also should be able to use SAH _document object I think?)

            // Javascript: the user passes in 
            Object[] currField = f.getMetadata().get(m.fieldName);
            if ((null == m.flags) || m.flags.isEmpty()) {
                if (null == currField) {
                    engine.put("text", text);
                    engine.put("_iterator", null);
                }
                //(otherwise will just pass the current fields in there)
            } else { // flags specified
                if (m.flags.contains("t")) { // text
                    engine.put("text", text);
                }
                if (m.flags.contains("d")) { // entire document (minus ents and assocs)
                    GsonBuilder gb = new GsonBuilder();
                    Gson g = gb.create();
                    List<EntityPojo> ents = f.getEntities();
                    List<AssociationPojo> assocs = f.getAssociations();
                    try {
                        f.setEntities(null);
                        f.setAssociations(null);
                        engine.put("document", g.toJson(f));
                        securityManager.eval(engine, JavaScriptUtils.initScript);
                    } finally {
                        f.setEntities(ents);
                        f.setAssociations(assocs);
                    }
                }
                if (m.flags.contains("m")) { // metadata
                    GsonBuilder gb = new GsonBuilder();
                    Gson g = gb.create();
                    engine.put("_metadata", g.toJson(f.getMetadata()));
                    securityManager.eval(engine, JavaScriptUtils.iteratorMetaScript);
                }
            } //(end flags processing)

            if (null != currField) {
                f.getMetadata().remove(m.fieldName);

                GsonBuilder gb = new GsonBuilder();
                Gson g = gb.create();
                engine.put("_iterator", g.toJson(currField));
                securityManager.eval(engine, JavaScriptUtils.iteratorDocScript);
            }
            //TESTED (handling of flags, and replacing of existing fields, including when field is null but specified)

            Object returnVal = securityManager.eval(engine, m.script);

            if (null != returnVal) {
                if (returnVal instanceof String) { // The only easy case
                    Object[] array = new Object[1];
                    if ((null != m.flags) && m.flags.contains("H")) {
                        returnVal = StringEscapeUtils.unescapeHtml((String) returnVal);
                    }
                    array[0] = returnVal;
                    f.addToMetadata(m.fieldName, array);
                } else { // complex object or array - in either case the engine turns these into
                         // internal.NativeArray or internal.NativeObject

                    BasicDBList outList = JavaScriptUtils.parseNativeJsObject(returnVal, engine);
                    f.addToMetadata(m.fieldName, outList.toArray());
                }
            }
        } catch (ScriptException e) {

            _context.getHarvestStatus().logMessage(HarvestExceptionUtils.createExceptionMessage(e).toString(),
                    true);

            // Just do nothing and log
            // e.printStackTrace();
            //DEBUG (don't output log messages per doc)
            //logger.error(e.getMessage());
        } catch (Exception e) {

            _context.getHarvestStatus().logMessage(HarvestExceptionUtils.createExceptionMessage(e).toString(),
                    true);

            // Just do nothing and log
            // e.printStackTrace();
            //DEBUG (don't output log messages per doc)
            //logger.error(e.getMessage());
        }
    } else if (m.scriptlang.equalsIgnoreCase("xpath")) {

        String xpath = m.script;

        try {
            createHtmlCleanerIfNeeded();

            int timesToRun = 1;
            Object[] currField = null;
            if ((null != m.flags) && m.flags.contains("c")) {
                currField = f.getMetadata().get(m.fieldName);
            }
            if (null != currField) { // chained metadata
                f.getMetadata().remove(m.fieldName); // (so will add to the end)
                timesToRun = currField.length;
                text = (String) currField[0];
            } //TESTED

            for (int ii = 0; ii < timesToRun; ++ii) {
                if (ii > 0) { // (else either just text, or in the above "chained metadata" initialization above)
                    text = (String) currField[ii];
                } //TESTED

                TagNode node = cleaner.clean(new ByteArrayInputStream(text.getBytes()));

                //NewCode : Only use html cleaner for cleansing
                //use JAXP for full Xpath lib
                Document doc = new DomSerializer(new CleanerProperties()).createDOM(node);

                String extraRegex = extractRegexFromXpath(xpath);

                if (extraRegex != null)
                    xpath = xpath.replace(extraRegex, "");

                XPath xpa = XPathFactory.newInstance().newXPath();
                NodeList res = (NodeList) xpa.evaluate(xpath, doc, XPathConstants.NODESET);

                if (res.getLength() > 0) {
                    if ((null != m.flags) && (m.flags.contains("o"))) { // "o" for object
                        m.groupNum = -1; // (see bConvertToObject below)
                    }
                    StringBuffer prefix = new StringBuffer(m.fieldName).append(':');
                    int nFieldNameLen = m.fieldName.length() + 1;
                    ArrayList<Object> Llist = new ArrayList<Object>(res.getLength());
                    boolean bConvertToObject = ((m.groupNum != null) && (m.groupNum == -1));
                    boolean convertToXml = ((null != m.flags) && (m.flags.contains("x")));
                    for (int i = 0; i < res.getLength(); i++) {
                        Node info_node = res.item(i);
                        if ((null != m.flags) && (m.flags.contains("g"))) {
                            Llist.add(parseHtmlTable(info_node, m.replace));
                        } else if (bConvertToObject || convertToXml) {
                            // Try to create a JSON object out of this
                            StringWriter writer = new StringWriter();
                            try {
                                Transformer transformer = TransformerFactory.newInstance().newTransformer();
                                transformer.transform(new DOMSource(info_node), new StreamResult(writer));
                            } catch (TransformerException e1) {
                                continue;
                            }

                            if (bConvertToObject) {
                                try {
                                    JSONObject subObj = XML.toJSONObject(writer.toString());
                                    if (xpath.endsWith("*")) { // (can have any number of different names here)
                                        Llist.add(XmlToMetadataParser.convertJsonObjectToLinkedHashMap(subObj));
                                    } //TESTED
                                    else {
                                        String[] rootNames = JSONObject.getNames(subObj);
                                        if (1 == rootNames.length) {
                                            // (don't think it can't be any other number in fact)
                                            subObj = subObj.getJSONObject(rootNames[0]);
                                        }
                                        boolean bUnescapeHtml = ((null != m.flags) && m.flags.contains("H"));
                                        Llist.add(XmlToMetadataParser.convertJsonObjectToLinkedHashMap(subObj,
                                                bUnescapeHtml));
                                    } //TESTED
                                } catch (JSONException e) { // Just carry on
                                    continue;
                                }
                                //TESTED
                            } else { // leave in XML form
                                Llist.add(writer.toString().substring(38)); // +38: (step over <?xml version="1.0" encoding="UTF-8"?>)
                            } //TESTED (xpath_test.json)
                        } else { // Treat this as string, either directly or via regex
                            String info = info_node.getTextContent().trim();
                            if (extraRegex == null || extraRegex.isEmpty()) {
                                prefix.setLength(nFieldNameLen);
                                prefix.append(info);
                                String dupCheck = prefix.toString();

                                if (!regexDuplicates.contains(dupCheck)) {
                                    if ((null != m.flags) && m.flags.contains("H")) {
                                        info = StringEscapeUtils.unescapeHtml(info);
                                    }
                                    Llist.add(info);
                                    if (!bAllowDuplicates) {
                                        regexDuplicates.add(dupCheck);
                                    }
                                }
                            } else { // Apply regex to the string
                                Pattern dataRegex = createRegex(extraRegex, m.flags);
                                Matcher dataMatcher = dataRegex.matcher(info);
                                boolean result = dataMatcher.find();
                                while (result) {
                                    String toAdd;
                                    if (m.groupNum != null)
                                        toAdd = dataMatcher.group(m.groupNum);
                                    else
                                        toAdd = dataMatcher.group();
                                    prefix.setLength(nFieldNameLen);
                                    prefix.append(toAdd);
                                    String dupCheck = prefix.toString();

                                    if (!regexDuplicates.contains(dupCheck)) {
                                        if ((null != m.flags) && m.flags.contains("H")) {
                                            toAdd = StringEscapeUtils.unescapeHtml(toAdd);
                                        }
                                        Llist.add(toAdd);
                                        if (!bAllowDuplicates) {
                                            regexDuplicates.add(dupCheck);
                                        }
                                    }

                                    result = dataMatcher.find();
                                }
                            } //(regex vs no regex)
                        } //(end string vs object)
                    }
                    if (Llist.size() > 0) {
                        f.addToMetadata(m.fieldName, Llist.toArray());
                    }
                }
            } //(end loop over metadata objects if applicable)

        } catch (IOException ioe) {
            _context.getHarvestStatus().logMessage(HarvestExceptionUtils.createExceptionMessage(ioe).toString(),
                    true);

            // Just do nothing and log
            //DEBUG (don't output log messages per doc)
            //logger.error(ioe.getMessage());
        } catch (ParserConfigurationException e1) {
            _context.getHarvestStatus().logMessage(HarvestExceptionUtils.createExceptionMessage(e1).toString(),
                    true);
            // Just do nothing and log
            //DEBUG (don't output log messages per doc)
            //logger.error(e1.getMessage());
        } catch (XPathExpressionException e1) {
            _context.getHarvestStatus().logMessage("Error evaluating xpath expression: " + xpath, true);
        }
    } else if (m.scriptlang.equalsIgnoreCase("stream")) { // XML or JSON streaming interface
        // which one?
        try {
            boolean json = false;
            boolean xml = false;
            for (int i = 0; i < 128; ++i) {
                if ('<' == text.charAt(i)) {
                    xml = true;
                    break;
                }
                if ('{' == text.charAt(i) || '[' == text.charAt(i)) {
                    json = true;
                    break;
                }
                if (!Character.isSpaceChar(text.charAt(i))) {
                    break;
                }
            } //TESTED (too many spaces: meta_stream_test, test4; incorrect chars: test3, xml: test1, json: test2)

            boolean textNotObject = m.flags == null || !m.flags.contains("o");

            List<DocumentPojo> docs = new LinkedList<DocumentPojo>();
            List<String> levelOneFields = null;
            if (null != m.script) {
                levelOneFields = Arrays.asList(m.script.split("\\s*,\\s*"));
                if ((1 == levelOneFields.size()) && levelOneFields.get(0).isEmpty()) {
                    // convert [""] to null
                    levelOneFields = null;
                }
            } //TESTED (json and xml)

            if (xml) {
                XmlToMetadataParser parser = new XmlToMetadataParser(levelOneFields, null, null, null, null,
                        null, Integer.MAX_VALUE);
                XMLInputFactory factory = XMLInputFactory.newInstance();
                factory.setProperty(XMLInputFactory.IS_COALESCING, true);
                factory.setProperty(XMLInputFactory.SUPPORT_DTD, false);
                XMLStreamReader reader = null;
                try {
                    reader = factory.createXMLStreamReader(new ByteArrayInputStream(text.getBytes()));
                    docs = parser.parseDocument(reader, textNotObject);
                } finally {
                    if (null != reader)
                        reader.close();
                }
            } //TESTED (meta_stream_test, test1)
            if (json) {
                JsonReader jsonReader = null;
                try {
                    JsonToMetadataParser parser = new JsonToMetadataParser(null, levelOneFields, null, null,
                            Integer.MAX_VALUE);
                    jsonReader = new JsonReader(
                            new InputStreamReader(new ByteArrayInputStream(text.getBytes()), "UTF-8"));
                    jsonReader.setLenient(true);
                    docs = parser.parseDocument(jsonReader, textNotObject);
                } finally {
                    if (null != jsonReader)
                        jsonReader.close();
                }
            } //TESTED (meta_stream_test test2)

            if (!docs.isEmpty()) {
                ArrayList<String> Llist = null;
                ArrayList<Object> LlistObj = null;
                if (textNotObject) {
                    Llist = new ArrayList<String>(docs.size());
                } else {
                    LlistObj = new ArrayList<Object>(docs.size());
                }
                for (DocumentPojo doc : docs) {
                    if ((null != doc.getFullText()) || (null != doc.getMetadata())) {
                        if (textNotObject) {
                            Llist.add(doc.getFullText());
                        } //TESTED
                        else if (xml) {
                            LlistObj.add(doc.getMetadata());
                        } //TESTED
                        else if (json) {
                            Object o = doc.getMetadata();
                            if (null != o) {
                                o = doc.getMetadata().get("json");
                                if (o instanceof Object[]) {
                                    LlistObj.addAll(Arrays.asList((Object[]) o));
                                } else if (null != o) {
                                    LlistObj.add(o);
                                } //TESTED
                            }
                        } //TESTED
                    }
                } //TESTED
                if ((null != Llist) && !Llist.isEmpty()) {
                    f.addToMetadata(m.fieldName, Llist.toArray());
                } //TESTED
                if ((null != LlistObj) && !LlistObj.isEmpty()) {
                    f.addToMetadata(m.fieldName, LlistObj.toArray());
                } //TESTED

            } //TESTED (meta_stream_test test1,test2)
        } //(end try)
        catch (Exception e) { // various parsing errors
            _context.getHarvestStatus().logMessage(HarvestExceptionUtils.createExceptionMessage(e).toString(),
                    true);
        }
    } //TESTED (meta_stream_test)

    // (don't currently support other script types)
}