Example usage for javax.xml.stream XMLInputFactory createXMLStreamReader

List of usage examples for javax.xml.stream XMLInputFactory createXMLStreamReader

Introduction

In this page you can find the example usage for javax.xml.stream XMLInputFactory createXMLStreamReader.

Prototype

public abstract XMLStreamReader createXMLStreamReader(java.io.InputStream stream) throws XMLStreamException;

Source Link

Document

Create a new XMLStreamReader from a java.io.InputStream

Usage

From source file:com.liferay.portal.util.LocalizationImpl.java

public String removeLocalization(String xml, String key, String requestedLanguageId, boolean cdata,
        boolean localized) {

    if (Validator.isNull(xml)) {
        return StringPool.BLANK;
    }/*from   w  w  w. ja  v a  2  s.  c  o m*/

    xml = _sanitizeXML(xml);

    String systemDefaultLanguageId = LocaleUtil.toLanguageId(LocaleUtil.getDefault());

    XMLStreamReader xmlStreamReader = null;
    XMLStreamWriter xmlStreamWriter = null;

    ClassLoader portalClassLoader = PortalClassLoaderUtil.getClassLoader();

    Thread currentThread = Thread.currentThread();

    ClassLoader contextClassLoader = currentThread.getContextClassLoader();

    try {
        if (contextClassLoader != portalClassLoader) {
            currentThread.setContextClassLoader(portalClassLoader);
        }

        XMLInputFactory xmlInputFactory = XMLInputFactory.newInstance();

        xmlStreamReader = xmlInputFactory.createXMLStreamReader(new UnsyncStringReader(xml));

        String availableLocales = StringPool.BLANK;
        String defaultLanguageId = StringPool.BLANK;

        // Read root node

        if (xmlStreamReader.hasNext()) {
            xmlStreamReader.nextTag();

            availableLocales = xmlStreamReader.getAttributeValue(null, _AVAILABLE_LOCALES);
            defaultLanguageId = xmlStreamReader.getAttributeValue(null, _DEFAULT_LOCALE);

            if (Validator.isNull(defaultLanguageId)) {
                defaultLanguageId = systemDefaultLanguageId;
            }
        }

        if ((availableLocales != null) && (availableLocales.indexOf(requestedLanguageId) != -1)) {

            availableLocales = StringUtil.remove(availableLocales, requestedLanguageId, StringPool.COMMA);

            UnsyncStringWriter unsyncStringWriter = new UnsyncStringWriter();

            XMLOutputFactory xmlOutputFactory = XMLOutputFactory.newInstance();

            xmlStreamWriter = xmlOutputFactory.createXMLStreamWriter(unsyncStringWriter);

            xmlStreamWriter.writeStartDocument();
            xmlStreamWriter.writeStartElement(_ROOT);

            if (localized) {
                xmlStreamWriter.writeAttribute(_AVAILABLE_LOCALES, availableLocales);
                xmlStreamWriter.writeAttribute(_DEFAULT_LOCALE, defaultLanguageId);
            }

            _copyNonExempt(xmlStreamReader, xmlStreamWriter, requestedLanguageId, defaultLanguageId, cdata);

            xmlStreamWriter.writeEndElement();
            xmlStreamWriter.writeEndDocument();

            xmlStreamWriter.close();
            xmlStreamWriter = null;

            xml = unsyncStringWriter.toString();
        }
    } catch (Exception e) {
        if (_log.isWarnEnabled()) {
            _log.warn(e, e);
        }
    } finally {
        if (contextClassLoader != portalClassLoader) {
            currentThread.setContextClassLoader(contextClassLoader);
        }

        if (xmlStreamReader != null) {
            try {
                xmlStreamReader.close();
            } catch (Exception e) {
            }
        }

        if (xmlStreamWriter != null) {
            try {
                xmlStreamWriter.close();
            } catch (Exception e) {
            }
        }
    }

    return xml;
}

From source file:com.liferay.portal.util.LocalizationImpl.java

public String updateLocalization(String xml, String key, String value, String requestedLanguageId,
        String defaultLanguageId, boolean cdata, boolean localized) {

    xml = _sanitizeXML(xml);//from w  ww  . java 2 s  .c  o  m

    XMLStreamReader xmlStreamReader = null;
    XMLStreamWriter xmlStreamWriter = null;

    ClassLoader portalClassLoader = PortalClassLoaderUtil.getClassLoader();

    Thread currentThread = Thread.currentThread();

    ClassLoader contextClassLoader = currentThread.getContextClassLoader();

    try {
        if (contextClassLoader != portalClassLoader) {
            currentThread.setContextClassLoader(portalClassLoader);
        }

        XMLInputFactory xmlInputFactory = XMLInputFactory.newInstance();

        xmlStreamReader = xmlInputFactory.createXMLStreamReader(new UnsyncStringReader(xml));

        String availableLocales = StringPool.BLANK;

        // Read root node

        if (xmlStreamReader.hasNext()) {
            xmlStreamReader.nextTag();

            availableLocales = xmlStreamReader.getAttributeValue(null, _AVAILABLE_LOCALES);

            if (Validator.isNull(availableLocales)) {
                availableLocales = defaultLanguageId;
            }

            if (availableLocales.indexOf(requestedLanguageId) == -1) {
                availableLocales = StringUtil.add(availableLocales, requestedLanguageId, StringPool.COMMA);
            }
        }

        UnsyncStringWriter unsyncStringWriter = new UnsyncStringWriter();

        XMLOutputFactory xmlOutputFactory = XMLOutputFactory.newInstance();

        xmlStreamWriter = xmlOutputFactory.createXMLStreamWriter(unsyncStringWriter);

        xmlStreamWriter.writeStartDocument();
        xmlStreamWriter.writeStartElement(_ROOT);

        if (localized) {
            xmlStreamWriter.writeAttribute(_AVAILABLE_LOCALES, availableLocales);
            xmlStreamWriter.writeAttribute(_DEFAULT_LOCALE, defaultLanguageId);
        }

        _copyNonExempt(xmlStreamReader, xmlStreamWriter, requestedLanguageId, defaultLanguageId, cdata);

        xmlStreamWriter.writeStartElement(key);

        if (localized) {
            xmlStreamWriter.writeAttribute(_LANGUAGE_ID, requestedLanguageId);
        }

        if (cdata) {
            xmlStreamWriter.writeCData(value);
        } else {
            xmlStreamWriter.writeCharacters(value);
        }

        xmlStreamWriter.writeEndElement();
        xmlStreamWriter.writeEndElement();
        xmlStreamWriter.writeEndDocument();

        xmlStreamWriter.close();
        xmlStreamWriter = null;

        xml = unsyncStringWriter.toString();
    } catch (Exception e) {
        if (_log.isWarnEnabled()) {
            _log.warn(e, e);
        }
    } finally {
        if (contextClassLoader != portalClassLoader) {
            currentThread.setContextClassLoader(contextClassLoader);
        }

        if (xmlStreamReader != null) {
            try {
                xmlStreamReader.close();
            } catch (Exception e) {
            }
        }

        if (xmlStreamWriter != null) {
            try {
                xmlStreamWriter.close();
            } catch (Exception e) {
            }
        }
    }

    return xml;
}

From source file:com.liferay.portal.util.LocalizationImpl.java

public String getLocalization(String xml, String requestedLanguageId, boolean useDefault) {

    String value = _getCachedValue(xml, requestedLanguageId, useDefault);

    if (value != null) {
        return value;
    } else {//from ww w  . j av a2s .  c  om
        value = StringPool.BLANK;
    }

    String systemDefaultLanguageId = LocaleUtil.toLanguageId(LocaleUtil.getDefault());

    String priorityLanguageId = null;

    Locale requestedLocale = LocaleUtil.fromLanguageId(requestedLanguageId);

    if (useDefault && LanguageUtil.isDuplicateLanguageCode(requestedLocale.getLanguage())) {

        Locale priorityLocale = LanguageUtil.getLocale(requestedLocale.getLanguage());

        if (!requestedLanguageId.equals(priorityLanguageId)) {
            priorityLanguageId = LocaleUtil.toLanguageId(priorityLocale);
        }
    }

    if (!Validator.isXml(xml)) {
        if (useDefault || requestedLanguageId.equals(systemDefaultLanguageId)) {

            value = xml;
        }

        _setCachedValue(xml, requestedLanguageId, useDefault, value);

        return value;
    }

    XMLStreamReader xmlStreamReader = null;

    ClassLoader portalClassLoader = PortalClassLoaderUtil.getClassLoader();

    Thread currentThread = Thread.currentThread();

    ClassLoader contextClassLoader = currentThread.getContextClassLoader();

    try {
        if (contextClassLoader != portalClassLoader) {
            currentThread.setContextClassLoader(portalClassLoader);
        }

        XMLInputFactory xmlInputFactory = XMLInputFactory.newInstance();

        xmlStreamReader = xmlInputFactory.createXMLStreamReader(new UnsyncStringReader(xml));

        String defaultLanguageId = StringPool.BLANK;

        // Skip root node

        if (xmlStreamReader.hasNext()) {
            xmlStreamReader.nextTag();

            defaultLanguageId = xmlStreamReader.getAttributeValue(null, _DEFAULT_LOCALE);

            if (Validator.isNull(defaultLanguageId)) {
                defaultLanguageId = systemDefaultLanguageId;
            }
        }

        // Find specified language and/or default language

        String defaultValue = StringPool.BLANK;
        String priorityValue = StringPool.BLANK;

        while (xmlStreamReader.hasNext()) {
            int event = xmlStreamReader.next();

            if (event == XMLStreamConstants.START_ELEMENT) {
                String languageId = xmlStreamReader.getAttributeValue(null, _LANGUAGE_ID);

                if (Validator.isNull(languageId)) {
                    languageId = defaultLanguageId;
                }

                if (languageId.equals(defaultLanguageId) || languageId.equals(priorityLanguageId)
                        || languageId.equals(requestedLanguageId)) {

                    String text = xmlStreamReader.getElementText();

                    if (languageId.equals(defaultLanguageId)) {
                        defaultValue = text;
                    }

                    if (languageId.equals(priorityLanguageId)) {
                        priorityValue = text;
                    }

                    if (languageId.equals(requestedLanguageId)) {
                        value = text;
                    }

                    if (Validator.isNotNull(value)) {
                        break;
                    }
                }
            } else if (event == XMLStreamConstants.END_DOCUMENT) {
                break;
            }
        }

        if (useDefault && Validator.isNotNull(priorityLanguageId) && Validator.isNull(value)
                && Validator.isNotNull(priorityValue)) {

            value = priorityValue;
        }

        if (useDefault && Validator.isNull(value)) {
            value = defaultValue;
        }
    } catch (Exception e) {
        if (_log.isWarnEnabled()) {
            _log.warn(e, e);
        }
    } finally {
        if (contextClassLoader != portalClassLoader) {
            currentThread.setContextClassLoader(contextClassLoader);
        }

        if (xmlStreamReader != null) {
            try {
                xmlStreamReader.close();
            } catch (Exception e) {
            }
        }
    }

    _setCachedValue(xml, requestedLanguageId, useDefault, value);

    return value;
}

From source file:co.turnus.trace.io.XmlTraceReader.java

public XmlTraceReader(File file, TraceFactory factory) {
    try {//from   w w w.  j  av  a2  s  .  c om
        XMLInputFactory inputFactory = XMLInputFactory.newInstance();
        String extension = TurnusUtils.getExtension(file);
        if (!extension.equals(TurnusExtension.TRACE) && !extension.equals(TurnusExtension.TRACE_COMPRESSED)) {
            throw new TurnusRuntimeException("Trace file reader: unsupported extension");
        }

        InputStream stream = new BufferedInputStream(new FileInputStream(file));
        if (extension.equals(TurnusExtension.TRACE_COMPRESSED)) {
            stream = new CompressorStreamFactory().createCompressorInputStream(CompressorStreamFactory.GZIP,
                    stream);
        }
        reader = inputFactory.createXMLStreamReader(stream);

    } catch (Exception e) {
        throw new TurnusRuntimeException("Error initializing the trace reader", e.getCause());
    }

    this.factory = factory;
    tempDep = new TempDependency();
    tempStep = new TempStep();
}

From source file:com.cedarsoft.serialization.test.performance.XmlParserPerformance.java

private void benchParse(XMLInputFactory inputFactory, @Nonnull String contentSample) throws XMLStreamException {
    for (int i = 0; i < BIG; i++) {
        XMLStreamReader parser = inputFactory.createXMLStreamReader(new StringReader(contentSample));

        assertEquals(XMLStreamReader.START_ELEMENT, parser.nextTag());
        assertEquals("fileType", parser.getLocalName());
        assertEquals("fileType", parser.getName().getLocalPart());

        boolean dependent = Boolean.parseBoolean(parser.getAttributeValue(null, "dependent"));

        assertEquals(XMLStreamReader.START_ELEMENT, parser.nextTag());
        assertEquals("id", parser.getName().getLocalPart());
        assertEquals(XMLStreamReader.CHARACTERS, parser.next());

        String id = parser.getText();

        assertEquals(XMLStreamReader.END_ELEMENT, parser.nextTag());

        assertEquals(XMLStreamReader.START_ELEMENT, parser.nextTag());
        assertEquals("extension", parser.getName().getLocalPart());

        boolean isDefault = Boolean.parseBoolean(parser.getAttributeValue(null, "default"));
        String delimiter = parser.getAttributeValue(null, "delimiter");

        assertEquals(XMLStreamReader.CHARACTERS, parser.next());

        String extension = parser.getText();
        assertEquals(XMLStreamReader.END_ELEMENT, parser.nextTag());
        assertEquals("extension", parser.getName().getLocalPart());

        assertEquals(XMLStreamReader.END_ELEMENT, parser.nextTag());
        assertEquals("fileType", parser.getName().getLocalPart());
        assertEquals(XMLStreamReader.END_DOCUMENT, parser.next());

        parser.close();//from   w  w  w . j a  va 2  s  .c  o  m

        FileType type = new FileType(id, new Extension(delimiter, extension, isDefault), dependent);
        assertNotNull(type);
    }
}

From source file:de.uzk.hki.da.model.ObjectPremisXmlWriter.java

/**
 * Integrate jhove data./*from   w w  w .j a  va  2  s. co m*/
 *
 * @param jhoveFilePath the jhove file path
 * @param tab the tab
 * @throws XMLStreamException the xML stream exception
 * @author Thomas Kleinke
 * @throws FileNotFoundException 
 */
private void integrateJhoveData(String jhoveFilePath, int tab)
        throws XMLStreamException, FileNotFoundException {
    File jhoveFile = new File(jhoveFilePath);
    if (!jhoveFile.exists())
        throw new FileNotFoundException("file does not exist. " + jhoveFile);

    FileInputStream inputStream = null;

    inputStream = new FileInputStream(jhoveFile);

    XMLInputFactory inputFactory = XMLInputFactory.newInstance();
    XMLStreamReader streamReader = inputFactory.createXMLStreamReader(inputStream);

    boolean textElement = false;

    while (streamReader.hasNext()) {
        int event = streamReader.next();

        switch (event) {

        case XMLStreamConstants.START_ELEMENT:
            writer.writeDTD("\n");
            indent(tab);
            tab++;

            String prefix = streamReader.getPrefix();

            if (prefix != null && !prefix.equals("")) {
                writer.setPrefix(prefix, streamReader.getNamespaceURI());
                writer.writeStartElement(streamReader.getNamespaceURI(), streamReader.getLocalName());
            } else
                writer.writeStartElement(streamReader.getLocalName());

            for (int i = 0; i < streamReader.getNamespaceCount(); i++)
                writer.writeNamespace(streamReader.getNamespacePrefix(i), streamReader.getNamespaceURI(i));

            for (int i = 0; i < streamReader.getAttributeCount(); i++) {
                QName qname = streamReader.getAttributeName(i);
                String attributeName = qname.getLocalPart();
                String attributePrefix = qname.getPrefix();
                if (attributePrefix != null && !attributePrefix.equals(""))
                    attributeName = attributePrefix + ":" + attributeName;

                writer.writeAttribute(attributeName, streamReader.getAttributeValue(i));
            }

            break;

        case XMLStreamConstants.CHARACTERS:
            if (!streamReader.isWhiteSpace()) {
                writer.writeCharacters(streamReader.getText());
                textElement = true;
            }
            break;

        case XMLStreamConstants.END_ELEMENT:
            tab--;

            if (!textElement) {
                writer.writeDTD("\n");
                indent(tab);
            }

            writer.writeEndElement();
            textElement = false;
            break;

        default:
            break;
        }
    }

    streamReader.close();
    try {
        inputStream.close();
    } catch (IOException e) {
        throw new RuntimeException("Failed to close input stream", e);
    }
}

From source file:com.cedarsoft.serialization.test.performance.XmlParserPerformance.java

private void benchParse(javolution.xml.stream.XMLInputFactory inputFactory)
        throws XMLStreamException, javolution.xml.stream.XMLStreamException {
    for (int i = 0; i < BIG; i++) {
        javolution.xml.stream.XMLStreamReader parser = inputFactory
                .createXMLStreamReader(new StringReader(CONTENT_SAMPLE));

        assertEquals(XMLStreamReader.START_ELEMENT, parser.nextTag());
        assertEquals("fileType", parser.getLocalName().toString());

        boolean dependent = Boolean.parseBoolean(parser.getAttributeValue(null, "dependent").toString());

        assertEquals(XMLStreamReader.START_ELEMENT, parser.nextTag());
        assertEquals("id", parser.getLocalName().toString());
        assertEquals(XMLStreamReader.CHARACTERS, parser.next());

        String id = parser.getText().toString();

        assertEquals(XMLStreamReader.END_ELEMENT, parser.nextTag());
        assertEquals("id", parser.getLocalName().toString());

        assertEquals(XMLStreamReader.START_ELEMENT, parser.nextTag());
        assertEquals("extension", parser.getLocalName().toString());

        boolean isDefault = Boolean.parseBoolean(parser.getAttributeValue(null, "default").toString());
        String delimiter = parser.getAttributeValue(null, "delimiter").toString();

        assertEquals(XMLStreamReader.CHARACTERS, parser.next());

        String extension = parser.getText().toString();

        assertEquals(XMLStreamReader.END_ELEMENT, parser.nextTag());
        assertEquals("extension", parser.getLocalName().toString());

        assertEquals(XMLStreamReader.END_ELEMENT, parser.nextTag());
        assertEquals("fileType", parser.getLocalName().toString());
        assertEquals(XMLStreamReader.END_DOCUMENT, parser.next());

        parser.close();/*from www  .  j  a  v a2 s. c  om*/

        FileType type = new FileType(id, new Extension(delimiter, extension, isDefault), dependent);
        assertNotNull(type);
    }
}

From source file:com.ikanow.infinit.e.harvest.extraction.document.file.FileHarvester.java

private void parse(InfiniteFile f, SourcePojo source) throws MalformedURLException, URISyntaxException {

    //NOTE: we only ever break out of here because of max docs in standalone mode
    // (because we don't know how to continue reading)

    DocumentPojo doc = null;/*from   ww  w  . j av a 2s . co  m*/
    //Determine File Extension
    String fileName = f.getName().toString();

    int mid = fileName.lastIndexOf(".");
    String extension = fileName.substring(mid + 1, fileName.length());

    //Checked to save processing time
    long fileTimestamp = (f.getDate() / 1000) * 1000;
    // (ensure truncated to seconds, since some operation somewhere hear does this...)

    Date modDate = new Date(fileTimestamp);
    //XML Data gets placed into MetaData

    boolean bIsXml = false;
    boolean bIsJson = false;
    boolean bIsLineOriented = false;
    if ((null != source.getFileConfig()) && (null != source.getFileConfig().type)) {
        extension = source.getFileConfig().type;
    }
    bIsXml = extension.equalsIgnoreCase("xml");
    bIsJson = extension.equalsIgnoreCase("json");
    bIsLineOriented = extension.endsWith("sv");

    if (bIsXml || bIsJson || bIsLineOriented) {
        int debugMaxDocs = Integer.MAX_VALUE; // by default don't set this, it's only for debug mode
        if (_context.isStandalone()) { // debug mode
            debugMaxDocs = maxDocsPerCycle;
        }
        //fast check to see if the file has changed before processing (or if it never existed)
        if (needsUpdated_SourceUrl(modDate, f.getUrlString(), source)) {
            if (0 != modDate.getTime()) { // if it ==0 then sourceUrl doesn't exist at all, no need to delete
                // This file already exists - in normal/managed mode will re-create
                // In streaming mode, simple skip over
                if (_streaming) {
                    return;
                } //TESTED

                DocumentPojo docRepresentingSrcUrl = new DocumentPojo();
                docRepresentingSrcUrl.setSourceUrl(f.getUrlString());
                docRepresentingSrcUrl.setSourceKey(source.getKey());
                docRepresentingSrcUrl.setCommunityId(source.getCommunityIds().iterator().next());
                sourceUrlsGettingUpdated.add(docRepresentingSrcUrl.getSourceUrl());
                this.docsToRemove.add(docRepresentingSrcUrl);
                // (can add documents with just source URL, are treated differently in the core libraries)               
            }

            SourceFileConfigPojo fileSystem = source.getFileConfig();
            if ((null == fileSystem) && (bIsXml || bIsJson)) {
                fileSystem = new SourceFileConfigPojo();
            }
            XmlToMetadataParser xmlParser = null;
            JsonToMetadataParser jsonParser = null;
            String urlType = extension;
            if (bIsXml) {
                xmlParser = new XmlToMetadataParser(fileSystem.XmlRootLevelValues, fileSystem.XmlIgnoreValues,
                        fileSystem.XmlSourceName, fileSystem.XmlPrimaryKey, fileSystem.XmlAttributePrefix,
                        fileSystem.XmlPreserveCase, debugMaxDocs);
            } //TESTED
            else if (bIsJson) {
                jsonParser = new JsonToMetadataParser(fileSystem.XmlSourceName, fileSystem.XmlRootLevelValues,
                        fileSystem.XmlPrimaryKey, fileSystem.XmlIgnoreValues, debugMaxDocs);
            } //TESTED

            List<DocumentPojo> partials = null;
            try {
                if (bIsXml) {
                    XMLStreamReader xmlStreamReader = null;
                    XMLInputFactory factory = XMLInputFactory.newInstance();
                    factory.setProperty(XMLInputFactory.IS_COALESCING, true);
                    factory.setProperty(XMLInputFactory.SUPPORT_DTD, false);
                    try {
                        xmlStreamReader = factory.createXMLStreamReader(f.getInputStream());
                        partials = xmlParser.parseDocument(xmlStreamReader);
                        long memUsage = xmlParser.getMemUsage();
                        _memUsage += memUsage;
                        _totalMemUsage.addAndGet(memUsage);
                    } finally {
                        if (null != xmlStreamReader)
                            xmlStreamReader.close();
                    }
                } //TESTED
                else if (bIsJson) {
                    JsonReader jsonReader = null;
                    try {
                        jsonReader = new JsonReader(new InputStreamReader(f.getInputStream(), "UTF-8"));
                        jsonReader.setLenient(true);
                        partials = jsonParser.parseDocument(jsonReader);
                        long memUsage = jsonParser.getMemUsage();
                        _memUsage += memUsage;
                        _totalMemUsage.addAndGet(memUsage);
                    } finally {
                        if (null != jsonReader)
                            jsonReader.close();
                    }
                } //TESTED
                else if (bIsLineOriented) { // Just generate a document for every line

                    BufferedReader lineReader = null;
                    try {
                        lineReader = new BufferedReader(new InputStreamReader(f.getInputStream(), "UTF-8"));
                        CsvToMetadataParser lineParser = new CsvToMetadataParser(debugMaxDocs);
                        partials = lineParser.parseDocument(lineReader, source);
                        long memUsage = lineParser.getMemUsage();
                        _memUsage += memUsage;
                        _totalMemUsage.addAndGet(memUsage);
                    } finally {
                        if (null != lineReader)
                            lineReader.close();
                    }
                } //TESTED

                MessageDigest md5 = null; // (generates unique urls if the user doesn't below)
                try {
                    md5 = MessageDigest.getInstance("MD5");
                } catch (NoSuchAlgorithmException e) {
                    // Do nothing, unlikely to happen...
                }
                int nIndex = 0;
                int numPartials = partials.size();
                for (DocumentPojo doctoAdd : partials) {
                    nIndex++;
                    doctoAdd.setSource(source.getTitle());
                    doctoAdd.setSourceKey(source.getKey());
                    doctoAdd.setMediaType(source.getMediaType());
                    doctoAdd.setModified(new Date(fileTimestamp));
                    doctoAdd.setCreated(new Date());

                    if (null == doctoAdd.getUrl()) { // Can be set in the parser or here
                        doctoAdd.setHasDefaultUrl(true); // (ie cannot occur in a different src URL)

                        if (1 == numPartials) {
                            String urlString = f.getUrlString();
                            if (urlString.endsWith(urlType)) {
                                doctoAdd.setUrl(urlString);
                            } else {
                                doctoAdd.setUrl(
                                        new StringBuffer(urlString).append('.').append(urlType).toString());
                            }
                            // (we always set sourceUrl as the true url of the file, so want to differentiate the URL with
                            //  some useful information)
                        } else if (null == doctoAdd.getMetadata()) { // Line oriented case
                            doctoAdd.setUrl(new StringBuffer(f.getUrlString()).append("/").append(nIndex)
                                    .append('.').append(urlType).toString());
                        } else {
                            if (null == md5) { // Will never happen, MD5 always exists
                                doctoAdd.setUrl(new StringBuffer(f.getUrlString()).append("/")
                                        .append(doctoAdd.getMetadata().hashCode()).append('.').append(urlType)
                                        .toString());
                            } else { // This is the standard call if the XML parser has not been configured to build the URL
                                doctoAdd.setUrl(new StringBuffer(f.getUrlString()).append("/")
                                        .append(DigestUtils.md5Hex(doctoAdd.getMetadata().toString()))
                                        .append('.').append(urlType).toString());
                            }
                        } //TESTED
                    }
                    doctoAdd.setTitle(f.getName().toString());
                    doctoAdd.setPublishedDate(new Date(fileTimestamp));
                    doctoAdd.setSourceUrl(f.getUrlString());

                    // Always add to files because I'm deleting the source URL
                    files.add(doctoAdd);
                } //TESTED 

            } catch (XMLStreamException e1) {
                errors++;
                _context.getHarvestStatus()
                        .logMessage(HarvestExceptionUtils.createExceptionMessage(e1).toString(), true);
            } catch (FactoryConfigurationError e1) {
                errors++;
                _context.getHarvestStatus().logMessage(e1.getMessage(), true);

            } catch (IOException e1) {
                errors++;
                _context.getHarvestStatus()
                        .logMessage(HarvestExceptionUtils.createExceptionMessage(e1).toString(), true);
            } catch (Exception e1) {
                errors++;
                _context.getHarvestStatus()
                        .logMessage(HarvestExceptionUtils.createExceptionMessage(e1).toString(), true);
            }
        } //(end if needs updated)
    } else //Tika supports Excel,Word,Powerpoint,Visio, & Outlook Documents
    {
        // (This dedup tells me if it's an add/update vs ignore - qr.isDuplicate higher up tells me if I need to add or update)
        if (needsUpdated_Url(modDate, f.getUrlString(), source)) {

            Metadata metadata = null;
            InputStream in = null;
            try {

                doc = new DocumentPojo();

                // Create a tika object (first time only)
                if (null == _tika) {
                    this.initializeTika(_context, source);
                }

                // BUGGERY
                // NEED TO LIKELY SET LIMIT TO BE 30MB or 50MB and BYPASS ANYTHING OVER THAT BELOW IS THE CODE TO DO THAT
                // tika.setMaxStringLength(30*1024*1024);
                // Disable the string length limit
                _tika.setMaxStringLength(-1);
                //input = new FileInputStream(new File(resourceLocation));
                // Create a metadata object to contain the metadata

                metadata = new Metadata();
                // Parse the file and get the text of the file
                doc.setSource(source.getTitle());
                doc.setSourceKey(source.getKey());
                doc.setMediaType(source.getMediaType());
                String fullText = "";

                in = f.getInputStream();
                try {
                    if (null == _tikaOutputFormat) { // text only
                        fullText = _tika.parseToString(in, metadata);
                    } //TESTED
                    else { // XML/HMTL
                        _tika.getParser().parse(in, _tikaOutputFormat, metadata, _tikaOutputParseContext);
                        fullText = _tikaXmlFormatWriter.toString();
                        _tikaXmlFormatWriter.getBuffer().setLength(0);
                    } //TESTED
                } finally {
                    if (null != in)
                        in.close();
                }
                int descCap = 500;
                doc.setFullText(fullText);
                if (descCap > fullText.length()) {
                    descCap = fullText.length();
                }
                doc.setDescription(fullText.substring(0, descCap));
                doc.setModified(new Date(fileTimestamp));
                doc.setCreated(new Date());
                doc.setUrl(f.getUrlString());
                doc.setTitle(f.getName().toString());
                doc.setPublishedDate(new Date(fileTimestamp));

                long memUsage = (250L * (doc.getFullText().length() + doc.getDescription().length())) / 100L; // 25% overhead, 2x for string->byte
                _memUsage += memUsage;
                _totalMemUsage.addAndGet(memUsage);

                // If the metadata contains a more plausible date then use that
                try {
                    String title = metadata.get(Metadata.TITLE);
                    if (null != title) {
                        doc.setTitle(title);
                    }
                } catch (Exception e) { // Fine just carry on                  
                }
                try {
                    Date date = metadata.getDate(Metadata.CREATION_DATE); // MS Word
                    if (null != date) {
                        doc.setPublishedDate(date);
                    } else {
                        date = metadata.getDate(Metadata.DATE); // Dublin
                        if (null != date) {
                            doc.setPublishedDate(date);
                        } else {
                            date = metadata.getDate(Metadata.ORIGINAL_DATE);
                            if (null != date) {
                                doc.setPublishedDate(date);
                            }
                        }
                    }
                } catch (Exception e) { // Fine just carry on                  
                }
                //TESTED

                // If the metadata contains a geotag then apply that:
                try {
                    String lat = metadata.get(Metadata.LATITUDE);
                    String lon = metadata.get(Metadata.LONGITUDE);
                    if ((null != lat) && (null != lon)) {
                        GeoPojo gt = new GeoPojo();
                        gt.lat = Double.parseDouble(lat);
                        gt.lon = Double.parseDouble(lon);
                        doc.setDocGeo(gt);
                    }
                } catch (Exception e) { // Fine just carry on                  
                }

                // Save the entire metadata:
                doc.addToMetadata("_FILE_METADATA_", metadata);

                for (ObjectId communityId : source.getCommunityIds()) {
                    doc.setCommunityId(communityId);
                }
                files.add(doc);

                // Close the input stream
                in.close();
                in = null;

                //TESTED

            } catch (SmbException e) {
                errors++;
                _context.getHarvestStatus()
                        .logMessage(HarvestExceptionUtils.createExceptionMessage(e).toString(), true);
            } catch (MalformedURLException e) {
                errors++;
                _context.getHarvestStatus()
                        .logMessage(HarvestExceptionUtils.createExceptionMessage(e).toString(), true);
            } catch (UnknownHostException e) {
                errors++;
                _context.getHarvestStatus()
                        .logMessage(HarvestExceptionUtils.createExceptionMessage(e).toString(), true);
            } catch (IOException e) {
                errors++;
                _context.getHarvestStatus().logMessage(e.getMessage(), true);
            } catch (TikaException e) {
                errors++;
                _context.getHarvestStatus().logMessage(e.getMessage(), true);
            } catch (Exception e) {
                errors++;
                _context.getHarvestStatus()
                        .logMessage(HarvestExceptionUtils.createExceptionMessage(e).toString(), true);
            } finally { // Close the input stream if an error occurs
                if (null != in) {
                    try {
                        in.close();
                    } catch (IOException e) {
                        // All good, do nothing
                    }
                }
            } // end exception handling
        } // end dedup check
    } // end XML vs "office" app

    //DEBUG
    //System.out.println("FILE=" + files.size() + " / MEM=" + _memUsage + " VS " + Runtime.getRuntime().totalMemory());
}

From source file:com.clustercontrol.agent.winevent.WinEventMonitor.java

/**
 * XMLStAX???EventLogRecord????/*from w w w.  jav a 2 s . c  o m*/
 * @param eventXmlStream
 * @return EventLogRecord?
 */
private ArrayList<EventLogRecord> parseEventXML(InputStream eventXmlStream) {
    ArrayList<EventLogRecord> eventlogs = new ArrayList<EventLogRecord>();

    try {
        XMLInputFactory xmlif = XMLInputFactory.newInstance();
        /**
         * OpenJDK7/OracleJDK7??"]"?2?????????????????????????????
         * ?XML?????????OpenJDK7/OracleJDK7???????/??????????
         * URL???????????????
         * 
         * URL
         * http://docs.oracle.com/javase/jp/6/api/javax/xml/stream/XMLStreamReader.html#next()
         */
        String xmlCoalescingKey = "javax.xml.stream.isCoalescing";// TODO JRE???????????????????
        if (m_log.isDebugEnabled()) {
            m_log.debug(xmlCoalescingKey + " = true");
        }
        xmlif.setProperty(xmlCoalescingKey, true);
        XMLStreamReader xmlr = xmlif.createXMLStreamReader(eventXmlStream);

        while (xmlr.hasNext()) {
            switch (xmlr.getEventType()) {
            case XMLStreamConstants.START_ELEMENT:
                m_log.trace("EventType : XMLStreamConstants.START_ELEMENT");

                String localName = xmlr.getLocalName();
                m_log.trace("local name : " + localName);

                if ("Event".equals(localName)) {
                    EventLogRecord eventlog = new EventLogRecord();
                    eventlogs.add(eventlog);
                    m_log.debug("create new EventLogRecord");
                } else {
                    String attrLocalName = null;
                    String attrValue = null;

                    if (xmlr.getAttributeCount() != 0) {
                        attrLocalName = xmlr.getAttributeLocalName(0);
                        attrValue = xmlr.getAttributeValue(0);
                        m_log.trace("attribute local name : " + attrLocalName);
                        m_log.trace("attribute local value : " + attrValue);
                    }

                    if ("Provider".equals(localName)) {
                        if ("Name".equals(attrLocalName)) {
                            m_log.trace("target value : " + attrValue);

                            EventLogRecord eventlog = eventlogs.get(eventlogs.size() - 1);
                            eventlog.setProviderName(attrValue);
                            m_log.debug("set ProviderName : " + eventlog.getProviderName());
                        }
                    }
                    // Get-WinEvent/wevtutil.exe
                    else if ("TimeCreated".equals(localName) && "SystemTime".equals(attrLocalName)) {
                        m_log.trace("target value : " + attrValue);

                        // "yyyy-MM-dd'T'HH:mm:ss.SSSSSSSSS'Z'"???S????????????
                        String formatedDateString = attrValue.replaceAll("\\..*Z", "");
                        m_log.trace("formatted target value : " + formatedDateString);
                        DateFormat sdf = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss");
                        sdf.setTimeZone(TimeZone.getTimeZone("UTC"));

                        EventLogRecord eventlog = eventlogs.get(eventlogs.size() - 1);
                        ;
                        try {
                            eventlog.setTimeCreated(sdf.parse(formatedDateString));
                        } catch (ParseException e) {
                            // do nothing
                            m_log.error("set TimeCreated Error", e);
                        }
                        m_log.debug("set TimeCreated : " + eventlog.getTimeCreated());
                    }
                    // Get-EventLog
                    if ("TimeGenerated".equals(localName) && "SystemTime".equals(attrLocalName)) {
                        m_log.trace("target value : " + attrValue);
                        SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss'Z'");
                        sdf.setTimeZone(HinemosTime.getTimeZone());

                        EventLogRecord eventlog = eventlogs.get(eventlogs.size() - 1);
                        ;
                        try {
                            eventlog.setTimeCreated(sdf.parse(attrValue));
                        } catch (ParseException e) {
                            // do nothing
                            m_log.error("set TimeCreated Error", e);
                        }
                        m_log.debug("set TimeCreated : " + eventlog.getTimeCreated());
                    } else {
                        targetProperty = localName;
                        m_log.trace("target property : " + targetProperty);
                    }
                }

                break;
            case XMLStreamConstants.SPACE:
            case XMLStreamConstants.CHARACTERS:
                m_log.trace("EventType : XMLStreamConstants.CHARACTERS, length=" + xmlr.getTextLength());
                if (targetProperty != null) {
                    try {
                        EventLogRecord eventlog = eventlogs.get(eventlogs.size() - 1);
                        ;
                        if ("EventID".equals(targetProperty)) {
                            eventlog.setId(Integer.parseInt(new String(xmlr.getTextCharacters(),
                                    xmlr.getTextStart(), xmlr.getTextLength())));
                            m_log.debug("set EventID : " + eventlog.getId());
                        }
                        // Get-WinEvent/wevtutil.exe
                        else if ("Level".equals(targetProperty)) {
                            if (eventlog.getLevel() == WinEventConstant.UNDEFINED) {
                                eventlog.setLevel(Integer.parseInt(new String(xmlr.getTextCharacters(),
                                        xmlr.getTextStart(), xmlr.getTextLength())));
                                m_log.debug("set Level : " + eventlog.getLevel());
                            }
                        } else if ("Task".equals(targetProperty)) {
                            if (eventlog.getTask() == WinEventConstant.UNDEFINED) {
                                eventlog.setTask(Integer.parseInt(new String(xmlr.getTextCharacters(),
                                        xmlr.getTextStart(), xmlr.getTextLength())));
                                m_log.debug("set Task : " + eventlog.getTask());
                            }
                        } else if ("Keywords".equals(targetProperty)) {
                            // TODO ????????0x8080000000000000
                            //eventlog.setKeywords(Long.decode(new String(xmlr.getTextCharacters(), xmlr.getTextStart(), xmlr.getTextLength())));
                            //m_log.debug("set Keywords : " + eventlog.getKeywords());
                        } else if ("EventRecordId".equals(targetProperty)) {
                            eventlog.setRecordId(Long.parseLong(new String(xmlr.getTextCharacters(),
                                    xmlr.getTextStart(), xmlr.getTextLength())));
                            m_log.debug("set RecordId : " + eventlog.getRecordId());
                        } else if ("Channel".equals(targetProperty)) {
                            eventlog.setLogName(new String(xmlr.getTextCharacters(), xmlr.getTextStart(),
                                    xmlr.getTextLength()));
                            m_log.debug("set LogName : " + eventlog.getLogName());
                        } else if ("Computer".equals(targetProperty)) {
                            eventlog.setMachineName(new String(xmlr.getTextCharacters(), xmlr.getTextStart(),
                                    xmlr.getTextLength()));
                            m_log.debug("set MachineName : " + eventlog.getMachineName());
                        } else if ("Message".equals(targetProperty)) {
                            String message = new String(xmlr.getTextCharacters(), xmlr.getTextStart(),
                                    xmlr.getTextLength());
                            message = message.replaceAll(tmpReturnCode, "\r\n");
                            message = message.replaceAll(tmpLtCode, "<");
                            message = message.replaceAll(tmpGtCode, ">");
                            eventlog.setMessage(message);
                            m_log.debug("set Message : " + eventlog.getMessage());
                        } else if ("Data".equals(targetProperty)) {
                            String data = new String(xmlr.getTextCharacters(), xmlr.getTextStart(),
                                    xmlr.getTextLength());
                            eventlog.getData().add(data);
                            m_log.debug("set Data : " + data);
                        } else {
                            m_log.debug("unknown target property : " + targetProperty);
                        }
                    } catch (NumberFormatException e) {
                        m_log.debug("number parse error", e);
                    }
                }
                targetProperty = null;
                break;
            default: // 
                break;
            }
            xmlr.next();
        }
        xmlr.close();
    } catch (XMLStreamException e) {
        m_log.warn("parseEvent() xmlstream error", e);
    }

    return eventlogs;

}

From source file:gima.neo4j.testsuite.osmcheck.OSMImporter.java

public void importFile(OSMWriter<?> osmWriter, String dataset, boolean allPoints, Charset charset)
        throws IOException, XMLStreamException {
    System.out.println("Importing with osm-writer: " + osmWriter);
    osmWriter.getOrCreateOSMDataset(layerName);
    osm_dataset = osmWriter.getDatasetId();

    long startTime = System.currentTimeMillis();
    long[] times = new long[] { 0L, 0L, 0L, 0L };
    javax.xml.stream.XMLInputFactory factory = javax.xml.stream.XMLInputFactory.newInstance();
    CountedFileReader reader = new CountedFileReader(dataset, charset);
    javax.xml.stream.XMLStreamReader parser = factory.createXMLStreamReader(reader);
    int countXMLTags = 0;
    beginProgressMonitor(100);//from w  w  w .  j  av a  2s.  c o m
    setLogContext(dataset);
    boolean startedWays = false;
    boolean startedRelations = false;
    try {
        ArrayList<String> currentXMLTags = new ArrayList<String>();
        int depth = 0;
        Map<String, Object> wayProperties = null;
        ArrayList<Long> wayNodes = new ArrayList<Long>();
        Map<String, Object> relationProperties = null;
        ArrayList<Map<String, Object>> relationMembers = new ArrayList<Map<String, Object>>();
        LinkedHashMap<String, Object> currentNodeTags = new LinkedHashMap<String, Object>();
        while (true) {
            updateProgressMonitor(reader.getPercentRead());
            incrLogContext();
            int event = parser.next();
            if (event == javax.xml.stream.XMLStreamConstants.END_DOCUMENT) {
                break;
            }
            switch (event) {
            case javax.xml.stream.XMLStreamConstants.START_ELEMENT:
                currentXMLTags.add(depth, parser.getLocalName());
                String tagPath = currentXMLTags.toString();
                if (tagPath.equals("[osm]")) {
                    osmWriter.setDatasetProperties(extractProperties(parser));
                } else if (tagPath.equals("[osm, bounds]")) {
                    osmWriter.addOSMBBox(extractProperties("bbox", parser));
                } else if (tagPath.equals("[osm, node]")) {
                    // <node id="269682538" lat="56.0420950" lon="12.9693483" user="sanna" uid="31450" visible="true" version="1" changeset="133823" timestamp="2008-06-11T12:36:28Z"/>
                    osmWriter.createOSMNode(extractProperties("node", parser));
                } else if (tagPath.equals("[osm, way]")) {
                    // <way id="27359054" user="spull" uid="61533" visible="true" version="8" changeset="4707351" timestamp="2010-05-15T15:39:57Z">
                    if (!startedWays) {
                        startedWays = true;
                        times[0] = System.currentTimeMillis();
                        osmWriter.optimize();
                        times[1] = System.currentTimeMillis();
                    }
                    wayProperties = extractProperties("way", parser);
                    wayNodes.clear();
                } else if (tagPath.equals("[osm, way, nd]")) {
                    Map<String, Object> properties = extractProperties(parser);
                    wayNodes.add(Long.parseLong(properties.get("ref").toString()));
                } else if (tagPath.endsWith("tag]")) {
                    Map<String, Object> properties = extractProperties(parser);
                    currentNodeTags.put(properties.get("k").toString(), properties.get("v").toString());
                } else if (tagPath.equals("[osm, relation]")) {
                    // <relation id="77965" user="Grillo" uid="13957" visible="true" version="24" changeset="5465617" timestamp="2010-08-11T19:25:46Z">
                    if (!startedRelations) {
                        startedRelations = true;
                        times[2] = System.currentTimeMillis();
                        osmWriter.optimize();
                        times[3] = System.currentTimeMillis();
                    }
                    relationProperties = extractProperties("relation", parser);
                    relationMembers.clear();
                } else if (tagPath.equals("[osm, relation, member]")) {
                    relationMembers.add(extractProperties(parser));
                }
                if (startedRelations) {
                    if (countXMLTags < 10) {
                        log("Starting tag at depth " + depth + ": " + currentXMLTags.get(depth) + " - "
                                + currentXMLTags.toString());
                        for (int i = 0; i < parser.getAttributeCount(); i++) {
                            log("\t" + currentXMLTags.toString() + ": " + parser.getAttributeLocalName(i) + "["
                                    + parser.getAttributeNamespace(i) + "," + parser.getAttributePrefix(i) + ","
                                    + parser.getAttributeType(i) + "," + "] = " + parser.getAttributeValue(i));
                        }
                    }
                    countXMLTags++;
                }
                depth++;
                break;
            case javax.xml.stream.XMLStreamConstants.END_ELEMENT:
                if (currentXMLTags.toString().equals("[osm, node]")) {
                    osmWriter.addOSMNodeTags(allPoints, currentNodeTags);
                } else if (currentXMLTags.toString().equals("[osm, way]")) {
                    osmWriter.createOSMWay(wayProperties, wayNodes, currentNodeTags);
                } else if (currentXMLTags.toString().equals("[osm, relation]")) {
                    osmWriter.createOSMRelation(relationProperties, relationMembers, currentNodeTags);
                }
                depth--;
                currentXMLTags.remove(depth);
                // log("Ending tag at depth "+depth+": "+currentTags.get(depth));
                break;
            default:
                break;
            }
        }
    } finally {
        endProgressMonitor();
        parser.close();
        osmWriter.finish();
        this.osm_dataset = osmWriter.getDatasetId();
    }
    describeTimes(startTime, times);
    osmWriter.describeMissing();
    osmWriter.describeLoaded();

    long stopTime = System.currentTimeMillis();
    log("info | Elapsed time in seconds: " + (1.0 * (stopTime - startTime) / 1000.0));
    stats.dumpGeomStats();
    stats.printTagStats();
}