List of usage examples for javax.xml.stream XMLInputFactory createXMLStreamReader
public abstract XMLStreamReader createXMLStreamReader(java.io.InputStream stream) throws XMLStreamException;
From source file:com.liferay.portal.util.LocalizationImpl.java
public String removeLocalization(String xml, String key, String requestedLanguageId, boolean cdata, boolean localized) { if (Validator.isNull(xml)) { return StringPool.BLANK; }/*from w w w. ja v a 2 s. c o m*/ xml = _sanitizeXML(xml); String systemDefaultLanguageId = LocaleUtil.toLanguageId(LocaleUtil.getDefault()); XMLStreamReader xmlStreamReader = null; XMLStreamWriter xmlStreamWriter = null; ClassLoader portalClassLoader = PortalClassLoaderUtil.getClassLoader(); Thread currentThread = Thread.currentThread(); ClassLoader contextClassLoader = currentThread.getContextClassLoader(); try { if (contextClassLoader != portalClassLoader) { currentThread.setContextClassLoader(portalClassLoader); } XMLInputFactory xmlInputFactory = XMLInputFactory.newInstance(); xmlStreamReader = xmlInputFactory.createXMLStreamReader(new UnsyncStringReader(xml)); String availableLocales = StringPool.BLANK; String defaultLanguageId = StringPool.BLANK; // Read root node if (xmlStreamReader.hasNext()) { xmlStreamReader.nextTag(); availableLocales = xmlStreamReader.getAttributeValue(null, _AVAILABLE_LOCALES); defaultLanguageId = xmlStreamReader.getAttributeValue(null, _DEFAULT_LOCALE); if (Validator.isNull(defaultLanguageId)) { defaultLanguageId = systemDefaultLanguageId; } } if ((availableLocales != null) && (availableLocales.indexOf(requestedLanguageId) != -1)) { availableLocales = StringUtil.remove(availableLocales, requestedLanguageId, StringPool.COMMA); UnsyncStringWriter unsyncStringWriter = new UnsyncStringWriter(); XMLOutputFactory xmlOutputFactory = XMLOutputFactory.newInstance(); xmlStreamWriter = xmlOutputFactory.createXMLStreamWriter(unsyncStringWriter); xmlStreamWriter.writeStartDocument(); xmlStreamWriter.writeStartElement(_ROOT); if (localized) { xmlStreamWriter.writeAttribute(_AVAILABLE_LOCALES, availableLocales); xmlStreamWriter.writeAttribute(_DEFAULT_LOCALE, defaultLanguageId); } _copyNonExempt(xmlStreamReader, xmlStreamWriter, requestedLanguageId, defaultLanguageId, cdata); xmlStreamWriter.writeEndElement(); xmlStreamWriter.writeEndDocument(); xmlStreamWriter.close(); xmlStreamWriter = null; xml = unsyncStringWriter.toString(); } } catch (Exception e) { if (_log.isWarnEnabled()) { _log.warn(e, e); } } finally { if (contextClassLoader != portalClassLoader) { currentThread.setContextClassLoader(contextClassLoader); } if (xmlStreamReader != null) { try { xmlStreamReader.close(); } catch (Exception e) { } } if (xmlStreamWriter != null) { try { xmlStreamWriter.close(); } catch (Exception e) { } } } return xml; }
From source file:com.liferay.portal.util.LocalizationImpl.java
public String updateLocalization(String xml, String key, String value, String requestedLanguageId, String defaultLanguageId, boolean cdata, boolean localized) { xml = _sanitizeXML(xml);//from w ww . java 2 s .c o m XMLStreamReader xmlStreamReader = null; XMLStreamWriter xmlStreamWriter = null; ClassLoader portalClassLoader = PortalClassLoaderUtil.getClassLoader(); Thread currentThread = Thread.currentThread(); ClassLoader contextClassLoader = currentThread.getContextClassLoader(); try { if (contextClassLoader != portalClassLoader) { currentThread.setContextClassLoader(portalClassLoader); } XMLInputFactory xmlInputFactory = XMLInputFactory.newInstance(); xmlStreamReader = xmlInputFactory.createXMLStreamReader(new UnsyncStringReader(xml)); String availableLocales = StringPool.BLANK; // Read root node if (xmlStreamReader.hasNext()) { xmlStreamReader.nextTag(); availableLocales = xmlStreamReader.getAttributeValue(null, _AVAILABLE_LOCALES); if (Validator.isNull(availableLocales)) { availableLocales = defaultLanguageId; } if (availableLocales.indexOf(requestedLanguageId) == -1) { availableLocales = StringUtil.add(availableLocales, requestedLanguageId, StringPool.COMMA); } } UnsyncStringWriter unsyncStringWriter = new UnsyncStringWriter(); XMLOutputFactory xmlOutputFactory = XMLOutputFactory.newInstance(); xmlStreamWriter = xmlOutputFactory.createXMLStreamWriter(unsyncStringWriter); xmlStreamWriter.writeStartDocument(); xmlStreamWriter.writeStartElement(_ROOT); if (localized) { xmlStreamWriter.writeAttribute(_AVAILABLE_LOCALES, availableLocales); xmlStreamWriter.writeAttribute(_DEFAULT_LOCALE, defaultLanguageId); } _copyNonExempt(xmlStreamReader, xmlStreamWriter, requestedLanguageId, defaultLanguageId, cdata); xmlStreamWriter.writeStartElement(key); if (localized) { xmlStreamWriter.writeAttribute(_LANGUAGE_ID, requestedLanguageId); } if (cdata) { xmlStreamWriter.writeCData(value); } else { xmlStreamWriter.writeCharacters(value); } xmlStreamWriter.writeEndElement(); xmlStreamWriter.writeEndElement(); xmlStreamWriter.writeEndDocument(); xmlStreamWriter.close(); xmlStreamWriter = null; xml = unsyncStringWriter.toString(); } catch (Exception e) { if (_log.isWarnEnabled()) { _log.warn(e, e); } } finally { if (contextClassLoader != portalClassLoader) { currentThread.setContextClassLoader(contextClassLoader); } if (xmlStreamReader != null) { try { xmlStreamReader.close(); } catch (Exception e) { } } if (xmlStreamWriter != null) { try { xmlStreamWriter.close(); } catch (Exception e) { } } } return xml; }
From source file:com.liferay.portal.util.LocalizationImpl.java
public String getLocalization(String xml, String requestedLanguageId, boolean useDefault) { String value = _getCachedValue(xml, requestedLanguageId, useDefault); if (value != null) { return value; } else {//from ww w . j av a2s . c om value = StringPool.BLANK; } String systemDefaultLanguageId = LocaleUtil.toLanguageId(LocaleUtil.getDefault()); String priorityLanguageId = null; Locale requestedLocale = LocaleUtil.fromLanguageId(requestedLanguageId); if (useDefault && LanguageUtil.isDuplicateLanguageCode(requestedLocale.getLanguage())) { Locale priorityLocale = LanguageUtil.getLocale(requestedLocale.getLanguage()); if (!requestedLanguageId.equals(priorityLanguageId)) { priorityLanguageId = LocaleUtil.toLanguageId(priorityLocale); } } if (!Validator.isXml(xml)) { if (useDefault || requestedLanguageId.equals(systemDefaultLanguageId)) { value = xml; } _setCachedValue(xml, requestedLanguageId, useDefault, value); return value; } XMLStreamReader xmlStreamReader = null; ClassLoader portalClassLoader = PortalClassLoaderUtil.getClassLoader(); Thread currentThread = Thread.currentThread(); ClassLoader contextClassLoader = currentThread.getContextClassLoader(); try { if (contextClassLoader != portalClassLoader) { currentThread.setContextClassLoader(portalClassLoader); } XMLInputFactory xmlInputFactory = XMLInputFactory.newInstance(); xmlStreamReader = xmlInputFactory.createXMLStreamReader(new UnsyncStringReader(xml)); String defaultLanguageId = StringPool.BLANK; // Skip root node if (xmlStreamReader.hasNext()) { xmlStreamReader.nextTag(); defaultLanguageId = xmlStreamReader.getAttributeValue(null, _DEFAULT_LOCALE); if (Validator.isNull(defaultLanguageId)) { defaultLanguageId = systemDefaultLanguageId; } } // Find specified language and/or default language String defaultValue = StringPool.BLANK; String priorityValue = StringPool.BLANK; while (xmlStreamReader.hasNext()) { int event = xmlStreamReader.next(); if (event == XMLStreamConstants.START_ELEMENT) { String languageId = xmlStreamReader.getAttributeValue(null, _LANGUAGE_ID); if (Validator.isNull(languageId)) { languageId = defaultLanguageId; } if (languageId.equals(defaultLanguageId) || languageId.equals(priorityLanguageId) || languageId.equals(requestedLanguageId)) { String text = xmlStreamReader.getElementText(); if (languageId.equals(defaultLanguageId)) { defaultValue = text; } if (languageId.equals(priorityLanguageId)) { priorityValue = text; } if (languageId.equals(requestedLanguageId)) { value = text; } if (Validator.isNotNull(value)) { break; } } } else if (event == XMLStreamConstants.END_DOCUMENT) { break; } } if (useDefault && Validator.isNotNull(priorityLanguageId) && Validator.isNull(value) && Validator.isNotNull(priorityValue)) { value = priorityValue; } if (useDefault && Validator.isNull(value)) { value = defaultValue; } } catch (Exception e) { if (_log.isWarnEnabled()) { _log.warn(e, e); } } finally { if (contextClassLoader != portalClassLoader) { currentThread.setContextClassLoader(contextClassLoader); } if (xmlStreamReader != null) { try { xmlStreamReader.close(); } catch (Exception e) { } } } _setCachedValue(xml, requestedLanguageId, useDefault, value); return value; }
From source file:co.turnus.trace.io.XmlTraceReader.java
public XmlTraceReader(File file, TraceFactory factory) { try {//from w w w. j av a2 s . c om XMLInputFactory inputFactory = XMLInputFactory.newInstance(); String extension = TurnusUtils.getExtension(file); if (!extension.equals(TurnusExtension.TRACE) && !extension.equals(TurnusExtension.TRACE_COMPRESSED)) { throw new TurnusRuntimeException("Trace file reader: unsupported extension"); } InputStream stream = new BufferedInputStream(new FileInputStream(file)); if (extension.equals(TurnusExtension.TRACE_COMPRESSED)) { stream = new CompressorStreamFactory().createCompressorInputStream(CompressorStreamFactory.GZIP, stream); } reader = inputFactory.createXMLStreamReader(stream); } catch (Exception e) { throw new TurnusRuntimeException("Error initializing the trace reader", e.getCause()); } this.factory = factory; tempDep = new TempDependency(); tempStep = new TempStep(); }
From source file:com.cedarsoft.serialization.test.performance.XmlParserPerformance.java
private void benchParse(XMLInputFactory inputFactory, @Nonnull String contentSample) throws XMLStreamException { for (int i = 0; i < BIG; i++) { XMLStreamReader parser = inputFactory.createXMLStreamReader(new StringReader(contentSample)); assertEquals(XMLStreamReader.START_ELEMENT, parser.nextTag()); assertEquals("fileType", parser.getLocalName()); assertEquals("fileType", parser.getName().getLocalPart()); boolean dependent = Boolean.parseBoolean(parser.getAttributeValue(null, "dependent")); assertEquals(XMLStreamReader.START_ELEMENT, parser.nextTag()); assertEquals("id", parser.getName().getLocalPart()); assertEquals(XMLStreamReader.CHARACTERS, parser.next()); String id = parser.getText(); assertEquals(XMLStreamReader.END_ELEMENT, parser.nextTag()); assertEquals(XMLStreamReader.START_ELEMENT, parser.nextTag()); assertEquals("extension", parser.getName().getLocalPart()); boolean isDefault = Boolean.parseBoolean(parser.getAttributeValue(null, "default")); String delimiter = parser.getAttributeValue(null, "delimiter"); assertEquals(XMLStreamReader.CHARACTERS, parser.next()); String extension = parser.getText(); assertEquals(XMLStreamReader.END_ELEMENT, parser.nextTag()); assertEquals("extension", parser.getName().getLocalPart()); assertEquals(XMLStreamReader.END_ELEMENT, parser.nextTag()); assertEquals("fileType", parser.getName().getLocalPart()); assertEquals(XMLStreamReader.END_DOCUMENT, parser.next()); parser.close();//from w w w . j a va 2 s .c o m FileType type = new FileType(id, new Extension(delimiter, extension, isDefault), dependent); assertNotNull(type); } }
From source file:de.uzk.hki.da.model.ObjectPremisXmlWriter.java
/** * Integrate jhove data./*from w w w .j a va 2 s. co m*/ * * @param jhoveFilePath the jhove file path * @param tab the tab * @throws XMLStreamException the xML stream exception * @author Thomas Kleinke * @throws FileNotFoundException */ private void integrateJhoveData(String jhoveFilePath, int tab) throws XMLStreamException, FileNotFoundException { File jhoveFile = new File(jhoveFilePath); if (!jhoveFile.exists()) throw new FileNotFoundException("file does not exist. " + jhoveFile); FileInputStream inputStream = null; inputStream = new FileInputStream(jhoveFile); XMLInputFactory inputFactory = XMLInputFactory.newInstance(); XMLStreamReader streamReader = inputFactory.createXMLStreamReader(inputStream); boolean textElement = false; while (streamReader.hasNext()) { int event = streamReader.next(); switch (event) { case XMLStreamConstants.START_ELEMENT: writer.writeDTD("\n"); indent(tab); tab++; String prefix = streamReader.getPrefix(); if (prefix != null && !prefix.equals("")) { writer.setPrefix(prefix, streamReader.getNamespaceURI()); writer.writeStartElement(streamReader.getNamespaceURI(), streamReader.getLocalName()); } else writer.writeStartElement(streamReader.getLocalName()); for (int i = 0; i < streamReader.getNamespaceCount(); i++) writer.writeNamespace(streamReader.getNamespacePrefix(i), streamReader.getNamespaceURI(i)); for (int i = 0; i < streamReader.getAttributeCount(); i++) { QName qname = streamReader.getAttributeName(i); String attributeName = qname.getLocalPart(); String attributePrefix = qname.getPrefix(); if (attributePrefix != null && !attributePrefix.equals("")) attributeName = attributePrefix + ":" + attributeName; writer.writeAttribute(attributeName, streamReader.getAttributeValue(i)); } break; case XMLStreamConstants.CHARACTERS: if (!streamReader.isWhiteSpace()) { writer.writeCharacters(streamReader.getText()); textElement = true; } break; case XMLStreamConstants.END_ELEMENT: tab--; if (!textElement) { writer.writeDTD("\n"); indent(tab); } writer.writeEndElement(); textElement = false; break; default: break; } } streamReader.close(); try { inputStream.close(); } catch (IOException e) { throw new RuntimeException("Failed to close input stream", e); } }
From source file:com.cedarsoft.serialization.test.performance.XmlParserPerformance.java
private void benchParse(javolution.xml.stream.XMLInputFactory inputFactory) throws XMLStreamException, javolution.xml.stream.XMLStreamException { for (int i = 0; i < BIG; i++) { javolution.xml.stream.XMLStreamReader parser = inputFactory .createXMLStreamReader(new StringReader(CONTENT_SAMPLE)); assertEquals(XMLStreamReader.START_ELEMENT, parser.nextTag()); assertEquals("fileType", parser.getLocalName().toString()); boolean dependent = Boolean.parseBoolean(parser.getAttributeValue(null, "dependent").toString()); assertEquals(XMLStreamReader.START_ELEMENT, parser.nextTag()); assertEquals("id", parser.getLocalName().toString()); assertEquals(XMLStreamReader.CHARACTERS, parser.next()); String id = parser.getText().toString(); assertEquals(XMLStreamReader.END_ELEMENT, parser.nextTag()); assertEquals("id", parser.getLocalName().toString()); assertEquals(XMLStreamReader.START_ELEMENT, parser.nextTag()); assertEquals("extension", parser.getLocalName().toString()); boolean isDefault = Boolean.parseBoolean(parser.getAttributeValue(null, "default").toString()); String delimiter = parser.getAttributeValue(null, "delimiter").toString(); assertEquals(XMLStreamReader.CHARACTERS, parser.next()); String extension = parser.getText().toString(); assertEquals(XMLStreamReader.END_ELEMENT, parser.nextTag()); assertEquals("extension", parser.getLocalName().toString()); assertEquals(XMLStreamReader.END_ELEMENT, parser.nextTag()); assertEquals("fileType", parser.getLocalName().toString()); assertEquals(XMLStreamReader.END_DOCUMENT, parser.next()); parser.close();/*from www . j a v a2 s. c om*/ FileType type = new FileType(id, new Extension(delimiter, extension, isDefault), dependent); assertNotNull(type); } }
From source file:com.ikanow.infinit.e.harvest.extraction.document.file.FileHarvester.java
private void parse(InfiniteFile f, SourcePojo source) throws MalformedURLException, URISyntaxException { //NOTE: we only ever break out of here because of max docs in standalone mode // (because we don't know how to continue reading) DocumentPojo doc = null;/*from ww w . j av a 2s . co m*/ //Determine File Extension String fileName = f.getName().toString(); int mid = fileName.lastIndexOf("."); String extension = fileName.substring(mid + 1, fileName.length()); //Checked to save processing time long fileTimestamp = (f.getDate() / 1000) * 1000; // (ensure truncated to seconds, since some operation somewhere hear does this...) Date modDate = new Date(fileTimestamp); //XML Data gets placed into MetaData boolean bIsXml = false; boolean bIsJson = false; boolean bIsLineOriented = false; if ((null != source.getFileConfig()) && (null != source.getFileConfig().type)) { extension = source.getFileConfig().type; } bIsXml = extension.equalsIgnoreCase("xml"); bIsJson = extension.equalsIgnoreCase("json"); bIsLineOriented = extension.endsWith("sv"); if (bIsXml || bIsJson || bIsLineOriented) { int debugMaxDocs = Integer.MAX_VALUE; // by default don't set this, it's only for debug mode if (_context.isStandalone()) { // debug mode debugMaxDocs = maxDocsPerCycle; } //fast check to see if the file has changed before processing (or if it never existed) if (needsUpdated_SourceUrl(modDate, f.getUrlString(), source)) { if (0 != modDate.getTime()) { // if it ==0 then sourceUrl doesn't exist at all, no need to delete // This file already exists - in normal/managed mode will re-create // In streaming mode, simple skip over if (_streaming) { return; } //TESTED DocumentPojo docRepresentingSrcUrl = new DocumentPojo(); docRepresentingSrcUrl.setSourceUrl(f.getUrlString()); docRepresentingSrcUrl.setSourceKey(source.getKey()); docRepresentingSrcUrl.setCommunityId(source.getCommunityIds().iterator().next()); sourceUrlsGettingUpdated.add(docRepresentingSrcUrl.getSourceUrl()); this.docsToRemove.add(docRepresentingSrcUrl); // (can add documents with just source URL, are treated differently in the core libraries) } SourceFileConfigPojo fileSystem = source.getFileConfig(); if ((null == fileSystem) && (bIsXml || bIsJson)) { fileSystem = new SourceFileConfigPojo(); } XmlToMetadataParser xmlParser = null; JsonToMetadataParser jsonParser = null; String urlType = extension; if (bIsXml) { xmlParser = new XmlToMetadataParser(fileSystem.XmlRootLevelValues, fileSystem.XmlIgnoreValues, fileSystem.XmlSourceName, fileSystem.XmlPrimaryKey, fileSystem.XmlAttributePrefix, fileSystem.XmlPreserveCase, debugMaxDocs); } //TESTED else if (bIsJson) { jsonParser = new JsonToMetadataParser(fileSystem.XmlSourceName, fileSystem.XmlRootLevelValues, fileSystem.XmlPrimaryKey, fileSystem.XmlIgnoreValues, debugMaxDocs); } //TESTED List<DocumentPojo> partials = null; try { if (bIsXml) { XMLStreamReader xmlStreamReader = null; XMLInputFactory factory = XMLInputFactory.newInstance(); factory.setProperty(XMLInputFactory.IS_COALESCING, true); factory.setProperty(XMLInputFactory.SUPPORT_DTD, false); try { xmlStreamReader = factory.createXMLStreamReader(f.getInputStream()); partials = xmlParser.parseDocument(xmlStreamReader); long memUsage = xmlParser.getMemUsage(); _memUsage += memUsage; _totalMemUsage.addAndGet(memUsage); } finally { if (null != xmlStreamReader) xmlStreamReader.close(); } } //TESTED else if (bIsJson) { JsonReader jsonReader = null; try { jsonReader = new JsonReader(new InputStreamReader(f.getInputStream(), "UTF-8")); jsonReader.setLenient(true); partials = jsonParser.parseDocument(jsonReader); long memUsage = jsonParser.getMemUsage(); _memUsage += memUsage; _totalMemUsage.addAndGet(memUsage); } finally { if (null != jsonReader) jsonReader.close(); } } //TESTED else if (bIsLineOriented) { // Just generate a document for every line BufferedReader lineReader = null; try { lineReader = new BufferedReader(new InputStreamReader(f.getInputStream(), "UTF-8")); CsvToMetadataParser lineParser = new CsvToMetadataParser(debugMaxDocs); partials = lineParser.parseDocument(lineReader, source); long memUsage = lineParser.getMemUsage(); _memUsage += memUsage; _totalMemUsage.addAndGet(memUsage); } finally { if (null != lineReader) lineReader.close(); } } //TESTED MessageDigest md5 = null; // (generates unique urls if the user doesn't below) try { md5 = MessageDigest.getInstance("MD5"); } catch (NoSuchAlgorithmException e) { // Do nothing, unlikely to happen... } int nIndex = 0; int numPartials = partials.size(); for (DocumentPojo doctoAdd : partials) { nIndex++; doctoAdd.setSource(source.getTitle()); doctoAdd.setSourceKey(source.getKey()); doctoAdd.setMediaType(source.getMediaType()); doctoAdd.setModified(new Date(fileTimestamp)); doctoAdd.setCreated(new Date()); if (null == doctoAdd.getUrl()) { // Can be set in the parser or here doctoAdd.setHasDefaultUrl(true); // (ie cannot occur in a different src URL) if (1 == numPartials) { String urlString = f.getUrlString(); if (urlString.endsWith(urlType)) { doctoAdd.setUrl(urlString); } else { doctoAdd.setUrl( new StringBuffer(urlString).append('.').append(urlType).toString()); } // (we always set sourceUrl as the true url of the file, so want to differentiate the URL with // some useful information) } else if (null == doctoAdd.getMetadata()) { // Line oriented case doctoAdd.setUrl(new StringBuffer(f.getUrlString()).append("/").append(nIndex) .append('.').append(urlType).toString()); } else { if (null == md5) { // Will never happen, MD5 always exists doctoAdd.setUrl(new StringBuffer(f.getUrlString()).append("/") .append(doctoAdd.getMetadata().hashCode()).append('.').append(urlType) .toString()); } else { // This is the standard call if the XML parser has not been configured to build the URL doctoAdd.setUrl(new StringBuffer(f.getUrlString()).append("/") .append(DigestUtils.md5Hex(doctoAdd.getMetadata().toString())) .append('.').append(urlType).toString()); } } //TESTED } doctoAdd.setTitle(f.getName().toString()); doctoAdd.setPublishedDate(new Date(fileTimestamp)); doctoAdd.setSourceUrl(f.getUrlString()); // Always add to files because I'm deleting the source URL files.add(doctoAdd); } //TESTED } catch (XMLStreamException e1) { errors++; _context.getHarvestStatus() .logMessage(HarvestExceptionUtils.createExceptionMessage(e1).toString(), true); } catch (FactoryConfigurationError e1) { errors++; _context.getHarvestStatus().logMessage(e1.getMessage(), true); } catch (IOException e1) { errors++; _context.getHarvestStatus() .logMessage(HarvestExceptionUtils.createExceptionMessage(e1).toString(), true); } catch (Exception e1) { errors++; _context.getHarvestStatus() .logMessage(HarvestExceptionUtils.createExceptionMessage(e1).toString(), true); } } //(end if needs updated) } else //Tika supports Excel,Word,Powerpoint,Visio, & Outlook Documents { // (This dedup tells me if it's an add/update vs ignore - qr.isDuplicate higher up tells me if I need to add or update) if (needsUpdated_Url(modDate, f.getUrlString(), source)) { Metadata metadata = null; InputStream in = null; try { doc = new DocumentPojo(); // Create a tika object (first time only) if (null == _tika) { this.initializeTika(_context, source); } // BUGGERY // NEED TO LIKELY SET LIMIT TO BE 30MB or 50MB and BYPASS ANYTHING OVER THAT BELOW IS THE CODE TO DO THAT // tika.setMaxStringLength(30*1024*1024); // Disable the string length limit _tika.setMaxStringLength(-1); //input = new FileInputStream(new File(resourceLocation)); // Create a metadata object to contain the metadata metadata = new Metadata(); // Parse the file and get the text of the file doc.setSource(source.getTitle()); doc.setSourceKey(source.getKey()); doc.setMediaType(source.getMediaType()); String fullText = ""; in = f.getInputStream(); try { if (null == _tikaOutputFormat) { // text only fullText = _tika.parseToString(in, metadata); } //TESTED else { // XML/HMTL _tika.getParser().parse(in, _tikaOutputFormat, metadata, _tikaOutputParseContext); fullText = _tikaXmlFormatWriter.toString(); _tikaXmlFormatWriter.getBuffer().setLength(0); } //TESTED } finally { if (null != in) in.close(); } int descCap = 500; doc.setFullText(fullText); if (descCap > fullText.length()) { descCap = fullText.length(); } doc.setDescription(fullText.substring(0, descCap)); doc.setModified(new Date(fileTimestamp)); doc.setCreated(new Date()); doc.setUrl(f.getUrlString()); doc.setTitle(f.getName().toString()); doc.setPublishedDate(new Date(fileTimestamp)); long memUsage = (250L * (doc.getFullText().length() + doc.getDescription().length())) / 100L; // 25% overhead, 2x for string->byte _memUsage += memUsage; _totalMemUsage.addAndGet(memUsage); // If the metadata contains a more plausible date then use that try { String title = metadata.get(Metadata.TITLE); if (null != title) { doc.setTitle(title); } } catch (Exception e) { // Fine just carry on } try { Date date = metadata.getDate(Metadata.CREATION_DATE); // MS Word if (null != date) { doc.setPublishedDate(date); } else { date = metadata.getDate(Metadata.DATE); // Dublin if (null != date) { doc.setPublishedDate(date); } else { date = metadata.getDate(Metadata.ORIGINAL_DATE); if (null != date) { doc.setPublishedDate(date); } } } } catch (Exception e) { // Fine just carry on } //TESTED // If the metadata contains a geotag then apply that: try { String lat = metadata.get(Metadata.LATITUDE); String lon = metadata.get(Metadata.LONGITUDE); if ((null != lat) && (null != lon)) { GeoPojo gt = new GeoPojo(); gt.lat = Double.parseDouble(lat); gt.lon = Double.parseDouble(lon); doc.setDocGeo(gt); } } catch (Exception e) { // Fine just carry on } // Save the entire metadata: doc.addToMetadata("_FILE_METADATA_", metadata); for (ObjectId communityId : source.getCommunityIds()) { doc.setCommunityId(communityId); } files.add(doc); // Close the input stream in.close(); in = null; //TESTED } catch (SmbException e) { errors++; _context.getHarvestStatus() .logMessage(HarvestExceptionUtils.createExceptionMessage(e).toString(), true); } catch (MalformedURLException e) { errors++; _context.getHarvestStatus() .logMessage(HarvestExceptionUtils.createExceptionMessage(e).toString(), true); } catch (UnknownHostException e) { errors++; _context.getHarvestStatus() .logMessage(HarvestExceptionUtils.createExceptionMessage(e).toString(), true); } catch (IOException e) { errors++; _context.getHarvestStatus().logMessage(e.getMessage(), true); } catch (TikaException e) { errors++; _context.getHarvestStatus().logMessage(e.getMessage(), true); } catch (Exception e) { errors++; _context.getHarvestStatus() .logMessage(HarvestExceptionUtils.createExceptionMessage(e).toString(), true); } finally { // Close the input stream if an error occurs if (null != in) { try { in.close(); } catch (IOException e) { // All good, do nothing } } } // end exception handling } // end dedup check } // end XML vs "office" app //DEBUG //System.out.println("FILE=" + files.size() + " / MEM=" + _memUsage + " VS " + Runtime.getRuntime().totalMemory()); }
From source file:com.clustercontrol.agent.winevent.WinEventMonitor.java
/** * XMLStAX???EventLogRecord????/*from w w w. jav a 2 s . c o m*/ * @param eventXmlStream * @return EventLogRecord? */ private ArrayList<EventLogRecord> parseEventXML(InputStream eventXmlStream) { ArrayList<EventLogRecord> eventlogs = new ArrayList<EventLogRecord>(); try { XMLInputFactory xmlif = XMLInputFactory.newInstance(); /** * OpenJDK7/OracleJDK7??"]"?2????????????????????????????? * ?XML?????????OpenJDK7/OracleJDK7???????/?????????? * URL??????????????? * * URL * http://docs.oracle.com/javase/jp/6/api/javax/xml/stream/XMLStreamReader.html#next() */ String xmlCoalescingKey = "javax.xml.stream.isCoalescing";// TODO JRE??????????????????? if (m_log.isDebugEnabled()) { m_log.debug(xmlCoalescingKey + " = true"); } xmlif.setProperty(xmlCoalescingKey, true); XMLStreamReader xmlr = xmlif.createXMLStreamReader(eventXmlStream); while (xmlr.hasNext()) { switch (xmlr.getEventType()) { case XMLStreamConstants.START_ELEMENT: m_log.trace("EventType : XMLStreamConstants.START_ELEMENT"); String localName = xmlr.getLocalName(); m_log.trace("local name : " + localName); if ("Event".equals(localName)) { EventLogRecord eventlog = new EventLogRecord(); eventlogs.add(eventlog); m_log.debug("create new EventLogRecord"); } else { String attrLocalName = null; String attrValue = null; if (xmlr.getAttributeCount() != 0) { attrLocalName = xmlr.getAttributeLocalName(0); attrValue = xmlr.getAttributeValue(0); m_log.trace("attribute local name : " + attrLocalName); m_log.trace("attribute local value : " + attrValue); } if ("Provider".equals(localName)) { if ("Name".equals(attrLocalName)) { m_log.trace("target value : " + attrValue); EventLogRecord eventlog = eventlogs.get(eventlogs.size() - 1); eventlog.setProviderName(attrValue); m_log.debug("set ProviderName : " + eventlog.getProviderName()); } } // Get-WinEvent/wevtutil.exe else if ("TimeCreated".equals(localName) && "SystemTime".equals(attrLocalName)) { m_log.trace("target value : " + attrValue); // "yyyy-MM-dd'T'HH:mm:ss.SSSSSSSSS'Z'"???S???????????? String formatedDateString = attrValue.replaceAll("\\..*Z", ""); m_log.trace("formatted target value : " + formatedDateString); DateFormat sdf = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss"); sdf.setTimeZone(TimeZone.getTimeZone("UTC")); EventLogRecord eventlog = eventlogs.get(eventlogs.size() - 1); ; try { eventlog.setTimeCreated(sdf.parse(formatedDateString)); } catch (ParseException e) { // do nothing m_log.error("set TimeCreated Error", e); } m_log.debug("set TimeCreated : " + eventlog.getTimeCreated()); } // Get-EventLog if ("TimeGenerated".equals(localName) && "SystemTime".equals(attrLocalName)) { m_log.trace("target value : " + attrValue); SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss'Z'"); sdf.setTimeZone(HinemosTime.getTimeZone()); EventLogRecord eventlog = eventlogs.get(eventlogs.size() - 1); ; try { eventlog.setTimeCreated(sdf.parse(attrValue)); } catch (ParseException e) { // do nothing m_log.error("set TimeCreated Error", e); } m_log.debug("set TimeCreated : " + eventlog.getTimeCreated()); } else { targetProperty = localName; m_log.trace("target property : " + targetProperty); } } break; case XMLStreamConstants.SPACE: case XMLStreamConstants.CHARACTERS: m_log.trace("EventType : XMLStreamConstants.CHARACTERS, length=" + xmlr.getTextLength()); if (targetProperty != null) { try { EventLogRecord eventlog = eventlogs.get(eventlogs.size() - 1); ; if ("EventID".equals(targetProperty)) { eventlog.setId(Integer.parseInt(new String(xmlr.getTextCharacters(), xmlr.getTextStart(), xmlr.getTextLength()))); m_log.debug("set EventID : " + eventlog.getId()); } // Get-WinEvent/wevtutil.exe else if ("Level".equals(targetProperty)) { if (eventlog.getLevel() == WinEventConstant.UNDEFINED) { eventlog.setLevel(Integer.parseInt(new String(xmlr.getTextCharacters(), xmlr.getTextStart(), xmlr.getTextLength()))); m_log.debug("set Level : " + eventlog.getLevel()); } } else if ("Task".equals(targetProperty)) { if (eventlog.getTask() == WinEventConstant.UNDEFINED) { eventlog.setTask(Integer.parseInt(new String(xmlr.getTextCharacters(), xmlr.getTextStart(), xmlr.getTextLength()))); m_log.debug("set Task : " + eventlog.getTask()); } } else if ("Keywords".equals(targetProperty)) { // TODO ????????0x8080000000000000 //eventlog.setKeywords(Long.decode(new String(xmlr.getTextCharacters(), xmlr.getTextStart(), xmlr.getTextLength()))); //m_log.debug("set Keywords : " + eventlog.getKeywords()); } else if ("EventRecordId".equals(targetProperty)) { eventlog.setRecordId(Long.parseLong(new String(xmlr.getTextCharacters(), xmlr.getTextStart(), xmlr.getTextLength()))); m_log.debug("set RecordId : " + eventlog.getRecordId()); } else if ("Channel".equals(targetProperty)) { eventlog.setLogName(new String(xmlr.getTextCharacters(), xmlr.getTextStart(), xmlr.getTextLength())); m_log.debug("set LogName : " + eventlog.getLogName()); } else if ("Computer".equals(targetProperty)) { eventlog.setMachineName(new String(xmlr.getTextCharacters(), xmlr.getTextStart(), xmlr.getTextLength())); m_log.debug("set MachineName : " + eventlog.getMachineName()); } else if ("Message".equals(targetProperty)) { String message = new String(xmlr.getTextCharacters(), xmlr.getTextStart(), xmlr.getTextLength()); message = message.replaceAll(tmpReturnCode, "\r\n"); message = message.replaceAll(tmpLtCode, "<"); message = message.replaceAll(tmpGtCode, ">"); eventlog.setMessage(message); m_log.debug("set Message : " + eventlog.getMessage()); } else if ("Data".equals(targetProperty)) { String data = new String(xmlr.getTextCharacters(), xmlr.getTextStart(), xmlr.getTextLength()); eventlog.getData().add(data); m_log.debug("set Data : " + data); } else { m_log.debug("unknown target property : " + targetProperty); } } catch (NumberFormatException e) { m_log.debug("number parse error", e); } } targetProperty = null; break; default: // break; } xmlr.next(); } xmlr.close(); } catch (XMLStreamException e) { m_log.warn("parseEvent() xmlstream error", e); } return eventlogs; }
From source file:gima.neo4j.testsuite.osmcheck.OSMImporter.java
public void importFile(OSMWriter<?> osmWriter, String dataset, boolean allPoints, Charset charset) throws IOException, XMLStreamException { System.out.println("Importing with osm-writer: " + osmWriter); osmWriter.getOrCreateOSMDataset(layerName); osm_dataset = osmWriter.getDatasetId(); long startTime = System.currentTimeMillis(); long[] times = new long[] { 0L, 0L, 0L, 0L }; javax.xml.stream.XMLInputFactory factory = javax.xml.stream.XMLInputFactory.newInstance(); CountedFileReader reader = new CountedFileReader(dataset, charset); javax.xml.stream.XMLStreamReader parser = factory.createXMLStreamReader(reader); int countXMLTags = 0; beginProgressMonitor(100);//from w w w . j av a 2s. c o m setLogContext(dataset); boolean startedWays = false; boolean startedRelations = false; try { ArrayList<String> currentXMLTags = new ArrayList<String>(); int depth = 0; Map<String, Object> wayProperties = null; ArrayList<Long> wayNodes = new ArrayList<Long>(); Map<String, Object> relationProperties = null; ArrayList<Map<String, Object>> relationMembers = new ArrayList<Map<String, Object>>(); LinkedHashMap<String, Object> currentNodeTags = new LinkedHashMap<String, Object>(); while (true) { updateProgressMonitor(reader.getPercentRead()); incrLogContext(); int event = parser.next(); if (event == javax.xml.stream.XMLStreamConstants.END_DOCUMENT) { break; } switch (event) { case javax.xml.stream.XMLStreamConstants.START_ELEMENT: currentXMLTags.add(depth, parser.getLocalName()); String tagPath = currentXMLTags.toString(); if (tagPath.equals("[osm]")) { osmWriter.setDatasetProperties(extractProperties(parser)); } else if (tagPath.equals("[osm, bounds]")) { osmWriter.addOSMBBox(extractProperties("bbox", parser)); } else if (tagPath.equals("[osm, node]")) { // <node id="269682538" lat="56.0420950" lon="12.9693483" user="sanna" uid="31450" visible="true" version="1" changeset="133823" timestamp="2008-06-11T12:36:28Z"/> osmWriter.createOSMNode(extractProperties("node", parser)); } else if (tagPath.equals("[osm, way]")) { // <way id="27359054" user="spull" uid="61533" visible="true" version="8" changeset="4707351" timestamp="2010-05-15T15:39:57Z"> if (!startedWays) { startedWays = true; times[0] = System.currentTimeMillis(); osmWriter.optimize(); times[1] = System.currentTimeMillis(); } wayProperties = extractProperties("way", parser); wayNodes.clear(); } else if (tagPath.equals("[osm, way, nd]")) { Map<String, Object> properties = extractProperties(parser); wayNodes.add(Long.parseLong(properties.get("ref").toString())); } else if (tagPath.endsWith("tag]")) { Map<String, Object> properties = extractProperties(parser); currentNodeTags.put(properties.get("k").toString(), properties.get("v").toString()); } else if (tagPath.equals("[osm, relation]")) { // <relation id="77965" user="Grillo" uid="13957" visible="true" version="24" changeset="5465617" timestamp="2010-08-11T19:25:46Z"> if (!startedRelations) { startedRelations = true; times[2] = System.currentTimeMillis(); osmWriter.optimize(); times[3] = System.currentTimeMillis(); } relationProperties = extractProperties("relation", parser); relationMembers.clear(); } else if (tagPath.equals("[osm, relation, member]")) { relationMembers.add(extractProperties(parser)); } if (startedRelations) { if (countXMLTags < 10) { log("Starting tag at depth " + depth + ": " + currentXMLTags.get(depth) + " - " + currentXMLTags.toString()); for (int i = 0; i < parser.getAttributeCount(); i++) { log("\t" + currentXMLTags.toString() + ": " + parser.getAttributeLocalName(i) + "[" + parser.getAttributeNamespace(i) + "," + parser.getAttributePrefix(i) + "," + parser.getAttributeType(i) + "," + "] = " + parser.getAttributeValue(i)); } } countXMLTags++; } depth++; break; case javax.xml.stream.XMLStreamConstants.END_ELEMENT: if (currentXMLTags.toString().equals("[osm, node]")) { osmWriter.addOSMNodeTags(allPoints, currentNodeTags); } else if (currentXMLTags.toString().equals("[osm, way]")) { osmWriter.createOSMWay(wayProperties, wayNodes, currentNodeTags); } else if (currentXMLTags.toString().equals("[osm, relation]")) { osmWriter.createOSMRelation(relationProperties, relationMembers, currentNodeTags); } depth--; currentXMLTags.remove(depth); // log("Ending tag at depth "+depth+": "+currentTags.get(depth)); break; default: break; } } } finally { endProgressMonitor(); parser.close(); osmWriter.finish(); this.osm_dataset = osmWriter.getDatasetId(); } describeTimes(startTime, times); osmWriter.describeMissing(); osmWriter.describeLoaded(); long stopTime = System.currentTimeMillis(); log("info | Elapsed time in seconds: " + (1.0 * (stopTime - startTime) / 1000.0)); stats.dumpGeomStats(); stats.printTagStats(); }