List of usage examples for javax.xml.stream XMLInputFactory newInstance
public static XMLInputFactory newInstance() throws FactoryConfigurationError
From source file:com.liferay.portal.util.LocalizationImpl.java
public String updateLocalization(String xml, String key, String value, String requestedLanguageId, String defaultLanguageId, boolean cdata, boolean localized) { xml = _sanitizeXML(xml);//w w w. j a va 2 s .co m XMLStreamReader xmlStreamReader = null; XMLStreamWriter xmlStreamWriter = null; ClassLoader portalClassLoader = PortalClassLoaderUtil.getClassLoader(); Thread currentThread = Thread.currentThread(); ClassLoader contextClassLoader = currentThread.getContextClassLoader(); try { if (contextClassLoader != portalClassLoader) { currentThread.setContextClassLoader(portalClassLoader); } XMLInputFactory xmlInputFactory = XMLInputFactory.newInstance(); xmlStreamReader = xmlInputFactory.createXMLStreamReader(new UnsyncStringReader(xml)); String availableLocales = StringPool.BLANK; // Read root node if (xmlStreamReader.hasNext()) { xmlStreamReader.nextTag(); availableLocales = xmlStreamReader.getAttributeValue(null, _AVAILABLE_LOCALES); if (Validator.isNull(availableLocales)) { availableLocales = defaultLanguageId; } if (availableLocales.indexOf(requestedLanguageId) == -1) { availableLocales = StringUtil.add(availableLocales, requestedLanguageId, StringPool.COMMA); } } UnsyncStringWriter unsyncStringWriter = new UnsyncStringWriter(); XMLOutputFactory xmlOutputFactory = XMLOutputFactory.newInstance(); xmlStreamWriter = xmlOutputFactory.createXMLStreamWriter(unsyncStringWriter); xmlStreamWriter.writeStartDocument(); xmlStreamWriter.writeStartElement(_ROOT); if (localized) { xmlStreamWriter.writeAttribute(_AVAILABLE_LOCALES, availableLocales); xmlStreamWriter.writeAttribute(_DEFAULT_LOCALE, defaultLanguageId); } _copyNonExempt(xmlStreamReader, xmlStreamWriter, requestedLanguageId, defaultLanguageId, cdata); xmlStreamWriter.writeStartElement(key); if (localized) { xmlStreamWriter.writeAttribute(_LANGUAGE_ID, requestedLanguageId); } if (cdata) { xmlStreamWriter.writeCData(value); } else { xmlStreamWriter.writeCharacters(value); } xmlStreamWriter.writeEndElement(); xmlStreamWriter.writeEndElement(); xmlStreamWriter.writeEndDocument(); xmlStreamWriter.close(); xmlStreamWriter = null; xml = unsyncStringWriter.toString(); } catch (Exception e) { if (_log.isWarnEnabled()) { _log.warn(e, e); } } finally { if (contextClassLoader != portalClassLoader) { currentThread.setContextClassLoader(contextClassLoader); } if (xmlStreamReader != null) { try { xmlStreamReader.close(); } catch (Exception e) { } } if (xmlStreamWriter != null) { try { xmlStreamWriter.close(); } catch (Exception e) { } } } return xml; }
From source file:com.clustercontrol.agent.winevent.WinEventMonitor.java
/** * XMLStAX???EventLogRecord????//from w ww . j av a 2 s . c om * @param eventXmlStream * @return EventLogRecord? */ private ArrayList<EventLogRecord> parseEventXML(InputStream eventXmlStream) { ArrayList<EventLogRecord> eventlogs = new ArrayList<EventLogRecord>(); try { XMLInputFactory xmlif = XMLInputFactory.newInstance(); /** * OpenJDK7/OracleJDK7??"]"?2????????????????????????????? * ?XML?????????OpenJDK7/OracleJDK7???????/?????????? * URL??????????????? * * URL * http://docs.oracle.com/javase/jp/6/api/javax/xml/stream/XMLStreamReader.html#next() */ String xmlCoalescingKey = "javax.xml.stream.isCoalescing";// TODO JRE??????????????????? if (m_log.isDebugEnabled()) { m_log.debug(xmlCoalescingKey + " = true"); } xmlif.setProperty(xmlCoalescingKey, true); XMLStreamReader xmlr = xmlif.createXMLStreamReader(eventXmlStream); while (xmlr.hasNext()) { switch (xmlr.getEventType()) { case XMLStreamConstants.START_ELEMENT: m_log.trace("EventType : XMLStreamConstants.START_ELEMENT"); String localName = xmlr.getLocalName(); m_log.trace("local name : " + localName); if ("Event".equals(localName)) { EventLogRecord eventlog = new EventLogRecord(); eventlogs.add(eventlog); m_log.debug("create new EventLogRecord"); } else { String attrLocalName = null; String attrValue = null; if (xmlr.getAttributeCount() != 0) { attrLocalName = xmlr.getAttributeLocalName(0); attrValue = xmlr.getAttributeValue(0); m_log.trace("attribute local name : " + attrLocalName); m_log.trace("attribute local value : " + attrValue); } if ("Provider".equals(localName)) { if ("Name".equals(attrLocalName)) { m_log.trace("target value : " + attrValue); EventLogRecord eventlog = eventlogs.get(eventlogs.size() - 1); eventlog.setProviderName(attrValue); m_log.debug("set ProviderName : " + eventlog.getProviderName()); } } // Get-WinEvent/wevtutil.exe else if ("TimeCreated".equals(localName) && "SystemTime".equals(attrLocalName)) { m_log.trace("target value : " + attrValue); // "yyyy-MM-dd'T'HH:mm:ss.SSSSSSSSS'Z'"???S???????????? String formatedDateString = attrValue.replaceAll("\\..*Z", ""); m_log.trace("formatted target value : " + formatedDateString); DateFormat sdf = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss"); sdf.setTimeZone(TimeZone.getTimeZone("UTC")); EventLogRecord eventlog = eventlogs.get(eventlogs.size() - 1); ; try { eventlog.setTimeCreated(sdf.parse(formatedDateString)); } catch (ParseException e) { // do nothing m_log.error("set TimeCreated Error", e); } m_log.debug("set TimeCreated : " + eventlog.getTimeCreated()); } // Get-EventLog if ("TimeGenerated".equals(localName) && "SystemTime".equals(attrLocalName)) { m_log.trace("target value : " + attrValue); SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss'Z'"); sdf.setTimeZone(HinemosTime.getTimeZone()); EventLogRecord eventlog = eventlogs.get(eventlogs.size() - 1); ; try { eventlog.setTimeCreated(sdf.parse(attrValue)); } catch (ParseException e) { // do nothing m_log.error("set TimeCreated Error", e); } m_log.debug("set TimeCreated : " + eventlog.getTimeCreated()); } else { targetProperty = localName; m_log.trace("target property : " + targetProperty); } } break; case XMLStreamConstants.SPACE: case XMLStreamConstants.CHARACTERS: m_log.trace("EventType : XMLStreamConstants.CHARACTERS, length=" + xmlr.getTextLength()); if (targetProperty != null) { try { EventLogRecord eventlog = eventlogs.get(eventlogs.size() - 1); ; if ("EventID".equals(targetProperty)) { eventlog.setId(Integer.parseInt(new String(xmlr.getTextCharacters(), xmlr.getTextStart(), xmlr.getTextLength()))); m_log.debug("set EventID : " + eventlog.getId()); } // Get-WinEvent/wevtutil.exe else if ("Level".equals(targetProperty)) { if (eventlog.getLevel() == WinEventConstant.UNDEFINED) { eventlog.setLevel(Integer.parseInt(new String(xmlr.getTextCharacters(), xmlr.getTextStart(), xmlr.getTextLength()))); m_log.debug("set Level : " + eventlog.getLevel()); } } else if ("Task".equals(targetProperty)) { if (eventlog.getTask() == WinEventConstant.UNDEFINED) { eventlog.setTask(Integer.parseInt(new String(xmlr.getTextCharacters(), xmlr.getTextStart(), xmlr.getTextLength()))); m_log.debug("set Task : " + eventlog.getTask()); } } else if ("Keywords".equals(targetProperty)) { // TODO ????????0x8080000000000000 //eventlog.setKeywords(Long.decode(new String(xmlr.getTextCharacters(), xmlr.getTextStart(), xmlr.getTextLength()))); //m_log.debug("set Keywords : " + eventlog.getKeywords()); } else if ("EventRecordId".equals(targetProperty)) { eventlog.setRecordId(Long.parseLong(new String(xmlr.getTextCharacters(), xmlr.getTextStart(), xmlr.getTextLength()))); m_log.debug("set RecordId : " + eventlog.getRecordId()); } else if ("Channel".equals(targetProperty)) { eventlog.setLogName(new String(xmlr.getTextCharacters(), xmlr.getTextStart(), xmlr.getTextLength())); m_log.debug("set LogName : " + eventlog.getLogName()); } else if ("Computer".equals(targetProperty)) { eventlog.setMachineName(new String(xmlr.getTextCharacters(), xmlr.getTextStart(), xmlr.getTextLength())); m_log.debug("set MachineName : " + eventlog.getMachineName()); } else if ("Message".equals(targetProperty)) { String message = new String(xmlr.getTextCharacters(), xmlr.getTextStart(), xmlr.getTextLength()); message = message.replaceAll(tmpReturnCode, "\r\n"); message = message.replaceAll(tmpLtCode, "<"); message = message.replaceAll(tmpGtCode, ">"); eventlog.setMessage(message); m_log.debug("set Message : " + eventlog.getMessage()); } else if ("Data".equals(targetProperty)) { String data = new String(xmlr.getTextCharacters(), xmlr.getTextStart(), xmlr.getTextLength()); eventlog.getData().add(data); m_log.debug("set Data : " + data); } else { m_log.debug("unknown target property : " + targetProperty); } } catch (NumberFormatException e) { m_log.debug("number parse error", e); } } targetProperty = null; break; default: // break; } xmlr.next(); } xmlr.close(); } catch (XMLStreamException e) { m_log.warn("parseEvent() xmlstream error", e); } return eventlogs; }
From source file:de.uzk.hki.da.sb.SIPFactoryTest.java
/** * @param premis/*from ww w. jav a 2s. c om*/ * @param publicRights */ private boolean checkPremisFilePubStartDate(File premis, ContractRights rights) { XMLInputFactory inputFactory = XMLInputFactory.newInstance(); XMLStreamReader streamReader; try { streamReader = inputFactory.createXMLStreamReader(new FileInputStream(premis)); while (streamReader.hasNext()) { int event = streamReader.next(); switch (event) { case XMLStreamConstants.START_ELEMENT: if (streamReader.getLocalName().equals("startDate")) { String startDate = streamReader.getElementText().substring(0, 10); SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd"); String publicDate = sdf.format(rights.getPublicRights().getStartDate()); if (startDate.trim().equals(publicDate.trim())) { return true; } } default: break; } } } catch (FileNotFoundException e) { e.printStackTrace(); } catch (XMLStreamException e) { e.printStackTrace(); } return false; }
From source file:org.opennms.netmgt.ackd.readers.HypericAckProcessor.java
/** * <p>parseHypericAlerts</p> * * @param reader a {@link java.io.Reader} object. * @return a {@link java.util.List} object. * @throws javax.xml.bind.JAXBException if any. * @throws javax.xml.stream.XMLStreamException if any. *///w ww .j a va2s. c o m public static List<HypericAlertStatus> parseHypericAlerts(Reader reader) throws JAXBException, XMLStreamException { List<HypericAlertStatus> retval = new ArrayList<HypericAlertStatus>(); // Instantiate a JAXB context to parse the alert status JAXBContext context = JAXBContext .newInstance(new Class[] { HypericAlertStatuses.class, HypericAlertStatus.class }); XMLInputFactory xmlif = XMLInputFactory.newInstance(); XMLEventReader xmler = xmlif.createXMLEventReader(reader); EventFilter filter = new EventFilter() { @Override public boolean accept(XMLEvent event) { return event.isStartElement(); } }; XMLEventReader xmlfer = xmlif.createFilteredReader(xmler, filter); // Read up until the beginning of the root element StartElement startElement = (StartElement) xmlfer.nextEvent(); // Fetch the root element name for {@link HypericAlertStatus} objects String rootElementName = context.createJAXBIntrospector().getElementName(new HypericAlertStatuses()) .getLocalPart(); if (rootElementName.equals(startElement.getName().getLocalPart())) { Unmarshaller unmarshaller = context.createUnmarshaller(); // Use StAX to pull parse the incoming alert statuses while (xmlfer.peek() != null) { Object object = unmarshaller.unmarshal(xmler); if (object instanceof HypericAlertStatus) { HypericAlertStatus alertStatus = (HypericAlertStatus) object; retval.add(alertStatus); } } } else { // Try to pull in the HTTP response to give the user a better idea of what went wrong StringBuffer errorContent = new StringBuffer(); LineNumberReader lineReader = new LineNumberReader(reader); try { String line; while (true) { line = lineReader.readLine(); if (line == null) { break; } else { errorContent.append(line.trim()); } } } catch (IOException e) { errorContent.append("Exception while trying to print out message content: " + e.getMessage()); } // Throw an exception and include the erroneous HTTP response in the exception text throw new JAXBException("Found wrong root element in Hyperic XML document, expected: \"" + rootElementName + "\", found \"" + startElement.getName().getLocalPart() + "\"\n" + errorContent.toString()); } return retval; }
From source file:iTests.framework.utils.WebuiTestUtils.java
/** * retrieves the license key from gigaspaces installation license key * @throws javax.xml.stream.FactoryConfigurationError * @throws javax.xml.stream.XMLStreamException * @throws java.io.IOException/*w w w. ja va 2s.co m*/ */ public String getLicenseKey() throws XMLStreamException, FactoryConfigurationError, IOException { String licensekey = LICENSE_PATH.replace("lib/required/../../", ""); InputStream is = new FileInputStream(new File(licensekey)); XMLStreamReader parser = XMLInputFactory.newInstance().createXMLStreamReader(is); int element; while (true) { element = parser.next(); if (element == XMLStreamReader.START_ELEMENT) { if (parser.getName().toString().equals("licensekey")) { return parser.getElementText(); } } if (element == XMLStreamReader.END_DOCUMENT) { break; } } return null; }
From source file:com.liferay.portal.util.LocalizationImpl.java
private String _getRootAttribute(String xml, String name, String defaultValue) { String value = null;//from ww w. j a v a 2 s. com XMLStreamReader xmlStreamReader = null; ClassLoader portalClassLoader = PortalClassLoaderUtil.getClassLoader(); Thread currentThread = Thread.currentThread(); ClassLoader contextClassLoader = currentThread.getContextClassLoader(); try { if (contextClassLoader != portalClassLoader) { currentThread.setContextClassLoader(portalClassLoader); } XMLInputFactory xmlInputFactory = XMLInputFactory.newInstance(); xmlStreamReader = xmlInputFactory.createXMLStreamReader(new UnsyncStringReader(xml)); if (xmlStreamReader.hasNext()) { xmlStreamReader.nextTag(); value = xmlStreamReader.getAttributeValue(null, name); } } catch (Exception e) { if (_log.isWarnEnabled()) { _log.warn(e, e); } } finally { if (contextClassLoader != portalClassLoader) { currentThread.setContextClassLoader(contextClassLoader); } if (xmlStreamReader != null) { try { xmlStreamReader.close(); } catch (Exception e) { } } } if (Validator.isNull(value)) { value = defaultValue; } return value; }
From source file:ca.phon.session.io.xml.v12.XMLSessionReader_v12.java
@Override public boolean canRead(File file) throws IOException { // open file and make sure the first // element is 'session' with the correct version boolean canRead = false; // use StAX to read only first element // create StAX reader XMLInputFactory factory = XMLInputFactory.newInstance(); XMLEventReader reader = null; try (FileInputStream source = new FileInputStream(file)) { //BufferedReader in = new BufferedReader(new InputStreamReader(source, "UTF-8")); XMLEventReader xmlReader = factory.createXMLEventReader(source, "UTF-8"); reader = factory.createFilteredReader(xmlReader, new XMLWhitespaceFilter()); XMLEvent evt;/*from ww w .ja v a2s . co m*/ while (!(evt = reader.nextEvent()).isStartElement()) ; canRead = evt.asStartElement().getName().getLocalPart().equals("session") && evt.asStartElement().getAttributeByName(new QName("version")).getValue().equals("PB1.2"); } catch (XMLStreamException e) { throw new IOException(e); } return canRead; }
From source file:com.ikanow.infinit.e.harvest.extraction.document.file.FileHarvester.java
private void parse(InfiniteFile f, SourcePojo source) throws MalformedURLException, URISyntaxException { //NOTE: we only ever break out of here because of max docs in standalone mode // (because we don't know how to continue reading) DocumentPojo doc = null;// ww w. j av a 2 s .c om //Determine File Extension String fileName = f.getName().toString(); int mid = fileName.lastIndexOf("."); String extension = fileName.substring(mid + 1, fileName.length()); //Checked to save processing time long fileTimestamp = (f.getDate() / 1000) * 1000; // (ensure truncated to seconds, since some operation somewhere hear does this...) Date modDate = new Date(fileTimestamp); //XML Data gets placed into MetaData boolean bIsXml = false; boolean bIsJson = false; boolean bIsLineOriented = false; if ((null != source.getFileConfig()) && (null != source.getFileConfig().type)) { extension = source.getFileConfig().type; } bIsXml = extension.equalsIgnoreCase("xml"); bIsJson = extension.equalsIgnoreCase("json"); bIsLineOriented = extension.endsWith("sv"); if (bIsXml || bIsJson || bIsLineOriented) { int debugMaxDocs = Integer.MAX_VALUE; // by default don't set this, it's only for debug mode if (_context.isStandalone()) { // debug mode debugMaxDocs = maxDocsPerCycle; } //fast check to see if the file has changed before processing (or if it never existed) if (needsUpdated_SourceUrl(modDate, f.getUrlString(), source)) { if (0 != modDate.getTime()) { // if it ==0 then sourceUrl doesn't exist at all, no need to delete // This file already exists - in normal/managed mode will re-create // In streaming mode, simple skip over if (_streaming) { return; } //TESTED DocumentPojo docRepresentingSrcUrl = new DocumentPojo(); docRepresentingSrcUrl.setSourceUrl(f.getUrlString()); docRepresentingSrcUrl.setSourceKey(source.getKey()); docRepresentingSrcUrl.setCommunityId(source.getCommunityIds().iterator().next()); sourceUrlsGettingUpdated.add(docRepresentingSrcUrl.getSourceUrl()); this.docsToRemove.add(docRepresentingSrcUrl); // (can add documents with just source URL, are treated differently in the core libraries) } SourceFileConfigPojo fileSystem = source.getFileConfig(); if ((null == fileSystem) && (bIsXml || bIsJson)) { fileSystem = new SourceFileConfigPojo(); } XmlToMetadataParser xmlParser = null; JsonToMetadataParser jsonParser = null; String urlType = extension; if (bIsXml) { xmlParser = new XmlToMetadataParser(fileSystem.XmlRootLevelValues, fileSystem.XmlIgnoreValues, fileSystem.XmlSourceName, fileSystem.XmlPrimaryKey, fileSystem.XmlAttributePrefix, fileSystem.XmlPreserveCase, debugMaxDocs); } //TESTED else if (bIsJson) { jsonParser = new JsonToMetadataParser(fileSystem.XmlSourceName, fileSystem.XmlRootLevelValues, fileSystem.XmlPrimaryKey, fileSystem.XmlIgnoreValues, debugMaxDocs); } //TESTED List<DocumentPojo> partials = null; try { if (bIsXml) { XMLStreamReader xmlStreamReader = null; XMLInputFactory factory = XMLInputFactory.newInstance(); factory.setProperty(XMLInputFactory.IS_COALESCING, true); factory.setProperty(XMLInputFactory.SUPPORT_DTD, false); try { xmlStreamReader = factory.createXMLStreamReader(f.getInputStream()); partials = xmlParser.parseDocument(xmlStreamReader); long memUsage = xmlParser.getMemUsage(); _memUsage += memUsage; _totalMemUsage.addAndGet(memUsage); } finally { if (null != xmlStreamReader) xmlStreamReader.close(); } } //TESTED else if (bIsJson) { JsonReader jsonReader = null; try { jsonReader = new JsonReader(new InputStreamReader(f.getInputStream(), "UTF-8")); jsonReader.setLenient(true); partials = jsonParser.parseDocument(jsonReader); long memUsage = jsonParser.getMemUsage(); _memUsage += memUsage; _totalMemUsage.addAndGet(memUsage); } finally { if (null != jsonReader) jsonReader.close(); } } //TESTED else if (bIsLineOriented) { // Just generate a document for every line BufferedReader lineReader = null; try { lineReader = new BufferedReader(new InputStreamReader(f.getInputStream(), "UTF-8")); CsvToMetadataParser lineParser = new CsvToMetadataParser(debugMaxDocs); partials = lineParser.parseDocument(lineReader, source); long memUsage = lineParser.getMemUsage(); _memUsage += memUsage; _totalMemUsage.addAndGet(memUsage); } finally { if (null != lineReader) lineReader.close(); } } //TESTED MessageDigest md5 = null; // (generates unique urls if the user doesn't below) try { md5 = MessageDigest.getInstance("MD5"); } catch (NoSuchAlgorithmException e) { // Do nothing, unlikely to happen... } int nIndex = 0; int numPartials = partials.size(); for (DocumentPojo doctoAdd : partials) { nIndex++; doctoAdd.setSource(source.getTitle()); doctoAdd.setSourceKey(source.getKey()); doctoAdd.setMediaType(source.getMediaType()); doctoAdd.setModified(new Date(fileTimestamp)); doctoAdd.setCreated(new Date()); if (null == doctoAdd.getUrl()) { // Can be set in the parser or here doctoAdd.setHasDefaultUrl(true); // (ie cannot occur in a different src URL) if (1 == numPartials) { String urlString = f.getUrlString(); if (urlString.endsWith(urlType)) { doctoAdd.setUrl(urlString); } else { doctoAdd.setUrl( new StringBuffer(urlString).append('.').append(urlType).toString()); } // (we always set sourceUrl as the true url of the file, so want to differentiate the URL with // some useful information) } else if (null == doctoAdd.getMetadata()) { // Line oriented case doctoAdd.setUrl(new StringBuffer(f.getUrlString()).append("/").append(nIndex) .append('.').append(urlType).toString()); } else { if (null == md5) { // Will never happen, MD5 always exists doctoAdd.setUrl(new StringBuffer(f.getUrlString()).append("/") .append(doctoAdd.getMetadata().hashCode()).append('.').append(urlType) .toString()); } else { // This is the standard call if the XML parser has not been configured to build the URL doctoAdd.setUrl(new StringBuffer(f.getUrlString()).append("/") .append(DigestUtils.md5Hex(doctoAdd.getMetadata().toString())) .append('.').append(urlType).toString()); } } //TESTED } doctoAdd.setTitle(f.getName().toString()); doctoAdd.setPublishedDate(new Date(fileTimestamp)); doctoAdd.setSourceUrl(f.getUrlString()); // Always add to files because I'm deleting the source URL files.add(doctoAdd); } //TESTED } catch (XMLStreamException e1) { errors++; _context.getHarvestStatus() .logMessage(HarvestExceptionUtils.createExceptionMessage(e1).toString(), true); } catch (FactoryConfigurationError e1) { errors++; _context.getHarvestStatus().logMessage(e1.getMessage(), true); } catch (IOException e1) { errors++; _context.getHarvestStatus() .logMessage(HarvestExceptionUtils.createExceptionMessage(e1).toString(), true); } catch (Exception e1) { errors++; _context.getHarvestStatus() .logMessage(HarvestExceptionUtils.createExceptionMessage(e1).toString(), true); } } //(end if needs updated) } else //Tika supports Excel,Word,Powerpoint,Visio, & Outlook Documents { // (This dedup tells me if it's an add/update vs ignore - qr.isDuplicate higher up tells me if I need to add or update) if (needsUpdated_Url(modDate, f.getUrlString(), source)) { Metadata metadata = null; InputStream in = null; try { doc = new DocumentPojo(); // Create a tika object (first time only) if (null == _tika) { this.initializeTika(_context, source); } // BUGGERY // NEED TO LIKELY SET LIMIT TO BE 30MB or 50MB and BYPASS ANYTHING OVER THAT BELOW IS THE CODE TO DO THAT // tika.setMaxStringLength(30*1024*1024); // Disable the string length limit _tika.setMaxStringLength(-1); //input = new FileInputStream(new File(resourceLocation)); // Create a metadata object to contain the metadata metadata = new Metadata(); // Parse the file and get the text of the file doc.setSource(source.getTitle()); doc.setSourceKey(source.getKey()); doc.setMediaType(source.getMediaType()); String fullText = ""; in = f.getInputStream(); try { if (null == _tikaOutputFormat) { // text only fullText = _tika.parseToString(in, metadata); } //TESTED else { // XML/HMTL _tika.getParser().parse(in, _tikaOutputFormat, metadata, _tikaOutputParseContext); fullText = _tikaXmlFormatWriter.toString(); _tikaXmlFormatWriter.getBuffer().setLength(0); } //TESTED } finally { if (null != in) in.close(); } int descCap = 500; doc.setFullText(fullText); if (descCap > fullText.length()) { descCap = fullText.length(); } doc.setDescription(fullText.substring(0, descCap)); doc.setModified(new Date(fileTimestamp)); doc.setCreated(new Date()); doc.setUrl(f.getUrlString()); doc.setTitle(f.getName().toString()); doc.setPublishedDate(new Date(fileTimestamp)); long memUsage = (250L * (doc.getFullText().length() + doc.getDescription().length())) / 100L; // 25% overhead, 2x for string->byte _memUsage += memUsage; _totalMemUsage.addAndGet(memUsage); // If the metadata contains a more plausible date then use that try { String title = metadata.get(Metadata.TITLE); if (null != title) { doc.setTitle(title); } } catch (Exception e) { // Fine just carry on } try { Date date = metadata.getDate(Metadata.CREATION_DATE); // MS Word if (null != date) { doc.setPublishedDate(date); } else { date = metadata.getDate(Metadata.DATE); // Dublin if (null != date) { doc.setPublishedDate(date); } else { date = metadata.getDate(Metadata.ORIGINAL_DATE); if (null != date) { doc.setPublishedDate(date); } } } } catch (Exception e) { // Fine just carry on } //TESTED // If the metadata contains a geotag then apply that: try { String lat = metadata.get(Metadata.LATITUDE); String lon = metadata.get(Metadata.LONGITUDE); if ((null != lat) && (null != lon)) { GeoPojo gt = new GeoPojo(); gt.lat = Double.parseDouble(lat); gt.lon = Double.parseDouble(lon); doc.setDocGeo(gt); } } catch (Exception e) { // Fine just carry on } // Save the entire metadata: doc.addToMetadata("_FILE_METADATA_", metadata); for (ObjectId communityId : source.getCommunityIds()) { doc.setCommunityId(communityId); } files.add(doc); // Close the input stream in.close(); in = null; //TESTED } catch (SmbException e) { errors++; _context.getHarvestStatus() .logMessage(HarvestExceptionUtils.createExceptionMessage(e).toString(), true); } catch (MalformedURLException e) { errors++; _context.getHarvestStatus() .logMessage(HarvestExceptionUtils.createExceptionMessage(e).toString(), true); } catch (UnknownHostException e) { errors++; _context.getHarvestStatus() .logMessage(HarvestExceptionUtils.createExceptionMessage(e).toString(), true); } catch (IOException e) { errors++; _context.getHarvestStatus().logMessage(e.getMessage(), true); } catch (TikaException e) { errors++; _context.getHarvestStatus().logMessage(e.getMessage(), true); } catch (Exception e) { errors++; _context.getHarvestStatus() .logMessage(HarvestExceptionUtils.createExceptionMessage(e).toString(), true); } finally { // Close the input stream if an error occurs if (null != in) { try { in.close(); } catch (IOException e) { // All good, do nothing } } } // end exception handling } // end dedup check } // end XML vs "office" app //DEBUG //System.out.println("FILE=" + files.size() + " / MEM=" + _memUsage + " VS " + Runtime.getRuntime().totalMemory()); }
From source file:de.uzk.hki.da.model.ObjectPremisXmlWriter.java
/** * Integrate jhove data.// w ww .j a va 2 s . com * * @param jhoveFilePath the jhove file path * @param tab the tab * @throws XMLStreamException the xML stream exception * @author Thomas Kleinke * @throws FileNotFoundException */ private void integrateJhoveData(String jhoveFilePath, int tab) throws XMLStreamException, FileNotFoundException { File jhoveFile = new File(jhoveFilePath); if (!jhoveFile.exists()) throw new FileNotFoundException("file does not exist. " + jhoveFile); FileInputStream inputStream = null; inputStream = new FileInputStream(jhoveFile); XMLInputFactory inputFactory = XMLInputFactory.newInstance(); XMLStreamReader streamReader = inputFactory.createXMLStreamReader(inputStream); boolean textElement = false; while (streamReader.hasNext()) { int event = streamReader.next(); switch (event) { case XMLStreamConstants.START_ELEMENT: writer.writeDTD("\n"); indent(tab); tab++; String prefix = streamReader.getPrefix(); if (prefix != null && !prefix.equals("")) { writer.setPrefix(prefix, streamReader.getNamespaceURI()); writer.writeStartElement(streamReader.getNamespaceURI(), streamReader.getLocalName()); } else writer.writeStartElement(streamReader.getLocalName()); for (int i = 0; i < streamReader.getNamespaceCount(); i++) writer.writeNamespace(streamReader.getNamespacePrefix(i), streamReader.getNamespaceURI(i)); for (int i = 0; i < streamReader.getAttributeCount(); i++) { QName qname = streamReader.getAttributeName(i); String attributeName = qname.getLocalPart(); String attributePrefix = qname.getPrefix(); if (attributePrefix != null && !attributePrefix.equals("")) attributeName = attributePrefix + ":" + attributeName; writer.writeAttribute(attributeName, streamReader.getAttributeValue(i)); } break; case XMLStreamConstants.CHARACTERS: if (!streamReader.isWhiteSpace()) { writer.writeCharacters(streamReader.getText()); textElement = true; } break; case XMLStreamConstants.END_ELEMENT: tab--; if (!textElement) { writer.writeDTD("\n"); indent(tab); } writer.writeEndElement(); textElement = false; break; default: break; } } streamReader.close(); try { inputStream.close(); } catch (IOException e) { throw new RuntimeException("Failed to close input stream", e); } }
From source file:sdmx.net.service.nomis.NOMISRESTServiceRegistry.java
public static List<NOMISGeography> parseGeography(InputStream in, String cubeId, String cubeName) throws XMLStreamException { List<NOMISGeography> geogList = new ArrayList<NOMISGeography>(); String tagContent = null;//from w ww . j av a2 s .c om XMLInputFactory factory = XMLInputFactory.newInstance(); XMLStreamReader reader = factory.createXMLStreamReader(in); int state = 0; String lastLang = null; while (reader.hasNext()) { int event = reader.next(); switch (event) { case XMLStreamConstants.START_ELEMENT: if (reader.getLocalName().equals("Type")) { NOMISGeography geog = new NOMISGeography(); geog.setCubeId(cubeId); geog.setCubeName(cubeName); geog.setGeography(reader.getAttributeValue("", "value")); geog.setGeographyName(reader.getAttributeValue("", "name")); geogList.add(geog); } break; case XMLStreamConstants.END_ELEMENT: break; } } return geogList; }