List of usage examples for javax.xml.stream XMLInputFactory createXMLStreamReader
public abstract XMLStreamReader createXMLStreamReader(java.io.InputStream stream) throws XMLStreamException;
From source file:com.ikanow.infinit.e.harvest.enrichment.custom.UnstructuredAnalysisHarvester.java
/** * processMeta - handle an individual field *//*w w w . j ava 2 s . c o m*/ private void processMeta(DocumentPojo f, metaField m, String text, SourcePojo source, UnstructuredAnalysisConfigPojo uap) { boolean bAllowDuplicates = false; if ((null != m.flags) && m.flags.contains("U")) { bAllowDuplicates = true; } if ((null == m.scriptlang) || m.scriptlang.equalsIgnoreCase("regex")) { Pattern metaPattern = createRegex(m.script, m.flags); int timesToRun = 1; Object[] currField = null; if ((null != m.flags) && m.flags.contains("c")) { currField = f.getMetadata().get(m.fieldName); } if (null != currField) { // chained metadata timesToRun = currField.length; text = (String) currField[0]; } //TESTED Matcher matcher = metaPattern.matcher(text); LinkedList<String> Llist = null; for (int ii = 0; ii < timesToRun; ++ii) { if (ii > 0) { // (else either just text, or in the above "chained metadata" initialization above) text = (String) currField[ii]; matcher = metaPattern.matcher(text); } //TESTED StringBuffer prefix = new StringBuffer(m.fieldName).append(':'); int nFieldNameLen = m.fieldName.length() + 1; try { while (matcher.find()) { if (null == Llist) { Llist = new LinkedList<String>(); } if (null == m.groupNum) { m.groupNum = 0; } String toAdd = matcher.group(m.groupNum); if (null != m.replace) { toAdd = metaPattern.matcher(toAdd).replaceFirst(m.replace); } if ((null != m.flags) && m.flags.contains("H")) { toAdd = StringEscapeUtils.unescapeHtml(toAdd); } prefix.setLength(nFieldNameLen); prefix.append(toAdd); String dupCheck = prefix.toString(); if (!regexDuplicates.contains(dupCheck)) { Llist.add(toAdd); if (!bAllowDuplicates) { regexDuplicates.add(dupCheck); } } } } catch (Exception e) { this._context.getHarvestStatus().logMessage("processMeta1: " + e.getMessage(), true); } } //(end metadata chaining handling) if (null != Llist) { if (null != currField) { // (overwrite) f.getMetadata().put(m.fieldName, Llist.toArray()); } else { f.addToMetadata(m.fieldName, Llist.toArray()); } } //TESTED } else if (m.scriptlang.equalsIgnoreCase("javascript")) { if (null == f.getMetadata()) { f.setMetadata(new LinkedHashMap<String, Object[]>()); } //set the script engine up if necessary if ((null != source) && (null != uap)) { //(these are null if called from new processing pipeline vs legacy code) intializeScriptEngine(source, uap); } try { //TODO (INF-2488): in new format, this should only happen in between contentMeta blocks/docs // (also should be able to use SAH _document object I think?) // Javascript: the user passes in Object[] currField = f.getMetadata().get(m.fieldName); if ((null == m.flags) || m.flags.isEmpty()) { if (null == currField) { engine.put("text", text); engine.put("_iterator", null); } //(otherwise will just pass the current fields in there) } else { // flags specified if (m.flags.contains("t")) { // text engine.put("text", text); } if (m.flags.contains("d")) { // entire document (minus ents and assocs) GsonBuilder gb = new GsonBuilder(); Gson g = gb.create(); List<EntityPojo> ents = f.getEntities(); List<AssociationPojo> assocs = f.getAssociations(); try { f.setEntities(null); f.setAssociations(null); engine.put("document", g.toJson(f)); securityManager.eval(engine, JavaScriptUtils.initScript); } finally { f.setEntities(ents); f.setAssociations(assocs); } } if (m.flags.contains("m")) { // metadata GsonBuilder gb = new GsonBuilder(); Gson g = gb.create(); engine.put("_metadata", g.toJson(f.getMetadata())); securityManager.eval(engine, JavaScriptUtils.iteratorMetaScript); } } //(end flags processing) if (null != currField) { f.getMetadata().remove(m.fieldName); GsonBuilder gb = new GsonBuilder(); Gson g = gb.create(); engine.put("_iterator", g.toJson(currField)); securityManager.eval(engine, JavaScriptUtils.iteratorDocScript); } //TESTED (handling of flags, and replacing of existing fields, including when field is null but specified) Object returnVal = securityManager.eval(engine, m.script); if (null != returnVal) { if (returnVal instanceof String) { // The only easy case Object[] array = new Object[1]; if ((null != m.flags) && m.flags.contains("H")) { returnVal = StringEscapeUtils.unescapeHtml((String) returnVal); } array[0] = returnVal; f.addToMetadata(m.fieldName, array); } else { // complex object or array - in either case the engine turns these into // internal.NativeArray or internal.NativeObject BasicDBList outList = JavaScriptUtils.parseNativeJsObject(returnVal, engine); f.addToMetadata(m.fieldName, outList.toArray()); } } } catch (ScriptException e) { _context.getHarvestStatus().logMessage(HarvestExceptionUtils.createExceptionMessage(e).toString(), true); // Just do nothing and log // e.printStackTrace(); //DEBUG (don't output log messages per doc) //logger.error(e.getMessage()); } catch (Exception e) { _context.getHarvestStatus().logMessage(HarvestExceptionUtils.createExceptionMessage(e).toString(), true); // Just do nothing and log // e.printStackTrace(); //DEBUG (don't output log messages per doc) //logger.error(e.getMessage()); } } else if (m.scriptlang.equalsIgnoreCase("xpath")) { String xpath = m.script; try { createHtmlCleanerIfNeeded(); int timesToRun = 1; Object[] currField = null; if ((null != m.flags) && m.flags.contains("c")) { currField = f.getMetadata().get(m.fieldName); } if (null != currField) { // chained metadata f.getMetadata().remove(m.fieldName); // (so will add to the end) timesToRun = currField.length; text = (String) currField[0]; } //TESTED for (int ii = 0; ii < timesToRun; ++ii) { if (ii > 0) { // (else either just text, or in the above "chained metadata" initialization above) text = (String) currField[ii]; } //TESTED TagNode node = cleaner.clean(new ByteArrayInputStream(text.getBytes())); //NewCode : Only use html cleaner for cleansing //use JAXP for full Xpath lib Document doc = new DomSerializer(new CleanerProperties()).createDOM(node); String extraRegex = extractRegexFromXpath(xpath); if (extraRegex != null) xpath = xpath.replace(extraRegex, ""); XPath xpa = XPathFactory.newInstance().newXPath(); NodeList res = (NodeList) xpa.evaluate(xpath, doc, XPathConstants.NODESET); if (res.getLength() > 0) { if ((null != m.flags) && (m.flags.contains("o"))) { // "o" for object m.groupNum = -1; // (see bConvertToObject below) } StringBuffer prefix = new StringBuffer(m.fieldName).append(':'); int nFieldNameLen = m.fieldName.length() + 1; ArrayList<Object> Llist = new ArrayList<Object>(res.getLength()); boolean bConvertToObject = ((m.groupNum != null) && (m.groupNum == -1)); boolean convertToXml = ((null != m.flags) && (m.flags.contains("x"))); for (int i = 0; i < res.getLength(); i++) { Node info_node = res.item(i); if ((null != m.flags) && (m.flags.contains("g"))) { Llist.add(parseHtmlTable(info_node, m.replace)); } else if (bConvertToObject || convertToXml) { // Try to create a JSON object out of this StringWriter writer = new StringWriter(); try { Transformer transformer = TransformerFactory.newInstance().newTransformer(); transformer.transform(new DOMSource(info_node), new StreamResult(writer)); } catch (TransformerException e1) { continue; } if (bConvertToObject) { try { JSONObject subObj = XML.toJSONObject(writer.toString()); if (xpath.endsWith("*")) { // (can have any number of different names here) Llist.add(XmlToMetadataParser.convertJsonObjectToLinkedHashMap(subObj)); } //TESTED else { String[] rootNames = JSONObject.getNames(subObj); if (1 == rootNames.length) { // (don't think it can't be any other number in fact) subObj = subObj.getJSONObject(rootNames[0]); } boolean bUnescapeHtml = ((null != m.flags) && m.flags.contains("H")); Llist.add(XmlToMetadataParser.convertJsonObjectToLinkedHashMap(subObj, bUnescapeHtml)); } //TESTED } catch (JSONException e) { // Just carry on continue; } //TESTED } else { // leave in XML form Llist.add(writer.toString().substring(38)); // +38: (step over <?xml version="1.0" encoding="UTF-8"?>) } //TESTED (xpath_test.json) } else { // Treat this as string, either directly or via regex String info = info_node.getTextContent().trim(); if (extraRegex == null || extraRegex.isEmpty()) { prefix.setLength(nFieldNameLen); prefix.append(info); String dupCheck = prefix.toString(); if (!regexDuplicates.contains(dupCheck)) { if ((null != m.flags) && m.flags.contains("H")) { info = StringEscapeUtils.unescapeHtml(info); } Llist.add(info); if (!bAllowDuplicates) { regexDuplicates.add(dupCheck); } } } else { // Apply regex to the string Pattern dataRegex = createRegex(extraRegex, m.flags); Matcher dataMatcher = dataRegex.matcher(info); boolean result = dataMatcher.find(); while (result) { String toAdd; if (m.groupNum != null) toAdd = dataMatcher.group(m.groupNum); else toAdd = dataMatcher.group(); prefix.setLength(nFieldNameLen); prefix.append(toAdd); String dupCheck = prefix.toString(); if (!regexDuplicates.contains(dupCheck)) { if ((null != m.flags) && m.flags.contains("H")) { toAdd = StringEscapeUtils.unescapeHtml(toAdd); } Llist.add(toAdd); if (!bAllowDuplicates) { regexDuplicates.add(dupCheck); } } result = dataMatcher.find(); } } //(regex vs no regex) } //(end string vs object) } if (Llist.size() > 0) { f.addToMetadata(m.fieldName, Llist.toArray()); } } } //(end loop over metadata objects if applicable) } catch (IOException ioe) { _context.getHarvestStatus().logMessage(HarvestExceptionUtils.createExceptionMessage(ioe).toString(), true); // Just do nothing and log //DEBUG (don't output log messages per doc) //logger.error(ioe.getMessage()); } catch (ParserConfigurationException e1) { _context.getHarvestStatus().logMessage(HarvestExceptionUtils.createExceptionMessage(e1).toString(), true); // Just do nothing and log //DEBUG (don't output log messages per doc) //logger.error(e1.getMessage()); } catch (XPathExpressionException e1) { _context.getHarvestStatus().logMessage("Error evaluating xpath expression: " + xpath, true); } } else if (m.scriptlang.equalsIgnoreCase("stream")) { // XML or JSON streaming interface // which one? try { boolean json = false; boolean xml = false; for (int i = 0; i < 128; ++i) { if ('<' == text.charAt(i)) { xml = true; break; } if ('{' == text.charAt(i) || '[' == text.charAt(i)) { json = true; break; } if (!Character.isSpaceChar(text.charAt(i))) { break; } } //TESTED (too many spaces: meta_stream_test, test4; incorrect chars: test3, xml: test1, json: test2) boolean textNotObject = m.flags == null || !m.flags.contains("o"); List<DocumentPojo> docs = new LinkedList<DocumentPojo>(); List<String> levelOneFields = null; if (null != m.script) { levelOneFields = Arrays.asList(m.script.split("\\s*,\\s*")); if ((1 == levelOneFields.size()) && levelOneFields.get(0).isEmpty()) { // convert [""] to null levelOneFields = null; } } //TESTED (json and xml) if (xml) { XmlToMetadataParser parser = new XmlToMetadataParser(levelOneFields, null, null, null, null, null, Integer.MAX_VALUE); XMLInputFactory factory = XMLInputFactory.newInstance(); factory.setProperty(XMLInputFactory.IS_COALESCING, true); factory.setProperty(XMLInputFactory.SUPPORT_DTD, false); XMLStreamReader reader = null; try { reader = factory.createXMLStreamReader(new ByteArrayInputStream(text.getBytes())); docs = parser.parseDocument(reader, textNotObject); } finally { if (null != reader) reader.close(); } } //TESTED (meta_stream_test, test1) if (json) { JsonReader jsonReader = null; try { JsonToMetadataParser parser = new JsonToMetadataParser(null, levelOneFields, null, null, Integer.MAX_VALUE); jsonReader = new JsonReader( new InputStreamReader(new ByteArrayInputStream(text.getBytes()), "UTF-8")); jsonReader.setLenient(true); docs = parser.parseDocument(jsonReader, textNotObject); } finally { if (null != jsonReader) jsonReader.close(); } } //TESTED (meta_stream_test test2) if (!docs.isEmpty()) { ArrayList<String> Llist = null; ArrayList<Object> LlistObj = null; if (textNotObject) { Llist = new ArrayList<String>(docs.size()); } else { LlistObj = new ArrayList<Object>(docs.size()); } for (DocumentPojo doc : docs) { if ((null != doc.getFullText()) || (null != doc.getMetadata())) { if (textNotObject) { Llist.add(doc.getFullText()); } //TESTED else if (xml) { LlistObj.add(doc.getMetadata()); } //TESTED else if (json) { Object o = doc.getMetadata(); if (null != o) { o = doc.getMetadata().get("json"); if (o instanceof Object[]) { LlistObj.addAll(Arrays.asList((Object[]) o)); } else if (null != o) { LlistObj.add(o); } //TESTED } } //TESTED } } //TESTED if ((null != Llist) && !Llist.isEmpty()) { f.addToMetadata(m.fieldName, Llist.toArray()); } //TESTED if ((null != LlistObj) && !LlistObj.isEmpty()) { f.addToMetadata(m.fieldName, LlistObj.toArray()); } //TESTED } //TESTED (meta_stream_test test1,test2) } //(end try) catch (Exception e) { // various parsing errors _context.getHarvestStatus().logMessage(HarvestExceptionUtils.createExceptionMessage(e).toString(), true); } } //TESTED (meta_stream_test) // (don't currently support other script types) }
From source file:net.xy.jcms.controller.configurations.parser.TranslationParser.java
/** * parses an xml configuration from an input streams. throwes * IllegalArgumentExceptions in case of syntax error. * //from w ww . j a v a 2 s. com * @param in * @return value * @throws XMLStreamException * @throws ClassNotFoundException * in case there are problems with an params type converter */ public static TranslationRule[] parse(final InputStream in, final ClassLoader loader) throws XMLStreamException, ClassNotFoundException { @SuppressWarnings("deprecation") final XMLInputFactory factory = XMLInputFactory.newInstance( "com.sun.xml.internal.stream.XMLInputFactoryImpl", TranslationParser.class.getClassLoader()); LOG.info("XMLInputFactory loaded: " + factory.getClass().getName()); factory.setProperty("javax.xml.stream.isCoalescing", true); // not supported be the reference implementation // factory.setProperty(XMLInputFactory.IS_VALIDATING, Boolean.TRUE); final XMLStreamReader parser = factory.createXMLStreamReader(in); while (parser.hasNext()) { final int event = parser.next(); if (event == XMLStreamConstants.START_ELEMENT && parser.getName().getLocalPart().equals("rules")) { return parseRules(parser, loader); } } throw new IllegalArgumentException("No rules section found."); }
From source file:net.xy.jcms.controller.configurations.parser.TranslationParser.java
/** * parses an single file translation/*from w ww.j a va2 s . c o m*/ * * @param in * @param loader * @return value * @throws XMLStreamException * @throws ClassNotFoundException * in case there are problems with an params type converter */ public static TranslationRule parseSingle(final InputStream in, final ClassLoader loader) throws XMLStreamException, ClassNotFoundException { @SuppressWarnings("deprecation") final XMLInputFactory factory = XMLInputFactory.newInstance( "com.sun.xml.internal.stream.XMLInputFactoryImpl", TranslationParser.class.getClassLoader()); LOG.info("XMLInputFactory loaded: " + factory.getClass().getName()); factory.setProperty("javax.xml.stream.isCoalescing", true); final XMLStreamReader parser = factory.createXMLStreamReader(in); while (parser.hasNext()) { final int event = parser.next(); if (event == XMLStreamConstants.START_ELEMENT && parser.getName().getLocalPart().equals("rule")) { return parseRule(parser, loader); } } throw new IllegalArgumentException("No rules section found."); }
From source file:net.xy.jcms.controller.configurations.parser.UsecaseParser.java
/** * parses usecases out from an xml file/* w w w . j a v a2 s . co m*/ * * @param in * @param loader * used for retrieving configuration included resources and also * for retrieving the controllers * @return value * @throws XMLStreamException * @throws ClassNotFoundException */ public static Usecase[] parse(final InputStream in, final ClassLoader loader) throws XMLStreamException, ClassNotFoundException { final XMLInputFactory factory = XMLInputFactory.newInstance(); factory.setProperty("javax.xml.stream.isCoalescing", true); // not supported by the reference implementation // factory.setProperty(XMLInputFactory.IS_VALIDATING, Boolean.TRUE); final XMLStreamReader parser = factory.createXMLStreamReader(in); while (parser.hasNext()) { final int event = parser.next(); if (event == XMLStreamConstants.START_ELEMENT && parser.getName().getLocalPart().equals("usecases")) { return parseUsecases(parser, loader); } } throw new IllegalArgumentException("No usecases section found. [" + parser.getLocation() + "]"); }
From source file:net.xy.jcms.controller.configurations.parser.UsecaseParser.java
/** * method for parsing single usecase xml files. one per file. * /*from www . ja v a 2s . c o m*/ * @param in * @param loader * @return parsed usecase * @throws XMLStreamException * @throws ClassNotFoundException */ public static Usecase parseSingle(final InputStream in, final ClassLoader loader) throws XMLStreamException, ClassNotFoundException { final XMLInputFactory factory = XMLInputFactory.newInstance(); factory.setProperty("javax.xml.stream.isCoalescing", true); // not supported by the reference implementation // factory.setProperty(XMLInputFactory.IS_VALIDATING, Boolean.TRUE); final XMLStreamReader parser = factory.createXMLStreamReader(in); while (parser.hasNext()) { final int event = parser.next(); if (event == XMLStreamConstants.START_ELEMENT && parser.getName().getLocalPart().equals("usecase")) { return parseUsecase(parser, loader); } } throw new IllegalArgumentException("No usecases section found. [" + parser.getLocation() + "]"); }
From source file:nl.knaw.huygens.tei.xpath.XPathUtil.java
public static Map<String, String> getNamespaceInfo(String xml) { Map<String, String> namespaces = Maps.newIdentityHashMap(); XMLInputFactory inputFactory = XMLInputFactory.newInstance(); try {/*from w w w . j a v a2 s. com*/ XMLStreamReader xreader = inputFactory.createXMLStreamReader(IOUtils.toInputStream(xml, "UTF-8")); while (xreader.hasNext()) { if (xreader.next() == XMLStreamConstants.START_ELEMENT) { QName qName = xreader.getName(); if (qName != null) { addNamespace(namespaces, qName.getPrefix(), qName.getNamespaceURI()); for (int i = 0; i < xreader.getAttributeCount(); i++) { addNamespace(namespaces, xreader.getAttributePrefix(i), xreader.getAttributeNamespace(i)); } } } } } catch (XMLStreamException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } return namespaces; }
From source file:org.activiti.app.service.editor.ActivitiDecisionTableService.java
public ModelRepresentation importDecisionTable(HttpServletRequest request, MultipartFile file) { String fileName = file.getOriginalFilename(); if (fileName != null && (fileName.endsWith(".dmn") || fileName.endsWith(".xml"))) { try {//from w w w. j ava2 s .co m XMLInputFactory xif = XmlUtil.createSafeXmlInputFactory(); InputStreamReader xmlIn = new InputStreamReader(file.getInputStream(), "UTF-8"); XMLStreamReader xtr = xif.createXMLStreamReader(xmlIn); DmnDefinition dmnDefinition = dmnXmlConverter.convertToDmnModel(xtr); ObjectNode editorJsonNode = dmnJsonConverter.convertToJson(dmnDefinition); // remove id to avoid InvalidFormatException when deserializing editorJsonNode.remove("id"); ModelRepresentation modelRepresentation = new ModelRepresentation(); modelRepresentation.setName(dmnDefinition.getName()); modelRepresentation.setDescription(dmnDefinition.getDescription()); modelRepresentation.setModelType(AbstractModel.MODEL_TYPE_DECISION_TABLE); Model model = modelService.createModel(modelRepresentation, editorJsonNode.toString(), SecurityUtils.getCurrentUserObject()); return new ModelRepresentation(model); } catch (Exception e) { logger.error("Could not import decision table model", e); throw new InternalServerErrorException("Could not import decision table model"); } } else { throw new BadRequestException( "Invalid file name, only .dmn or .xml files are supported not " + fileName); } }
From source file:org.activiti.app.service.editor.ModelServiceImpl.java
public ModelRepresentation importNewVersion(String modelId, String fileName, InputStream modelStream) { Model processModel = getModel(modelId); User currentUser = SecurityUtils.getCurrentUserObject(); if (fileName != null && (fileName.endsWith(".bpmn") || fileName.endsWith(".bpmn20.xml"))) { try {/*w w w.j a v a 2s. c om*/ XMLInputFactory xif = XmlUtil.createSafeXmlInputFactory(); InputStreamReader xmlIn = new InputStreamReader(modelStream, "UTF-8"); XMLStreamReader xtr = xif.createXMLStreamReader(xmlIn); BpmnModel bpmnModel = bpmnXMLConverter.convertToBpmnModel(xtr); if (CollectionUtils.isEmpty(bpmnModel.getProcesses())) { throw new BadRequestException("No process found in definition " + fileName); } if (bpmnModel.getLocationMap().size() == 0) { throw new BadRequestException( "No required BPMN DI information found in definition " + fileName); } ObjectNode modelNode = bpmnJsonConverter.convertToJson(bpmnModel); AbstractModel savedModel = saveModel(modelId, processModel.getName(), processModel.getKey(), processModel.getDescription(), modelNode.toString(), true, "Version import via REST service", currentUser); return new ModelRepresentation(savedModel); } catch (BadRequestException e) { throw e; } catch (Exception e) { throw new BadRequestException( "Import failed for " + fileName + ", error message " + e.getMessage()); } } else { throw new BadRequestException( "Invalid file name, only .bpmn and .bpmn20.xml files are supported not " + fileName); } }
From source file:org.activiti.bpmn.converter.BpmnXMLConverter.java
public BpmnModel convertToBpmnModel(InputStreamProvider inputStreamProvider, boolean validateSchema, boolean enableSafeBpmnXml, String encoding) { XMLInputFactory xif = XMLInputFactory.newInstance(); if (xif.isPropertySupported(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES)) { xif.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, false); }/*from ww w . j a v a 2 s . com*/ if (xif.isPropertySupported(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES)) { xif.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, false); } if (xif.isPropertySupported(XMLInputFactory.SUPPORT_DTD)) { xif.setProperty(XMLInputFactory.SUPPORT_DTD, false); } InputStreamReader in = null; try { in = new InputStreamReader(inputStreamProvider.getInputStream(), encoding); XMLStreamReader xtr = xif.createXMLStreamReader(in); try { if (validateSchema) { if (!enableSafeBpmnXml) { validateModel(inputStreamProvider); } else { validateModel(xtr); } // The input stream is closed after schema validation in = new InputStreamReader(inputStreamProvider.getInputStream(), encoding); xtr = xif.createXMLStreamReader(in); } } catch (Exception e) { throw new RuntimeException("Could not validate XML with BPMN 2.0 XSD", e); } // XML conversion return convertToBpmnModel(xtr); } catch (UnsupportedEncodingException e) { throw new RuntimeException("The bpmn 2.0 xml is not UTF8 encoded", e); } catch (XMLStreamException e) { throw new RuntimeException("Error while reading the BPMN 2.0 XML", e); } finally { if (in != null) { try { in.close(); } catch (IOException e) { LOGGER.debug("Problem closing BPMN input stream", e); } } } }