List of usage examples for org.jdom2 Namespace getNamespace
public static Namespace getNamespace(final String prefix, final String uri)
Namespace
for the supplied prefix and uri. From source file:de.uniba.dsg.ppn.ba.validation.XmlLocator.java
License:Open Source License
/** * Searches the line of the given xpath expression in the given file and * returns either the line or -1. -1 means, that with the xpath expression * couldn't be determined a bpmn element. * * @param xmlFile//from w ww . j a v a 2 s.com * the xml file where the error has to be found * @param xpathExpression * the xpath expression to find the error in the file * @return line or -1 */ public int findLine(File xmlFile, String xpathExpression) { try { Document doc = saxBuilder.build(xmlFile); int bracketPosition = xpathExpression.lastIndexOf('['); int elementPosition = 0; try { elementPosition = Integer .parseInt(xpathExpression.substring(bracketPosition + 1, xpathExpression.lastIndexOf(']'))); xpathExpression = xpathExpression.substring(0, bracketPosition); } catch (NumberFormatException e) { // ignore, because then there's no position number in the xpath // expression and the expression needn't to be rewritten } XPathExpression<Element> xpath = xPathFactory.compile(xpathExpression, Filters.element(), null, Namespace.getNamespace("bpmn", ConstantHelper.BPMNNAMESPACE)); List<Element> foundElements = xpath.evaluate(doc); if (foundElements.size() > elementPosition) { return ((LocatedElement) foundElements.get(elementPosition)).getLine(); } } catch (IOException | JDOMException e) { LOGGER.debug(ConstantHelper.FILENOTFOUNDMESSAGEWITHCAUSE, xmlFile.getName(), e); } return -1; }
From source file:ec.edu.cedia.redi.ldclient.provider.ScopusAuthorProvider.java
License:Apache License
/** * Parse each XML result of publications. Assings each publication resource * to its author. See//w w w. j a v a2s .c om * <a href="http://api.elsevier.com/documentation/SCOPUSSearchAPI.wadl">Scopus * Search API</a>. * * @param input * @param requestUrl * @param triples * @return list of publication resources * @throws DataRetrievalException */ private List<String> parseSearchPub(InputStream input, String requestUrl, final Model triples) throws DataRetrievalException { try { List<String> publications = new ArrayList<>(); ValueFactory vf = ValueFactoryImpl.getInstance(); String authorId = requestUrl.substring(requestUrl.indexOf("au-id(") + 6, requestUrl.indexOf(")&")); URI author = vf.createURI("http://api.elsevier.com/content/author/author_id/", authorId); final Document doc = new SAXBuilder(XMLReaders.NONVALIDATING).build(input); XPathExpression<Attribute> path = XPathFactory.instance().compile( "/atom:search-results/atom:entry/atom:link[@ref='self']/@href", Filters.attribute(), null, Namespace.getNamespace("atom", "http://www.w3.org/2005/Atom")); List<Attribute> publicationsFound = path.evaluate(doc); for (int i = 0; i < publicationsFound.size(); i++) { String pubResource = publicationsFound.get(i).getValue(); triples.add(author, FOAF.PUBLICATIONS, vf.createURI(pubResource)); publications.add(pubResource + "?apiKey=" + apiKey + "&httpAccept=application/rdf%2Bxml"); } return publications; } catch (JDOMException | IOException ex) { throw new DataRetrievalException(ex); } }
From source file:ec.edu.cedia.redi.ldclient.provider.ScopusAuthorProvider.java
License:Apache License
/** * Maps each author from XML to RDF using default implementation of * {@link AbstractXMLDataProvider#parseResponse}. * * @see// w ww . j a v a 2s. c o m * <a href="http://api.elsevier.com/documentation/AUTHORSearchAPI.wadl">Authors * search API.</a> * * @param input * @param resource * @param requestUrl * @param triples * @param contentType * @return list of resources of authors found. * @throws DataRetrievalException */ private List<String> parseResponseAuthorsSearch(InputStream input, String resource, String requestUrl, Model triples, String contentType) throws DataRetrievalException { try { // List of authors to extract perfil information such as publications, affiliations, etc. List<String> authorsFound = new ArrayList(); ValueFactory vf = ValueFactoryImpl.getInstance(); // Keep stream for various reads. byte[] response = IOUtils.toByteArray(input); final Document doc = new SAXBuilder(XMLReaders.NONVALIDATING).build(new ByteArrayInputStream(response)); // get only URI of authors XPathExpression<Text> path = XPathFactory.instance().compile( "/atom:search-results/atom:entry/prism:url/text()", Filters.textOnly(), null, Namespace.getNamespace("atom", "http://www.w3.org/2005/Atom"), Namespace.getNamespace("prism", "http://prismstandard.org/namespaces/basic/2.0/")); // Map each author XML to RDF using default implementationf parseResponse method from AbstractXMLDataProvider. List<Text> auhtorsFound = path.evaluate(doc); for (int i = 0; i < auhtorsFound.size(); i++) { setAuthorXPathMappings(i); String authorsResource = auhtorsFound.get(i).getValue(); super.parseResponse(authorsResource, requestUrl, triples, new ByteArrayInputStream(response), contentType); authorsFound.add( authorsResource + "?apiKey=" + apiKey + "&httpAccept=application/rdf%2Bxml&view=ENHANCED"); triples.add(vf.createURI(authorsResource), OWL.ONEOF, vf.createURI(resource)); } return authorsFound; } catch (JDOMException | IOException | DataRetrievalException ex) { throw new DataRetrievalException(ex); } }
From source file:es.upm.dit.xsdinferencer.generation.generatorimpl.schemageneration.XMLSchemaDocumentGenerator.java
License:Apache License
/** * It generates the XSD file of the targetNamespace given at the constructor, taking into account that * the main namespace is the one given at the constructor. * //from w ww .j a v a 2s .c o m * @param schema the schema object * @param configuration the inference configuration * * @return a JDOM2 {@link Document} object containing the XSD contents. * * @see SchemaDocumentGenerator#generateSchemaDocument(Schema, XSDInferenceConfiguration) */ @Override public Document generateSchemaDocument(Schema schema, XSDInferenceConfiguration configuration) { // if(!configuration.getElementsGlobal()==false || // !configuration.getComplexTypesGlobal()==true || // !configuration.getSimpleTypesGlobal()==true // ) // throw new UnsupportedOperationException("Not implemented yet."); // checkArgument(schema.getNamespacesToPossiblePrefixMappingModifiable().containsKey(mainNamespace), "The main namespace must be a known namespace"); checkArgument(schema.getNamespacesToPossiblePrefixMappingModifiable().containsKey(targetNamespace), "The target namespace must be a known namespace"); // checkArgument(!schema.getNamespacesToPossiblePrefixMappingModifiable().containsKey(XSD_NAMESPACE_URI),"The XSD namespace must not be a known namespace"); // checkArgument(!schema.getNamespacesToPossiblePrefixMappingModifiable().containsKey(XSI_NAMESPACE_URI),"The XSI namespace must not be a known namespace"); Map<String, String> namespaceURIToPrefixMappings = schema.getSolvedNamespaceMappings(); if (configuration.getSkipNamespaces().contains(targetNamespace)) { throw new IllegalArgumentException("This is an skipped namespace, so its XSD should not be generated"); } if (targetNamespace.equals(XSD_NAMESPACE_URI)) System.err.println( "The XML Schema namespace is being considered as a target namespace in your documents. Independing of the inferred schemas, the only valid XSD for an XSD would be the normative one present at its first RFC"); Namespace xsdNamespace = Namespace.getNamespace(XSD_NAMESPACE_PREFIX.replace(":", ""), XSD_NAMESPACE_URI); List<Namespace> namespaceDeclarations = getNamespaceDeclarations(namespaceURIToPrefixMappings, xsdNamespace); Element elementSchema = new Element("schema", xsdNamespace); for (int i = 0; i < namespaceDeclarations.size(); i++) { Namespace currentNamespace = namespaceDeclarations.get(i); elementSchema.addNamespaceDeclaration(currentNamespace); String currentNamespaceUri = currentNamespace.getURI(); if (!targetNamespace.equals(mainNamespace) && !currentNamespaceUri.equals(mainNamespace)) continue; if (currentNamespace.equals(Namespace.XML_NAMESPACE) && (!schema.getAttributes().containsRow(XSDInferenceConfiguration.XML_NAMESPACE_URI) && !schema.getElements().containsRow(XSDInferenceConfiguration.XML_NAMESPACE_URI))) { continue; } if (currentNamespaceUri.equals(XSD_NAMESPACE_URI) && !namespaceURIToPrefixMappings.containsKey(XSD_NAMESPACE_URI)) continue; if (targetNamespace.equals(currentNamespaceUri) || (currentNamespaceUri.equals("") && (fileNameGenerator == null))) continue; if (currentNamespaceUri.equals("") && !currentNamespaceUri.equals(mainNamespace) && !schema.getElements().containsRow("")) continue; Element importElement = new Element("import", xsdNamespace); if (!currentNamespaceUri.equals("")) { Attribute namespaceAttr = new Attribute("namespace", currentNamespaceUri); importElement.setAttribute(namespaceAttr); } if (fileNameGenerator != null && !configuration.getSkipNamespaces().contains(currentNamespaceUri)) { String fileName = fileNameGenerator.getSchemaDocumentFileName(currentNamespaceUri, namespaceURIToPrefixMappings); Attribute schemaLocationAttr = new Attribute("schemaLocation", fileName); importElement.setAttribute(schemaLocationAttr); } elementSchema.addContent(importElement); } if (!targetNamespace.equals("")) { Attribute targetNamespaceAttr = new Attribute("targetNamespace", targetNamespace); elementSchema.setAttribute(targetNamespaceAttr); } SortedSet<SimpleType> sortedSimpleTypes = new TreeSet<>(new SimpleTypeComparator()); sortedSimpleTypes.addAll(schema.getSimpleTypes().values()); SortedSet<ComplexType> sortedComplexTypes = new TreeSet<>(new ComplexTypeComparator()); sortedComplexTypes.addAll(schema.getComplexTypes().values()); //CONTINUE FROM HERE: Generate sorted sets for SchemaElement and SchemaAttribute objects and use them where needed. Attribute elementFormDefault = new Attribute("elementFormDefault", "qualified"); elementSchema.setAttribute(elementFormDefault); Document resultingDocument = new Document(elementSchema); if (targetNamespace.equals(mainNamespace)) { //First, we declare global SimpleTypes. //If simpleTypesGlobal is true, any enumeration will be declared as a global simple type. //if not, simple types of complex types which have attributes but not children will be declared globally //(due to limitations of XSD, they may not be declared locally together with the attributes info) if (configuration.getSimpleTypesGlobal()) { for (SimpleType simpleType : sortedSimpleTypes) { if (!simpleType.isEnum() || simpleType.isEmpty()) continue; Element simpleTypeElement = generateSimpleType(simpleType, false, configuration, xsdNamespace); elementSchema.addContent(simpleTypeElement); } } else { for (ComplexType complexType : sortedComplexTypes) { SimpleType simpleType = complexType.getTextSimpleType(); if (complexType.getAttributeList().isEmpty() || !(complexType.getAutomaton().nodeCount() == 0) || !simpleType.isEnum() || simpleType.isEmpty()) continue; Element simpleTypeElement = generateSimpleType(simpleType, false, configuration, xsdNamespace); elementSchema.addContent(simpleTypeElement); } } //Global complexType elements are only generated in the main schema (i.e. the one whose targetNamespace is equal to mainNamespace) if (configuration.getComplexTypesGlobal()) { for (ComplexType complexType : sortedComplexTypes) { boolean hasNoChildren = complexType.getRegularExpression().equals(new EmptyRegularExpression()); boolean hasNoAttributes = complexType.getAttributeList().size() == 0; boolean hasNoComments = complexType.getComments().size() == 0; // boolean simpleTypeIsNotEmpty = !complexType.getTextSimpleType().isEmpty(); boolean simpleTypeIsWhiteSpaceOnlyOrEmpty = !(complexType.getTextSimpleType().isEmpty() || complexType.getTextSimpleType().consistOnlyOfWhitespaceCharacters()); if (hasNoChildren && hasNoAttributes && simpleTypeIsWhiteSpaceOnlyOrEmpty && hasNoComments) continue; //Because the elements which are linked to this ComplexType at our internal model //will be linked to an XSD simple type elsewhere, either a builtin or a custom one. Element complexTypeElement = generateComplexType(configuration, complexType, false, targetNamespace, namespaceURIToPrefixMappings, mainNamespace, xsdNamespace); elementSchema.addContent(complexTypeElement); } } } //If there are many namespaces and the workaround is disabled, we must declare global attributes. //If the targetNamespace is not the mainNamespace, we must declare all the attributes. //if the target namespace is the main namespace, we do not need to declare anything, because the complex types which hold the attributes //are also in the main namespace. if ((namespaceURIToPrefixMappings.size() - configuration.getSkipNamespaces().size()) > 1) { SortedMap<String, SchemaAttribute> globalAttributeCandidates = new TreeMap<>( schema.getAttributes().row(targetNamespace)); if (!targetNamespace.equals(mainNamespace) && !targetNamespace.equals("")) { globalAttributesLoop: for (Map.Entry<String, SchemaAttribute> schemaAttributeEntry : globalAttributeCandidates .entrySet()) { SchemaAttribute schemaAttribute = schemaAttributeEntry.getValue(); //First, we check if the attribute has been already declared when the workaround is disabled. //If so, we update the "use" property. //The type should have been already merged. if (!configuration.getStrictValidRootDefinitionWorkaround()) { List<Element> alreadyGeneratedAttributeElements = elementSchema.getChildren("attribute", xsdNamespace); for (int i = 0; i < alreadyGeneratedAttributeElements.size(); i++) { Element currentAttributeElement = alreadyGeneratedAttributeElements.get(i); if (currentAttributeElement.getAttributeValue("name") .equals(schemaAttribute.getName())) { continue globalAttributesLoop; } } } Element attributeOrAttributeGroupElement = generateAttribute(schemaAttribute, true, configuration, namespaceURIToPrefixMappings, targetNamespace, mainNamespace, schemaAttributeEntry.getKey(), xsdNamespace); elementSchema.addContent(attributeOrAttributeGroupElement); } } } //Now, we declare global elements. //An element will be declared globally if and only if: //1-elementsGlobal is true in the configuration //2-The element is a valid root //3-The element is in a namespace other than the main namespace. Note that the element WILL be surrounded by the corresponding group if the workaround is enabled. //Another important remark: Iterating over a set copy implies iterating over DISTINCT SchemaElements, so if two keys pointed to equal SchemaElements, we would generate it only once- SortedSet<SchemaElement> schemaElementsAtTargetNamespace = new TreeSet<>(new SchemaElementComparator()); schemaElementsAtTargetNamespace.addAll(schema.getElements().row(targetNamespace).values()); globalSchemaElementsLoop: for (SchemaElement schemaElement : schemaElementsAtTargetNamespace) { // if(!configuration.getElementsGlobal()&& // !schemaElement.isValidRoot()&& // (targetNamespace.equals(mainNamespace)||configuration.getStrictValidRootDefinitionWorkaround())) if (!configuration.getElementsGlobal() && !schemaElement.isValidRoot() && (targetNamespace.equals(mainNamespace))) continue; // for(Element currentElement:elementSchema.getContent(Filters.element("element",xsdNamespace))){ // if(schemaElement.getName().equals(currentElement.getAttributeValue("name"))) // continue globalSchemaElementsLoop; // } String possibleGroupName = schemaElement.getName() + configuration.getTypeNamesAncestorsSeparator() + schemaElement.getType().getName(); for (Element currentElement : elementSchema.getContent(Filters.element("group", xsdNamespace))) { if (possibleGroupName.equals(currentElement.getAttributeValue("name"))) continue globalSchemaElementsLoop; } Element elementOrGroupElement = generateElement(schemaElement, true, configuration, targetNamespace, mainNamespace, null, namespaceURIToPrefixMappings, xsdNamespace); if (elementOrGroupElement.getName().equals("element")) { for (Element currentElement : elementSchema.getChildren("element", xsdNamespace)) { if (schemaElement.getName().equals(currentElement.getAttributeValue("name"))) continue globalSchemaElementsLoop; } } elementSchema.addContent(elementOrGroupElement); } return resultingDocument; }
From source file:esiptestbed.mudrod.ontology.pre.AggregateTriples.java
License:Apache License
/** * Method of extract triples (subclassOf, equivalent class) from OWL file * @throws IOException IOException//ww w . j av a 2 s . c o m */ public void getAllClass() throws IOException { List<?> classElements = rootNode.getChildren("Class", Namespace.getNamespace("owl", owl_namespace)); for (int i = 0; i < classElements.size(); i++) { Element classElement = (Element) classElements.get(i); String className = classElement.getAttributeValue("about", Namespace.getNamespace("rdf", rdf_namespace)); if (className == null) { className = classElement.getAttributeValue("ID", Namespace.getNamespace("rdf", rdf_namespace)); } List<?> subclassElements = classElement.getChildren("subClassOf", Namespace.getNamespace("rdfs", rdfs_namespace)); for (int j = 0; j < subclassElements.size(); j++) { Element subclassElement = (Element) subclassElements.get(j); String subclassName = subclassElement.getAttributeValue("resource", Namespace.getNamespace("rdf", rdf_namespace)); if (subclassName == null) { Element allValuesFromEle = findChild("allValuesFrom", subclassElement); if (allValuesFromEle != null) { subclassName = allValuesFromEle.getAttributeValue("resource", Namespace.getNamespace("rdf", rdf_namespace)); bw.write(cutString(className) + ",SubClassOf," + cutString(subclassName) + "\n"); } } else { bw.write(cutString(className) + ",SubClassOf," + cutString(subclassName) + "\n"); } } List equalClassElements = classElement.getChildren("equivalentClass", Namespace.getNamespace("owl", owl_namespace)); for (int k = 0; k < equalClassElements.size(); k++) { Element equalClassElement = (Element) equalClassElements.get(k); String equalClassElementName = equalClassElement.getAttributeValue("resource", Namespace.getNamespace("rdf", rdf_namespace)); if (equalClassElementName != null) { bw.write(cutString(className) + ",equivalentClass," + cutString(equalClassElementName) + "\n"); } } } }
From source file:eu.himeros.hocr.FlatXml.java
License:Open Source License
private void init(File inFile, File outFile) throws Exception { SAXBuilder builder = new SAXBuilder(); Document doc = builder.build(inFile); Element root = doc.getRootElement(); Namespace oldns = root.getNamespace(); Element newRoot = new Element("html", "http://www.w3.org/1999/xhtml"); Namespace xmlns = newRoot.getNamespace(); Element head = root.getChild("head", oldns); head.setNamespace(xmlns);/*from ww w . j a v a 2 s . c om*/ for (Element child : head.getChildren()) child.setNamespace(xmlns); Element title = new Element("title", xmlns); title.addContent("ocr"); if (head != null) head.addContent(title); Element body = root.getChild("body", oldns); body.setNamespace(xmlns); /*Element oldPage; try{ oldPage=body.getChild("div",xmlns); }catch(Exception ex){ oldPage=new Element("div",xmlns); }*/ Element page = new Element("div", xmlns); page.setAttribute("class", "ocr_page"); page.setAttribute("id", "i" + inFile.getName().substring(1).replace(".html", ".png")); XPathExpression<Element> xpath = XPathFactory.instance().compile("//*[@class='ocr_carea']", Filters.element(), null, Namespace.getNamespace("ns", "http://www.w3.org/1999/xhtml")); List<Element> careaElL = xpath.evaluate(body); for (Element careaEl : careaElL) { page.addContent(new Comment("<div class=\"" + careaEl.getAttributeValue("class") + "\" title=\"" + careaEl.getAttributeValue("title") + "\">")); for (Element pEl : careaEl.getChildren()) { page.addContent(new Comment("<p>")); for (Element lineEl : pEl.getChildren()) { lineEl.removeAttribute("id"); lineEl.setNamespace(xmlns); for (Element child : lineEl.getChildren()) { child.removeAttribute("id"); child.removeAttribute("lang"); child.removeAttribute("lang", xmlns); child.setNamespace(xmlns); } page.addContent(lineEl.clone()); } page.addContent(new Comment("</p>")); } page.addContent(new Comment("</div>")); } //oldPage.detach(); if (body != null) { body.removeContent(); body.addContent(page); } newRoot.addContent(root.removeContent()); doc.detachRootElement(); doc.setRootElement(newRoot); XMLOutputter xmlOutputter = new XMLOutputter(Format.getPrettyFormat()); xmlOutputter.output(doc, new BufferedWriter(new FileWriter(outFile))); }
From source file:eu.himeros.hocr.HocrInfoAggregator.java
License:Open Source License
private void updateElements() { xpath = XPathFactory.instance().compile("//ns:span[@uc!='']", Filters.element(), null, Namespace.getNamespace("ns", "http://www.w3.org/1999/xhtml")); List<Element> elements = xpath.evaluate(root); for (Element element : elements) { String uc = element.getAttributeValue("uc"); element.setAttribute("occ", "" + occHm.get(uc)); try {/*from ww w.ja va 2 s. co m*/ if (occHm.get(uc) == 1) { element.setAttribute("anchor", nearGtHm.get(uc).getAttributeValue("uc")); element.setAttribute("anchor-id", nearGtHm.get(uc).getAttributeValue("id")); if ("CORRWORD".equals(element.getAttributeValue("class")) | "UCWORD".equals(element.getAttributeValue("class"))) { String title = element.getAttributeValue("title"); title = nearGtHm.get(uc).getAttributeValue("text") + "\u261a " + title; element.setAttribute("title", title); } } } catch (Exception ex) { continue; } } }
From source file:eu.himeros.hocr.HocrInfoAggregator.java
License:Open Source License
public void alignToGroundTruth() { ArrayList<Element> ocrAl = new ArrayList<>(); ArrayList<Element> nearGtAl; int start = 1; int end;/*from w w w .ja v a 2s . co m*/ xpath = XPathFactory.instance().compile("//ns:span[@id]", Filters.element(), null, Namespace.getNamespace("ns", "http://www.w3.org/1999/xhtml")); List<Element> elements = xpath.evaluate(root); for (Element element : elements) { if (element.getAttributeValue("anchor-id") == null) { if ("".equals(element.getAttributeValue("uc"))) { continue; } ocrAl.add(element); } else { end = ((end = Integer.parseInt(element.getAttributeValue("anchor-id")) - 1) < 1 ? 1 : end); nearGtAl = makeNearGtAl(start, end); makeAlignment(ocrAl, nearGtAl); ocrAl = new ArrayList<>(); start = end + 2; } } }
From source file:eu.himeros.hocr.HocrInfoAggregator.java
License:Open Source License
private void makeCompliantHocr() { xpath = XPathFactory.instance().compile("//ns:span[@id|@idx]", Filters.element(), null, Namespace.getNamespace("ns", "http://www.w3.org/1999/xhtml")); List<Element> elements = xpath.evaluate(root); int spanId = 0; for (Element span : elements) { if (span.getAttribute("idx") != null) { try { span = span.getChildren().get(0); } catch (Exception ex) { // }/* ww w . j av a 2s . co m*/ } LinkedList<Attribute> attributeLl = new LinkedList(span.getParentElement().getAttributes()); attributeLl.addFirst(new Attribute("id", "w_" + spanId++)); span.getParentElement().setAttributes(attributeLl); String[] suggestions = null; String title = span.getAttributeValue("title"); if (title != null) { suggestions = title.split(" "); } if (suggestions == null) { suggestions = new String[] { "" }; } Element ins = new Element("ins", xmlns); ins.setAttribute("class", "alt"); ins.setAttribute("title", makeNlp(span.getAttributeValue("class"))); ins.setText(span.getText()); span.removeContent(); span.addContent(ins); span.setAttribute("class", "alternatives"); span.removeAttribute("uc"); span.removeAttribute("occ"); span.removeAttribute("title"); span.removeAttribute("anchor"); span.removeAttribute("anchor-id"); span.removeAttribute("id"); span.getParentElement().removeAttribute("idx"); span.removeAttribute("whole"); span.getParentElement().removeAttribute("whole"); if (title == null || "".equals(title)) { continue; } double score = 0.90; for (String suggestion : suggestions) { if (suggestion == null || "".equals(suggestion)) { continue; } Element del = new Element("del", xmlns); del.setAttribute("title", "nlp " + String.format("%.2f", score).replaceAll(",", ".")); score = score - 0.01; suggestion = suggestion.replaceAll(l1PunctMarkFilter, ""); Matcher leftMatcher = l1LeftPunctMarkPattern.matcher(ins.getText()); if (leftMatcher.matches()) { suggestion = leftMatcher.group(1) + suggestion; } Matcher rightMatcher = l1RightPunctMarkPattern.matcher(ins.getText()); if (rightMatcher.matches()) { String ngtSymbol = ""; if (suggestion.endsWith("\u261a")) { ngtSymbol = "\u261a"; suggestion = suggestion.substring(0, suggestion.length() - 1); } suggestion = suggestion + rightMatcher.group(1) + ngtSymbol; } ///!!!! if (suggestion.endsWith("\u261a") && ins.getParentElement().getParentElement() .getAttributeValue("lang", Namespace.XML_NAMESPACE) != null) { String buff = suggestion.substring(0, suggestion.length() - 1); sa.align(buff, ins.getText()); double sim = 1 - sa.getEditDistance() / Math.max((double) buff.length(), (double) ins.getText().length()); if (sim > 0.6) { suggestion = ins.getText() + "\u261b"; ins.setText(buff); ins.setAttribute("title", "nlp 0.70"); } } del.addContent(suggestion); span.addContent(del); } } }
From source file:eu.himeros.hocr.NgtMaker.java
License:Open Source License
public void parseDoc(File file) throws Exception { adjustFile(file);/* w w w.j a v a2 s . co m*/ start = -1; end = -1; prevValue = -1; ocrAl = new ArrayList<>(1000); outFileName = file.getAbsolutePath().substring(0, file.getAbsolutePath().length() - 4) + "ngt.xml"; builder = new SAXBuilder(); doc = builder.build(file); root = doc.getRootElement(); xmlns = root.getNamespace(); xpath = XPathFactory.instance().compile("//ns:span[@class='ocr_word']", Filters.element(), null, Namespace.getNamespace("ns", "http://www.w3.org/1999/xhtml")); List<Element> elements = xpath.evaluate(root); for (Element element : elements) { parseOcrWord(element); } ocrAl.add("%%%"); ocrAl.add("%%%"); findAnchors(); writeFragment(start, end); }