List of usage examples for org.jdom2 Element getText
public String getText()
From source file:cz.muni.fi.mir.mathmlcanonicalization.modules.OperatorNormalizer.java
License:Apache License
private boolean isSpareOperator(final Element operator, final Collection<String> spareOperators) { assert operator != null && spareOperators != null && isOperator(operator); return (isEnabled(REMOVE_EMPTY_OPERATORS) && operator.getText().isEmpty()) || (spareOperators.contains(operator.getTextTrim())); }
From source file:cz.muni.fi.mir.mathmlcanonicalization.modules.OperatorNormalizer.java
License:Apache License
private void replaceIdentifiers(final Element ancestor, final Set<String> operators) { assert ancestor != null && operators != null; final List<Element> toReplace = new ArrayList<Element>(); for (Element element : ancestor.getDescendants(new ElementFilter(IDENTIFIER))) { // TODO: control whole ranges of symbols rather than listed ones if (operators.contains(element.getTextTrim())) { toReplace.add(element);// www .j ava2 s . co m } } for (Element element : toReplace) { LOGGER.log(Level.FINE, "Creating an operator from {0}", element.getText()); replaceElement(element, OPERATOR); } }
From source file:DataWeb.Code.java
License:Open Source License
public Code(Element e, Namespace ns) { id = e.getAttributeValue("id"); idProfesor = e.getChildText("idProfesor", ns); nombre = e.getChildText("nombre", ns); lenguaje = e.getChildText("lenguaje", ns); resaltar = e.getChildText("resaltar", ns); linea = new ArrayList<String>(); comentario = new ArrayList<String>(); for (Element line : e.getChildren("linea", ns)) linea.add(line.getText()); for (Element line : e.getChildren("idComentario", ns)) comentario.add(line.getText());//from w w w. j a v a2 s .c o m }
From source file:DataWeb.Course.java
License:Open Source License
public Course(Element e, Namespace ns) { id = e.getAttributeValue("id"); idProfesor = e.getChildText("idProfesor", ns); nombre = e.getChildText("nombre", ns); idAlumno = new ArrayList<String>(); idCodigo = new ArrayList<String>(); for (Element alumno : e.getChildren("idAlumno", ns)) idAlumno.add(alumno.getText()); for (Element codigo : e.getChildren("idCodigo", ns)) idCodigo.add(codigo.getText());/*from w w w . jav a 2 s. c o m*/ }
From source file:DataWeb.User.java
License:Open Source License
public User(Element e, Namespace ns) { id = e.getAttributeValue("id"); usuario = e.getChildText("usuario", ns); password = e.getChildText("password", ns); nombres = e.getChildText("nombres", ns); apPaterno = e.getChildText("apPat", ns); apMaterno = e.getChildText("apMat", ns); category = e.getChildText("category", ns); idCurso = new ArrayList<String>(); for (Element curso : e.getChildren("idCurso", ns)) idCurso.add(curso.getText()); }
From source file:dblp.xml.DBLPParserFirstSchema.java
private void extractElements(Element root, final String type) { try {/*from ww w .j a va 2 s. co m*/ // System.out.println("Root element :" + doc.getDocumentElement().getNodeName()); List<Element> nList = root.getChildren(type); // System.out.println(nList); System.out.println(type + " " + nList.size()); int index = 0; int ignore = 0; final Map<Integer, String> filteredConfs = readNodeFile(conf_filter_path); Set<String> filteredConfsSet = new HashSet<>(); filteredConfsSet.addAll(filteredConfs.values()); for (Element eElement : nList) { String booktitle = eElement.getChild("booktitle").getText(); //TODO: should comment out, now it doesn't filter conferences if (!filteredConfsSet.contains(booktitle)) { ignore++; continue; } String year = eElement.getChild("year").getText(); int year_int = Integer.parseInt(year.trim()); if (year_int < year_threshold) { ignore++; continue; } index++; if (index % 100 == 0) { System.out.println("Node#: " + index + " Ignore#:" + ignore); } String title = eElement.getChild("title").getText(); if (titleSet.contains(title)) { ignore++; continue; } titleSet.add(title); int titleId = titlesCollection.get(title); int yearId = yearsCollection.get(year); int confId = confsCollection.get(booktitle); title_conf_writer.write(titleId + " " + confId + "\n"); title_year_writer.write(titleId + " " + yearId + "\n"); final List<Element> authors = eElement.getChildren("author"); for (Element author : authors) { String author_str = author.getText(); int authorId = authorsCollection.get(author_str); title_author_writer.write(titleId + " " + authorId + "\n"); } } title_author_writer.close(); title_conf_writer.close(); title_year_writer.close(); } catch (IOException ex) { Logger.getLogger(DBLPParserFirstSchema.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:dblp.xml.DBLPParserSecondSchema.java
private void extractElements(Element root, final String type) { try {/* w w w. j ava 2 s .c o m*/ List<Element> nList = root.getChildren(type); System.out.println(type + " " + nList.size()); int index = 0; int ignore = 0; final Map<Integer, String> filteredConfs = readNodeFile(conf_filter_path); Set<String> filteredConfsSet = new HashSet<>(); filteredConfsSet.addAll(filteredConfs.values()); for (Element eElement : nList) { String booktitle = eElement.getChild("booktitle").getText(); if (!filteredConfsSet.contains(booktitle)) { ignore++; continue; } String year = eElement.getChild("year").getText(); int year_int = Integer.parseInt(year.trim()); if (year_int < year_threshold) { ignore++; continue; } index++; if (index % 100 == 0) { System.out.println("Node#: " + index + " Ignore#:" + ignore); } String title = eElement.getChild("title").getText(); int titleId = titlesCollection.get(title); int confId = confsCollection.get(booktitle); String yearConf = year + " " + booktitle; int yearConfId = yearConfsCollection.get(yearConf); conf_year_writer.write(confId + " " + yearConfId + "\n"); title_year_writer.write(titleId + " " + yearConfId + "\n"); final List<Element> authors = eElement.getChildren("author"); for (Element author : authors) { String author_str = author.getText(); int authorId = authorsCollection.get(author_str); title_author_writer.write(titleId + " " + authorId + "\n"); author_conf_writer.write(authorId + " " + confId + "\n"); } } title_author_writer.close(); author_conf_writer.close(); title_year_writer.close(); conf_year_writer.close(); } catch (IOException ex) { Logger.getLogger(DBLPParserSecondSchema.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:dblp.xml.DBLPParserThirdSchema.java
private void extractElements(Element root, final String type) { try {/* w w w . ja v a 2 s.c o m*/ // System.out.println("Root element :" + doc.getDocumentElement().getNodeName()); List<Element> nList = root.getChildren(type); // System.out.println(nList); System.out.println(type + " " + nList.size()); int index = 0; int ignore = 0; final Map<Integer, String> filteredConfs = readNodeFile(conf_filter_path); Set<String> filteredConfsSet = new HashSet<>(); filteredConfsSet.addAll(filteredConfs.values()); for (Element eElement : nList) { String booktitle = eElement.getChild("booktitle").getText(); if (!filteredConfsSet.contains(booktitle)) { ignore++; continue; } String year = eElement.getChild("year").getText(); int year_int = Integer.parseInt(year.trim()); if (year_int < year_threshold) { ignore++; continue; } index++; if (index % 100 == 0) { System.out.println("Node#: " + index + " Ignore#:" + ignore); } String title = eElement.getChild("title").getText(); int titleId = titlesCollection.get(title); int confId = confsCollection.get(booktitle); String yearConf = year + " " + booktitle; int yearConfId = yearConfsCollection.get(yearConf); title_conf_writer.write(titleId + " " + confId + "\n"); conf_year_writer.write(confId + " " + yearConfId + "\n"); final List<Element> authors = eElement.getChildren("author"); for (Element author : authors) { String author_str = author.getText(); int authorId = authorsCollection.get(author_str); title_author_writer.write(titleId + " " + authorId + "\n"); } } title_author_writer.close(); title_conf_writer.close(); conf_year_writer.close(); } catch (IOException ex) { Logger.getLogger(DBLPParserThirdSchema.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:de.andreasschoknecht.LS3.PNMLReader.java
License:Open Source License
/** * Creates the term lists for a process model (LS3Document) in a model collection. Adds the terms to the document itself as Bag-of-Words and adds the terms to * the HashSet of terms of the document collection. This method is used when parsing a document collection. * * @param labels The labels contained in the PNML file * @param ls3Document The LS3Document representation of the PNML file for updating the term list of the document * @param documentCollection The DocumentCollection for updating the term list of the whole collection * @throws IOException if stop word file could not be read *//*ww w . ja va 2 s .c o m*/ private void createTermLists(List<Object> labels, LS3Document ls3Document, DocumentCollection documentCollection) throws IOException { initializeWordList(); ArrayList<String> tokens = new ArrayList<String>(); String label = ""; for (Object temp : labels) { Element value = (Element) temp; label = label + value.getText() + " "; } PTBTokenizer<CoreLabel> ptbt = new PTBTokenizer<>(new StringReader(label), new CoreLabelTokenFactory(), "untokenizable=allKeep"); while (ptbt.hasNext()) { tokens.add(ptbt.next().value()); } for (int i = 0, j = tokens.size(); i < j; i++) { String bereinigt = tokens.get(i).toLowerCase(); // Clear tokens of empty tokens, stop words, and automatic tool labels if (!bereinigt.matches("(p|t)*([0-9]+)") && !stopwords.contains(bereinigt) && !bereinigt.equals("")) { String term = bereinigt.replaceAll("[0-9]+", ""); ls3Document.addTerm(stemString(term)); documentCollection.addTerm(stemString(term)); } } }
From source file:de.andreasschoknecht.LS3.PNMLReader.java
License:Open Source License
/** * Creates the term list for a process model (LS3Document). It only adds the terms to the document itself as Bag-of-Words. * This method is used when parsing a query model. * * @param labels The labels contained in the PNML file * @param ls3Document The LS3Document representation of the PNML file for updating the term list of the document * @throws IOException if stop word file could not be read *//* w w w . j a v a 2 s.c om*/ private void createTermLists(List<Object> labels, LS3Document ls3Document) throws IOException { initializeWordList(); ArrayList<String> tokens = new ArrayList<String>(); String label = ""; for (Object temp : labels) { Element value = (Element) temp; label = label + value.getText() + " "; } PTBTokenizer<CoreLabel> ptbt = new PTBTokenizer<>(new StringReader(label), new CoreLabelTokenFactory(), "untokenizable=allKeep"); while (ptbt.hasNext()) { tokens.add(ptbt.next().value()); } for (int i = 0, j = tokens.size(); i < j; i++) { String bereinigt = tokens.get(i).toLowerCase(); // Clear tokens of empty tokens, stop words, and automatic tool labels if (!bereinigt.matches("(p|t)*([0-9]+)") && !stopwords.contains(bereinigt) && !bereinigt.equals("")) { String term = bereinigt.replaceAll("[0-9]+", ""); ls3Document.addTerm(stemString(term)); } } }