Example usage for org.jdom2 Element getChildText

List of usage examples for org.jdom2 Element getChildText

Introduction

In this page you can find the example usage for org.jdom2 Element getChildText.

Prototype

public String getChildText(final String cname) 

Source Link

Document

Returns the textual content of the named child element, or null if there's no such child.

Usage

From source file:es.ehu.si.ixa.pipe.convert.Convert.java

License:Apache License

public void absaSemEvalToNER(String fileName) {
    SAXBuilder sax = new SAXBuilder();
    XPathFactory xFactory = XPathFactory.instance();
    try {//from  w  ww. j  av a 2  s  .co m
        Document doc = sax.build(fileName);
        XPathExpression<Element> expr = xFactory.compile("//sentence", Filters.element());
        List<Element> sentences = expr.evaluate(doc);
        for (Element sent : sentences) {

            StringBuilder sb = new StringBuilder();
            String sentString = sent.getChildText("text");
            sb = sb.append(sentString);
            Element aspectTerms = sent.getChild("aspectTerms");
            if (aspectTerms != null) {
                List<List<Integer>> offsetList = new ArrayList<List<Integer>>();
                List<Integer> offsets = new ArrayList<Integer>();
                List<Element> aspectTermList = aspectTerms.getChildren();
                if (!aspectTermList.isEmpty()) {
                    for (Element aspectElem : aspectTermList) {
                        Integer offsetFrom = Integer.parseInt(aspectElem.getAttributeValue("from"));
                        Integer offsetTo = Integer.parseInt(aspectElem.getAttributeValue("to"));
                        offsets.add(offsetFrom);
                        offsets.add(offsetTo);
                    }
                }
                Collections.sort(offsets);
                for (int i = 0; i < offsets.size(); i++) {
                    List<Integer> offsetArray = new ArrayList<Integer>();
                    offsetArray.add(offsets.get(i++));
                    if (offsets.size() > i) {
                        offsetArray.add(offsets.get(i));
                    }
                    offsetList.add(offsetArray);
                }
                int counter = 0;
                for (List<Integer> offsetSent : offsetList) {
                    Integer offsetFrom = offsetSent.get(0);
                    Integer offsetTo = offsetSent.get(1);
                    String aspectString = sentString.substring(offsetFrom, offsetTo);
                    sb.replace(offsetFrom + counter, offsetTo + counter,
                            "<START:term> " + aspectString + " <END>");
                    counter += 19;
                }
            }
            System.out.println(sb.toString());
        }
    } catch (JDOMException | IOException e) {
        e.printStackTrace();
    }
}

From source file:es.ehu.si.ixa.pipe.convert.Convert.java

License:Apache License

public void absaSemEvalToNER2015(String fileName) {
    SAXBuilder sax = new SAXBuilder();
    XPathFactory xFactory = XPathFactory.instance();
    try {/*from  w w  w .  jav a 2s. co  m*/
        Document doc = sax.build(fileName);
        XPathExpression<Element> expr = xFactory.compile("//sentence", Filters.element());
        List<Element> sentences = expr.evaluate(doc);
        for (Element sent : sentences) {

            String sentString = sent.getChildText("text");
            StringBuilder sb = new StringBuilder();
            sb = sb.append(sentString);
            Element opinionsElement = sent.getChild("Opinions");
            if (opinionsElement != null) {
                List<List<Integer>> offsetList = new ArrayList<List<Integer>>();
                List<Integer> offsets = new ArrayList<Integer>();
                List<Element> oteList = opinionsElement.getChildren();
                for (Element aspectElem : oteList) {
                    if (!aspectElem.getAttributeValue("target").equals("NULL")) {
                        Integer offsetFrom = Integer.parseInt(aspectElem.getAttributeValue("from"));
                        Integer offsetTo = Integer.parseInt(aspectElem.getAttributeValue("to"));
                        offsets.add(offsetFrom);
                        offsets.add(offsetTo);
                    }
                }
                List<Integer> offsetsWithoutDuplicates = new ArrayList<Integer>(new HashSet<Integer>(offsets));
                Collections.sort(offsetsWithoutDuplicates);

                for (int i = 0; i < offsetsWithoutDuplicates.size(); i++) {
                    List<Integer> offsetArray = new ArrayList<Integer>();
                    offsetArray.add(offsetsWithoutDuplicates.get(i++));
                    if (offsetsWithoutDuplicates.size() > i) {
                        offsetArray.add(offsetsWithoutDuplicates.get(i));
                    }
                    offsetList.add(offsetArray);
                }
                int counter = 0;
                for (List<Integer> offsetSent : offsetList) {
                    Integer offsetFrom = offsetSent.get(0);
                    Integer offsetTo = offsetSent.get(1);
                    String aspectString = sentString.substring(offsetFrom, offsetTo);
                    sb.replace(offsetFrom + counter, offsetTo + counter,
                            "<START:target> " + aspectString + " <END>");
                    counter += 21;
                }
                System.out.println(sb.toString());
            }
        }
    } catch (JDOMException | IOException e) {
        e.printStackTrace();
    }
}

From source file:es.ehu.si.ixa.pipe.convert.Convert.java

License:Apache License

public void absaSemEvalToMultiClassNER2015(String fileName) {
    SAXBuilder sax = new SAXBuilder();
    XPathFactory xFactory = XPathFactory.instance();
    try {//from  w ww. ja v  a2s .  c om
        Document doc = sax.build(fileName);
        XPathExpression<Element> expr = xFactory.compile("//sentence", Filters.element());
        List<Element> sentences = expr.evaluate(doc);
        for (Element sent : sentences) {

            String sentString = sent.getChildText("text");
            StringBuilder sb = new StringBuilder();
            sb = sb.append(sentString);
            Element opinionsElement = sent.getChild("Opinions");
            if (opinionsElement != null) {
                List<List<Integer>> offsetList = new ArrayList<List<Integer>>();
                HashSet<String> targetClassSet = new LinkedHashSet<String>();
                List<Integer> offsets = new ArrayList<Integer>();
                List<Element> opinionList = opinionsElement.getChildren();
                for (Element opinion : opinionList) {
                    if (!opinion.getAttributeValue("target").equals("NULL")) {
                        String className = opinion.getAttributeValue("category");
                        String targetString = opinion.getAttributeValue("target");
                        Integer offsetFrom = Integer.parseInt(opinion.getAttributeValue("from"));
                        Integer offsetTo = Integer.parseInt(opinion.getAttributeValue("to"));
                        offsets.add(offsetFrom);
                        offsets.add(offsetTo);
                        targetClassSet.add(targetString + "JAR!" + className + opinion.getAttributeValue("from")
                                + opinion.getAttributeValue("to"));
                    }
                }
                List<Integer> offsetsWithoutDuplicates = new ArrayList<Integer>(new HashSet<Integer>(offsets));
                Collections.sort(offsetsWithoutDuplicates);
                List<String> targetClassList = new ArrayList<String>(targetClassSet);

                for (int i = 0; i < offsetsWithoutDuplicates.size(); i++) {
                    List<Integer> offsetArray = new ArrayList<Integer>();
                    offsetArray.add(offsetsWithoutDuplicates.get(i++));
                    if (offsetsWithoutDuplicates.size() > i) {
                        offsetArray.add(offsetsWithoutDuplicates.get(i));
                    }
                    offsetList.add(offsetArray);
                }
                int counter = 0;
                for (int i = 0; i < offsetList.size(); i++) {
                    Integer offsetFrom = offsetList.get(i).get(0);
                    Integer offsetTo = offsetList.get(i).get(1);
                    String className = targetClassList.get(i);
                    String aspectString = sentString.substring(offsetFrom, offsetTo);
                    sb.replace(offsetFrom + counter, offsetTo + counter, "<START:"
                            + className.split("JAR!")[1].substring(0, 3) + "> " + aspectString + " <END>");
                    counter += 18;
                }
                System.out.println(sb.toString());
            }
        }
    } catch (JDOMException | IOException e) {
        e.printStackTrace();
    }
}

From source file:es.ehu.si.ixa.pipe.convert.Convert.java

License:Apache License

public void absaSemEvalText(Reader reader) {
    SAXBuilder sax = new SAXBuilder();
    XPathFactory xFactory = XPathFactory.instance();
    try {/* w w  w  . j  a  v  a 2s  .  c o m*/
        Document doc = sax.build(reader);
        XPathExpression<Element> expr = xFactory.compile("//sentence", Filters.element());
        List<Element> sentences = expr.evaluate(doc);
        for (Element sent : sentences) {
            String sentString = sent.getChildText("text");
            System.out.println(sentString);
        }
    } catch (JDOMException | IOException e) {
        e.printStackTrace();
    }
}

From source file:es.ehu.si.ixa.pipe.convert.Convert.java

License:Apache License

public String absa15testToNAF(String fileName) {
    KAFDocument kaf = new KAFDocument("en", "v1.naf");
    Segmenter segmenter = new Segmenter();
    TokenFactory tokenFactory = new TokenFactory();
    Properties properties = setAnnotateProperties();
    SAXBuilder sax = new SAXBuilder();
    XPathFactory xFactory = XPathFactory.instance();
    try {//from w w w  .j ava  2s . co m
        Document doc = sax.build(fileName);
        XPathExpression<Element> expr = xFactory.compile("//sentence", Filters.element());
        List<Element> sentences = expr.evaluate(doc);

        int counter = 1;
        for (Element sent : sentences) {
            String sentId = sent.getAttributeValue("id");
            String sentString = sent.getChildText("text");
            StringReader stringReader = new StringReader(sentString);
            BufferedReader breader = new BufferedReader(stringReader);
            IxaPipeTokenizer<Token> tokenizer = new IxaPipeTokenizer<Token>(breader, tokenFactory, properties);
            List<Token> tokens = tokenizer.tokenize();
            List<List<Token>> segmentedSentences = segmenter.segment(tokens);
            for (List<Token> sentence : segmentedSentences) {
                for (Token token : sentence) {
                    WF wf = kaf.newWF(token.value(), token.startOffset(), counter);
                    wf.setXpath(sentId);
                }
            }
            counter++;
        }
    } catch (JDOMException | IOException e) {
        e.printStackTrace();
    }
    return kaf.toString();
}

From source file:eu.knux.passmanager.helper.FileHelper.java

License:Apache License

public static LinkedHashMap<String, Category> loadPassword(File f) {
    SAXBuilder builder = new SAXBuilder();
    Element racine = null;//w w  w.  ja  va 2  s. c o m
    LinkedHashMap<String, Category> categoriesReturned = new LinkedHashMap<>();
    try {
        Document doc = builder.build(f);
        racine = doc.getRootElement();
    } catch (JDOMException | IOException e) {
        e.printStackTrace();
    }

    if (racine != null) {
        List<Element> categories = racine.getChildren("category");
        categoriesReturned.put("root", new Category("root"));
        for (Element e : categories) {
            String name = e.getAttributeValue("name");
            List<Element> passes = e.getChildren("password");
            for (Element e2 : passes) {
                Category currCate = null;
                if (name != null && !categoriesReturned.containsKey(name)) {
                    categoriesReturned.put(name, new Category(name));
                }
                currCate = (name == null) ? categoriesReturned.get("root") : categoriesReturned.get(name);

                Password p = new Password();
                p.setName(e2.getChildText("name"));
                p.setPass(e2.getChildText("pass"));
                p.setComment(e2.getChildText("comment"));
                p.setEncrypted(true);
                currCate.addPassword(p);
            }
        }
    }

    return categoriesReturned;
}

From source file:eus.ixa.ixa.pipe.convert.AbsaSemEval.java

License:Apache License

private static void absa2015ToNAFNER(KAFDocument kaf, String fileName, String language) {
    // reading the ABSA xml file
    SAXBuilder sax = new SAXBuilder();
    XPathFactory xFactory = XPathFactory.instance();
    try {/*www  .  j  a  v a  2s  . co  m*/
        Document doc = sax.build(fileName);
        XPathExpression<Element> expr = xFactory.compile("//sentence", Filters.element());
        List<Element> sentences = expr.evaluate(doc);

        // naf sentence counter
        int counter = 1;
        for (Element sent : sentences) {
            List<Integer> wfFromOffsets = new ArrayList<>();
            List<Integer> wfToOffsets = new ArrayList<>();
            List<WF> sentWFs = new ArrayList<>();
            List<Term> sentTerms = new ArrayList<>();
            // sentence id and original text
            String sentId = sent.getAttributeValue("id");
            String sentString = sent.getChildText("text");
            // the list contains just one list of tokens
            List<List<Token>> segmentedSentence = StringUtils.tokenizeSentence(sentString, language);
            for (List<Token> sentence : segmentedSentence) {
                for (Token token : sentence) {
                    WF wf = kaf.newWF(token.startOffset(), token.getTokenValue(), counter);
                    wf.setXpath(sentId);
                    final List<WF> wfTarget = new ArrayList<>();
                    wfTarget.add(wf);
                    wfFromOffsets.add(wf.getOffset());
                    wfToOffsets.add(wf.getOffset() + wf.getLength());
                    sentWFs.add(wf);
                    Term term = kaf.newTerm(KAFDocument.newWFSpan(wfTarget));
                    term.setPos("O");
                    term.setLemma(token.getTokenValue());
                    sentTerms.add(term);
                }
            }
            counter++;
            String[] tokenIds = new String[sentWFs.size()];
            for (int i = 0; i < sentWFs.size(); i++) {
                tokenIds[i] = sentWFs.get(i).getId();
            }
            // going through every opinion element for each sentence
            // each opinion element can contain one or more opinions
            Element opinionsElement = sent.getChild("Opinions");
            if (opinionsElement != null) {
                // iterating over every opinion in the opinions element
                List<Element> opinionList = opinionsElement.getChildren();
                for (Element opinion : opinionList) {
                    String category = opinion.getAttributeValue("category");
                    String targetString = opinion.getAttributeValue("target");
                    System.err.println("-> " + category + ", " + targetString);
                    // adding OTE
                    if (!targetString.equalsIgnoreCase("NULL")) {
                        int fromOffset = Integer.parseInt(opinion.getAttributeValue("from"));
                        int toOffset = Integer.parseInt(opinion.getAttributeValue("to"));
                        int startIndex = -1;
                        int endIndex = -1;
                        for (int i = 0; i < wfFromOffsets.size(); i++) {
                            if (wfFromOffsets.get(i) == fromOffset) {
                                startIndex = i;
                            }
                        }
                        for (int i = 0; i < wfToOffsets.size(); i++) {
                            if (wfToOffsets.get(i) == toOffset) {
                                // span is +1 with respect to the last token of the span
                                endIndex = i + 1;
                            }
                        }
                        // TODO remove this condition to correct manually offsets
                        if (startIndex != -1 && endIndex != -1) {
                            List<String> wfIds = Arrays
                                    .asList(Arrays.copyOfRange(tokenIds, startIndex, endIndex));
                            List<String> wfTermIds = NAFUtils.getWFIdsFromTerms(sentTerms);
                            if (NAFUtils.checkTermsRefsIntegrity(wfIds, wfTermIds)) {
                                List<Term> nameTerms = kaf.getTermsFromWFs(wfIds);
                                ixa.kaflib.Span<Term> neSpan = KAFDocument.newTermSpan(nameTerms);
                                List<ixa.kaflib.Span<Term>> references = new ArrayList<ixa.kaflib.Span<Term>>();
                                references.add(neSpan);
                                Entity neEntity = kaf.newEntity(references);
                                neEntity.setType(category);
                            }
                        }
                    }
                }
            }
        } // end of sentence
    } catch (JDOMException | IOException e) {
        e.printStackTrace();
    }
}

From source file:eus.ixa.ixa.pipe.convert.AbsaSemEval.java

License:Apache License

public static String absa2015ToWFs(String fileName, String language) {
    KAFDocument kaf = new KAFDocument("en", "v1.naf");
    SAXBuilder sax = new SAXBuilder();
    XPathFactory xFactory = XPathFactory.instance();
    try {/* w  ww  . j  a  va  2s . co m*/
        Document doc = sax.build(fileName);
        XPathExpression<Element> expr = xFactory.compile("//sentence", Filters.element());
        List<Element> sentences = expr.evaluate(doc);

        int counter = 1;
        for (Element sent : sentences) {
            String sentId = sent.getAttributeValue("id");
            String sentString = sent.getChildText("text");
            List<List<Token>> segmentedSentences = StringUtils.tokenizeSentence(sentString, language);
            for (List<Token> sentence : segmentedSentences) {
                for (Token token : sentence) {
                    WF wf = kaf.newWF(token.startOffset(), token.getTokenValue(), counter);
                    wf.setXpath(sentId);
                }
            }
            counter++;
        }
    } catch (JDOMException | IOException e) {
        e.printStackTrace();
    }
    return kaf.toString();
}

From source file:eus.ixa.ixa.pipe.convert.AbsaSemEval.java

License:Apache License

public static String absa2015ToDocCatFormatForPolarity(String fileName, String language, int windowMin,
        int windowMax) {
    SAXBuilder sax = new SAXBuilder();
    XPathFactory xFactory = XPathFactory.instance();
    Document doc = null;//w  ww . j  av a  2  s .c om
    String text = "";

    try {
        doc = sax.build(fileName);
        XPathExpression<Element> expr = xFactory.compile("//sentence", Filters.element());
        List<Element> sentences = expr.evaluate(doc);

        for (Element sent : sentences) {
            Element opinionsElement = sent.getChild("Opinions");
            String sentStringTmp = sent.getChildText("text");

            List<List<Token>> segmentedSentence = StringUtils.tokenizeSentence(sentStringTmp, language);
            List<Token> sentence = segmentedSentence.get(0);

            if (opinionsElement != null) {
                // iterating over every opinion in the opinions element
                List<Element> opinionList = opinionsElement.getChildren();

                for (Element opinion : opinionList) {

                    String sentString = "";

                    String targetString = opinion.getAttributeValue("target");
                    String polarityString = opinion.getAttributeValue("polarity");

                    if (targetString.equalsIgnoreCase("NULL") || opinionList.size() == 1) {
                        for (Token token : sentence) {
                            sentString += token.getTokenValue() + " ";
                        }
                        text += polarityString + "\t" + sentString + "\n";
                    } else {
                        int posTargetMin = -1;
                        int posTargetMax = -1;
                        // List<String> itemsTarget = Arrays.asList(targetString.split("
                        // "));
                        List<List<Token>> segmentedtarget = StringUtils.tokenizeSentence(targetString,
                                language);
                        List<Token> target = segmentedtarget.get(0);
                        String targetMin = target.get(0).getTokenValue();
                        String targetMax = target.get(target.size() - 1).getTokenValue();
                        int count = 0;
                        for (Token token : sentence) {
                            if (token.getTokenValue().equals(targetMin)) {
                                posTargetMin = count;
                            }
                            if (token.getTokenValue().equals(targetMax) && posTargetMin > -1) {
                                posTargetMax = count;
                                break;
                            }
                            count++;
                        }
                        if (posTargetMin - windowMin >= 0) {
                            posTargetMin = posTargetMin - windowMin;
                        } else
                            posTargetMin = 0;
                        if (posTargetMax + windowMax < sentence.size()) {
                            posTargetMax = posTargetMax + windowMax;
                        } else
                            posTargetMax = sentence.size() - 1;
                        for (int x = posTargetMin; x <= posTargetMax; x++) {
                            sentString += sentence.get(x).getTokenValue() + " ";
                        }
                        text += polarityString + "\t" + sentString + "\n";
                    }
                }

            }
        } // end of sentence
    } catch (JDOMException | IOException e) {
        e.printStackTrace();
    }

    return text;
}

From source file:eus.ixa.ixa.pipe.convert.AbsaSemEval.java

License:Apache License

private static void absa2014ToNAFNER(KAFDocument kaf, String fileName, String language) {
    // reading the ABSA xml file
    SAXBuilder sax = new SAXBuilder();
    XPathFactory xFactory = XPathFactory.instance();
    try {//from   w w w. j a  v  a  2  s . c  o m
        Document doc = sax.build(fileName);
        XPathExpression<Element> expr = xFactory.compile("//sentence", Filters.element());
        List<Element> sentences = expr.evaluate(doc);

        // naf sentence counter
        int counter = 1;
        for (Element sent : sentences) {
            List<Integer> wfFromOffsets = new ArrayList<>();
            List<Integer> wfToOffsets = new ArrayList<>();
            List<WF> sentWFs = new ArrayList<>();
            List<Term> sentTerms = new ArrayList<>();
            // sentence id and original text
            String sentId = sent.getAttributeValue("id");
            String sentString = sent.getChildText("text");
            // the list contains just one list of tokens
            List<List<Token>> segmentedSentence = StringUtils.tokenizeSentence(sentString, language);
            for (List<Token> sentence : segmentedSentence) {
                for (Token token : sentence) {
                    WF wf = kaf.newWF(token.startOffset(), token.getTokenValue(), counter);
                    wf.setXpath(sentId);
                    final List<WF> wfTarget = new ArrayList<WF>();
                    wfTarget.add(wf);
                    wfFromOffsets.add(wf.getOffset());
                    wfToOffsets.add(wf.getOffset() + wf.getLength());
                    sentWFs.add(wf);
                    Term term = kaf.newTerm(KAFDocument.newWFSpan(wfTarget));
                    term.setPos("O");
                    term.setLemma(token.getTokenValue());
                    sentTerms.add(term);
                }
            }
            counter++;
            String[] tokenIds = new String[sentWFs.size()];
            for (int i = 0; i < sentWFs.size(); i++) {
                tokenIds[i] = sentWFs.get(i).getId();
            }
            // going through every opinion element for each sentence
            // each opinion element can contain one or more opinions
            Element aspectTermsElem = sent.getChild("aspectTerms");

            if (aspectTermsElem != null) {

                List<Element> aspectTermsList = aspectTermsElem.getChildren();
                // iterating over every opinion in the opinions element
                if (!aspectTermsList.isEmpty()) {
                    for (Element aspectTerm : aspectTermsList) {
                        // String targetString = aspectTerm.getAttributeValue("term");
                        // System.err.println("-> " + targetString);
                        // adding OTE
                        int fromOffset = Integer.parseInt(aspectTerm.getAttributeValue("from"));
                        int toOffset = Integer.parseInt(aspectTerm.getAttributeValue("to"));
                        int startIndex = -1;
                        int endIndex = -1;
                        for (int i = 0; i < wfFromOffsets.size(); i++) {
                            if (wfFromOffsets.get(i) == fromOffset) {
                                startIndex = i;
                            }
                        }
                        for (int i = 0; i < wfToOffsets.size(); i++) {
                            if (wfToOffsets.get(i) == toOffset) {
                                // span is +1 with respect to the last token of the span
                                endIndex = i + 1;
                            }
                        }
                        // TODO remove this condition to correct manually offsets
                        if (startIndex != -1 && endIndex != -1) {
                            List<String> wfIds = Arrays
                                    .asList(Arrays.copyOfRange(tokenIds, startIndex, endIndex));
                            List<String> wfTermIds = NAFUtils.getWFIdsFromTerms(sentTerms);
                            if (NAFUtils.checkTermsRefsIntegrity(wfIds, wfTermIds)) {
                                List<Term> nameTerms = kaf.getTermsFromWFs(wfIds);
                                ixa.kaflib.Span<Term> neSpan = KAFDocument.newTermSpan(nameTerms);
                                List<ixa.kaflib.Span<Term>> references = new ArrayList<ixa.kaflib.Span<Term>>();
                                references.add(neSpan);
                                Entity neEntity = kaf.newEntity(references);
                                neEntity.setType("term");
                            }
                        }
                    }
                }
            }
        } // end of sentence
    } catch (JDOMException | IOException e) {
        e.printStackTrace();
    }
}