List of usage examples for org.jdom2 Element getChildText
public String getChildText(final String cname)
From source file:es.ehu.si.ixa.pipe.convert.Convert.java
License:Apache License
public void absaSemEvalToNER(String fileName) { SAXBuilder sax = new SAXBuilder(); XPathFactory xFactory = XPathFactory.instance(); try {//from w ww. j av a 2 s .co m Document doc = sax.build(fileName); XPathExpression<Element> expr = xFactory.compile("//sentence", Filters.element()); List<Element> sentences = expr.evaluate(doc); for (Element sent : sentences) { StringBuilder sb = new StringBuilder(); String sentString = sent.getChildText("text"); sb = sb.append(sentString); Element aspectTerms = sent.getChild("aspectTerms"); if (aspectTerms != null) { List<List<Integer>> offsetList = new ArrayList<List<Integer>>(); List<Integer> offsets = new ArrayList<Integer>(); List<Element> aspectTermList = aspectTerms.getChildren(); if (!aspectTermList.isEmpty()) { for (Element aspectElem : aspectTermList) { Integer offsetFrom = Integer.parseInt(aspectElem.getAttributeValue("from")); Integer offsetTo = Integer.parseInt(aspectElem.getAttributeValue("to")); offsets.add(offsetFrom); offsets.add(offsetTo); } } Collections.sort(offsets); for (int i = 0; i < offsets.size(); i++) { List<Integer> offsetArray = new ArrayList<Integer>(); offsetArray.add(offsets.get(i++)); if (offsets.size() > i) { offsetArray.add(offsets.get(i)); } offsetList.add(offsetArray); } int counter = 0; for (List<Integer> offsetSent : offsetList) { Integer offsetFrom = offsetSent.get(0); Integer offsetTo = offsetSent.get(1); String aspectString = sentString.substring(offsetFrom, offsetTo); sb.replace(offsetFrom + counter, offsetTo + counter, "<START:term> " + aspectString + " <END>"); counter += 19; } } System.out.println(sb.toString()); } } catch (JDOMException | IOException e) { e.printStackTrace(); } }
From source file:es.ehu.si.ixa.pipe.convert.Convert.java
License:Apache License
public void absaSemEvalToNER2015(String fileName) { SAXBuilder sax = new SAXBuilder(); XPathFactory xFactory = XPathFactory.instance(); try {/*from w w w . jav a 2s. co m*/ Document doc = sax.build(fileName); XPathExpression<Element> expr = xFactory.compile("//sentence", Filters.element()); List<Element> sentences = expr.evaluate(doc); for (Element sent : sentences) { String sentString = sent.getChildText("text"); StringBuilder sb = new StringBuilder(); sb = sb.append(sentString); Element opinionsElement = sent.getChild("Opinions"); if (opinionsElement != null) { List<List<Integer>> offsetList = new ArrayList<List<Integer>>(); List<Integer> offsets = new ArrayList<Integer>(); List<Element> oteList = opinionsElement.getChildren(); for (Element aspectElem : oteList) { if (!aspectElem.getAttributeValue("target").equals("NULL")) { Integer offsetFrom = Integer.parseInt(aspectElem.getAttributeValue("from")); Integer offsetTo = Integer.parseInt(aspectElem.getAttributeValue("to")); offsets.add(offsetFrom); offsets.add(offsetTo); } } List<Integer> offsetsWithoutDuplicates = new ArrayList<Integer>(new HashSet<Integer>(offsets)); Collections.sort(offsetsWithoutDuplicates); for (int i = 0; i < offsetsWithoutDuplicates.size(); i++) { List<Integer> offsetArray = new ArrayList<Integer>(); offsetArray.add(offsetsWithoutDuplicates.get(i++)); if (offsetsWithoutDuplicates.size() > i) { offsetArray.add(offsetsWithoutDuplicates.get(i)); } offsetList.add(offsetArray); } int counter = 0; for (List<Integer> offsetSent : offsetList) { Integer offsetFrom = offsetSent.get(0); Integer offsetTo = offsetSent.get(1); String aspectString = sentString.substring(offsetFrom, offsetTo); sb.replace(offsetFrom + counter, offsetTo + counter, "<START:target> " + aspectString + " <END>"); counter += 21; } System.out.println(sb.toString()); } } } catch (JDOMException | IOException e) { e.printStackTrace(); } }
From source file:es.ehu.si.ixa.pipe.convert.Convert.java
License:Apache License
public void absaSemEvalToMultiClassNER2015(String fileName) { SAXBuilder sax = new SAXBuilder(); XPathFactory xFactory = XPathFactory.instance(); try {//from w ww. ja v a2s . c om Document doc = sax.build(fileName); XPathExpression<Element> expr = xFactory.compile("//sentence", Filters.element()); List<Element> sentences = expr.evaluate(doc); for (Element sent : sentences) { String sentString = sent.getChildText("text"); StringBuilder sb = new StringBuilder(); sb = sb.append(sentString); Element opinionsElement = sent.getChild("Opinions"); if (opinionsElement != null) { List<List<Integer>> offsetList = new ArrayList<List<Integer>>(); HashSet<String> targetClassSet = new LinkedHashSet<String>(); List<Integer> offsets = new ArrayList<Integer>(); List<Element> opinionList = opinionsElement.getChildren(); for (Element opinion : opinionList) { if (!opinion.getAttributeValue("target").equals("NULL")) { String className = opinion.getAttributeValue("category"); String targetString = opinion.getAttributeValue("target"); Integer offsetFrom = Integer.parseInt(opinion.getAttributeValue("from")); Integer offsetTo = Integer.parseInt(opinion.getAttributeValue("to")); offsets.add(offsetFrom); offsets.add(offsetTo); targetClassSet.add(targetString + "JAR!" + className + opinion.getAttributeValue("from") + opinion.getAttributeValue("to")); } } List<Integer> offsetsWithoutDuplicates = new ArrayList<Integer>(new HashSet<Integer>(offsets)); Collections.sort(offsetsWithoutDuplicates); List<String> targetClassList = new ArrayList<String>(targetClassSet); for (int i = 0; i < offsetsWithoutDuplicates.size(); i++) { List<Integer> offsetArray = new ArrayList<Integer>(); offsetArray.add(offsetsWithoutDuplicates.get(i++)); if (offsetsWithoutDuplicates.size() > i) { offsetArray.add(offsetsWithoutDuplicates.get(i)); } offsetList.add(offsetArray); } int counter = 0; for (int i = 0; i < offsetList.size(); i++) { Integer offsetFrom = offsetList.get(i).get(0); Integer offsetTo = offsetList.get(i).get(1); String className = targetClassList.get(i); String aspectString = sentString.substring(offsetFrom, offsetTo); sb.replace(offsetFrom + counter, offsetTo + counter, "<START:" + className.split("JAR!")[1].substring(0, 3) + "> " + aspectString + " <END>"); counter += 18; } System.out.println(sb.toString()); } } } catch (JDOMException | IOException e) { e.printStackTrace(); } }
From source file:es.ehu.si.ixa.pipe.convert.Convert.java
License:Apache License
public void absaSemEvalText(Reader reader) { SAXBuilder sax = new SAXBuilder(); XPathFactory xFactory = XPathFactory.instance(); try {/* w w w . j a v a 2s . c o m*/ Document doc = sax.build(reader); XPathExpression<Element> expr = xFactory.compile("//sentence", Filters.element()); List<Element> sentences = expr.evaluate(doc); for (Element sent : sentences) { String sentString = sent.getChildText("text"); System.out.println(sentString); } } catch (JDOMException | IOException e) { e.printStackTrace(); } }
From source file:es.ehu.si.ixa.pipe.convert.Convert.java
License:Apache License
public String absa15testToNAF(String fileName) { KAFDocument kaf = new KAFDocument("en", "v1.naf"); Segmenter segmenter = new Segmenter(); TokenFactory tokenFactory = new TokenFactory(); Properties properties = setAnnotateProperties(); SAXBuilder sax = new SAXBuilder(); XPathFactory xFactory = XPathFactory.instance(); try {//from w w w .j ava 2s . co m Document doc = sax.build(fileName); XPathExpression<Element> expr = xFactory.compile("//sentence", Filters.element()); List<Element> sentences = expr.evaluate(doc); int counter = 1; for (Element sent : sentences) { String sentId = sent.getAttributeValue("id"); String sentString = sent.getChildText("text"); StringReader stringReader = new StringReader(sentString); BufferedReader breader = new BufferedReader(stringReader); IxaPipeTokenizer<Token> tokenizer = new IxaPipeTokenizer<Token>(breader, tokenFactory, properties); List<Token> tokens = tokenizer.tokenize(); List<List<Token>> segmentedSentences = segmenter.segment(tokens); for (List<Token> sentence : segmentedSentences) { for (Token token : sentence) { WF wf = kaf.newWF(token.value(), token.startOffset(), counter); wf.setXpath(sentId); } } counter++; } } catch (JDOMException | IOException e) { e.printStackTrace(); } return kaf.toString(); }
From source file:eu.knux.passmanager.helper.FileHelper.java
License:Apache License
public static LinkedHashMap<String, Category> loadPassword(File f) { SAXBuilder builder = new SAXBuilder(); Element racine = null;//w w w. ja va 2 s. c o m LinkedHashMap<String, Category> categoriesReturned = new LinkedHashMap<>(); try { Document doc = builder.build(f); racine = doc.getRootElement(); } catch (JDOMException | IOException e) { e.printStackTrace(); } if (racine != null) { List<Element> categories = racine.getChildren("category"); categoriesReturned.put("root", new Category("root")); for (Element e : categories) { String name = e.getAttributeValue("name"); List<Element> passes = e.getChildren("password"); for (Element e2 : passes) { Category currCate = null; if (name != null && !categoriesReturned.containsKey(name)) { categoriesReturned.put(name, new Category(name)); } currCate = (name == null) ? categoriesReturned.get("root") : categoriesReturned.get(name); Password p = new Password(); p.setName(e2.getChildText("name")); p.setPass(e2.getChildText("pass")); p.setComment(e2.getChildText("comment")); p.setEncrypted(true); currCate.addPassword(p); } } } return categoriesReturned; }
From source file:eus.ixa.ixa.pipe.convert.AbsaSemEval.java
License:Apache License
private static void absa2015ToNAFNER(KAFDocument kaf, String fileName, String language) { // reading the ABSA xml file SAXBuilder sax = new SAXBuilder(); XPathFactory xFactory = XPathFactory.instance(); try {/*www . j a v a 2s . co m*/ Document doc = sax.build(fileName); XPathExpression<Element> expr = xFactory.compile("//sentence", Filters.element()); List<Element> sentences = expr.evaluate(doc); // naf sentence counter int counter = 1; for (Element sent : sentences) { List<Integer> wfFromOffsets = new ArrayList<>(); List<Integer> wfToOffsets = new ArrayList<>(); List<WF> sentWFs = new ArrayList<>(); List<Term> sentTerms = new ArrayList<>(); // sentence id and original text String sentId = sent.getAttributeValue("id"); String sentString = sent.getChildText("text"); // the list contains just one list of tokens List<List<Token>> segmentedSentence = StringUtils.tokenizeSentence(sentString, language); for (List<Token> sentence : segmentedSentence) { for (Token token : sentence) { WF wf = kaf.newWF(token.startOffset(), token.getTokenValue(), counter); wf.setXpath(sentId); final List<WF> wfTarget = new ArrayList<>(); wfTarget.add(wf); wfFromOffsets.add(wf.getOffset()); wfToOffsets.add(wf.getOffset() + wf.getLength()); sentWFs.add(wf); Term term = kaf.newTerm(KAFDocument.newWFSpan(wfTarget)); term.setPos("O"); term.setLemma(token.getTokenValue()); sentTerms.add(term); } } counter++; String[] tokenIds = new String[sentWFs.size()]; for (int i = 0; i < sentWFs.size(); i++) { tokenIds[i] = sentWFs.get(i).getId(); } // going through every opinion element for each sentence // each opinion element can contain one or more opinions Element opinionsElement = sent.getChild("Opinions"); if (opinionsElement != null) { // iterating over every opinion in the opinions element List<Element> opinionList = opinionsElement.getChildren(); for (Element opinion : opinionList) { String category = opinion.getAttributeValue("category"); String targetString = opinion.getAttributeValue("target"); System.err.println("-> " + category + ", " + targetString); // adding OTE if (!targetString.equalsIgnoreCase("NULL")) { int fromOffset = Integer.parseInt(opinion.getAttributeValue("from")); int toOffset = Integer.parseInt(opinion.getAttributeValue("to")); int startIndex = -1; int endIndex = -1; for (int i = 0; i < wfFromOffsets.size(); i++) { if (wfFromOffsets.get(i) == fromOffset) { startIndex = i; } } for (int i = 0; i < wfToOffsets.size(); i++) { if (wfToOffsets.get(i) == toOffset) { // span is +1 with respect to the last token of the span endIndex = i + 1; } } // TODO remove this condition to correct manually offsets if (startIndex != -1 && endIndex != -1) { List<String> wfIds = Arrays .asList(Arrays.copyOfRange(tokenIds, startIndex, endIndex)); List<String> wfTermIds = NAFUtils.getWFIdsFromTerms(sentTerms); if (NAFUtils.checkTermsRefsIntegrity(wfIds, wfTermIds)) { List<Term> nameTerms = kaf.getTermsFromWFs(wfIds); ixa.kaflib.Span<Term> neSpan = KAFDocument.newTermSpan(nameTerms); List<ixa.kaflib.Span<Term>> references = new ArrayList<ixa.kaflib.Span<Term>>(); references.add(neSpan); Entity neEntity = kaf.newEntity(references); neEntity.setType(category); } } } } } } // end of sentence } catch (JDOMException | IOException e) { e.printStackTrace(); } }
From source file:eus.ixa.ixa.pipe.convert.AbsaSemEval.java
License:Apache License
public static String absa2015ToWFs(String fileName, String language) { KAFDocument kaf = new KAFDocument("en", "v1.naf"); SAXBuilder sax = new SAXBuilder(); XPathFactory xFactory = XPathFactory.instance(); try {/* w ww . j a va 2s . co m*/ Document doc = sax.build(fileName); XPathExpression<Element> expr = xFactory.compile("//sentence", Filters.element()); List<Element> sentences = expr.evaluate(doc); int counter = 1; for (Element sent : sentences) { String sentId = sent.getAttributeValue("id"); String sentString = sent.getChildText("text"); List<List<Token>> segmentedSentences = StringUtils.tokenizeSentence(sentString, language); for (List<Token> sentence : segmentedSentences) { for (Token token : sentence) { WF wf = kaf.newWF(token.startOffset(), token.getTokenValue(), counter); wf.setXpath(sentId); } } counter++; } } catch (JDOMException | IOException e) { e.printStackTrace(); } return kaf.toString(); }
From source file:eus.ixa.ixa.pipe.convert.AbsaSemEval.java
License:Apache License
public static String absa2015ToDocCatFormatForPolarity(String fileName, String language, int windowMin, int windowMax) { SAXBuilder sax = new SAXBuilder(); XPathFactory xFactory = XPathFactory.instance(); Document doc = null;//w ww . j av a 2 s .c om String text = ""; try { doc = sax.build(fileName); XPathExpression<Element> expr = xFactory.compile("//sentence", Filters.element()); List<Element> sentences = expr.evaluate(doc); for (Element sent : sentences) { Element opinionsElement = sent.getChild("Opinions"); String sentStringTmp = sent.getChildText("text"); List<List<Token>> segmentedSentence = StringUtils.tokenizeSentence(sentStringTmp, language); List<Token> sentence = segmentedSentence.get(0); if (opinionsElement != null) { // iterating over every opinion in the opinions element List<Element> opinionList = opinionsElement.getChildren(); for (Element opinion : opinionList) { String sentString = ""; String targetString = opinion.getAttributeValue("target"); String polarityString = opinion.getAttributeValue("polarity"); if (targetString.equalsIgnoreCase("NULL") || opinionList.size() == 1) { for (Token token : sentence) { sentString += token.getTokenValue() + " "; } text += polarityString + "\t" + sentString + "\n"; } else { int posTargetMin = -1; int posTargetMax = -1; // List<String> itemsTarget = Arrays.asList(targetString.split(" // ")); List<List<Token>> segmentedtarget = StringUtils.tokenizeSentence(targetString, language); List<Token> target = segmentedtarget.get(0); String targetMin = target.get(0).getTokenValue(); String targetMax = target.get(target.size() - 1).getTokenValue(); int count = 0; for (Token token : sentence) { if (token.getTokenValue().equals(targetMin)) { posTargetMin = count; } if (token.getTokenValue().equals(targetMax) && posTargetMin > -1) { posTargetMax = count; break; } count++; } if (posTargetMin - windowMin >= 0) { posTargetMin = posTargetMin - windowMin; } else posTargetMin = 0; if (posTargetMax + windowMax < sentence.size()) { posTargetMax = posTargetMax + windowMax; } else posTargetMax = sentence.size() - 1; for (int x = posTargetMin; x <= posTargetMax; x++) { sentString += sentence.get(x).getTokenValue() + " "; } text += polarityString + "\t" + sentString + "\n"; } } } } // end of sentence } catch (JDOMException | IOException e) { e.printStackTrace(); } return text; }
From source file:eus.ixa.ixa.pipe.convert.AbsaSemEval.java
License:Apache License
private static void absa2014ToNAFNER(KAFDocument kaf, String fileName, String language) { // reading the ABSA xml file SAXBuilder sax = new SAXBuilder(); XPathFactory xFactory = XPathFactory.instance(); try {//from w w w. j a v a 2 s . c o m Document doc = sax.build(fileName); XPathExpression<Element> expr = xFactory.compile("//sentence", Filters.element()); List<Element> sentences = expr.evaluate(doc); // naf sentence counter int counter = 1; for (Element sent : sentences) { List<Integer> wfFromOffsets = new ArrayList<>(); List<Integer> wfToOffsets = new ArrayList<>(); List<WF> sentWFs = new ArrayList<>(); List<Term> sentTerms = new ArrayList<>(); // sentence id and original text String sentId = sent.getAttributeValue("id"); String sentString = sent.getChildText("text"); // the list contains just one list of tokens List<List<Token>> segmentedSentence = StringUtils.tokenizeSentence(sentString, language); for (List<Token> sentence : segmentedSentence) { for (Token token : sentence) { WF wf = kaf.newWF(token.startOffset(), token.getTokenValue(), counter); wf.setXpath(sentId); final List<WF> wfTarget = new ArrayList<WF>(); wfTarget.add(wf); wfFromOffsets.add(wf.getOffset()); wfToOffsets.add(wf.getOffset() + wf.getLength()); sentWFs.add(wf); Term term = kaf.newTerm(KAFDocument.newWFSpan(wfTarget)); term.setPos("O"); term.setLemma(token.getTokenValue()); sentTerms.add(term); } } counter++; String[] tokenIds = new String[sentWFs.size()]; for (int i = 0; i < sentWFs.size(); i++) { tokenIds[i] = sentWFs.get(i).getId(); } // going through every opinion element for each sentence // each opinion element can contain one or more opinions Element aspectTermsElem = sent.getChild("aspectTerms"); if (aspectTermsElem != null) { List<Element> aspectTermsList = aspectTermsElem.getChildren(); // iterating over every opinion in the opinions element if (!aspectTermsList.isEmpty()) { for (Element aspectTerm : aspectTermsList) { // String targetString = aspectTerm.getAttributeValue("term"); // System.err.println("-> " + targetString); // adding OTE int fromOffset = Integer.parseInt(aspectTerm.getAttributeValue("from")); int toOffset = Integer.parseInt(aspectTerm.getAttributeValue("to")); int startIndex = -1; int endIndex = -1; for (int i = 0; i < wfFromOffsets.size(); i++) { if (wfFromOffsets.get(i) == fromOffset) { startIndex = i; } } for (int i = 0; i < wfToOffsets.size(); i++) { if (wfToOffsets.get(i) == toOffset) { // span is +1 with respect to the last token of the span endIndex = i + 1; } } // TODO remove this condition to correct manually offsets if (startIndex != -1 && endIndex != -1) { List<String> wfIds = Arrays .asList(Arrays.copyOfRange(tokenIds, startIndex, endIndex)); List<String> wfTermIds = NAFUtils.getWFIdsFromTerms(sentTerms); if (NAFUtils.checkTermsRefsIntegrity(wfIds, wfTermIds)) { List<Term> nameTerms = kaf.getTermsFromWFs(wfIds); ixa.kaflib.Span<Term> neSpan = KAFDocument.newTermSpan(nameTerms); List<ixa.kaflib.Span<Term>> references = new ArrayList<ixa.kaflib.Span<Term>>(); references.add(neSpan); Entity neEntity = kaf.newEntity(references); neEntity.setType("term"); } } } } } } // end of sentence } catch (JDOMException | IOException e) { e.printStackTrace(); } }