Here you can find the source of normaliseConll(String input)
Parameter | Description |
---|---|
input | a parameter |
public static String normaliseConll(String input)
//package com.java2s; /* //from w w w. ja v a 2 s .co m * Copyright (C) 2015 ikonstas * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ import java.util.ArrayList; import java.util.Arrays; import java.util.Iterator; import java.util.List; public class Main { /** * Remove punctuation, quotation marks, and brackets, from CoNLL input, as * they are discarded from the PLTAG parser * * @param input * @return */ public static String normaliseConll(String input) { List<String> tokens = unpackConllSentence(input); for (Iterator<String> iter = tokens.iterator(); iter.hasNext();) { String token = iter.next(); String word = token.split("\t")[1]; if (word.equals("``") || word.equals("`") || word.equals("''") || word.equals("{") || word.equals("}") || word.equals("(") || word.equals(")")) { iter.remove(); } } String finalToken = tokens.get(tokens.size() - 1); String finalWord = finalToken.split("\t")[1]; while (finalWord.matches("\\p{Punct}") && !finalWord.equals("%") && !finalWord.equals(":") && !finalWord.equals(",")) { tokens.remove(tokens.size() - 1); finalToken = tokens.get(tokens.size() - 1); finalWord = finalToken.split("\t")[1]; } return repackConllSentence(tokens); } public static List<String> unpackConllSentence(String input) { return unpack(input, "\n"); } public static String repackConllSentence(List<String> input) { return repack(input, "\n"); } public static List<String> unpack(String input, String delimiter) { List<String> list = new ArrayList<String>(); list.addAll(Arrays.asList(input.split(delimiter))); return list; } public static String repack(List<String> input, String delimiter) { StringBuilder str = new StringBuilder(input.get(0)); for (int i = 1; i < input.size(); i++) { str.append(delimiter).append(input.get(i)); } return str.toString(); } public static List<Integer> asList(int[] ar) { List<Integer> list = new ArrayList<Integer>(ar.length); for (int a : ar) { list.add(a); } return list; } }