Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package opennlp.tools.postag; import java.io.Serializable; import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.Objects; import opennlp.tools.tokenize.WhitespaceTokenizer; import opennlp.tools.util.InvalidFormatException; /** * Represents an pos-tagged sentence. */ public class POSSample implements Serializable { private List<String> sentence; private List<String> tags; private final String[][] additionalContext; public POSSample(String[] sentence, String[] tags) { this(sentence, tags, null); } public POSSample(List<String> sentence, List<String> tags) { this(sentence, tags, null); } public POSSample(List<String> sentence, List<String> tags, String[][] additionalContext) { this.sentence = Collections.unmodifiableList(sentence); this.tags = Collections.unmodifiableList(tags); checkArguments(); String[][] ac; if (additionalContext != null) { ac = new String[additionalContext.length][]; for (int i = 0; i < additionalContext.length; i++) { ac[i] = new String[additionalContext[i].length]; System.arraycopy(additionalContext[i], 0, ac[i], 0, additionalContext[i].length); } } else { ac = null; } this.additionalContext = ac; } public POSSample(String[] sentence, String[] tags, String[][] additionalContext) { this(Arrays.asList(sentence), Arrays.asList(tags), additionalContext); } private void checkArguments() { if (sentence.size() != tags.size()) { throw new IllegalArgumentException("There must be exactly one tag for each token. tokens: " + sentence.size() + ", tags: " + tags.size()); } if (sentence.contains(null)) { throw new IllegalArgumentException("null elements are not allowed in sentence tokens!"); } if (tags.contains(null)) { throw new IllegalArgumentException("null elements are not allowed in tags!"); } } public String[] getSentence() { return sentence.toArray(new String[sentence.size()]); } public String[] getTags() { return tags.toArray(new String[tags.size()]); } public String[][] getAddictionalContext() { return this.additionalContext; } @Override public String toString() { StringBuilder result = new StringBuilder(); for (int i = 0; i < getSentence().length; i++) { result.append(getSentence()[i]); result.append('_'); result.append(getTags()[i]); result.append(' '); } if (result.length() > 0) { // get rid of last space result.setLength(result.length() - 1); } return result.toString(); } public static POSSample parse(String sentenceString) throws InvalidFormatException { String[] tokenTags = WhitespaceTokenizer.INSTANCE.tokenize(sentenceString); String[] sentence = new String[tokenTags.length]; String[] tags = new String[tokenTags.length]; for (int i = 0; i < tokenTags.length; i++) { int split = tokenTags[i].lastIndexOf("_"); if (split == -1) { throw new InvalidFormatException("Cannot find \"_\" inside token '" + tokenTags[i] + "'!"); } sentence[i] = tokenTags[i].substring(0, split); tags[i] = tokenTags[i].substring(split + 1); } return new POSSample(sentence, tags); } @Override public int hashCode() { return Objects.hash(Arrays.hashCode(getSentence()), Arrays.hashCode(getTags())); } @Override public boolean equals(Object obj) { if (obj == this) { return true; } if (obj instanceof POSSample) { POSSample a = (POSSample) obj; return Arrays.equals(getSentence(), a.getSentence()) && Arrays.equals(getTags(), a.getTags()); } return this == obj; } }