Java tutorial
/******************************************************************************* * Copyright (c) 2012 Gyrgy Orosz, Attila Novk. * All rights reserved. This program and the accompanying materials * are made available under the terms of the GNU Lesser Public License v3 * which accompanies this distribution, and is available at * http://www.gnu.org/licenses/ * * This file is part of PurePos. * * PurePos is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * PurePos is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser Public License for more details. * * Contributors: * Gyrgy Orosz - initial API and implementation ******************************************************************************/ package hu.ppke.itk.nlpg.purepos.common.lemma; import org.apache.commons.lang3.tuple.Pair; public class SuffixLemmaTransformation extends AbstractLemmaTransformation<Pair<String, Integer>> { private static final long serialVersionUID = 1160747425706872720L; public SuffixLemmaTransformation(String word, String lemma, Integer tag) { super(word, lemma, tag); } private static final int SHIFT = 100; @Override protected Pair<String, Integer> decode(String word, String stem, Integer tag) { int i; for (i = 0; i < word.length() && i < stem.length(); ++i) { if (word.charAt(i) != stem.charAt(i)) { break; } } String wordSuff = word.substring(i); int cutSize = wordSuff.length(); String lemmaSuff = stem.substring(i); int code = SHIFT * tag + cutSize; return Pair.of(lemmaSuff, code); } @Override protected Pair<String, Integer> encode(String word, Pair<String, Integer> representation) { int tagCode = representation.getRight() / SHIFT; int cutSize = representation.getRight() % SHIFT; String add = representation.getLeft(); String lemma = word.substring(0, word.length() - cutSize) + add; return Pair.of(lemma, tagCode); } @Override public int minimalCutLength() { return representation.getRight() % SHIFT; } }