Java tutorial
/******************************************************************************* * Copyright (c) 2012 Gyrgy Orosz, Attila Novk. * All rights reserved. This program and the accompanying materials * are made available under the terms of the GNU Lesser Public License v3 * which accompanies this distribution, and is available at * http://www.gnu.org/licenses/ * * This file is part of PurePos. * * PurePos is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * PurePos is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser Public License for more details. * * Contributors: * Gyrgy Orosz - initial API and implementation ******************************************************************************/ package hu.ppke.itk.nlpg.purepos.model.internal; import hu.ppke.itk.nlpg.purepos.model.ISuffixGuesser; import hu.ppke.itk.nlpg.purepos.model.SuffixTree; import java.util.HashMap; import org.apache.commons.lang3.tuple.MutablePair; /** * Suffix tree implementation, representing nodes in a hash table. Edges are not * stored, they can be easily constant time calculated from the strings. * * @author Gyrgy Orosz * * @param <T> * Stored tags type */ public class HashSuffixTree<T> extends SuffixTree<String, T> { /** * */ private static final long serialVersionUID = 3763884096384725634L; /** * A node is: (suffix, ((tag, tag count), suffix count)) */ protected HashMap<String, MutablePair<HashMap<T, Integer>, Integer>> representation = new HashMap<String, MutablePair<HashMap<T, Integer>, Integer>>(); protected Integer totalTagCount = 0; public HashSuffixTree(int maxSuffixLength) { super(maxSuffixLength); } @Override public void addWord(String word, T tag, int count) { int end = word.length(); int start = Math.max(0, end - maxSuffixLength); for (int pointer = start; pointer <= end; pointer++) { String suffix = word.substring(pointer); increment(suffix, tag, count); } totalTagCount += count; } @Override public void addWord(String word, T tag, int count, int minLen) { int end = word.length() - minLen; int start = Math.max(0, end - maxSuffixLength); for (int pointer = start; pointer <= end; pointer++) { String suffix = word.substring(pointer); increment(suffix, tag, count); } totalTagCount += count; } protected void increment(String suffix, T tag, int count) { if (representation.containsKey(suffix)) { MutablePair<HashMap<T, Integer>, Integer> value = representation.get(suffix); HashMap<T, Integer> tagCounts = value.getLeft(); if (tagCounts.containsKey(tag)) { tagCounts.put(tag, tagCounts.get(tag) + count); } else { tagCounts.put(tag, count); } value.setRight(value.getRight() + count); } else { HashMap<T, Integer> tagCounts = new HashMap<T, Integer>(); tagCounts.put(tag, count); MutablePair<HashMap<T, Integer>, Integer> value = new MutablePair<HashMap<T, Integer>, Integer>( tagCounts, count); representation.put(suffix, value); } } @Override public ISuffixGuesser<String, T> createGuesser(double theta // Map<T, Double> aprioriProbs ) { return new HashSuffixGuesser<T>(representation, // aprioriProbs, theta); } }