Java tutorial
/** * Copyright 2007 The Apache Software Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package cc.pp.analyzer.paoding.knife; import java.io.FileNotFoundException; import java.io.IOException; import java.util.Arrays; import java.util.HashSet; import java.util.Iterator; import java.util.Map; import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import cc.pp.analyzer.paoding.dictionary.BinaryDictionary; import cc.pp.analyzer.paoding.dictionary.Dictionary; import cc.pp.analyzer.paoding.dictionary.HashBinaryDictionary; import cc.pp.analyzer.paoding.dictionary.Hit; import cc.pp.analyzer.paoding.dictionary.Word; import cc.pp.analyzer.paoding.dictionary.support.detection.Detector; import cc.pp.analyzer.paoding.dictionary.support.detection.DifferenceListener; import cc.pp.analyzer.paoding.dictionary.support.detection.ExtensionFileFilter; import cc.pp.analyzer.paoding.dictionary.support.filewords.FileWordsReader; import cc.pp.analyzer.paoding.exception.PaodingAnalysisException; /** * ?,{@link CJKKnife}<br> * ????????????? * <p> * * @author Zhiliang Wang [qieqie.wang@gmail.com] * * @see CJKKnife * * @since 1.0 */ public class FileDictionaries implements Dictionaries { // ------------------------------------------------- protected Log log = LogFactory.getLog(this.getClass()); // ------------------------------------------------- /** * ? */ protected Dictionary vocabularyDictionary; /** * lantin+cjk? */ protected Dictionary combinatoricsDictionary; /** * ? * */ protected Dictionary confucianFamilyNamesDictionary; /** * ? */ protected Dictionary noiseCharactorsDictionary; /** * ? * */ protected Dictionary noiseWordsDictionary; /** * ??? */ protected Dictionary unitsDictionary; // ------------------------------------------------- @SuppressWarnings("rawtypes") protected Map/* <String, Set<String>> */ allWords; protected String dicHome; protected String skipPrefix; protected String noiseCharactor; protected String noiseWord; protected String unit; protected String confucianFamilyName; protected String combinatorics; protected String charsetName; // ---------------------- public FileDictionaries() { } public FileDictionaries(String dicHome, String skipPrefix, String noiseCharactor, String noiseWord, String unit, String confucianFamilyName, String combinatorics, String charsetName) { this.dicHome = dicHome; this.skipPrefix = skipPrefix; this.noiseCharactor = noiseCharactor; this.noiseWord = noiseWord; this.unit = unit; this.confucianFamilyName = confucianFamilyName; this.combinatorics = combinatorics; this.charsetName = charsetName; } public String getDicHome() { return dicHome; } public void setDicHome(String dicHome) { this.dicHome = dicHome; } public String getSkipPrefix() { return skipPrefix; } public void setSkipPrefix(String skipPrefix) { this.skipPrefix = skipPrefix; } public String getNoiseCharactor() { return noiseCharactor; } public void setNoiseCharactor(String noiseCharactor) { this.noiseCharactor = noiseCharactor; } public String getNoiseWord() { return noiseWord; } public void setNoiseWord(String noiseWord) { this.noiseWord = noiseWord; } public String getUnit() { return unit; } public void setUnit(String unit) { this.unit = unit; } public String getConfucianFamilyName() { return confucianFamilyName; } public void setConfucianFamilyName(String confucianFamilyName) { this.confucianFamilyName = confucianFamilyName; } public String getCharsetName() { return charsetName; } public void setCharsetName(String charsetName) { this.charsetName = charsetName; } public void setLantinFllowedByCjk(String lantinFllowedByCjk) { this.combinatorics = lantinFllowedByCjk; } public String getLantinFllowedByCjk() { return combinatorics; } // ------------------------------------------------- /** * ? * * @return */ @Override public synchronized Dictionary getVocabularyDictionary() { if (vocabularyDictionary == null) { // 5639??0x2fff=x^13>8000>8000*0.75=6000>5639 vocabularyDictionary = new HashBinaryDictionary(getVocabularyWords(), 0x2fff, 0.75f); Dictionary noiseWordsDic = getNoiseWordsDictionary(); for (int i = 0; i < noiseWordsDic.size(); i++) { Hit hit = vocabularyDictionary.search(noiseWordsDic.get(i), 0, noiseWordsDic.get(i).length()); if (hit.isHit()) { hit.getWord().setNoiseWord(); } } Dictionary noiseCharactorsDic = getNoiseCharactorsDictionary(); for (int i = 0; i < noiseCharactorsDic.size(); i++) { Hit hit = vocabularyDictionary.search(noiseCharactorsDic.get(i), 0, noiseCharactorsDic.get(i).length()); if (hit.isHit()) { hit.getWord().setNoiseCharactor(); } } } return vocabularyDictionary; } /** * ? * * @return */ @Override public synchronized Dictionary getConfucianFamilyNamesDictionary() { if (confucianFamilyNamesDictionary == null) { confucianFamilyNamesDictionary = new BinaryDictionary(getConfucianFamilyNames()); } return confucianFamilyNamesDictionary; } /** * ? * * @return */ @Override public synchronized Dictionary getNoiseCharactorsDictionary() { if (noiseCharactorsDictionary == null) { noiseCharactorsDictionary = new HashBinaryDictionary(getNoiseCharactors(), 256, 0.75f); } return noiseCharactorsDictionary; } /** * ? * * @return */ @Override public synchronized Dictionary getNoiseWordsDictionary() { if (noiseWordsDictionary == null) { noiseWordsDictionary = new BinaryDictionary(getNoiseWords()); } return noiseWordsDictionary; } /** * ??? * * @return */ @Override public synchronized Dictionary getUnitsDictionary() { if (unitsDictionary == null) { unitsDictionary = new HashBinaryDictionary(getUnits(), 1024, 0.75f); } return unitsDictionary; } @Override public synchronized Dictionary getCombinatoricsDictionary() { if (combinatoricsDictionary == null) { combinatoricsDictionary = new BinaryDictionary(getCombinatoricsWords()); } return combinatoricsDictionary; } private Detector detector; @Override public synchronized void startDetecting(int interval, DifferenceListener l) { if (detector != null || interval < 0) { return; } Detector detector = new Detector(); detector.setHome(dicHome); detector.setFilter(new ExtensionFileFilter(".dic")); detector.setLastSnapshot(detector.flash()); detector.setListener(l); detector.setInterval(interval); detector.start(true); this.detector = detector; } @Override public synchronized void stopDetecting() { if (detector == null) { return; } detector.setStop(); detector = null; } /** * * @param dicName */ @SuppressWarnings({ "unchecked", "rawtypes" }) protected synchronized void refreshDicWords(String dicPath) { int index = dicPath.lastIndexOf(".dic"); String dicName = dicPath.substring(0, index); if (allWords != null) { try { Map/* <String, Set<String>> */ temp = FileWordsReader.readWords(dicHome + dicPath, charsetName); allWords.put(dicName, temp.values().iterator().next()); } catch (FileNotFoundException e) { // ???? allWords.remove(dicName); } catch (IOException e) { throw toRuntimeException(e); } if (!isSkipForVacabulary(dicName)) { this.vocabularyDictionary = null; } // ?noiseWord if (isNoiseWordDicFile(dicName)) { this.noiseWordsDictionary = null; // noiseWordvocabulary?vocabulary this.vocabularyDictionary = null; } // ?noiseCharactors else if (isNoiseCharactorDicFile(dicName)) { this.noiseCharactorsDictionary = null; // noiseCharactorsDictionaryvocabulary?vocabulary this.vocabularyDictionary = null; } // ?? else if (isUnitDicFile(dicName)) { this.unitsDictionary = null; } // ?? else if (isConfucianFamilyNameDicFile(dicName)) { this.confucianFamilyNamesDictionary = null; } // ??,?? else if (isLantinFollowedByCjkDicFile(dicName)) { this.combinatoricsDictionary = null; } } } // --------------------------------------------------------------- // ?-?package? @SuppressWarnings({ "unchecked", "rawtypes" }) protected Word[] getVocabularyWords() { Map/* <String, Set<Word>> */ dics = loadAllWordsIfNecessary(); Set/* <Word> */ set = null; Iterator/* <Word> */ iter = dics.keySet().iterator(); while (iter.hasNext()) { String name = (String) iter.next(); if (isSkipForVacabulary(name)) { continue; } Set/* <Word> */ dic = (Set/* <Word> */) dics.get(name); if (set == null) { set = new HashSet/* <Word> */(dic); } else { set.addAll(dic); } } Word[] words = (Word[]) set.toArray(new Word[set.size()]); Arrays.sort(words); return words; } protected Word[] getConfucianFamilyNames() { return getDictionaryWords(confucianFamilyName); } protected Word[] getNoiseWords() { return getDictionaryWords(noiseWord); } protected Word[] getNoiseCharactors() { return getDictionaryWords(noiseCharactor); } protected Word[] getUnits() { return getDictionaryWords(unit); } protected Word[] getCombinatoricsWords() { return getDictionaryWords(combinatorics); } @SuppressWarnings({ "unchecked", "rawtypes" }) protected Word[] getDictionaryWords(String dicNameRelativeDicHome) { Map dics; try { dics = FileWordsReader.readWords(dicHome + "/" + dicNameRelativeDicHome + ".dic", charsetName); } catch (IOException e) { throw toRuntimeException(e); } Set/* <Word> */<Word> set = (Set/* <Word> */<Word>) dics.get(dicNameRelativeDicHome); Word[] words = set.toArray(new Word[set.size()]); Arrays.sort(words); return words; } // ------------------------------------- /** * ???(?????)key * dicdivision/china.dickey"division/china" */ @SuppressWarnings("unchecked") protected synchronized Map/* <String, Set<String>> */<String, Set<String>> loadAllWordsIfNecessary() { if (allWords == null) { try { log.info("loading dictionaries from " + dicHome); allWords = FileWordsReader.readWords(dicHome, charsetName); if (allWords.size() == 0) { String message = "Not found any dictionary files, have you set the 'paoding.dic.home' right? (" + this.dicHome + ")"; log.error(message); throw new PaodingAnalysisException(message); } log.info("loaded success!"); } catch (IOException e) { throw toRuntimeException(e); } } return allWords; } // --------------------------------------- protected final boolean isSkipForVacabulary(String dicNameRelativeDicHome) { return dicNameRelativeDicHome.startsWith(skipPrefix) || dicNameRelativeDicHome.indexOf("/" + skipPrefix) != -1; } protected boolean isUnitDicFile(String dicName) { return dicName.equals(this.unit); } protected boolean isNoiseCharactorDicFile(String dicName) { return dicName.equals(this.noiseCharactor); } protected boolean isNoiseWordDicFile(String dicName) { return dicName.equals(this.noiseWord); } protected boolean isConfucianFamilyNameDicFile(String dicName) { return dicName.equals(this.confucianFamilyName); } protected boolean isLantinFollowedByCjkDicFile(String dicName) { return dicName.equals(this.combinatorics); } // -------------------------------------- protected RuntimeException toRuntimeException(IOException e) { return new PaodingAnalysisException(e); } }