Java tutorial
/** * IK ? 5.0 * IK Analyzer release 5.0 * * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * ??(linliangyi2005@gmail.com)?? * ? 2012 * provided by Linliangyi and copyright 2012 by Oolong studio * * */ package org.wltea.analyzer.dic; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.util.Collection; import java.util.HashSet; import java.util.List; import org.apache.log4j.Logger; import org.apache.zookeeper.CreateMode; import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.WatchedEvent; import org.apache.zookeeper.Watcher; import org.apache.zookeeper.ZooDefs.Ids; import org.apache.zookeeper.ZooKeeper; import org.wltea.analyzer.cfg.Configuration; /** * ??,??? */ /** * @author kevin * */ public class Dictionary implements Watcher { private static Logger logger = Logger.getLogger(Dictionary.class); private static final String ZK_DIC_ROOT = "/dictionary"; private static final String ZK_DIC_EXT = ZK_DIC_ROOT + "/ext"; private static final String ZK_DIC_STOP = ZK_DIC_ROOT + "/stop"; /* * ??? */ private static Dictionary singleton; /* * ? */ private DictSegment _MainDict; /* * ??? */ private DictSegment _StopWordDict; /* * ??? */ private DictSegment _QuantifierDict; /** * ? */ private Configuration cfg; /** * */ private ZooKeeper zk; private Dictionary(Configuration cfg) { this.cfg = cfg; this.initZk(); this.loadMainDict(); //this.loadStopWordDict(); this.loadQuantifierDict(); } /** * ?? * IK Analyzer?Dictionary???? * ?Dictionary?? * ?? * ??? * @return Dictionary */ public static Dictionary initial(Configuration cfg) { if (singleton == null) { synchronized (Dictionary.class) { if (singleton == null) { singleton = new Dictionary(cfg); return singleton; } } } return singleton; } /** * ???? * @return Dictionary ? */ public static Dictionary getSingleton() { if (singleton == null) { throw new IllegalStateException("??initial"); } return singleton; } private void initZk() { String zkhost = System.getProperty("zkHost"); try { this.zk = new ZooKeeper(zkhost, 5 * 1000, this); if (zk.exists(ZK_DIC_ROOT, false) == null) { zk.create(ZK_DIC_ROOT, null, Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); } if (zk.exists(ZK_DIC_EXT + ".add", true) == null) { zk.create(ZK_DIC_EXT + ".add", null, Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); } if (zk.exists(ZK_DIC_EXT + ".del", true) == null) { zk.create(ZK_DIC_EXT + ".del", null, Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); } if (zk.exists(ZK_DIC_STOP + ".add", true) == null) { zk.create(ZK_DIC_STOP + ".add", null, Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); } if (zk.exists(ZK_DIC_STOP + ".del", true) == null) { zk.create(ZK_DIC_STOP + ".del", null, Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); } } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (KeeperException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (InterruptedException e) { // TODO Auto-generated catch block e.printStackTrace(); } } /** * ??? * @param words Collection<String>?? * @throws InterruptedException * @throws KeeperException */ public void addWordsToMainDict(Collection<String> words) throws KeeperException, InterruptedException { if (words != null) { StringBuffer buf = new StringBuffer(); for (String word : words) { if (word != null) { //???? buf.append(word).append("\n"); } } this.zk.setData(ZK_DIC_EXT + ".add", buf.toString().getBytes(), -1); } } /** * ???? * @param words * @throws InterruptedException * @throws KeeperException */ public void disableWordsFromMainDict(Collection<String> words) throws KeeperException, InterruptedException { if (words != null) { StringBuffer buf = new StringBuffer(); for (String word : words) { if (word != null) { //???? this.zk.setData(ZK_DIC_EXT + ".del", buf.toString().getBytes(), -1); } } } } /** * ??? * @param words Collection<String>?? * @throws InterruptedException * @throws KeeperException */ public void addWordsToStopDict(Collection<String> words) throws KeeperException, InterruptedException { if (words != null) { StringBuffer buf = new StringBuffer(); for (String word : words) { if (word != null) { //???? buf.append(word).append("\n"); } } this.zk.setData(ZK_DIC_STOP + ".add", buf.toString().getBytes(), -1); } } /** * ???? * @param words * @throws InterruptedException * @throws KeeperException */ public void disableWordsFromStopDict(Collection<String> words) throws KeeperException, InterruptedException { if (words != null) { StringBuffer buf = new StringBuffer(); for (String word : words) { if (word != null) { //???? this.zk.setData(ZK_DIC_STOP + ".del", buf.toString().getBytes(), -1); } } } } /** * ?? * @param charArray * @return Hit ??? */ public Hit matchInMainDict(char[] charArray) { return singleton._MainDict.match(charArray); } /** * ?? * @param charArray * @param begin * @param length * @return Hit ??? */ public Hit matchInMainDict(char[] charArray, int begin, int length) { return singleton._MainDict.match(charArray, begin, length); } /** * ???? * @param charArray * @param begin * @param length * @return Hit ??? */ public Hit matchInQuantifierDict(char[] charArray, int begin, int length) { return singleton._QuantifierDict.match(charArray, begin, length); } /** * ?Hit?DictSegment?? * @param charArray * @param currentIndex * @param matchedHit * @return Hit */ public Hit matchWithHit(char[] charArray, int currentIndex, Hit matchedHit) { DictSegment ds = matchedHit.getMatchedDictSegment(); return ds.match(charArray, currentIndex, 1, matchedHit); } /** * ??? * @param charArray * @param begin * @param length * @return boolean */ public boolean isStopWord(char[] charArray, int begin, int length) { return singleton._StopWordDict.match(charArray, begin, length).isMatch(); } /** * ??? */ private void loadMainDict() { //? _MainDict = new DictSegment((char) 0); //?? InputStream is = this.getClass().getClassLoader().getResourceAsStream(cfg.getMainDictionary()); if (is == null) { throw new RuntimeException("Main Dictionary not found!!!"); } try { BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512); String theWord = null; do { theWord = br.readLine(); if (theWord != null && !"".equals(theWord.trim())) { _MainDict.fillSegment(theWord.trim().toLowerCase().toCharArray()); } } while (theWord != null); } catch (IOException ioe) { System.err.println("Main Dictionary loading exception."); ioe.printStackTrace(); } finally { try { if (is != null) { is.close(); is = null; } } catch (IOException e) { e.printStackTrace(); } } //? //this.loadExtDict(); } /** * ??? */ private void loadExtDict() { //?? List<String> extDictFiles = cfg.getExtDictionarys(); if (extDictFiles != null) { InputStream is = null; for (String extDictName : extDictFiles) { //?? System.out.println("?" + extDictName); is = this.getClass().getClassLoader().getResourceAsStream(extDictName); //? if (is == null) { continue; } try { BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512); String theWord = null; do { theWord = br.readLine(); if (theWord != null && !"".equals(theWord.trim())) { //??? //System.out.println(theWord); _MainDict.fillSegment(theWord.trim().toLowerCase().toCharArray()); } } while (theWord != null); } catch (IOException ioe) { System.err.println("Extension Dictionary loading exception."); ioe.printStackTrace(); } finally { try { if (is != null) { is.close(); is = null; } } catch (IOException e) { e.printStackTrace(); } } } } } /** * ??? */ private void loadStopWordDict() { //? _StopWordDict = new DictSegment((char) 0); //?? List<String> extStopWordDictFiles = cfg.getExtStopWordDictionarys(); if (extStopWordDictFiles != null) { InputStream is = null; for (String extStopWordDictName : extStopWordDictFiles) { System.out.println("??" + extStopWordDictName); //?? is = this.getClass().getClassLoader().getResourceAsStream(extStopWordDictName); //? if (is == null) { continue; } try { BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512); String theWord = null; do { theWord = br.readLine(); if (theWord != null && !"".equals(theWord.trim())) { //System.out.println(theWord); //??? _StopWordDict.fillSegment(theWord.trim().toLowerCase().toCharArray()); } } while (theWord != null); } catch (IOException ioe) { System.err.println("Extension Stop word Dictionary loading exception."); ioe.printStackTrace(); } finally { try { if (is != null) { is.close(); is = null; } } catch (IOException e) { e.printStackTrace(); } } } } } /** * ??? */ private void loadQuantifierDict() { //?? _QuantifierDict = new DictSegment((char) 0); //???? InputStream is = this.getClass().getClassLoader().getResourceAsStream(cfg.getQuantifierDicionary()); if (is == null) { throw new RuntimeException("Quantifier Dictionary not found!!!"); } try { BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512); String theWord = null; do { theWord = br.readLine(); if (theWord != null && !"".equals(theWord.trim())) { _QuantifierDict.fillSegment(theWord.trim().toLowerCase().toCharArray()); } } while (theWord != null); } catch (IOException ioe) { System.err.println("Quantifier Dictionary loading exception."); ioe.printStackTrace(); } finally { try { if (is != null) { is.close(); is = null; } } catch (IOException e) { e.printStackTrace(); } } } /** * ?? * @param words * @param update true:?false:? * @param dict */ private void updateWordsToDict(Collection<String> words, boolean update, DictSegment dict) { if (words != null) { logger.info("Words : " + words.toString()); for (String word : words) { if (word != null) { //???? if (update) { dict.fillSegment(word.trim().toLowerCase().toCharArray()); } else { dict.disableSegment(word.trim().toLowerCase().toCharArray()); } } } } } /** * zookeeper????? * @param path * @return */ private Collection<String> getDataFromZkFile(String path) { Collection<String> set = new HashSet<String>(); byte[] buf = null; try { buf = this.zk.getData(path, true, null); } catch (KeeperException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (InterruptedException e) { // TODO Auto-generated catch block e.printStackTrace(); } if (buf != null) { String[] words = new String(buf).split("\n"); for (int i = 0; i < words.length; i++) { set.add(words[i]); } } return set; } /* (non-Javadoc) * @see org.apache.zookeeper.Watcher#process(org.apache.zookeeper.WatchedEvent) */ public void process(WatchedEvent event) { // TODO Auto-generated method stub logger.debug("path: " + event.getPath() + " EventType: " + event.getType()); if (event.getType() == Watcher.Event.EventType.NodeDataChanged && event.getPath() != null) { String path = event.getPath(); logger.debug("path: " + path); if (path.equals(ZK_DIC_EXT + ".add")) { Collection<String> words = this.getDataFromZkFile(path); this.updateWordsToDict(words, true, singleton._MainDict); } else if (path.equals(ZK_DIC_EXT + ".del")) { Collection<String> words = this.getDataFromZkFile(path); this.updateWordsToDict(words, false, singleton._MainDict); } else if (path.equals(ZK_DIC_STOP + ".add")) { Collection<String> words = this.getDataFromZkFile(path); this.updateWordsToDict(words, true, singleton._StopWordDict); } else if (path.equals(ZK_DIC_STOP + ".del")) { Collection<String> words = this.getDataFromZkFile(path); this.updateWordsToDict(words, true, singleton._StopWordDict); } } } }