Java tutorial
/* * Project Name: DODDLE-OWL (a Domain Ontology rapiD DeveLopment Environment - OWL extension) * Project Website: http://doddle-owl.sourceforge.net/ * * Copyright (C) 2004-2015 Yamaguchi Laboratory, Keio University. All rights reserved. * * This file is part of DODDLE-OWL. * * DODDLE-OWL is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * DODDLE-OWL is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with DODDLE-OWL. If not, see <http://www.gnu.org/licenses/>. * */ package net.sourceforge.doddle_owl.data; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.io.RandomAccessFile; import java.net.URL; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import net.sourceforge.doddle_owl.utils.*; import org.apache.commons.io.FileUtils; /** * @author Takeshi Morita */ public class JpnWordNetDic { private static String TREE_DATA_FILE = "tree.data"; private static String WORD_DATA_FILE = "word.data"; private static String CONCEPT_DATA_FILE = "concept.data"; private static String RELATION_DATA_FILE = "relation.data"; private static String TREE_INDEX_FILE = "tree.index"; private static String WORD_INDEX_FILE = "word.index"; private static String CONCEPT_INDEX_FILE = "concept.index"; private static String RELATION_INDEX_FILE = "relation.index"; private static RandomAccessFile jpnwnTreeDataFile; private static RandomAccessFile jpnwnWordDataFile; private static RandomAccessFile jpnwnConceptDataFile; private static RandomAccessFile jpnwnRelationDataFile; private static RandomAccessFile jpnwnTreeIndexFile; private static RandomAccessFile jpnwnWordIndexFile; private static RandomAccessFile jpnwnConceptIndexFile; private static RandomAccessFile jpnwnRelationIndexFile; private static Map<String, Concept> jpnwnURIConceptMap; // private static Map<String, Set<String>> jpnwnWordIDSetMap; // public static boolean initJPNWNDic() { if (jpnwnURIConceptMap != null && 0 < jpnwnURIConceptMap.size()) { return true; } jpnwnURIConceptMap = new HashMap<String, Concept>(); jpnwnWordIDSetMap = new HashMap<String, Set<String>>(); // String baseDir = DODDLEConstants.JPWN_HOME + File.separator; try { // jpnwnTreeDataFile = new RandomAccessFile(baseDir + // TREE_DATA_FILE, "r"); jpnwnTreeDataFile = new RandomAccessFile(Utils.getJPWNFile(TREE_DATA_FILE), "r"); // jpnwnWordDataFile = new RandomAccessFile(baseDir + // WORD_DATA_FILE, "r"); jpnwnWordDataFile = new RandomAccessFile(Utils.getJPWNFile(WORD_DATA_FILE), "r"); // jpnwnConceptDataFile = new RandomAccessFile(baseDir + // CONCEPT_DATA_FILE, "r"); jpnwnConceptDataFile = new RandomAccessFile(Utils.getJPWNFile(CONCEPT_DATA_FILE), "r"); // jpnwnTreeIndexFile = new RandomAccessFile(baseDir + // TREE_INDEX_FILE, "r"); jpnwnTreeIndexFile = new RandomAccessFile(Utils.getJPWNFile(TREE_INDEX_FILE), "r"); // jpnwnWordIndexFile = new RandomAccessFile(baseDir + // WORD_INDEX_FILE, "r"); jpnwnWordIndexFile = new RandomAccessFile(Utils.getJPWNFile(WORD_INDEX_FILE), "r"); // jpnwnConceptIndexFile = new RandomAccessFile(baseDir + // CONCEPT_INDEX_FILE, "r"); jpnwnConceptIndexFile = new RandomAccessFile(Utils.getJPWNFile(CONCEPT_INDEX_FILE), "r"); } catch (IOException ioe) { ioe.printStackTrace(); return false; } return true; } private static long getIndexFpListSize() { RandomAccessFile indexFpListFile = null; indexFpListFile = jpnwnWordIndexFile; try { return indexFpListFile.length() / 10; } catch (IOException ioe) { ioe.printStackTrace(); } return -1; } private static long getIndexFp(long fp) { RandomAccessFile indexFpListFile = null; indexFpListFile = jpnwnWordIndexFile; try { indexFpListFile.seek(fp); String fpStr = indexFpListFile.readLine(); if (fpStr == null) { return -1; } return Long.valueOf(fpStr); } catch (IOException ioe) { ioe.printStackTrace(); } return -1; } private static long getIndexFileSize(RandomAccessFile indexFile) { try { return indexFile.length() / 10; } catch (IOException ioe) { ioe.printStackTrace(); } return -1; } private static long getConceptIndexFileSize() { RandomAccessFile indexFile = null; indexFile = jpnwnConceptIndexFile; return getIndexFileSize(indexFile); } private static long getTreeIndexFileSize() { RandomAccessFile indexFile = null; indexFile = jpnwnTreeIndexFile; return getIndexFileSize(indexFile); } private static long getRelationIndexFileSize() { return getIndexFileSize(jpnwnRelationIndexFile); } private static long getDataFp(long fp, RandomAccessFile indexFile) { try { indexFile.seek(fp); return Long.valueOf(indexFile.readLine()); } catch (IOException ioe) { ioe.printStackTrace(); } return -1; } private static long getConceptDataFp(long fp) { RandomAccessFile indexFile = null; indexFile = jpnwnConceptIndexFile; return getDataFp(fp, indexFile); } private static long getTreeDataFp(long fp) { RandomAccessFile indexFile = null; indexFile = jpnwnTreeIndexFile; return getDataFp(fp, indexFile); } private static long getRelationDataFp(long fp) { return getDataFp(fp, jpnwnRelationIndexFile); } private static String getTermAndIndexFpSet(long ifp) { RandomAccessFile indexFile = null; indexFile = jpnwnWordDataFile; try { // System.out.println("ifp: " + ifp); indexFile.seek(ifp); return new String(indexFile.readLine().getBytes("ISO8859_1"), "UTF-8"); } catch (IOException ioe) { ioe.printStackTrace(); } return null; } private static String getData(long dfp, RandomAccessFile dataFile, String encoding) { try { // System.out.println("dfp: " + dfp); dataFile.seek(dfp); return new String(dataFile.readLine().getBytes("ISO8859_1"), encoding); } catch (IOException ioe) { ioe.printStackTrace(); } return null; } private static String getConceptData(long dfp) { RandomAccessFile dataFile = null; dataFile = jpnwnConceptDataFile; return getData(dfp, dataFile, "UTF-8"); } private static String getTreeData(long dfp) { RandomAccessFile dataFile = null; dataFile = jpnwnTreeDataFile; return getData(dfp, dataFile, "ISO8859_1"); } private static String getRelationData(long dfp) { return getData(dfp, jpnwnRelationDataFile, "ISO8859_1"); } public static String getConceptData(String id) { long low = 0; long conceptIndexFileSize = getConceptIndexFileSize(); long high = conceptIndexFileSize; while (low <= high) { long mid = (low + high) / 2; if (conceptIndexFileSize - 1 < mid) { return null; } // <= ---> < long conceptDataFP = getConceptDataFp(mid * 10); if (conceptDataFP == -1) { return null; } // System.out.println("mid: " + mid); String conceptData = getConceptData(conceptDataFP); if (conceptData == null) { return null; } String[] lines = conceptData.split("\t"); String searchedID = lines[0]; // System.out.println(searchedID.compareTo(id)); if (searchedID.compareTo(id) == 0) { // System.out.println(conceptData); return conceptData; } else if (0 < searchedID.compareTo(id)) { high = mid - 1; } else { low = mid + 1; } } return null; } public static String getTreeData(String id) { long low = 0; long treeIndexFileSize = getTreeIndexFileSize(); long high = treeIndexFileSize; while (low <= high) { long mid = (low + high) / 2; // System.out.println("mid: " + mid); if (treeIndexFileSize - 1 <= mid) { return null; } long treeDataFP = getTreeDataFp(mid * 10); if (treeDataFP == -1) { return null; } String treeData = getTreeData(treeDataFP); if (treeData == null) { return null; } String[] lines = treeData.split("\t"); String searchedID = lines[0]; // System.out.println(searchedID.compareTo(id)); if (searchedID.compareTo(id) == 0) { // System.out.println(conceptData); return treeData; } else if (0 < searchedID.compareTo(id)) { high = mid - 1; } else { low = mid + 1; } } return null; } public static String getRelationData(String id) { long low = 0; long relationIndexFileSize = getRelationIndexFileSize(); long high = relationIndexFileSize; while (low <= high) { long mid = (low + high) / 2; if (relationIndexFileSize - 1 <= mid) { return null; } long relationDataFP = getRelationDataFp(mid * 10); if (relationDataFP == -1) { return null; } String relationData = getRelationData(relationDataFP); if (relationData == null) { return null; } String[] lines = relationData.split("\t"); String searchedID = lines[0]; // System.out.println(searchedID.compareTo(id)); if (searchedID.compareTo(id) == 0) { // System.out.println(conceptData); return relationData; } else if (0 < searchedID.compareTo(id)) { high = mid - 1; } else { low = mid + 1; } } return null; } private static Concept getConcept(long dfp) { RandomAccessFile dataFile = null; try { dataFile = jpnwnConceptDataFile; dataFile.seek(dfp); String data = new String(dataFile.readLine().getBytes("ISO8859_1"), "UTF-8"); // System.out.println(data); String[] dataArray = data.split("\\^"); String[] conceptData = new String[4]; String id = dataArray[0].replaceAll("\t", ""); System.arraycopy(dataArray, 1, conceptData, 0, conceptData.length); String uri = ""; Concept c = null; uri = DODDLEConstants.JPN_WN_URI + id; c = new Concept(uri, conceptData); jpnwnURIConceptMap.put(uri, c); return c; } catch (IOException ioe) { ioe.printStackTrace(); } return null; } private static Set<Long> getdataFpSet(long high, String term) { long low = 0; Set<Long> dataFpSet = new HashSet<Long>(); while (low <= high) { long mid = (low + high) / 2; // System.out.println("mid: " + mid); long indexFP = getIndexFp(mid * 10); if (indexFP == -1) { return dataFpSet; } String line = getTermAndIndexFpSet(indexFP); String[] lines = line.split("\t"); String searchedTerm = lines[0]; // System.out.println(searchedTerm.compareTo(term)); if (searchedTerm.compareTo(term) == 0) { for (int i = 1; i < lines.length; i++) { dataFpSet.add(Long.valueOf(lines[i])); } // System.out.println(searchedTerm); return dataFpSet; } else if (0 < searchedTerm.compareTo(term)) { high = mid - 1; } else { low = mid + 1; } } return dataFpSet; } public static Set<String> getSynsetSet(String word) { Map<String, Set<String>> wordIDSetMap = null; wordIDSetMap = jpnwnWordIDSetMap; if (wordIDSetMap.get(word) != null) { return wordIDSetMap.get(word); } Set<Long> dataFpSet = getdataFpSet(getIndexFpListSize(), word); // System.out.println(dataFpSet); Set<String> idSet = new HashSet<String>(); for (Long dfp : dataFpSet) { // System.out.println(dfp); Concept c = getConcept(dfp); // System.out.println(c.getLocalName()); idSet.add(c.getLocalName()); } wordIDSetMap.put(word, idSet); return idSet; } public static Set<String> getJPNWNSynsetSet(String word) { return getSynsetSet(word); } private static void addURISet(String data, String relation, Set<String> uriSet) { String[] idSet = data.split("\\|" + relation)[1].split("\t"); for (String id : idSet) { if (id.indexOf("|") != -1) { break; } if (!id.equals("")) { uriSet.add(DODDLEConstants.JPN_WN_URI + id); } } } /** * * ??????????? * */ public static Set<Concept> getVerbConceptSet(Set<Concept> inputConceptSet) { Set<Concept> verbConceptSet = new HashSet<Concept>(); for (Concept c : inputConceptSet) { String id = c.getLocalName(); String data = getRelationData(id); if (data != null && (data.indexOf("|agent") != -1 || data.indexOf("|object") != -1)) { // agent?object???? verbConceptSet.add(c); } } return verbConceptSet; } public static Set<String> getRelationValueSet(String relation, String vid, List<List<Concept>> trimmedConceptList) { Set<String> uriSet = new HashSet<String>(); String data = getRelationData(vid); if (data != null) { if (data.indexOf("|" + relation) == -1) { return uriSet; } addURISet(data, relation, uriSet); } for (List<Concept> conceptList : trimmedConceptList) { for (Concept c : conceptList) { String tid = c.getLocalName(); data = getRelationData(tid); if (data == null) { continue; } if (data.indexOf("|" + relation) == -1) { continue; } addURISet(data, relation, uriSet); } } return uriSet; } public static Concept getConcept(String id) { Map<String, Concept> uriConceptMap = null; String ns = DODDLEConstants.JPN_WN_URI; uriConceptMap = jpnwnURIConceptMap; String uri = ns + id; // System.out.println(uri); if (uriConceptMap.get(uri) != null) { return uriConceptMap.get(uri); } String data = getConceptData(id); // System.out.println(id+": "+data); if (data == null) { return null; } String[] dataArray = data.split("\\^"); String[] conceptData = new String[4]; System.arraycopy(dataArray, 1, conceptData, 0, conceptData.length); Concept c = new Concept(uri, conceptData); uriConceptMap.put(uri, c); return c; } public static Concept getJPNWNConcept(String synset) { return getConcept(synset); } public static void main(String[] args) throws Exception { JpnWordNetDic.initJPNWNDic(); String id1 = "08675967-n"; // String id1 = "JPNWN_ROOT"; Concept c = JpnWordNetDic.getConcept(id1); System.out.println(c); Set<String> idSet = new HashSet<String>(); BufferedReader reader = new BufferedReader( new InputStreamReader(new FileInputStream(DODDLEConstants.JPWN_HOME + "tree.data"), "UTF-8")); while (reader.ready()) { String line = reader.readLine(); if (line.indexOf(id1) != -1) { String id = line.split("\t\\|")[0]; idSet.add(id); } } System.out.println(idSet); for (String id : idSet) { c = JpnWordNetDic.getConcept(id); System.out.println(c); } } }