Java tutorial
/* * See the NOTICE file distributed with this work for additional * information regarding copyright ownership. * * This is free software; you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this software; if not, write to the Free * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA * 02110-1301 USA, or see the FSF site: http://www.fsf.org. */ package edu.toronto.cs.ontools.taxonomy; import java.io.BufferedInputStream; import java.io.BufferedReader; import java.io.File; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.FileReader; import java.io.IOException; import java.io.OutputStream; import java.io.PrintStream; import java.net.URL; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Set; import java.util.TreeSet; import org.apache.commons.io.IOUtils; import edu.toronto.cs.ontools.main.LocalFileUtils; import edu.toronto.cs.ontools.utils.graph.DAG; import edu.toronto.cs.ontools.utils.graph.DAGNode; import edu.toronto.cs.ontools.utils.graph.IDAGNode; import edu.toronto.cs.ontools.utils.maps.CounterMap; public abstract class AbstractTaxonomy extends DAG<TaxonomyTerm> implements Taxonomy { public final static String PARENT_ID_REGEX = "^([A-Z]{2}\\:[0-9]{7})\\s*!\\s*.*"; private final static String TERM_MARKER = "[Term]"; private final static String END_OF_TERM_LIST_MARKER = "[Typedef]"; private final static String FIELD_NAME_VALUE_SEPARATOR = "\\s*:\\s+"; private final Map<String, String> alternateIdMapping = new HashMap<String, String>(); private IDAGNode root; private final Map<String, Set<String>> ancestorCache = new HashMap<String, Set<String>>(); /* * (non-Javadoc) * * @see edu.toronto.cs.cidb.hpoa.taxonomy.Taxonomyy#load(java.io.File) */ public int load(File source) { // Make sure we can read the data if (source == null) { return -1; } // Load data clear(); TermData data = new TermData(); try { BufferedReader in = new BufferedReader(new FileReader(source)); String line; while ((line = in.readLine()) != null) { if (line.trim().equalsIgnoreCase(END_OF_TERM_LIST_MARKER)) { break; } if (line.trim().equalsIgnoreCase(TERM_MARKER)) { if (data.isValid()) { this.createTaxonomyTerm(data); } else if (data.getReplacement() != null) { this.alternateIdMapping.put(data.getId(), data.getReplacement()); } data.clear(); continue; } String pieces[] = line.split(FIELD_NAME_VALUE_SEPARATOR, 2); if (pieces.length != 2) { continue; } String name = pieces[0], value = pieces[1]; data.addTo(name, value); } if (data.isValid()) { this.createTaxonomyTerm(data); } else if (data.getReplacement() != null) { this.alternateIdMapping.put(data.getId(), data.getReplacement()); } in.close(); } catch (NullPointerException ex) { ex.printStackTrace(); System.err.println("File does not exist"); } catch (FileNotFoundException ex) { ex.printStackTrace(); System.err.println("Could not locate source file: " + source.getAbsolutePath()); } catch (IOException ex) { // TODO Auto-generated catch block ex.printStackTrace(); } cleanArcs(); // How much did we load: return size(); } private void cleanArcs() { Set<IDAGNode> roots = new HashSet<IDAGNode>(); // Redo all links for (DAGNode n : getNodes()) { if (n.getParents().size() == 0) { roots.add(n); continue; } for (String parentId : n.getParents()) { DAGNode p = getTerm(parentId); if (p != null) { p.addChild(n); } else { System.err.println("[WARNING] Node with id " + n.getId() + " has parent " + parentId + ", but no node " + parentId + " exists in the graph!\n"); } } } if (roots.size() == 0) { System.err.println("Something's wrong, this directed graph is DEFINITELY not acyclic!"); } else if (roots.size() == 1) { for (IDAGNode n : roots) { this.root = n; } } else { this.root = new TaxonomyTerm("", "FAKE ROOT"); this.addNode((TaxonomyTerm) this.root); this.alternateIdMapping.put(this.root.getId(), this.root.getId()); for (IDAGNode n : roots) { this.root.addChild(n); n.addParent(this.root); } } } protected void createTaxonomyTerm(TermData data) { TaxonomyTerm term = new TaxonomyTerm(data); this.addNode(term); this.alternateIdMapping.put(term.getId(), term.getId()); for (String altId : data.safeGet(TermData.ALT_ID_FIELD_NAME)) { this.alternateIdMapping.put(altId, term.getId()); } } /* * (non-Javadoc) * * @see * edu.toronto.cs.cidb.hpoa.taxonomy.Taxonomy#getRealId(java.lang.String) */ public String getRealId(String id) { return this.alternateIdMapping.get(id); } /* * (non-Javadoc) * * @see edu.toronto.cs.cidb.hpoa.taxonomy.Taxonomy#getTerm(java.lang.String) */ public TaxonomyTerm getTerm(String id) { String realId = this.getRealId(id); if (realId != null) { return (TaxonomyTerm) this.getNode(realId); } return null; } /* * (non-Javadoc) * * @see edu.toronto.cs.cidb.hpoa.taxonomy.Taxonomy#getName(java.lang.String) */ public String getName(String id) { DAGNode node = this.getTerm(id); if (node != null) { return node.getName(); } return id; } public void printAltMapping(PrintStream out) { printAltMapping(out, false); } public void printAltMapping(PrintStream out, boolean all) { for (String key : this.alternateIdMapping.keySet()) { if (all || !key.equals(this.alternateIdMapping.get(key))) { out.println(key + " -> " + this.alternateIdMapping.get(key)); } } } /* * (non-Javadoc) * * @see edu.toronto.cs.cidb.hpoa.taxonomy.Taxonomy#getRootId() */ public String getRootId() { return this.root.getId(); } /* * (non-Javadoc) * * @see edu.toronto.cs.cidb.hpoa.taxonomy.Taxonomy#getRoot() */ public TaxonomyTerm getRoot() { return (TaxonomyTerm) this.root; } @Override public boolean removeNode(String id) { return super.removeNode(id); } @Override public boolean removeNode(TaxonomyTerm node) { return super.removeNode(node); } protected Set<String> findAncestors(String id) { Set<String> result = new HashSet<String>(); if (this.getTerm(id) == null) { return result; } Set<String> front = new HashSet<String>(); Set<String> newFront = new HashSet<String>(); front.add(this.getRealId(id)); result.add(this.getRealId(id)); while (!front.isEmpty()) { for (String nextTermId : front) { for (String parentTermId : this.getTerm(nextTermId).getParents()) { if (!result.contains(parentTermId)) { newFront.add(parentTermId); result.add(parentTermId); } } } front.clear(); front.addAll(newFront); newFront.clear(); } return result; } /* * (non-Javadoc) * * @see * edu.toronto.cs.cidb.hpoa.taxonomy.Taxonomy#getAncestors(java.lang.String) */ public Set<String> getAncestors(String termId) { if (this.ancestorCache.get(termId) == null) { this.ancestorCache.put(termId, this.findAncestors(termId)); } return this.ancestorCache.get(termId); } public File getInputFileHandler(String inputLocation, boolean forceUpdate) { try { File result = new File(inputLocation); if (!result.exists()) { String name = inputLocation.substring(inputLocation.lastIndexOf('/') + 1); result = LocalFileUtils.getTemporaryFile(name); if (!result.exists()) { result.createNewFile(); BufferedInputStream in = new BufferedInputStream((new URL(inputLocation)).openStream()); OutputStream out = new FileOutputStream(result); IOUtils.copy(in, out); out.flush(); out.close(); } } return result; } catch (IOException ex) { ex.printStackTrace(); return null; } } public void display() { display(System.out); } public void display(File out) { try { PrintStream p = new PrintStream(out); display(p); p.close(); } catch (FileNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); display(System.out); } } protected void display(PrintStream out) { Map<String, Boolean> visited = new HashMap<String, Boolean>(); Set<String> crt = new TreeSet<String>(); Set<String> next = new TreeSet<String>(); crt.add(this.getRootId()); CounterMap<Integer> h = new CounterMap<Integer>(); int min = this.size(), max = 0; double avg = 0; while (!crt.isEmpty()) { for (String id : crt) { if (Boolean.TRUE.equals(visited.get(id))) { continue; } TaxonomyTerm term = this.getTerm(id); int p = term.getParents().size(); h.addTo(p); if (min > p) { min = p; } if (max < p) { max = p; } avg += p; out.println(term); visited.put(id, true); next.addAll(term.getChildren()); } crt.clear(); crt.addAll(next); next.clear(); } avg /= this.size(); out.println(h); out.println(); out.println("SIZE " + this.size()); out.println("MIN: " + min); out.println("MAX: " + max); out.println("AVG: " + avg); } }