Java tutorial
/******************************************************************************* * Copyright 2010 * Ubiquitous Knowledge Processing (UKP) Lab * Technische Universitt Darmstadt * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ package de.tudarmstadt.ukp.dkpro.core.io.mmax2; import java.io.BufferedInputStream; import java.io.BufferedOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.util.HashMap; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerConfigurationException; import javax.xml.transform.TransformerException; import javax.xml.transform.TransformerFactory; import javax.xml.transform.TransformerFactoryConfigurationError; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.eml.MMAX2.annotation.markables.Markable; import org.eml.MMAX2.annotation.markables.MarkableLevel; import org.eml.MMAX2.annotation.markables.MarkablePointer; import org.eml.MMAX2.discourse.MMAX2Discourse; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.xml.sax.SAXException; /** * Writes the CAS into a MMAX2 project. * The MMAX2 project can then be used to annotate the annotations further. * * All the settings (which annotations should be included, * how they should be treated in the MMAX2 project, etc.) * are to be made in the empty (but fully configured) MMAX2 project that is then * filled with the data from the CAS. * This requires some knowledge of MMAX2. * * @author zesch * @author ferschke * */ // TODO add parameter for path to MMAX project, as this needs to be adapted to the task anyway public class MMAXWriter { private final Log log = LogFactory.getLog(getClass()); public enum StartUpMode { visible, inactive } public static final String BASEDATA_PATH = "Basedata" + File.separator; public static final String CUSTOMIZATION_PATH = "Customizations" + File.separator; public static final String MARKABLE_PATH = "Markables" + File.separator; public static final String SCHEME_PATH = "Schemes" + File.separator; public static final String STYLE_PATH = "Styles" + File.separator; // The path to the source mmax project template private static final String SOURCE_PATH_STRING = "resource/mmax/"; private File basedataPath; private File customizationPath; private File markablePath; private File schemePath; private File stylePath; private Basedata basedata; // The path to the mmax project private File projectPath; // the common_paths.xml file private File commonPathsFile; private MMAX2Discourse discourse; public MMAXWriter(File outputPath) throws MMAXWriterException { this.projectPath = outputPath; this.basedata = new Basedata(); commonPathsFile = new File(this.projectPath, "common_paths.xml"); if (!commonPathsFile.exists()) { try { copy(new File(SOURCE_PATH_STRING), projectPath); } catch (IOException e) { throw new MMAXWriterException(e); } } basedataPath = new File(projectPath.getPath(), BASEDATA_PATH); customizationPath = new File(projectPath.getPath(), CUSTOMIZATION_PATH); schemePath = new File(projectPath.getPath(), SCHEME_PATH); stylePath = new File(projectPath.getPath(), STYLE_PATH); markablePath = new File(projectPath.getPath(), MARKABLE_PATH); } public void clearFiles() { for (File f : basedataPath.listFiles()) { if (!f.getName().endsWith(".dtd")) { log.info("Deleting: " + f.getPath().toString()); f.delete(); } } for (File f : projectPath.listFiles()) { if (f.getName().endsWith(".mmax")) { log.info("Deleting: " + f.getPath().toString()); f.delete(); } } for (File f : markablePath.listFiles()) { if (!f.getName().endsWith(".dtd")) { log.info("Deleting: " + f.getPath().toString()); f.delete(); } } for (File f : projectPath.listFiles()) { if (f.isDirectory() && !(f.getName().equals(basedataPath.getName()) || f.getName().equals(customizationPath.getName()) || f.getName().equals(schemePath.getName()) || f.getName().equals(stylePath.getName()) || f.getName().equals(markablePath.getName()))) { log.info("Deleting:" + f.getPath().toString()); deleteDir(f); } } } public void clearBasedata() { this.basedata = new Basedata(); } public void appendBasedata(String term, int startOffset) { basedata.append(term, startOffset); } public String getBasedataId(int offset) { return basedata.getId(offset); } public String createMMAXFile(String mmaxFilename) throws MMAXWriterException { log.info("Writing Basedata"); File basedataFile = new File(basedataPath, mmaxFilename + ".xml"); basedata.save(basedataFile); Document doc = createXML(); Element toplevel = doc.createElement("mmax_project"); Element elem = doc.createElement("words"); elem.appendChild(doc.createTextNode(mmaxFilename + ".xml")); toplevel.appendChild(elem); File mmaxFile = new File(projectPath, mmaxFilename + ".mmax"); doc.appendChild(toplevel); saveXML(doc, mmaxFile, null, null); // load the current discourse loadDiscourse(mmaxFile.getPath()); return basedataFile.getName(); } private void loadDiscourse(String infile) { log.info("Loading discourse from: " + infile); discourse = MMAX2Discourse.buildDiscourse(infile, commonPathsFile.getPath()); } public void registerMarkableLevel(String levelname, String schemeFilename, String customizationFilename) throws MMAXWriterException { Document compath = loadXML(commonPathsFile); Element annotations = (Element) compath.getElementsByTagName("annotations").item(0); NodeList annos = annotations.getElementsByTagName("level"); boolean found = false; for (int j = 0; j < annos.getLength(); j++) { if (annos.item(j).getAttributes().getNamedItem("name").getTextContent().equals(levelname)) { found = true; System.err.println("Level " + levelname + " already exists."); break; } } // if the level is not already present in the commons_path file, add it if (!found) { Element anno = compath.createElement("level"); anno.setAttribute("name", levelname); anno.setAttribute("schemefile", schemeFilename); anno.setAttribute("customization_file", customizationFilename); anno.appendChild(compath.createTextNode("$_" + levelname + ".xml")); annotations.appendChild(anno); } saveXML(compath, commonPathsFile, null, null); } private MarkableLevel getMarkableLevel(String levelName) { return discourse.getMarkableLevelByName(levelName, false); } public Markable addMarkable(Node node, String id, MarkableLevel level) { return new Markable(node, id, null, null, level); } public Markable addMarkable(String levelName, String[] ids, HashMap attributes) throws MMAXWriterException { MarkableLevel level = getMarkableLevel(levelName); if (level == null) { throw new MMAXWriterException("Could not get level: " + levelName); } Markable markable = level.addMarkable(ids, attributes); return markable; } public MarkablePointer addMarkablePointer(Markable m1, Markable m2) { return null; } public void saveMarkables(String levelName) { getMarkableLevel(levelName).saveMarkables(""); } private Document loadXML(File file) throws MMAXWriterException { DocumentBuilder parser; Document doc; try { parser = DocumentBuilderFactory.newInstance().newDocumentBuilder(); doc = parser.parse(file); } catch (ParserConfigurationException e) { throw new MMAXWriterException(e); } catch (SAXException e) { throw new MMAXWriterException(e); } catch (IOException e) { throw new MMAXWriterException(e); } return doc; } private void saveXML(Document doc, File file, String outputkey, String doctype) throws MMAXWriterException { try { DOMSource source = new DOMSource(doc); FileOutputStream stream = new FileOutputStream(file); StreamResult result = new StreamResult(stream); Transformer trans = TransformerFactory.newInstance().newTransformer(); if (doctype != null) { trans.setOutputProperty(outputkey, doctype); } trans.transform(source, result); stream.close(); } catch (FileNotFoundException e) { throw new MMAXWriterException(e); } catch (TransformerConfigurationException e) { throw new MMAXWriterException(e); } catch (TransformerFactoryConfigurationError e) { throw new MMAXWriterException(e); } catch (TransformerException e) { throw new MMAXWriterException(e); } catch (IOException e) { throw new MMAXWriterException(e); } } public Document createXML() throws MMAXWriterException { DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); DocumentBuilder builder; try { builder = factory.newDocumentBuilder(); } catch (ParserConfigurationException e) { throw new MMAXWriterException(e); } return builder.newDocument(); } /** * Copies a source folder/file to a target folder/file. Used to duplicate the template project and template files. * @param source * @param target * @throws IOException */ private void copy(File source, File target) throws IOException { if (source.isFile() && !target.exists()) { BufferedInputStream in = new BufferedInputStream(new FileInputStream(source)); BufferedOutputStream out = new BufferedOutputStream(new FileOutputStream(target)); int i; while ((i = in.read()) != -1) { out.write(i); } in.close(); out.close(); log.trace(target.getName() + " copied."); } if (source.isDirectory()) { target.mkdirs(); File[] files = source.listFiles(); for (File file : files) { if (!file.getName().endsWith(".svn")) { // do not copy svn files! copy(file, new File(target, file.getName())); } } } } private static boolean deleteDir(File dir) { if (dir.isDirectory()) { for (File element : dir.listFiles()) { boolean success = deleteDir(element); if (!success) { return false; } } } return dir.delete(); } }