Java tutorial
/* * Copyright 2016 * Ubiquitous Knowledge Processing (UKP) Lab * Technische Universitt Darmstadt * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package de.tudarmstadt.ukp.experiments.argumentation.sequence.writers; import de.tudarmstadt.ukp.dkpro.argumentation.io.writer.ArgumentDumpWriter; import de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData; import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; import de.tudarmstadt.ukp.dkpro.core.io.xmi.XmiReader; import org.apache.commons.io.IOUtils; import org.apache.uima.UimaContext; import org.apache.uima.analysis_engine.AnalysisEngineProcessException; import org.apache.uima.fit.component.JCasAnnotator_ImplBase; import org.apache.uima.fit.descriptor.ConfigurationParameter; import org.apache.uima.fit.factory.AnalysisEngineFactory; import org.apache.uima.fit.factory.CollectionReaderFactory; import org.apache.uima.fit.pipeline.SimplePipeline; import org.apache.uima.fit.util.JCasUtil; import org.apache.uima.jcas.JCas; import org.apache.uima.resource.ResourceInitializationException; import java.io.*; /** * @author Ivan Habernal */ public class PlainTextWriter extends JCasAnnotator_ImplBase { /** * Output file. If multiple CASes as processed, their contents are concatenated into this file. * When this file is set to "-", the dump does to {@link System#out} (default). */ public static final String PARAM_OUTPUT_FOLDER = "outputFolder"; @ConfigurationParameter(name = PARAM_OUTPUT_FOLDER, mandatory = true, defaultValue = "-") private File outputFolder; @Override public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); if (!outputFolder.exists()) { outputFolder.mkdirs(); } } @Override public void process(JCas aJCas) throws AnalysisEngineProcessException { try { File outFile = new File(outputFolder, DocumentMetaData.get(aJCas).getDocumentId() + ".txt"); PrintWriter out = new PrintWriter(new OutputStreamWriter(new FileOutputStream(outFile), "UTF-8")); for (Sentence sentence : JCasUtil.select(aJCas, Sentence.class)) { // print sentence coordinates out.print(sentence.getBegin() + "," + sentence.getEnd() + "\t"); // print tokens delimited by tab space for (Token token : JCasUtil.selectCovered(Token.class, sentence)) { out.print(token.getCoveredText()); out.print("\t"); } out.println(); } out.flush(); IOUtils.closeQuietly(out); } catch (IOException e) { throw new AnalysisEngineProcessException(e); } } public static void main(String[] args) throws Exception { String goldDataDir = args[0]; SimplePipeline.runPipeline( CollectionReaderFactory.createReaderDescription(XmiReader.class, XmiReader.PARAM_SOURCE_LOCATION, goldDataDir, XmiReader.PARAM_PATTERNS, "[+]*.xmi", XmiReader.PARAM_LENIENT, false), // AnalysisEngineFactory.createEngineDescription( // PlainTextWriter.class, // PlainTextWriter.PARAM_OUTPUT_FOLDER, "/tmp/out" // ) // , AnalysisEngineFactory.createEngineDescription(ArgumentDumpWriter.class, ArgumentDumpWriter.PARAM_OUTPUT_FILE, "/tmp/out.txt", ArgumentDumpWriter.PARAM_INCLUDE_RELATIONS, false)); } }