Java tutorial
/******************************************************************************* * Copyright 2015 * Ubiquitous Knowledge Processing (UKP) Lab * Technische Universitt Darmstadt * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ package de.tudarmstadt.ukp.dkpro.tc.core.task; import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription; import static org.apache.uima.fit.factory.CollectionReaderFactory.createReaderDescription; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import org.apache.commons.io.FileUtils; import org.apache.uima.analysis_engine.AnalysisEngineDescription; import org.apache.uima.cas.CAS; import org.apache.uima.collection.CollectionReaderDescription; import org.apache.uima.fit.factory.AggregateBuilder; import org.apache.uima.resource.ResourceInitializationException; import de.tudarmstadt.ukp.dkpro.core.io.bincas.BinaryCasReader; import de.tudarmstadt.ukp.dkpro.lab.engine.TaskContext; import de.tudarmstadt.ukp.dkpro.lab.storage.StorageService.AccessMode; import de.tudarmstadt.ukp.dkpro.lab.task.Discriminator; import de.tudarmstadt.ukp.dkpro.lab.uima.task.impl.UimaTaskBase; import de.tudarmstadt.ukp.dkpro.tc.api.exception.TextClassificationException; import de.tudarmstadt.ukp.dkpro.tc.api.features.meta.MetaCollector; import de.tudarmstadt.ukp.dkpro.tc.core.Constants; import de.tudarmstadt.ukp.dkpro.tc.core.feature.SequenceContextMetaCollector; import de.tudarmstadt.ukp.dkpro.tc.core.feature.UnitContextMetaCollector; import de.tudarmstadt.ukp.dkpro.tc.core.util.TaskUtils; /** * Iterates over all documents and stores required collection-level meta data, e.g. which n-grams * appear in the documents. * */ public class MetaInfoTask extends UimaTaskBase { /** * Public name of the task key */ public static final String META_KEY = "meta"; /** * Public name of the folder where meta information will be stored within the task */ public static final String INPUT_KEY = "input"; private List<String> operativeViews; @Discriminator protected List<String> featureSet; @Discriminator private String featureMode; @Discriminator protected List<Object> pipelineParameters; private Set<Class<? extends MetaCollector>> metaCollectorClasses; @Discriminator private File filesRoot; @Discriminator private Collection<String> files_training; @Override public CollectionReaderDescription getCollectionReaderDescription(TaskContext aContext) throws ResourceInitializationException, IOException { // TrainTest setup: input files are set as imports if (filesRoot == null || files_training == null) { File root = aContext.getStorageLocation(INPUT_KEY, AccessMode.READONLY); Collection<File> files = FileUtils.listFiles(root, new String[] { "bin" }, true); return createReaderDescription(BinaryCasReader.class, BinaryCasReader.PARAM_PATTERNS, files); } // CV setup: filesRoot and files_atrining have to be set as dimension else { return createReaderDescription(BinaryCasReader.class, BinaryCasReader.PARAM_PATTERNS, files_training); } } @Override public AnalysisEngineDescription getAnalysisEngineDescription(TaskContext aContext) throws ResourceInitializationException, IOException { // check for error conditions if (featureSet == null) { throw new ResourceInitializationException( new TextClassificationException("No feature extractors have been added to the experiment.")); } // automatically determine the required metaCollector classes from the provided feature // extractors try { metaCollectorClasses = TaskUtils.getMetaCollectorsFromFeatureExtractors(featureSet); } catch (ClassNotFoundException e) { throw new ResourceInitializationException(e); } catch (InstantiationException e) { throw new ResourceInitializationException(e); } catch (IllegalAccessException e) { throw new ResourceInitializationException(e); } if (featureMode.equals(Constants.FM_UNIT)) { // add additional unit context meta collector that extracts the context around text classification units // mainly used for error analysis purposes metaCollectorClasses.add(UnitContextMetaCollector.class); } if (featureMode.equals(Constants.FM_SEQUENCE)) { metaCollectorClasses.add(SequenceContextMetaCollector.class); } // collect parameter/key pairs that need to be set Map<String, String> parameterKeyPairs = new HashMap<String, String>(); for (Class<? extends MetaCollector> metaCollectorClass : metaCollectorClasses) { try { parameterKeyPairs.putAll(metaCollectorClass.newInstance().getParameterKeyPairs()); } catch (InstantiationException e) { throw new ResourceInitializationException(e); } catch (IllegalAccessException e) { throw new ResourceInitializationException(e); } } List<Object> parameters = new ArrayList<Object>(); if (pipelineParameters != null) { parameters.addAll(pipelineParameters); } // make sure that the meta key import can be resolved (even when no meta features have been // extracted, as in the regression demo) // TODO better way to do this? if (parameterKeyPairs.size() == 0) { File file = new File(aContext.getStorageLocation(META_KEY, AccessMode.READONLY).getPath()); file.mkdir(); } for (Entry<String, String> entry : parameterKeyPairs.entrySet()) { File file = new File(aContext.getStorageLocation(META_KEY, AccessMode.READONLY), entry.getValue()); parameters.addAll(Arrays.asList(entry.getKey(), file.getAbsolutePath())); } AggregateBuilder builder = new AggregateBuilder(); for (Class<? extends MetaCollector> metaCollectorClass : metaCollectorClasses) { if (operativeViews != null) { for (String viewName : operativeViews) { builder.add(createEngineDescription(metaCollectorClass, parameters.toArray()), CAS.NAME_DEFAULT_SOFA, viewName); } } else { builder.add(createEngineDescription(metaCollectorClass, parameters.toArray())); } } return builder.createAggregateDescription(); } /** * @param operativeViews */ public void setOperativeViews(List<String> operativeViews) { this.operativeViews = operativeViews; } }