eu.project.ttc.tools.TermSuitePipeline.java Source code

Introduction

Here is the source code for eu.project.ttc.tools.TermSuitePipeline.java
Source

/*******************************************************************************
 * Copyright 2015-2016 - CNRS (Centre National de Recherche Scientifique)
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 *
 *******************************************************************************/
package eu.project.ttc.tools;

import java.io.File;
import java.io.Serializable;
import java.math.BigInteger;
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.security.SecureRandom;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import java.util.concurrent.BlockingQueue;

import org.apache.commons.lang.mutable.MutableInt;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.collection.CollectionReader;
import org.apache.uima.collection.CollectionReaderDescription;
import org.apache.uima.fit.factory.AggregateBuilder;
import org.apache.uima.fit.factory.AnalysisEngineFactory;
import org.apache.uima.fit.factory.CollectionReaderFactory;
import org.apache.uima.fit.factory.ExternalResourceFactory;
import org.apache.uima.fit.pipeline.SimplePipeline;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ExternalResourceDescription;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.base.Joiner;
import com.google.common.base.Optional;
import com.google.common.base.Preconditions;
import com.google.common.collect.Maps;

import eu.project.ttc.engines.CasStatCounter;
import eu.project.ttc.engines.Contextualizer;
import eu.project.ttc.engines.DocumentLogger;
import eu.project.ttc.engines.EvalEngine;
import eu.project.ttc.engines.ExtensionDetecter;
import eu.project.ttc.engines.FixedExpressionSpotter;
import eu.project.ttc.engines.FixedExpressionTermMarker;
import eu.project.ttc.engines.GraphicalVariantGatherer;
import eu.project.ttc.engines.MateLemmaFixer;
import eu.project.ttc.engines.MateLemmatizerTagger;
import eu.project.ttc.engines.Merger;
import eu.project.ttc.engines.PipelineObserver;
import eu.project.ttc.engines.PrimaryOccurrenceDetector;
import eu.project.ttc.engines.Ranker;
import eu.project.ttc.engines.RegexSpotter;
import eu.project.ttc.engines.ScorerAE;
import eu.project.ttc.engines.StringRegexFilter;
import eu.project.ttc.engines.SyntacticTermGatherer;
import eu.project.ttc.engines.TermClassifier;
import eu.project.ttc.engines.TermIndexBlacklistWordFilterAE;
import eu.project.ttc.engines.TermOccAnnotationImporter;
import eu.project.ttc.engines.TermSpecificityComputer;
import eu.project.ttc.engines.TreeTaggerLemmaFixer;
import eu.project.ttc.engines.cleaner.AbstractTermIndexCleaner;
import eu.project.ttc.engines.cleaner.MaxSizeThresholdCleaner;
import eu.project.ttc.engines.cleaner.TermIndexThresholdCleaner;
import eu.project.ttc.engines.cleaner.TermIndexTopNCleaner;
import eu.project.ttc.engines.cleaner.TermProperty;
import eu.project.ttc.engines.desc.Lang;
import eu.project.ttc.engines.desc.TermSuiteCollection;
import eu.project.ttc.engines.desc.TermSuitePipelineException;
import eu.project.ttc.engines.exporter.CompoundExporterAE;
import eu.project.ttc.engines.exporter.EvalExporterAE;
import eu.project.ttc.engines.exporter.ExportVariationRuleExamplesAE;
import eu.project.ttc.engines.exporter.JsonCasExporter;
import eu.project.ttc.engines.exporter.JsonExporterAE;
import eu.project.ttc.engines.exporter.SpotterTSVWriter;
import eu.project.ttc.engines.exporter.TSVExporterAE;
import eu.project.ttc.engines.exporter.TbxExporterAE;
import eu.project.ttc.engines.exporter.TermsuiteJsonCasExporter;
import eu.project.ttc.engines.exporter.VariantEvalExporterAE;
import eu.project.ttc.engines.exporter.VariationExporterAE;
import eu.project.ttc.engines.exporter.XmiCasExporter;
import eu.project.ttc.engines.morpho.CompostAE;
import eu.project.ttc.engines.morpho.ManualCompositionSetter;
import eu.project.ttc.engines.morpho.ManualPrefixSetter;
import eu.project.ttc.engines.morpho.PrefixSplitter;
import eu.project.ttc.engines.morpho.SuffixDerivationDetecter;
import eu.project.ttc.engines.morpho.SuffixDerivationExceptionSetter;
import eu.project.ttc.history.TermHistory;
import eu.project.ttc.history.TermHistoryResource;
import eu.project.ttc.metrics.LogLikelihood;
import eu.project.ttc.models.OccurrenceStore;
import eu.project.ttc.models.OccurrenceType;
import eu.project.ttc.models.Term;
import eu.project.ttc.models.TermIndex;
import eu.project.ttc.models.VariationType;
import eu.project.ttc.models.index.MemoryTermIndex;
import eu.project.ttc.models.occstore.MemoryOccurrenceStore;
import eu.project.ttc.models.occstore.MongoDBOccurrenceStore;
import eu.project.ttc.readers.AbstractToTxtSaxHandler;
import eu.project.ttc.readers.CollectionDocument;
import eu.project.ttc.readers.EmptyCollectionReader;
import eu.project.ttc.readers.GenericXMLToTxtCollectionReader;
import eu.project.ttc.readers.IstexCollectionReader;
import eu.project.ttc.readers.JsonCollectionReader;
import eu.project.ttc.readers.QueueRegistry;
import eu.project.ttc.readers.StreamingCollectionReader;
import eu.project.ttc.readers.StringCollectionReader;
import eu.project.ttc.readers.TeiCollectionReader;
import eu.project.ttc.readers.TxtCollectionReader;
import eu.project.ttc.readers.XmiCollectionReader;
import eu.project.ttc.resources.CharacterFootprintTermFilter;
import eu.project.ttc.resources.CompostInflectionRules;
import eu.project.ttc.resources.EvalTrace;
import eu.project.ttc.resources.FixedExpressionResource;
import eu.project.ttc.resources.GeneralLanguageResource;
import eu.project.ttc.resources.ManualSegmentationResource;
import eu.project.ttc.resources.MateLemmatizerModel;
import eu.project.ttc.resources.MateTaggerModel;
import eu.project.ttc.resources.ObserverResource;
import eu.project.ttc.resources.PrefixTree;
import eu.project.ttc.resources.ReferenceTermList;
import eu.project.ttc.resources.SimpleWordSet;
import eu.project.ttc.resources.SuffixDerivationList;
import eu.project.ttc.resources.TermIndexResource;
import eu.project.ttc.resources.TermSuitePipelineObserver;
import eu.project.ttc.resources.YamlVariantRules;
import eu.project.ttc.stream.CasConsumer;
import eu.project.ttc.stream.ConsumerRegistry;
import eu.project.ttc.stream.DocumentProvider;
import eu.project.ttc.stream.DocumentStream;
import eu.project.ttc.stream.StreamingCasConsumer;
import eu.project.ttc.types.FixedExpression;
import eu.project.ttc.types.TermOccAnnotation;
import eu.project.ttc.types.WordAnnotation;
import eu.project.ttc.utils.FileUtils;
import eu.project.ttc.utils.OccurrenceBuffer;
import fr.free.rocheteau.jerome.engines.Stemmer;
import fr.univnantes.julestar.uima.resources.MultimapFlatResource;
import fr.univnantes.lina.uima.ChineseSegmenterResourceHelper;
import fr.univnantes.lina.uima.engines.ChineseSegmenter;
import fr.univnantes.lina.uima.engines.TreeTaggerWrapper;
import fr.univnantes.lina.uima.models.ChineseSegmentResource;
import fr.univnantes.lina.uima.models.TreeTaggerParameter;
import fr.univnantes.lina.uima.tkregex.ae.RegexListResource;
import fr.univnantes.lina.uima.tkregex.ae.TokenRegexAE;
import uima.sandbox.filter.resources.DefaultFilterResource;
import uima.sandbox.filter.resources.FilterResource;
import uima.sandbox.lexer.engines.Lexer;
import uima.sandbox.lexer.resources.SegmentBank;
import uima.sandbox.lexer.resources.SegmentBankResource;
import uima.sandbox.mapper.engines.Mapper;
import uima.sandbox.mapper.resources.Mapping;
import uima.sandbox.mapper.resources.MappingResource;

/*
 * TODO Integrates frozen expressions
 * TODO integrate Sonar runner
 * TODO Add functional pipeline TestCases for each collection type and for different pipeline configs
 */

/**
 * A collection reader and ae aggregator (builder pattern) that 
 * creates and runs a full pipeline.
 *  
 * @author Damien Cram
 *
 */
public class TermSuitePipeline {

    /* The Logger */
    private static final Logger LOGGER = LoggerFactory.getLogger(TermSuitePipeline.class);

    /* ******************************
     * MAIN PIPELINE PARAMETERS
     */
    private OccurrenceStore occurrenceStore = new MemoryOccurrenceStore();
    private Optional<? extends TermIndex> termIndex = Optional.absent();
    private Lang lang;
    private CollectionReaderDescription crDescription;
    private String pipelineObserverName;
    private AggregateBuilder aggregateBuilder;
    private String termHistoryResourceName = "PipelineHistory";

    /*
     * POS Tagger parameters
     */
    private Optional<String> mateModelsPath = Optional.absent();
    private Optional<String> treeTaggerPath = Optional.absent();

    /*
     * Regex Spotter params
     */
    private boolean addSpottedAnnoToTermIndex = true;
    private boolean spotWithOccurrences = true;
    private Optional<Boolean> logOverlappingRules = Optional.absent();
    private Optional<String> postProcessingStrategy = Optional.absent();
    private boolean enableSyntacticLabels = false;

    /*
     * Contextualizer options
     */
    private OccurrenceType contextualizeCoTermsType = OccurrenceType.SINGLE_WORD;
    private boolean contextualizeWithTermClasses = false;
    private int contextualizeWithCoOccurrenceFrequencyThreshhold = 1;
    private String contextAssocRateMeasure = LogLikelihood.class.getName();

    /*
     * Cleaner properties
     */
    private boolean keepVariantsWhileCleaning = false;

    /*
     * Compost Params
     */
    private Optional<Float> alpha = Optional.absent();
    private Optional<Float> beta = Optional.absent();
    private Optional<Float> gamma = Optional.absent();
    private Optional<Float> delta = Optional.absent();
    private Optional<Float> compostScoreThreshold = Optional.absent();
    private Optional<Integer> compostMinComponentSize = Optional.absent();
    private Optional<Integer> compostMaxComponentNum = Optional.absent();
    private Optional<Float> compostSegmentSimilarityThreshold = Optional.of(1f);

    /*
     * Graphical Variant Gatherer parameters
     */
    private Optional<Float> graphicalVariantSimilarityThreshold = Optional.absent();

    /* JSON */
    private boolean exportJsonWithOccurrences = true;
    private boolean exportJsonWithContext = false;
    private boolean linkMongoStore = false;
    /* TSV */
    private String tsvExportProperties = "groupingKey,wr";
    private boolean tsvWithVariantScores = false;
    private boolean tsvWithHeaders = true;

    /*
     * Streaming parameters
     */
    private Thread streamThread = null;
    private DocumentProvider documentProvider;

    /* *******************
     * CONSTRUCTORS
     */
    private TermSuitePipeline(String lang, String urlPrefix) {
        this.lang = Lang.forName(lang);
        this.aggregateBuilder = new AggregateBuilder();
        this.pipelineObserverName = PipelineObserver.class.getSimpleName() + "-" + Thread.currentThread().getId()
                + "-" + System.currentTimeMillis();

        TermSuiteResourceManager.getInstance().register(pipelineObserverName, new TermSuitePipelineObserver(2, 1));

        this.termHistoryResourceName = TermHistory.class.getSimpleName() + "-" + Thread.currentThread().getId()
                + "-" + System.currentTimeMillis();
        TermSuiteResourceManager.getInstance().register(termHistoryResourceName, new TermHistory());

        initUIMALogging();
    }

    private void initUIMALogging() {
        System.setProperty("org.apache.uima.logger.class", UIMASlf4jWrapperLogger.class.getName());
    }

    /**
     * 
     * Starts a chaining {@link TermSuitePipeline} builder. 
     * 
     * @param lang
     *          The 
     * @return
     *          The chaining builder.
     * 
     */
    public static TermSuitePipeline create(String lang) {
        return new TermSuitePipeline(lang, null);
    }

    public static TermSuitePipeline create(TermIndex termIndex) {
        Preconditions.checkNotNull(termIndex.getName(),
                "The term index must have a name before it can be used in TermSuitePipeline");

        if (!TermSuiteResourceManager.getInstance().contains(termIndex.getName()))
            TermSuiteResourceManager.getInstance().register(termIndex.getName(), termIndex);

        TermSuitePipeline pipeline = create(termIndex.getLang().getCode());
        pipeline.emptyCollection();
        pipeline.setTermIndex(termIndex);

        return pipeline;
    }

    /* *******************************
     * RUNNERS
     */

    /**
     * Runs the pipeline with {@link SimplePipeline} on the {@link CollectionReader} that must have been defined.
     * 
     * @throws TermSuitePipelineException if no {@link CollectionReader} has been declared on this pipeline
     */
    public TermSuitePipeline run() {
        checkCR();
        runPipeline();
        return this;
    }

    private void runPipeline() {
        try {
            SimplePipeline.runPipeline(this.crDescription, createDescription());
            terminates();
        } catch (Exception e) {
            throw new TermSuitePipelineException(e);
        }
    }

    public DocumentStream stream(CasConsumer consumer) {
        try {
            String id = new BigInteger(130, new SecureRandom()).toString(8);
            String casConsumerName = "pipeline-" + id + "-consumer";
            ConsumerRegistry.getInstance().registerConsumer(casConsumerName, consumer);
            String queueName = "pipeline-" + id + "-queue";
            final BlockingQueue<CollectionDocument> q = QueueRegistry.getInstance().registerQueue(queueName, 10);

            /*
             * 1- Creates the streaming collection reader desc
             */
            this.crDescription = CollectionReaderFactory.createReaderDescription(StreamingCollectionReader.class,
                    StreamingCollectionReader.PARAM_LANGUAGE, this.lang.getCode(),
                    StreamingCollectionReader.PARAM_NAME, queueName, StreamingCollectionReader.PARAM_QUEUE_NAME,
                    queueName);

            /*
             * 2- Aggregate the consumer AE
             */
            AnalysisEngineDescription consumerAE = AnalysisEngineFactory.createEngineDescription(
                    StreamingCasConsumer.class, StreamingCasConsumer.PARAM_CONSUMER_NAME, casConsumerName);
            this.aggregateBuilder.add(consumerAE);

            /*
             * 3- Starts the pipeline in a separate Thread 
             */
            this.streamThread = new Thread() {
                @Override
                public void run() {
                    runPipeline();
                }
            };
            this.streamThread.start();

            /*
             * 4- Bind user inputs to the queue
             */
            documentProvider = new DocumentProvider() {
                @Override
                public void provide(CollectionDocument doc) {
                    try {
                        q.put(doc);
                    } catch (InterruptedException e) {
                        LOGGER.warn("Interrupted while there were more documents waiting.");
                    }
                }
            };
            return new DocumentStream(streamThread, documentProvider, consumer, queueName);
        } catch (Exception e) {
            throw new TermSuitePipelineException(e);
        }
    }

    public Thread getStreamThread() {
        return streamThread;
    }

    private void checkCR() {
        if (crDescription == null)
            throw new TermSuitePipelineException("No collection reader has been declared on this pipeline.");
    }

    private void terminates() {
        if (termIndex.isPresent() && termIndex.get().getOccurrenceStore() instanceof MongoDBOccurrenceStore)
            ((MongoDBOccurrenceStore) termIndex.get().getOccurrenceStore()).close();

    }

    /**
     * Registers a pipeline listener.
     * 
     * @param pipelineListener
     * @return
     *       This chaining {@link TermSuitePipeline} builder object
     */
    public TermSuitePipeline addPipelineListener(PipelineListener pipelineListener) {
        TermSuiteResourceManager manager = TermSuiteResourceManager.getInstance();
        ((TermSuitePipelineObserver) manager.get(pipelineObserverName)).registerListener(pipelineListener);
        return this;
    }

    /**
     * Runs the pipeline with {@link SimplePipeline} without requiring a {@link CollectionReader}
     * to be defined.
     * @param cas the {@link JCas} on which the pipeline operates.
     * @return
     *       This chaining {@link TermSuitePipeline} builder object
     */
    public TermSuitePipeline run(JCas cas) {
        try {
            SimplePipeline.runPipeline(cas, createDescription());
            terminates();
            return this;
        } catch (Exception e) {
            throw new TermSuitePipelineException(e);
        }
    }

    public TermSuitePipeline setInlineString(String text) {
        try {
            this.crDescription = CollectionReaderFactory.createReaderDescription(StringCollectionReader.class,
                    StringCollectionReader.PARAM_TEXT, text, StringCollectionReader.PARAM_LANGUAGE,
                    this.lang.getCode());
            return this;
        } catch (Exception e) {
            throw new TermSuitePipelineException(e);
        }
    }

    public TermSuitePipeline setIstexCollection(String apiURL, List<String> documentsIds) {
        try {
            this.crDescription = CollectionReaderFactory.createReaderDescription(IstexCollectionReader.class,
                    IstexCollectionReader.PARAM_IGNORE_LANGUAGE_ERRORS, true, IstexCollectionReader.PARAM_LANGUAGE,
                    this.lang.getCode(), IstexCollectionReader.PARAM_ID_LIST, Joiner.on(",").join(documentsIds),
                    IstexCollectionReader.PARAM_API_URL, apiURL);
            return this;
        } catch (Exception e) {
            throw new TermSuitePipelineException(e);
        }
    }

    /**
     * Creates a collection reader for this pipeline.
     * 
     * @param termSuiteCollection
     * @param collectionPath
     * @param collectionEncoding
     * @return
     *       This chaining {@link TermSuitePipeline} builder object
     */
    public TermSuitePipeline setCollection(TermSuiteCollection termSuiteCollection, String collectionPath,
            String collectionEncoding) {
        Preconditions.checkNotNull(termSuiteCollection);
        Preconditions.checkNotNull(collectionPath);
        Preconditions.checkNotNull(collectionEncoding);
        try {
            switch (termSuiteCollection) {
            case TEI:
                this.crDescription = CollectionReaderFactory.createReaderDescription(TeiCollectionReader.class,
                        TeiCollectionReader.PARAM_INPUTDIR, collectionPath,
                        TxtCollectionReader.PARAM_COLLECTION_TYPE, termSuiteCollection,
                        TeiCollectionReader.PARAM_ENCODING, collectionEncoding, TeiCollectionReader.PARAM_LANGUAGE,
                        this.lang.getCode());
                break;
            case TXT:
                this.crDescription = CollectionReaderFactory.createReaderDescription(TxtCollectionReader.class,
                        TxtCollectionReader.PARAM_INPUTDIR, collectionPath,
                        TxtCollectionReader.PARAM_COLLECTION_TYPE, termSuiteCollection,
                        TxtCollectionReader.PARAM_ENCODING, collectionEncoding, TxtCollectionReader.PARAM_LANGUAGE,
                        this.lang.getCode());
                break;
            case XMI:
                this.crDescription = CollectionReaderFactory.createReaderDescription(XmiCollectionReader.class,
                        XmiCollectionReader.PARAM_INPUTDIR, collectionPath,
                        XmiCollectionReader.PARAM_COLLECTION_TYPE, termSuiteCollection,
                        XmiCollectionReader.PARAM_ENCODING, collectionEncoding, XmiCollectionReader.PARAM_LANGUAGE,
                        this.lang.getCode());
                break;
            case JSON:
                this.crDescription = CollectionReaderFactory.createReaderDescription(JsonCollectionReader.class,
                        JsonCollectionReader.PARAM_INPUTDIR, collectionPath,
                        JsonCollectionReader.PARAM_COLLECTION_TYPE, termSuiteCollection,
                        JsonCollectionReader.PARAM_ENCODING, collectionEncoding,
                        JsonCollectionReader.PARAM_LANGUAGE, this.lang.getCode());
                break;
            case EMPTY:
                this.crDescription = CollectionReaderFactory.createReaderDescription(EmptyCollectionReader.class);
                break;
            default:
                throw new IllegalArgumentException("No such collection: " + termSuiteCollection);
            }
            return this;
        } catch (Exception e) {
            throw new TermSuitePipelineException(e);
        }
    }

    /**
     * Creates a collection reader of type {@link GenericXMLToTxtCollectionReader} for this pipeline.
     * 
     * Requires a list of dropped tags and txt tags for collection parsing. 
     * 
     * @see AbstractToTxtSaxHandler
     * 
     * @param termSuiteCollection
     * @param collectionPath
     * @param collectionEncoding
     * @param droppedTags
     * @param txtTags
     * @return
     *       This chaining {@link TermSuitePipeline} builder object
     */
    public TermSuitePipeline setCollection(TermSuiteCollection termSuiteCollection, String collectionPath,
            String collectionEncoding, String droppedTags, String txtTags) {
        try {
            this.crDescription = CollectionReaderFactory.createReaderDescription(
                    GenericXMLToTxtCollectionReader.class, GenericXMLToTxtCollectionReader.PARAM_COLLECTION_TYPE,
                    termSuiteCollection, GenericXMLToTxtCollectionReader.PARAM_DROPPED_TAGS, droppedTags,
                    GenericXMLToTxtCollectionReader.PARAM_TXT_TAGS, txtTags,
                    GenericXMLToTxtCollectionReader.PARAM_INPUTDIR, collectionPath,
                    GenericXMLToTxtCollectionReader.PARAM_ENCODING, collectionEncoding,
                    GenericXMLToTxtCollectionReader.PARAM_LANGUAGE, this.lang.getCode());
            return this;
        } catch (Exception e) {
            throw new TermSuitePipelineException(e);
        }
    }

    /**
     * Invoke this method if TermSuite resources are accessible via 
     * a "file:/path/to/res/" url, i.e. they can be found locally.
     * 
     * @param resourceDir
     * @return
     */
    public TermSuitePipeline setResourceDir(String resourceDir) {
        Preconditions.checkArgument(new File(resourceDir).isDirectory(), "Not a directory: %s", resourceDir);

        if (!resourceDir.endsWith(File.separator))
            resourceDir = resourceDir + File.separator;
        //      TermSuiteUtils.addToClasspath(resourceDir);
        try {
            this.resourceUrlPrefix = Optional.of(new URL("file:" + resourceDir));
            LOGGER.info("Resource URL prefix is: {}", this.resourceUrlPrefix.get());
        } catch (MalformedURLException e) {
            throw new TermSuitePipelineException(e);
        }
        return this;
    }

    public TermSuitePipeline setResourceJar(String resourceJar) {
        Preconditions.checkArgument(FileUtils.isJar(resourceJar), "Not a jar file: %s", resourceJar);
        try {
            this.resourceUrlPrefix = Optional.of(new URL("jar:file:" + resourceJar + "!/"));
            LOGGER.info("Resource URL prefix is: {}", this.resourceUrlPrefix.get());
        } catch (MalformedURLException e) {
            throw new TermSuitePipelineException(e);
        }
        return this;
    }

    private Optional<URL> resourceUrlPrefix = Optional.absent();

    public TermSuitePipeline setResourceUrlPrefix(String urlPrefix) {
        try {
            this.resourceUrlPrefix = Optional.of(new URL(urlPrefix));
        } catch (MalformedURLException e) {
            throw new TermSuitePipelineException("Bad url: " + urlPrefix, e);
        }
        return this;
    }

    public TermSuitePipeline setContextAssocRateMeasure(String contextAssocRateMeasure) {
        this.contextAssocRateMeasure = contextAssocRateMeasure;
        return this;
    }

    public TermSuitePipeline emptyCollection() {
        return setCollection(TermSuiteCollection.EMPTY, "", "UTF-8");
    }

    public AnalysisEngineDescription createDescription() {
        try {
            return this.aggregateBuilder.createAggregateDescription();
        } catch (Exception e) {
            throw new TermSuitePipelineException(e);
        }
    }

    public TermSuitePipeline setHistory(TermHistory history) {
        TermSuiteResourceManager.getInstance().remove(termHistoryResourceName);
        TermSuiteResourceManager.getInstance().register(termHistoryResourceName, history);
        return this;
    }

    public TermSuitePipeline watch(String... termKeys) {
        TermHistory termHistory = (TermHistory) TermSuiteResourceManager.getInstance().get(termHistoryResourceName);
        termHistory.addWatchedTerms(termKeys);
        return this;
    }

    public String getHistoryResourceName() {
        return termHistoryResourceName;
    }

    public TermSuitePipeline aeWordTokenizer() {
        try {
            AnalysisEngineDescription ae = AnalysisEngineFactory.createEngineDescription(Lexer.class,
                    Lexer.PARAM_TYPE, "eu.project.ttc.types.WordAnnotation");

            ExternalResourceDescription segmentBank = ExternalResourceFactory.createExternalResourceDescription(
                    SegmentBankResource.class, getResUrl(TermSuiteResource.SEGMENT_BANK));

            ExternalResourceFactory.bindResource(ae, SegmentBank.KEY_SEGMENT_BANK, segmentBank);

            return aggregateAndReturn(ae, "Word tokenizer", 0);
        } catch (Exception e) {
            throw new TermSuitePipelineException(e);
        }

    }

    //   private TermSuitePipeline aggregateAndReturn(AnalysisEngineDescription ae) {
    //      return aggregateAndReturn(ae, null, 0);
    //   }

    private Map<String, MutableInt> taskNumbers = Maps.newHashMap();

    private String getNumberedTaskName(String taskName) {
        if (!taskNumbers.containsKey(taskName))
            taskNumbers.put(taskName, new MutableInt(0));
        taskNumbers.get(taskName).increment();
        return String.format("%s-%d", taskName, taskNumbers.get(taskName).intValue());
    }

    private TermSuitePipeline aggregateAndReturn(AnalysisEngineDescription ae, String taskName, int ccWeight) {
        Preconditions.checkNotNull(taskName);

        // Add the pre-task observer
        this.aggregateBuilder.add(aeObserver(taskName, ccWeight, PipelineObserver.TASK_STARTED));

        // Add the ae itself
        this.aggregateBuilder.add(ae);

        // Add the post-task observer
        this.aggregateBuilder.add(aeObserver(taskName, ccWeight, PipelineObserver.TASK_ENDED));
        return this;
    }

    private AnalysisEngineDescription aeObserver(String taskName, int weight, String hook) {
        try {
            AnalysisEngineDescription ae = AnalysisEngineFactory.createEngineDescription(PipelineObserver.class,
                    PipelineObserver.TASK_NAME, taskName, PipelineObserver.HOOK, hook, PipelineObserver.WEIGHT,
                    weight);

            ExternalResourceFactory.bindResource(ae, resObserver());

            return ae;
        } catch (Exception e) {
            throw new TermSuitePipelineException(e);
        }

    }

    public TermSuitePipeline aeTreeTagger() {
        try {
            AnalysisEngineDescription ae = AnalysisEngineFactory.createEngineDescription(TreeTaggerWrapper.class,
                    TreeTaggerWrapper.PARAM_ANNOTATION_TYPE, "eu.project.ttc.types.WordAnnotation",
                    TreeTaggerWrapper.PARAM_TAG_FEATURE, "tag", TreeTaggerWrapper.PARAM_LEMMA_FEATURE, "lemma",
                    TreeTaggerWrapper.PARAM_UPDATE_ANNOTATION_FEATURES, true,
                    TreeTaggerWrapper.PARAM_TT_HOME_DIRECTORY, this.treeTaggerPath.get());

            ExternalResourceDescription ttParam = ExternalResourceFactory.createExternalResourceDescription(
                    TreeTaggerParameter.class, getResUrl(TermSuiteResource.TREETAGGER_CONFIG, Tagger.TREE_TAGGER));

            ExternalResourceFactory.bindResource(ae, TreeTaggerParameter.KEY_TT_PARAMETER, ttParam);

            return aggregateAndReturn(ae, "POS Tagging (TreeTagger)", 0).ttLemmaFixer().ttNormalizer();
        } catch (Exception e) {
            throw new TermSuitePipelineException(e);
        }
    }

    /*
     * Builds the resource url for this pipeline
     */
    private URL getResUrl(TermSuiteResource tsResource, Tagger tagger) {
        if (!resourceUrlPrefix.isPresent())
            return tsResource.fromClasspath(lang, tagger);
        else
            return tsResource.fromUrlPrefix(this.resourceUrlPrefix.get(), lang, tagger);

    }

    /*
     * Builds the resource url for this pipeline    * 
     */
    private URL getResUrl(TermSuiteResource tsResource) {
        if (!resourceUrlPrefix.isPresent())
            return tsResource.fromClasspath(lang);
        else
            return tsResource.fromUrlPrefix(this.resourceUrlPrefix.get(), lang);
    }

    public TermSuitePipeline setMateModelPath(String path) {
        this.mateModelsPath = Optional.of(path);
        Preconditions.checkArgument(Files.exists(Paths.get(path)), "Directory %s does not exist", path);
        Preconditions.checkArgument(Files.isDirectory(Paths.get(path)), "File %s is not a directory", path);
        return this;
    }

    public TermSuitePipeline aeMateTaggerLemmatizer() {
        try {
            AnalysisEngineDescription ae = AnalysisEngineFactory
                    .createEngineDescription(MateLemmatizerTagger.class);

            Preconditions.checkState(mateModelsPath.isPresent(),
                    "The path to mate models must be explicitely given. See method #setMateModelPath");
            String lemmatizerModel = Paths.get(mateModelsPath.get(), "mate-lemma-" + lang.getCode() + ".model")
                    .toString();
            String taggerModel = Paths.get(mateModelsPath.get(), "mate-pos-" + lang.getCode() + ".model")
                    .toString();
            Preconditions.checkArgument(Files.exists(Paths.get(lemmatizerModel)),
                    "Lemmatizer model does not exist: %s", lemmatizerModel);
            Preconditions.checkArgument(Files.exists(Paths.get(taggerModel)), "Tagger model does not exist: %s",
                    taggerModel);

            ExternalResourceFactory.createDependencyAndBind(ae, MateLemmatizerTagger.LEMMATIZER,
                    MateLemmatizerModel.class, lemmatizerModel);
            ExternalResourceFactory.createDependencyAndBind(ae, MateLemmatizerTagger.TAGGER, MateTaggerModel.class,
                    taggerModel);

            return aggregateAndReturn(ae, "POS Tagging (Mate)", 0).mateLemmaFixer().mateNormalizer();
        } catch (Exception e) {
            throw new TermSuitePipelineException(e);
        }
    }

    /**
     * Defines the term properties that appear in tsv export file
     * 
     * @see #haeTsvExporter(String)
     * @param properties
     * @return
     *       This chaining {@link TermSuitePipeline} builder object
     */
    public TermSuitePipeline setTsvExportProperties(TermProperty... properties) {
        this.tsvExportProperties = Joiner.on(",").join(properties);
        return this;
    }

    /**
     * Exports the {@link TermIndex} in tsv format
     * 
     * @see #setTsvExportProperties(TermProperty...)
     * @param toFilePath
     * @return
     *       This chaining {@link TermSuitePipeline} builder object
     */
    public TermSuitePipeline haeTsvExporter(String toFilePath) {
        try {
            AnalysisEngineDescription ae = AnalysisEngineFactory.createEngineDescription(TSVExporterAE.class,
                    TSVExporterAE.TO_FILE_PATH, toFilePath, TSVExporterAE.TERM_PROPERTIES, this.tsvExportProperties,
                    TSVExporterAE.SHOW_HEADERS, tsvWithHeaders, TSVExporterAE.SHOW_SCORES, tsvWithVariantScores);
            ExternalResourceFactory.bindResource(ae, resTermIndex());

            return aggregateAndReturn(ae, getNumberedTaskName("Exporting the terminology to " + toFilePath), 1);
        } catch (Exception e) {
            throw new TermSuitePipelineException(e);
        }
    }

    /**
     * 
     * Exports examples of matching pairs for each variation rule.
     * 
     * @param toFilePath
     *             the file path where to write the examples for each variation rules
     * @return
     *       This chaining {@link TermSuitePipeline} builder object
     */
    public TermSuitePipeline haeExportVariationRuleExamples(String toFilePath) {
        try {
            AnalysisEngineDescription ae = AnalysisEngineFactory.createEngineDescription(
                    ExportVariationRuleExamplesAE.class, ExportVariationRuleExamplesAE.TO_FILE_PATH, toFilePath);
            ExternalResourceFactory.bindResource(ae, resTermIndex());
            ExternalResourceFactory.bindResource(ae, resSyntacticVariantRules());

            return aggregateAndReturn(ae, "Exporting variation rules examples", 0);
        } catch (Exception e) {
            throw new TermSuitePipelineException(e);
        }
    }

    /**
     * 
     * Exports all compound words of the terminology to given file path.
     * 
     * @param toFilePath
     * @return
     *       This chaining {@link TermSuitePipeline} builder object
     */
    public TermSuitePipeline haeCompoundExporter(String toFilePath) {
        try {
            AnalysisEngineDescription ae = AnalysisEngineFactory.createEngineDescription(CompoundExporterAE.class,
                    CompoundExporterAE.TO_FILE_PATH, toFilePath);
            ExternalResourceFactory.bindResource(ae, resTermIndex());

            return aggregateAndReturn(ae, "Exporting compounds", 0);
        } catch (Exception e) {
            throw new TermSuitePipelineException(e);
        }
    }

    public TermSuitePipeline haeVariationExporter(String toFilePath, VariationType... vTypes) {
        try {
            String typeStrings = Joiner.on(",").join(vTypes);
            AnalysisEngineDescription ae = AnalysisEngineFactory.createEngineDescription(VariationExporterAE.class,
                    VariationExporterAE.TO_FILE_PATH, toFilePath, VariationExporterAE.VARIATION_TYPES, typeStrings);
            ExternalResourceFactory.bindResource(ae, resTermIndex());

            String taskName = "Exporting variations " + typeStrings + " to file " + toFilePath;
            return aggregateAndReturn(ae, taskName, 0);
        } catch (Exception e) {
            throw new TermSuitePipelineException(e);
        }
    }

    public TermSuitePipeline haeTbxExporter(String toFilePath) {
        try {
            AnalysisEngineDescription ae = AnalysisEngineFactory.createEngineDescription(TbxExporterAE.class,
                    TbxExporterAE.TO_FILE_PATH, toFilePath);
            ExternalResourceFactory.bindResource(ae, resTermIndex());

            return aggregateAndReturn(ae, getNumberedTaskName("Exporting the terminology to " + toFilePath), 1);
        } catch (Exception e) {
            throw new TermSuitePipelineException(e);
        }
    }

    public TermSuitePipeline haeEvalExporter(String toFilePath, boolean withVariants) {
        try {
            AnalysisEngineDescription ae = AnalysisEngineFactory.createEngineDescription(EvalExporterAE.class,
                    EvalExporterAE.TO_FILE_PATH, toFilePath, EvalExporterAE.WITH_VARIANTS, withVariants

            );
            ExternalResourceFactory.bindResource(ae, resTermIndex());

            return aggregateAndReturn(ae, "Exporting evaluation files", 0);
        } catch (Exception e) {
            throw new TermSuitePipelineException(e);
        }
    }

    public TermSuitePipeline setExportJsonWithOccurrences(boolean exportJsonWithOccurrences) {
        this.exportJsonWithOccurrences = exportJsonWithOccurrences;
        return this;
    }

    public TermSuitePipeline setExportJsonWithContext(boolean b) {
        this.exportJsonWithContext = b;
        return this;
    }

    public TermSuitePipeline haeJsonExporter(String toFilePath) {
        try {
            AnalysisEngineDescription ae = AnalysisEngineFactory.createEngineDescription(JsonExporterAE.class,
                    JsonExporterAE.TO_FILE_PATH, toFilePath, JsonExporterAE.WITH_OCCURRENCE,
                    exportJsonWithOccurrences, JsonExporterAE.WITH_CONTEXTS, exportJsonWithContext,
                    JsonExporterAE.LINKED_MONGO_STORE, this.linkMongoStore);
            ExternalResourceFactory.bindResource(ae, resTermIndex());

            return aggregateAndReturn(ae, getNumberedTaskName("Exporting the terminology to " + toFilePath), 1);
        } catch (Exception e) {
            throw new TermSuitePipelineException(e);
        }
    }

    /**
     * 
     * Creates a tsv output with :
     *  - the occurrence list of each term and theirs in-text contexts.
     *  - a json structure for the evaluation of each variant
     * 
     * @param toFilePath
     *          The output file path
     * @param topN
     *          The number of variants to keep in the file
     * @param maxVariantsPerTerm
     *          The maximum number of variants to eval for each term
     * @return
     *       This chaining {@link TermSuitePipeline} builder object
     */
    public TermSuitePipeline haeVariantEvalExporter(String toFilePath, int topN, int maxVariantsPerTerm) {
        try {
            AnalysisEngineDescription ae = AnalysisEngineFactory.createEngineDescription(
                    VariantEvalExporterAE.class, VariantEvalExporterAE.TO_FILE_PATH, toFilePath,
                    VariantEvalExporterAE.TOP_N, topN, VariantEvalExporterAE.NB_VARIANTS_PER_TERM,
                    maxVariantsPerTerm);

            ExternalResourceFactory.bindResource(ae, resTermIndex());

            return aggregateAndReturn(ae, "Exporting variant evaluation files", 0);
        } catch (Exception e) {
            throw new TermSuitePipelineException(e);
        }
    }

    private void addParameters(AnalysisEngineDescription ae, Object... parameters) {
        if (parameters.length % 2 == 1)
            throw new IllegalArgumentException(
                    "Expecting even number of arguements for key-value pairs: " + parameters.length);
        for (int i = 0; i < parameters.length; i += 2)
            ae.getMetaData().getConfigurationParameterSettings().setParameterValue((String) parameters[i],
                    parameters[i + 1]);
    }

    private TermSuitePipeline subNormalizer(String target, URL mappingFile) {
        try {
            AnalysisEngineDescription ae = AnalysisEngineFactory.createEngineDescription(Mapper.class,
                    Mapper.PARAM_SOURCE, "eu.project.ttc.types.WordAnnotation:tag", Mapper.PARAM_TARGET, target,
                    Mapper.PARAM_UPDATE, true);

            ExternalResourceDescription mappingRes = ExternalResourceFactory
                    .createExternalResourceDescription(MappingResource.class, mappingFile);

            ExternalResourceFactory.bindResource(ae, Mapping.KEY_MAPPING, mappingRes);

            return aggregateAndReturn(ae, "Normalizing " + mappingFile, 0);
        } catch (Exception e) {
            throw new TermSuitePipelineException(e);
        }
    }

    private TermSuitePipeline caseNormalizer(Tagger tagger) {
        return subNormalizer("eu.project.ttc.types.WordAnnotation:case",
                getResUrl(TermSuiteResource.TAGGER_CASE_MAPPING, tagger));
    }

    private TermSuitePipeline categoryNormalizer(Tagger tagger) {
        return subNormalizer("eu.project.ttc.types.WordAnnotation:category",
                getResUrl(TermSuiteResource.TAGGER_CATEGORY_MAPPING, tagger));
    }

    private TermSuitePipeline tenseNormalizer(Tagger tagger) {
        return subNormalizer("eu.project.ttc.types.WordAnnotation:tense",
                getResUrl(TermSuiteResource.TAGGER_TENSE_MAPPING, tagger));
    }

    private TermSuitePipeline subCategoryNormalizer(Tagger tagger) {
        return subNormalizer("eu.project.ttc.types.WordAnnotation:subCategory",
                getResUrl(TermSuiteResource.TAGGER_SUBCATEGORY_MAPPING, tagger));
    }

    private TermSuitePipeline moodNormalizer(Tagger tagger) {
        return subNormalizer("eu.project.ttc.types.WordAnnotation:mood",
                getResUrl(TermSuiteResource.TAGGER_MOOD_MAPPING, tagger));
    }

    private TermSuitePipeline numberNormalizer(Tagger tagger) {
        return subNormalizer("eu.project.ttc.types.WordAnnotation:number",
                getResUrl(TermSuiteResource.TAGGER_NUMBER_MAPPING, tagger));
    }

    private TermSuitePipeline genderNormalizer(Tagger tagger) {
        return subNormalizer("eu.project.ttc.types.WordAnnotation:gender",
                getResUrl(TermSuiteResource.TAGGER_GENDER_MAPPING, tagger));
    }

    private TermSuitePipeline mateNormalizer() {
        return normalizer(Tagger.MATE);
    }

    private TermSuitePipeline ttNormalizer() {
        return normalizer(Tagger.TREE_TAGGER);
    }

    private TermSuitePipeline normalizer(Tagger tagger) {
        categoryNormalizer(tagger);
        subCategoryNormalizer(tagger);
        moodNormalizer(tagger);
        tenseNormalizer(tagger);
        genderNormalizer(tagger);
        numberNormalizer(tagger);
        return caseNormalizer(tagger);
    }

    public TermSuitePipeline aeStemmer() {
        try {
            AnalysisEngineDescription ae = AnalysisEngineFactory.createEngineDescription(Stemmer.class,
                    Stemmer.PARAM_FEATURE, "eu.project.ttc.types.WordAnnotation:stem", Stemmer.PARAM_LANGUAGE, lang,
                    Stemmer.PARAM_UPDATE, true);

            return aggregateAndReturn(ae, "Stemming", 0);
        } catch (Exception e) {
            throw new TermSuitePipelineException(e);
        }
    }

    private TermSuitePipeline ttLemmaFixer() {
        try {
            AnalysisEngineDescription ae = AnalysisEngineFactory.createEngineDescription(TreeTaggerLemmaFixer.class,
                    TreeTaggerLemmaFixer.LANGUAGE, lang.getCode());

            return aggregateAndReturn(ae, "Fixing lemmas", 0);
        } catch (Exception e) {
            throw new TermSuitePipelineException(e);
        }
    }

    private TermSuitePipeline mateLemmaFixer() {
        try {
            AnalysisEngineDescription ae = AnalysisEngineFactory.createEngineDescription(MateLemmaFixer.class,
                    MateLemmaFixer.LANGUAGE, lang.getCode());

            return aggregateAndReturn(ae, "Fixing lemmas", 0);
        } catch (Exception e) {
            throw new TermSuitePipelineException(e);
        }
    }

    /**
     * Iterates over the {@link TermIndex} and mark terms as
     * "fixed expressions" when their lemmas are found in the 
     * {@link FixedExpressionResource}.
     * 
     * @return
     *       This chaining {@link TermSuitePipeline} builder object
     */
    public TermSuitePipeline aeFixedExpressionTermMarker() {
        /*
         * TODO Check if resource is present for that current language.
         */
        try {
            AnalysisEngineDescription ae = AnalysisEngineFactory
                    .createEngineDescription(FixedExpressionTermMarker.class);

            ExternalResourceDescription fixedExprRes = ExternalResourceFactory.createExternalResourceDescription(
                    FixedExpressionResource.class, getResUrl(TermSuiteResource.FIXED_EXPRESSIONS));

            ExternalResourceFactory.bindResource(ae, FixedExpressionResource.FIXED_EXPRESSION_RESOURCE,
                    fixedExprRes);

            ExternalResourceFactory.bindResource(ae, resTermIndex());

            return aggregateAndReturn(ae, "Marking fixed expression terms", 0);
        } catch (Exception e) {
            throw new TermSuitePipelineException(e);
        }
    }

    /**
     * Spots fixed expressions in the CAS an creates {@link FixedExpression}
     * annotation whenever one is found.
     * 
     * @return
     *       This chaining {@link TermSuitePipeline} builder object
     */
    public TermSuitePipeline aeFixedExpressionSpotter() {
        /*
         * TODO Check if resource is present for that current language.
         */
        try {
            AnalysisEngineDescription ae = AnalysisEngineFactory.createEngineDescription(
                    FixedExpressionSpotter.class, FixedExpressionSpotter.FIXED_EXPRESSION_MAX_SIZE, 5,
                    FixedExpressionSpotter.REMOVE_WORD_ANNOTATIONS_FROM_CAS, false,
                    FixedExpressionSpotter.REMOVE_TERM_OCC_ANNOTATIONS_FROM_CAS, true);

            ExternalResourceDescription fixedExprRes = ExternalResourceFactory.createExternalResourceDescription(
                    FixedExpressionResource.class, getResUrl(TermSuiteResource.FIXED_EXPRESSIONS));

            ExternalResourceFactory.bindResource(ae, FixedExpressionResource.FIXED_EXPRESSION_RESOURCE,
                    fixedExprRes);

            return aggregateAndReturn(ae, "Spotting fixed expressions", 0);
        } catch (Exception e) {
            throw new TermSuitePipelineException(e);
        }
    }

    /**
     * The single-word and multi-word term spotter AE
     * base on UIMA Tokens Regex.
     * 
     * @return
     *       This chaining {@link TermSuitePipeline} builder object
     */
    public TermSuitePipeline aeRegexSpotter() {
        try {
            Serializable postProcStrategy = this.postProcessingStrategy.isPresent()
                    ? this.postProcessingStrategy.get()
                    : lang.getRegexPostProcessingStrategy();
            AnalysisEngineDescription ae = AnalysisEngineFactory.createEngineDescription(RegexSpotter.class,
                    TokenRegexAE.PARAM_ALLOW_OVERLAPPING_OCCURRENCES, true, RegexSpotter.POST_PROCESSING_STRATEGY,
                    postProcStrategy);

            if (enableSyntacticLabels)
                addParameters(ae, TokenRegexAE.PARAM_SET_LABELS, "labels");

            if (logOverlappingRules.isPresent())
                addParameters(ae, RegexSpotter.LOG_OVERLAPPING_RULES, logOverlappingRules.get());

            ExternalResourceDescription mwtRules = ExternalResourceFactory.createExternalResourceDescription(
                    RegexListResource.class, getResUrl(TermSuiteResource.MWT_RULES));

            ExternalResourceFactory.bindResource(ae, RegexListResource.KEY_TOKEN_REGEX_RULES, mwtRules);

            ExternalResourceFactory.bindResource(ae, resHistory());

            ExternalResourceDescription allowedCharsRes = ExternalResourceFactory.createExternalResourceDescription(
                    CharacterFootprintTermFilter.class, getResUrl(TermSuiteResource.ALLOWED_CHARS));

            ExternalResourceFactory.bindResource(ae, RegexSpotter.CHARACTER_FOOTPRINT_TERM_FILTER, allowedCharsRes);

            if (this.addSpottedAnnoToTermIndex)
                ExternalResourceFactory.bindResource(ae, resTermIndex());

            ExternalResourceDescription stopWordsRes = ExternalResourceFactory.createExternalResourceDescription(
                    DefaultFilterResource.class, getResUrl(TermSuiteResource.STOP_WORDS_FILTER));

            ExternalResourceFactory.bindResource(ae, RegexSpotter.STOP_WORD_FILTER, stopWordsRes);

            return aggregateAndReturn(ae, "Spotting terms", 0).aeTermOccAnnotationImporter();
        } catch (Exception e) {
            throw new TermSuitePipelineException(e);
        }
    }

    /**
     * An AE thats imports all {@link TermOccAnnotation} in CAS to a {@link TermIndex}.
     * 
     * @return
     *       This chaining {@link TermSuitePipeline} builder object
     */
    public TermSuitePipeline aeTermOccAnnotationImporter() {
        try {
            AnalysisEngineDescription ae = AnalysisEngineFactory.createEngineDescription(
                    TermOccAnnotationImporter.class, TermOccAnnotationImporter.KEEP_OCCURRENCES_IN_TERM_INDEX,
                    spotWithOccurrences);
            ExternalResourceFactory.bindResource(ae, resTermIndex());
            ExternalResourceFactory.bindResource(ae, resHistory());

            return aggregateAndReturn(ae, "TermOccAnnotation importer", 0);
        } catch (Exception e) {
            throw new TermSuitePipelineException(e);
        }
    }

    /**
     * Naive morphological analysis of prefix compounds based on a 
     * prefix dictionary resource
     * 
     * @return
     *       This chaining {@link TermSuitePipeline} builder object
     */
    public TermSuitePipeline aePrefixSplitter() {
        try {
            AnalysisEngineDescription ae = AnalysisEngineFactory.createEngineDescription(PrefixSplitter.class);

            ExternalResourceDescription prefixTreeRes = ExternalResourceFactory
                    .createExternalResourceDescription(PrefixTree.class, getResUrl(TermSuiteResource.PREFIX_BANK));

            ExternalResourceFactory.bindResource(ae, PrefixTree.PREFIX_TREE, prefixTreeRes);

            ExternalResourceFactory.bindResource(ae, resHistory());

            ExternalResourceFactory.bindResource(ae, resTermIndex());

            return aggregateAndReturn(ae, "Splitting prefixes", 0).aePrefixExceptionsSetter();
        } catch (Exception e) {
            throw new TermSuitePipelineException(e);
        }
    }

    public TermSuitePipeline aeSuffixDerivationDetector() {
        try {
            AnalysisEngineDescription ae = AnalysisEngineFactory
                    .createEngineDescription(SuffixDerivationDetecter.class);

            ExternalResourceDescription suffixDerivationsRes = ExternalResourceFactory
                    .createExternalResourceDescription(SuffixDerivationList.class,
                            getResUrl(TermSuiteResource.SUFFIX_DERIVATIONS));

            ExternalResourceFactory.bindResource(ae, SuffixDerivationList.SUFFIX_DERIVATIONS, suffixDerivationsRes);

            ExternalResourceFactory.bindResource(ae, resTermIndex());
            ExternalResourceFactory.bindResource(ae, resHistory());

            return aggregateAndReturn(ae, "Detecting suffix derivations prefixes", 0).aeSuffixDerivationException();
        } catch (Exception e) {
            throw new TermSuitePipelineException(e);
        }
    }

    private TermSuitePipeline aeSuffixDerivationException() {
        try {
            AnalysisEngineDescription ae = AnalysisEngineFactory
                    .createEngineDescription(SuffixDerivationExceptionSetter.class);

            ExternalResourceDescription suffixDerivationsExceptionsRes = ExternalResourceFactory
                    .createExternalResourceDescription(MultimapFlatResource.class,
                            getResUrl(TermSuiteResource.SUFFIX_DERIVATION_EXCEPTIONS));

            ExternalResourceFactory.bindResource(ae, SuffixDerivationExceptionSetter.SUFFIX_DERIVATION_EXCEPTION,
                    suffixDerivationsExceptionsRes);

            ExternalResourceFactory.bindResource(ae, resTermIndex());
            ExternalResourceFactory.bindResource(ae, resHistory());

            return aggregateAndReturn(ae, "Setting suffix derivation exceptions", 0);
        } catch (Exception e) {
            throw new TermSuitePipelineException(e);
        }

    }

    private TermSuitePipeline aeManualCompositionSetter() {
        try {
            AnalysisEngineDescription ae = AnalysisEngineFactory
                    .createEngineDescription(ManualCompositionSetter.class);

            ExternalResourceDescription manualCompositionListRes = ExternalResourceFactory
                    .createExternalResourceDescription(ManualSegmentationResource.class,
                            getResUrl(TermSuiteResource.MANUAL_COMPOSITIONS));

            ExternalResourceFactory.bindResource(ae, ManualCompositionSetter.MANUAL_COMPOSITION_LIST,
                    manualCompositionListRes);

            ExternalResourceFactory.bindResource(ae, resTermIndex());

            return aggregateAndReturn(ae, "Setting manual composition", 0);
        } catch (Exception e) {
            throw new TermSuitePipelineException(e);
        }

    }

    private TermSuitePipeline aePrefixExceptionsSetter() {
        try {
            AnalysisEngineDescription ae = AnalysisEngineFactory.createEngineDescription(ManualPrefixSetter.class);

            ExternalResourceDescription prefixExceptionsRes = ExternalResourceFactory
                    .createExternalResourceDescription(ManualSegmentationResource.class,
                            getResUrl(TermSuiteResource.PREFIX_EXCEPTIONS));

            ExternalResourceFactory.bindResource(ae, ManualPrefixSetter.PREFIX_EXCEPTIONS, prefixExceptionsRes);

            ExternalResourceFactory.bindResource(ae, resTermIndex());
            ExternalResourceFactory.bindResource(ae, resHistory());

            return aggregateAndReturn(ae, "Setting prefix exceptions", 0);
        } catch (Exception e) {
            throw new TermSuitePipelineException(e);
        }

    }

    /**
     * Removes from the term index any term having a 
     * stop word at its boundaries.
     * 
     * @see TermIndexBlacklistWordFilterAE
     * @return
     *       This chaining {@link TermSuitePipeline} builder object
     */
    public TermSuitePipeline aeStopWordsFilter() {
        try {
            AnalysisEngineDescription ae = AnalysisEngineFactory
                    .createEngineDescription(TermIndexBlacklistWordFilterAE.class);

            ExternalResourceDescription stopWordsFilterResourceRes = ExternalResourceFactory
                    .createExternalResourceDescription(DefaultFilterResource.class,
                            getResUrl(TermSuiteResource.STOP_WORDS_FILTER));

            ExternalResourceFactory.bindResource(ae, FilterResource.KEY_FILTERS, stopWordsFilterResourceRes);

            ExternalResourceFactory.bindResource(ae, resTermIndex());

            return aggregateAndReturn(ae, "Filtering stop words", 0);
        } catch (Exception e) {
            throw new TermSuitePipelineException(e);
        }
    }

    /**
     * Exports all CAS as XMI files to a given directory.
     * 
     * @param toDirectoryPath
     * @return
     *       This chaining {@link TermSuitePipeline} builder object
     */
    public TermSuitePipeline haeXmiCasExporter(String toDirectoryPath) {
        try {
            AnalysisEngineDescription ae = AnalysisEngineFactory.createEngineDescription(XmiCasExporter.class,
                    XmiCasExporter.OUTPUT_DIRECTORY, toDirectoryPath);

            return aggregateAndReturn(ae, "Exporting XMI Cas files", 0);
        } catch (Exception e) {
            throw new TermSuitePipelineException(e);
        }
    }

    /**
     * Exports all CAS as JSON files to a given directory.
     *
     * @param toDirectoryPath
     * @return
     *       This chaining {@link TermSuitePipeline} builder object
     */
    public TermSuitePipeline haeTermsuiteJsonCasExporter(String toDirectoryPath) {
        try {
            AnalysisEngineDescription ae = AnalysisEngineFactory.createEngineDescription(
                    TermsuiteJsonCasExporter.class, TermsuiteJsonCasExporter.OUTPUT_DIRECTORY, toDirectoryPath);

            return aggregateAndReturn(ae, "Exporting Json Cas files", 0);
        } catch (Exception e) {
            throw new TermSuitePipelineException(e);
        }
    }

    /**
     * Export all CAS in TSV format to a given directory.
     * 
     * @see SpotterTSVWriter
     * @param toDirectoryPath
     * @return
     *       This chaining {@link TermSuitePipeline} builder object
     */
    public TermSuitePipeline haeSpotterTSVWriter(String toDirectoryPath) {
        try {
            AnalysisEngineDescription ae = AnalysisEngineFactory.createEngineDescription(SpotterTSVWriter.class,
                    XmiCasExporter.OUTPUT_DIRECTORY, toDirectoryPath);

            return aggregateAndReturn(ae, "Exporting annotations in TSV to " + toDirectoryPath, 0);
        } catch (Exception e) {
            throw new TermSuitePipelineException(e);
        }
    }

    public TermSuitePipeline aeDocumentLogger(long nbDocument) {

        try {
            AnalysisEngineDescription ae = AnalysisEngineFactory.createEngineDescription(DocumentLogger.class,
                    DocumentLogger.NB_DOCUMENTS, nbDocument);

            return aggregateAndReturn(ae, "Document logging", 0);
        } catch (Exception e) {
            throw new TermSuitePipelineException(e);
        }
    }

    /**
     * Tokenizer for chinese collections.
     * @see ChineseSegmenter
     * 
     * @return
     *       This chaining {@link TermSuitePipeline} builder object
     */
    public TermSuitePipeline aeChineseTokenizer() {
        try {
            AnalysisEngineDescription ae = AnalysisEngineFactory.createEngineDescription(ChineseSegmenter.class,
                    ChineseSegmenter.ANNOTATION_TYPE, "eu.project.ttc.types.WordAnnotation");
            ExternalResourceFactory.createDependencyAndBind(ae, ChineseSegmenter.CHINESE_WORD_SEGMENTS,
                    ChineseSegmentResource.class, ChineseSegmenterResourceHelper.getChineseWordSegments());
            ExternalResourceFactory.createDependencyAndBind(ae, ChineseSegmenter.CHINESE_FOREIGN_NAME_SEGMENTS,
                    ChineseSegmentResource.class, ChineseSegmenterResourceHelper.getForeignNameSegments());
            ExternalResourceFactory.createDependencyAndBind(ae, ChineseSegmenter.CHINESE_NUMBER_SEGMENTS,
                    ChineseSegmentResource.class, ChineseSegmenterResourceHelper.getNumberSegments());

            return aggregateAndReturn(ae, "Word tokenizing", 0);
        } catch (Exception e) {
            throw new TermSuitePipelineException(e);
        }
    }

    private ExternalResourceDescription termIndexResourceDesc;

    public ExternalResourceDescription resTermIndex() {
        if (termIndexResourceDesc == null) {
            if (!termIndex.isPresent())
                emptyTermIndex(UUID.randomUUID().toString());

            termIndexResourceDesc = ExternalResourceFactory
                    .createExternalResourceDescription(TermIndexResource.class, termIndex.get().getName());

            TermSuiteResourceManager manager = TermSuiteResourceManager.getInstance();

            // register the term index if not in term index manager
            if (!manager.contains(termIndex.get().getName()))
                manager.register(termIndex.get().getName(), termIndex.get());
        }
        return termIndexResourceDesc;

    }

    private ExternalResourceDescription pipelineObserverResource;

    public ExternalResourceDescription resObserver() {
        if (pipelineObserverResource == null) {
            pipelineObserverResource = ExternalResourceFactory
                    .createExternalResourceDescription(ObserverResource.class, this.pipelineObserverName);
        }
        return pipelineObserverResource;

    }

    private ExternalResourceDescription termHistoryResource;

    public ExternalResourceDescription resHistory() {
        if (termHistoryResource == null) {
            termHistoryResource = ExternalResourceFactory
                    .createExternalResourceDescription(TermHistoryResource.class, this.termHistoryResourceName);
        }
        return termHistoryResource;

    }

    private ExternalResourceDescription syntacticVariantRules;

    public ExternalResourceDescription resSyntacticVariantRules() {
        if (syntacticVariantRules == null) {
            syntacticVariantRules = ExternalResourceFactory.createExternalResourceDescription(
                    YamlVariantRules.class, getResUrl(TermSuiteResource.VARIANTS));
        }
        return syntacticVariantRules;

    }

    /**
     * Returns the term index produced (or last modified) by this pipeline.
     * @return
     *       The term index processed by this pipeline
     */
    public TermIndex getTermIndex() {
        return this.termIndex.get();
    }

    /**
     * Sets the term index on which this pipeline will run.
     * 
     * @param termIndex
     * @return
     *       This chaining {@link TermSuitePipeline} builder object
     */
    public TermSuitePipeline setTermIndex(TermIndex termIndex) {
        this.termIndex = Optional.of(termIndex);
        return this;
    }

    /**
     * Creates a new in-memory {@link TermIndex} on which this 
     * piepline with run.
     * 
     * @param name
     *          the name of the new term index
     * @return
     *       This chaining {@link TermSuitePipeline} builder object
     */
    public TermSuitePipeline emptyTermIndex(String name) {
        MemoryTermIndex termIndex = new MemoryTermIndex(name, this.lang, this.occurrenceStore);
        LOGGER.info("Creating TermIndex {}", termIndex.getName());
        this.termIndex = Optional.of(termIndex);
        return this;
    }

    private ExternalResourceDescription generalLanguageResourceDesc;

    private ExternalResourceDescription resGeneralLanguage() {
        if (generalLanguageResourceDesc == null)
            generalLanguageResourceDesc = ExternalResourceFactory.createExternalResourceDescription(
                    GeneralLanguageResource.class, getResUrl(TermSuiteResource.GENERAL_LANGUAGE));
        return generalLanguageResourceDesc;
    }

    /**
     * Computes {@link TermProperty#WR} values (and additional 
     * term properties of type {@link TermProperty} in the future).
     * 
     * @see TermSpecificityComputer
     * @see TermProperty
     * @return
     *       This chaining {@link TermSuitePipeline} builder object
     */
    public TermSuitePipeline aeSpecificityComputer() {
        try {
            AnalysisEngineDescription ae = AnalysisEngineFactory
                    .createEngineDescription(TermSpecificityComputer.class);
            ExternalResourceFactory.bindResource(ae, resGeneralLanguage());
            ExternalResourceFactory.bindResource(ae, resTermIndex());
            ExternalResourceFactory.bindResource(ae, resHistory());

            return aggregateAndReturn(ae, "Computing term specificities", 0);
        } catch (Exception e) {
            throw new TermSuitePipelineException(e);
        }
    }

    public TermSuitePipeline setContextualizeCoTermsType(OccurrenceType contextualizeCoTermsType) {
        this.contextualizeCoTermsType = contextualizeCoTermsType;
        return this;
    }

    public TermSuitePipeline setContextualizeWithTermClasses(boolean contextualizeWithTermClasses) {
        this.contextualizeWithTermClasses = contextualizeWithTermClasses;
        return this;
    }

    public TermSuitePipeline setContextualizeWithCoOccurrenceFrequencyThreshhold(
            int contextualizeWithCoOccurrenceFrequencyThreshhold) {
        this.contextualizeWithCoOccurrenceFrequencyThreshhold = contextualizeWithCoOccurrenceFrequencyThreshhold;
        return this;
    }

    /**
     * Computes the {@link Contextualizer} vector of all 
     * single-word terms in the term index.
     * 
     * @see Contextualizer
     * @param scope
     * @param allTerms
     * @return
     *       This chaining {@link TermSuitePipeline} builder object
     */
    public TermSuitePipeline aeContextualizer(int scope, boolean allTerms) {
        AnalysisEngineDescription ae;
        try {
            ae = AnalysisEngineFactory.createEngineDescription(Contextualizer.class,
                    Contextualizer.NORMALIZE_ASSOC_RATE, true, Contextualizer.SCOPE, scope,
                    Contextualizer.CO_TERMS_TYPE, contextualizeCoTermsType,
                    Contextualizer.COMPUTE_CONTEXTS_FOR_ALL_TERMS, allTerms, Contextualizer.ASSOCIATION_RATE,
                    contextAssocRateMeasure, Contextualizer.USE_TERM_CLASSES, contextualizeWithTermClasses,
                    Contextualizer.MINIMUM_COOCC_FREQUENCY_THRESHOLD,
                    contextualizeWithCoOccurrenceFrequencyThreshhold);
            ExternalResourceFactory.bindResource(ae, resTermIndex());

            return aggregateAndReturn(ae, "Build context vectors", 1);
        } catch (Exception e) {
            throw new TermSuitePipelineException(e);
        }
    }

    public TermSuitePipeline aeMaxSizeThresholdCleaner(TermProperty property, int maxSize) {
        try {
            AnalysisEngineDescription ae = AnalysisEngineFactory.createEngineDescription(
                    MaxSizeThresholdCleaner.class, AbstractTermIndexCleaner.CLEANING_PROPERTY, property,
                    MaxSizeThresholdCleaner.MAX_SIZE, maxSize);
            ExternalResourceFactory.bindResource(ae, resTermIndex());

            return aggregateAndReturn(ae, "Cleaning TermIndex on property " + property.toString().toLowerCase()
                    + " with maxSize=" + maxSize, 0);
        } catch (Exception e) {
            throw new TermSuitePipelineException(e);
        }

    }

    public TermSuitePipeline aeThresholdCleaner(TermProperty property, float threshold, boolean isPeriodic,
            int cleaningPeriod, int termIndexSizeTrigger) {
        try {
            AnalysisEngineDescription ae = AnalysisEngineFactory.createEngineDescription(
                    TermIndexThresholdCleaner.class, AbstractTermIndexCleaner.CLEANING_PROPERTY, property,
                    AbstractTermIndexCleaner.NUM_TERMS_CLEANING_TRIGGER, termIndexSizeTrigger,
                    AbstractTermIndexCleaner.KEEP_VARIANTS, this.keepVariantsWhileCleaning,
                    TermIndexThresholdCleaner.THRESHOLD, threshold);
            setPeriodic(isPeriodic, cleaningPeriod, ae);

            ExternalResourceFactory.bindResource(ae, resTermIndex());
            ExternalResourceFactory.bindResource(ae, resHistory());

            return aggregateAndReturn(ae, getNumberedTaskName("Cleaning"), 0);
        } catch (Exception e) {
            throw new TermSuitePipelineException(e);
        }
    }

    public TermSuitePipeline aePrimaryOccurrenceDetector(int detectionStrategy) {
        try {
            AnalysisEngineDescription ae = AnalysisEngineFactory
                    .createEngineDescription(PrimaryOccurrenceDetector.class);

            ExternalResourceFactory.bindResource(ae, resTermIndex());

            return aggregateAndReturn(ae, "Detecting primary occurrences", 0);
        } catch (Exception e) {
            throw new TermSuitePipelineException(e);
        }
    }

    private void setPeriodic(boolean isPeriodic, int cleaningPeriod, AnalysisEngineDescription ae) {
        if (isPeriodic)
            addParameters(ae, AbstractTermIndexCleaner.PERIODIC_CAS_CLEAN_ON, true,
                    AbstractTermIndexCleaner.CLEANING_PERIOD, cleaningPeriod);
    }

    /**
     * 
     * 
     * 
     * @param property
     * @param threshold
     * @param cleaningPeriod
     * @return
     *       This chaining {@link TermSuitePipeline} builder object
     */
    public TermSuitePipeline aeThresholdCleanerPeriodic(TermProperty property, float threshold,
            int cleaningPeriod) {
        return aeThresholdCleaner(property, threshold, true, cleaningPeriod, 0);
    }

    public TermSuitePipeline aeThresholdCleanerSizeTrigger(TermProperty property, float threshold,
            int termIndexSizeTrigger) {
        return aeThresholdCleaner(property, threshold, false, 0, termIndexSizeTrigger);
    }

    public TermSuitePipeline setKeepVariantsWhileCleaning(boolean keepVariantsWhileCleaning) {
        this.keepVariantsWhileCleaning = keepVariantsWhileCleaning;
        return this;
    }

    public TermSuitePipeline aeThresholdCleaner(TermProperty property, float threshold) {
        return aeThresholdCleaner(property, threshold, false, 0, 0);
    }

    public TermSuitePipeline aeTopNCleaner(TermProperty property, int n) {
        return aeTopNCleanerPeriodic(property, n, false, 0);
    }

    /**
     * 
     * @param property
     * @param n
     * @param isPeriodic
     * @param cleaningPeriod
     * @return
     *       This chaining {@link TermSuitePipeline} builder object
     */
    public TermSuitePipeline aeTopNCleanerPeriodic(TermProperty property, int n, boolean isPeriodic,
            int cleaningPeriod) {
        try {
            AnalysisEngineDescription ae = AnalysisEngineFactory.createEngineDescription(TermIndexTopNCleaner.class,
                    AbstractTermIndexCleaner.CLEANING_PROPERTY, property, TermIndexTopNCleaner.TOP_N, n);
            setPeriodic(isPeriodic, cleaningPeriod, ae);
            ExternalResourceFactory.bindResource(ae, resTermIndex());
            ExternalResourceFactory.bindResource(ae, resHistory());

            return aggregateAndReturn(ae, "Cleaning TermIndex. Keepings only top " + n + " terms on property "
                    + property.toString().toLowerCase(), 0);
        } catch (Exception e) {
            throw new TermSuitePipelineException(e);
        }
    }

    public TermSuitePipeline setGraphicalVariantSimilarityThreshold(float th) {
        this.graphicalVariantSimilarityThreshold = Optional.of(th);
        return this;
    }

    public TermSuitePipeline aeGraphicalVariantGatherer() {
        try {
            AnalysisEngineDescription ae = AnalysisEngineFactory
                    .createEngineDescription(GraphicalVariantGatherer.class, GraphicalVariantGatherer.LANG,
                            lang.getCode(), GraphicalVariantGatherer.SIMILARITY_THRESHOLD,
                            graphicalVariantSimilarityThreshold.isPresent()
                                    ? graphicalVariantSimilarityThreshold.get()
                                    : 0.9f);
            ExternalResourceFactory.bindResource(ae, resTermIndex());
            ExternalResourceFactory.bindResource(ae, resObserver());
            ExternalResourceFactory.bindResource(ae, resHistory());

            return aggregateAndReturn(ae, GraphicalVariantGatherer.TASK_NAME, 1);
        } catch (Exception e) {
            throw new TermSuitePipelineException(e);
        }
    }

    /**
     * Filters out URLs from CAS.
     * 
     * @return
     *       This chaining {@link TermSuitePipeline} builder object
     */
    public TermSuitePipeline aeUrlFilter() {
        try {
            AnalysisEngineDescription ae = AnalysisEngineFactory.createEngineDescription(StringRegexFilter.class);

            return aggregateAndReturn(ae, "Filtering URLs", 0);
        } catch (Exception e) {
            throw new TermSuitePipelineException(e);
        }
    }

    /**
     * Gathers terms according to their syntactic structures.
     * 
     * @return
     *       This chaining {@link TermSuitePipeline} builder object
     */
    public TermSuitePipeline aeSyntacticVariantGatherer() {
        try {
            AnalysisEngineDescription ae = AnalysisEngineFactory
                    .createEngineDescription(SyntacticTermGatherer.class);

            ExternalResourceFactory.bindResource(ae, resSyntacticVariantRules());
            ExternalResourceFactory.bindResource(ae, resTermIndex());
            ExternalResourceFactory.bindResource(ae, resObserver());
            ExternalResourceFactory.bindResource(ae, resHistory());

            return aggregateAndReturn(ae, SyntacticTermGatherer.TASK_NAME, 1);
        } catch (Exception e) {
            throw new TermSuitePipelineException(e);
        }
    }

    /**
     * Detects all inclusion/extension relation between terms that have size >= 2.
     * 
     * @return
     *       This chaining {@link TermSuitePipeline} builder object
     */
    public TermSuitePipeline aeExtensionDetector() {
        try {
            AnalysisEngineDescription ae = AnalysisEngineFactory.createEngineDescription(ExtensionDetecter.class);

            ExternalResourceFactory.bindResource(ae, resTermIndex());
            ExternalResourceFactory.bindResource(ae, resHistory());

            return aggregateAndReturn(ae, "Detecting term extensions", 1);
        } catch (Exception e) {
            throw new TermSuitePipelineException(e);
        }
    }

    /**
     * Transforms the {@link TermIndex} into a flat one-n scored model.
     * 
     * 
     * @return
     *       This chaining {@link TermSuitePipeline} builder object
     */
    public TermSuitePipeline aeScorer() {
        try {
            AnalysisEngineDescription ae = AnalysisEngineFactory.createEngineDescription(ScorerAE.class);

            ExternalResourceFactory.bindResource(ae, resTermIndex());
            ExternalResourceFactory.bindResource(ae, resObserver());
            ExternalResourceFactory.bindResource(ae, resHistory());

            return aggregateAndReturn(ae, ScorerAE.TASK_NAME, 1);
        } catch (Exception e) {
            throw new TermSuitePipelineException(e);
        }
    }

    /**
     *  Merges the variants (only those who are extensions of the base term) 
     *  of a terms by graphical variation.
     *  
     * @return
     *       This chaining {@link TermSuitePipeline} builder object
     */
    public TermSuitePipeline aeMerger() {
        try {
            AnalysisEngineDescription ae = AnalysisEngineFactory.createEngineDescription(Merger.class,
                    Merger.SIMILARITY_THRESHOLD, 0.9f);

            ExternalResourceFactory.bindResource(ae, resTermIndex());
            ExternalResourceFactory.bindResource(ae, resObserver());

            return aggregateAndReturn(ae, Merger.TASK_NAME, 1);
        } catch (Exception e) {
            throw new TermSuitePipelineException(e);
        }
    }

    /**
     * 
     * Sets the {@link Term#setRank(int)} of all terms of the {@link TermIndex}
     * given a {@link TermProperty}.
     * 
     * @param property
     * @param desc
     * @return
     */
    public TermSuitePipeline aeRanker(TermProperty property, boolean desc) {
        Preconditions.checkArgument(property != TermProperty.RANK, "Cannot rank on property %s", TermProperty.RANK);
        try {
            AnalysisEngineDescription ae = AnalysisEngineFactory.createEngineDescription(Ranker.class,
                    Ranker.RANKING_PROPERTY, property, Ranker.DESC, desc);
            ExternalResourceFactory.bindResource(ae, resTermIndex());
            ExternalResourceFactory.bindResource(ae, resObserver());
            ExternalResourceFactory.bindResource(ae, resHistory());

            return aggregateAndReturn(ae, Ranker.TASK_NAME, 1);
        } catch (Exception e) {
            throw new TermSuitePipelineException(e);
        }
    }

    public TermSuitePipeline setTreeTaggerHome(String treeTaggerPath) {
        this.treeTaggerPath = Optional.of(treeTaggerPath);
        return this;
    }

    public TermSuitePipeline haeLogOverlappingRules() {
        this.logOverlappingRules = Optional.of(true);
        return this;
    }

    public TermSuitePipeline enableSyntacticLabels() {
        this.enableSyntacticLabels = true;
        return this;
    }

    public TermSuitePipeline setCompostCoeffs(float alpha, float beta, float gamma, float delta) {
        Preconditions.checkArgument(alpha + beta + gamma + delta == 1.0f, "The sum of coeff must be 1.0");
        this.alpha = Optional.of(alpha);
        this.beta = Optional.of(beta);
        this.gamma = Optional.of(gamma);
        this.delta = Optional.of(delta);
        return this;
    }

    public TermSuitePipeline setCompostMaxComponentNum(int compostMaxComponentNum) {
        this.compostMaxComponentNum = Optional.of(compostMaxComponentNum);
        return this;
    }

    public TermSuitePipeline setCompostMinComponentSize(int compostMinComponentSize) {
        this.compostMinComponentSize = Optional.of(compostMinComponentSize);
        return this;
    }

    public TermSuitePipeline setCompostScoreThreshold(float compostScoreThreshold) {
        this.compostScoreThreshold = Optional.of(compostScoreThreshold);
        return this;
    }

    public TermSuitePipeline setCompostSegmentSimilarityThreshold(float compostSegmentSimilarityThreshold) {
        this.compostSegmentSimilarityThreshold = Optional.of(compostSegmentSimilarityThreshold);
        return this;
    }

    public TermSuitePipeline aeCompostSplitter() {
        try {
            AnalysisEngineDescription ae = AnalysisEngineFactory.createEngineDescription(CompostAE.class,
                    CompostAE.SCORE_THRESHOLD,
                    this.compostScoreThreshold.isPresent() ? this.compostScoreThreshold.get()
                            : this.lang.getCompostScoreThreshold(),
                    CompostAE.ALPHA, alpha.isPresent() ? alpha.get() : lang.getCompostAlpha(), CompostAE.BETA,
                    beta.isPresent() ? beta.get() : lang.getCompostBeta(), CompostAE.GAMMA,
                    gamma.isPresent() ? gamma.get() : lang.getCompostGamma(), CompostAE.DELTA,
                    delta.isPresent() ? delta.get() : lang.getCompostDelta(), CompostAE.MIN_COMPONENT_SIZE,
                    this.compostMinComponentSize.isPresent() ? this.compostMinComponentSize.get()
                            : this.lang.getCompostMinComponentSize(),
                    CompostAE.MAX_NUMBER_OF_COMPONENTS,
                    this.compostMaxComponentNum.isPresent() ? this.compostMaxComponentNum.get()
                            : this.lang.getCompostMaxComponentNumber(),
                    CompostAE.SEGMENT_SIMILARITY_THRESHOLD, this.compostSegmentSimilarityThreshold.get());
            ExternalResourceFactory.bindResource(ae, resTermIndex());
            ExternalResourceFactory.bindResource(ae, resObserver());

            ExternalResourceDescription langDicoRes = ExternalResourceFactory
                    .createExternalResourceDescription(SimpleWordSet.class, getResUrl(TermSuiteResource.DICO));

            ExternalResourceFactory.bindResource(ae, CompostAE.LANGUAGE_DICO, langDicoRes);

            ExternalResourceDescription compostInflectionRulesRes = ExternalResourceFactory
                    .createExternalResourceDescription(CompostInflectionRules.class,
                            getResUrl(TermSuiteResource.COMPOST_INFLECTION_RULES));

            ExternalResourceFactory.bindResource(ae, CompostAE.INFLECTION_RULES, compostInflectionRulesRes);

            ExternalResourceDescription transformationRulesRes = ExternalResourceFactory
                    .createExternalResourceDescription(CompostInflectionRules.class,
                            getResUrl(TermSuiteResource.COMPOST_TRANSFORMATION_RULES));

            ExternalResourceFactory.bindResource(ae, CompostAE.TRANSFORMATION_RULES, transformationRulesRes);

            ExternalResourceDescription compostStopListRes = ExternalResourceFactory
                    .createExternalResourceDescription(SimpleWordSet.class,
                            getResUrl(TermSuiteResource.COMPOST_STOP_LIST));

            ExternalResourceFactory.bindResource(ae, CompostAE.STOP_LIST, compostStopListRes);

            ExternalResourceDescription neoClassicalPrefixesRes = ExternalResourceFactory
                    .createExternalResourceDescription(SimpleWordSet.class,
                            getResUrl(TermSuiteResource.NEOCLASSICAL_PREFIXES));

            ExternalResourceFactory.bindResource(ae, CompostAE.NEOCLASSICAL_PREFIXES, neoClassicalPrefixesRes);

            ExternalResourceFactory.bindResource(ae, resHistory());

            return aeManualCompositionSetter().aggregateAndReturn(ae, CompostAE.TASK_NAME, 2);
        } catch (Exception e) {
            throw new TermSuitePipelineException(e);
        }
    }

    public TermSuitePipeline haeCasStatCounter(String statName) {
        try {
            AnalysisEngineDescription ae = AnalysisEngineFactory.createEngineDescription(CasStatCounter.class,
                    CasStatCounter.STAT_NAME, statName);
            ExternalResourceFactory.bindResource(ae, resTermIndex());

            return aggregateAndReturn(ae, getNumberedTaskName("Counting stats [" + statName + "]"), 0);
        } catch (Exception e) {
            throw new TermSuitePipelineException(e);
        }
    }

    /**
     * 
     * Exports time progress to TSV file.
     * 
     * Columns are :
     * <ul>
     * <li>elapsed time from initialization in milliseconds</li>
     * <li>number of docs processed</li>
     * <li>cumulated size of data processed</li>
     * <li>number of terms in term index</li>
     * <li>number of {@link WordAnnotation} processed</li>
     * </ul>
     * 
     * 
     * @param toFile
     * @return
     *       This chaining {@link TermSuitePipeline} builder object
     */
    public TermSuitePipeline haeTraceTimePerf(String toFile) {
        try {
            AnalysisEngineDescription ae = AnalysisEngineFactory.createEngineDescription(CasStatCounter.class,
                    CasStatCounter.DOCUMENT_PERIOD, 1, CasStatCounter.TO_TRACE_FILE, toFile);
            ExternalResourceFactory.bindResource(ae, resTermIndex());

            return aggregateAndReturn(ae, "Exporting time performances to file " + toFile, 0);
        } catch (Exception e) {
            throw new TermSuitePipelineException(e);
        }
    }

    /**
     * 
     * @see TermClassifier
     * @param sortingProperty
     *          the term property used to order terms before they are classified. 
     *          The first term of a class appearing given this order will be considered 
     *          as the head of the class.
     * @return
     *       This chaining {@link TermSuitePipeline} builder object
     */
    public TermSuitePipeline aeTermClassifier(TermProperty sortingProperty) {
        try {
            AnalysisEngineDescription ae = AnalysisEngineFactory.createEngineDescription(TermClassifier.class,
                    TermClassifier.CLASSIFYING_PROPERTY, sortingProperty

            );
            ExternalResourceFactory.bindResource(ae, resTermIndex());

            return aggregateAndReturn(ae,
                    "Classifying ters on property " + sortingProperty.toString().toLowerCase(), 0);
        } catch (Exception e) {
            throw new TermSuitePipelineException(e);
        }
    }

    /**
     * 
     * @param refFileURI
     *          The path to reference termino
     * @param outputFile
     *          The path to output log file
     * @param customLogHeader
     *          A custom string to add in the header of the output log file
     * @param rFile
     *          The path to output r file
     * @param evalTraceName
     *          The name of the eval trace
     * @param rtlWithVariants
     *          true if variants of the reference termino should be kept during the eval
     * @return
     *       This chaining {@link TermSuitePipeline} builder object
     */
    public TermSuitePipeline haeEval(String refFileURI, String outputFile, String customLogHeader, String rFile,
            String evalTraceName, boolean rtlWithVariants) {
        try {
            AnalysisEngineDescription ae = AnalysisEngineFactory.createEngineDescription(EvalEngine.class,
                    EvalEngine.OUTPUT_LOG_FILE, outputFile, EvalEngine.OUTPUT_R_FILE, rFile,
                    EvalEngine.CUSTOM_LOG_HEADER_STRING, customLogHeader,
                    //            EvalEngine.LC_WITH_VARIANTS, extractedTerminoWithVariants,
                    EvalEngine.RTL_WITH_VARIANTS, rtlWithVariants

            );
            ExternalResourceFactory.bindResource(ae, resTermIndex());
            ExternalResourceFactory.createDependencyAndBind(ae, EvalEngine.EVAL_TRACE, EvalTrace.class,
                    evalTraceName);
            ExternalResourceFactory.createDependencyAndBind(ae, EvalEngine.REFERENCE_LIST, ReferenceTermList.class,
                    "file:" + refFileURI);

            return aggregateAndReturn(ae, "Evaluating " + evalTraceName, 0);
        } catch (Exception e) {
            throw new TermSuitePipelineException(e);
        }
    }

    /**
     * 
     * Stores occurrences to MongoDB
     * 
     * @param mongoDBUri
     *          the mongo db connection uri
     * @return
     *       This chaining {@link TermSuitePipeline} builder object
     */
    public TermSuitePipeline setMongoDBOccurrenceStore(String mongoDBUri) {
        this.occurrenceStore = new MongoDBOccurrenceStore(mongoDBUri);
        return this;
    }

    /**
     * @deprecated Use TermSuitePipeline#setOccurrenceStoreMode instead.
     * 
     * @param activate
     * @return
     *       This chaining {@link TermSuitePipeline} builder object
     * 
     */
    @Deprecated
    public TermSuitePipeline setSpotWithOccurrences(boolean activate) {
        this.spotWithOccurrences = activate;
        return this;
    }

    /**
     * Configures {@link RegexSpotter}. If <code>true</code>, 
     * adds all spotted occurrences to the {@link TermIndex}.
     * 
     * @see #aeRegexSpotter()
     * 
     * @param addToTermIndex
     *          the value of the parameter
     * @return
     *       This chaining {@link TermSuitePipeline} builder object
     */
    public TermSuitePipeline setAddSpottedAnnoToTermIndex(boolean addToTermIndex) {
        this.addSpottedAnnoToTermIndex = addToTermIndex;
        return this;
    }

    /**
     * Sets the post processing strategy for {@link RegexSpotter} analysis engine
     * 
     * @see #aeRegexSpotter()
     * @see OccurrenceBuffer#NO_CLEANING
     * @see OccurrenceBuffer#KEEP_PREFIXES
     * @see OccurrenceBuffer#KEEP_SUFFIXES
     * 
     * @param postProcessingStrategy
     * @return
     *       This chaining {@link TermSuitePipeline} builder object
     */
    public TermSuitePipeline setPostProcessingStrategy(String postProcessingStrategy) {

        this.postProcessingStrategy = Optional.of(postProcessingStrategy);

        return this;
    }

    /**
     * Configures tsvExporter to (not) show headers on the 
     * first line.
     * 
     * @param tsvWithHeaders
     *          the flag
     * @return
     *       This chaining {@link TermSuitePipeline} builder object
     */
    public TermSuitePipeline setTsvShowHeaders(boolean tsvWithHeaders) {
        this.tsvWithHeaders = tsvWithHeaders;
        return this;
    }

    /**
     * Configures tsvExporter to (not) show variant scores with the
     * "V" label
     * 
     * @param tsvWithVariantScores
     *          the flag
     * @return
     *       This chaining {@link TermSuitePipeline} builder object
        */
    public TermSuitePipeline setTsvShowScores(boolean tsvWithVariantScores) {
        this.tsvWithVariantScores = tsvWithVariantScores;
        return this;
    }

    public TermSuitePipeline haeJsonCasExporter(String toDirectoryPath) {

        try {
            AnalysisEngineDescription ae = AnalysisEngineFactory.createEngineDescription(JsonCasExporter.class,
                    JsonCasExporter.OUTPUT_DIRECTORY, toDirectoryPath);
            return aggregateAndReturn(ae, getNumberedTaskName("Exporting CAS to JSON files"), 0);
        } catch (Exception e) {
            throw new TermSuitePipelineException(e);
        }
    }

    /**
     * 
     * Configures the {@link JsonExporterAE} to not embed the occurrences 
     * in the json file, but to link the mongodb occurrence store instead.
     * 
     * 
     * 
     * @see #haeJsonExporter(String) 
     * @return
     *       This chaining {@link TermSuitePipeline} builder object
     */
    public TermSuitePipeline linkMongoStore() {
        this.linkMongoStore = true;
        return this;
    }

    /**
     * 
     * Aggregates an AE to the TS pipeline.
     * 
     * @param ae
     *          the ae description of the added pipeline.
     * @param taskName
     *          a user-readable name for the AE task (intended to 
     *          be displayed in progress views)
     * @return
     *       This chaining {@link TermSuitePipeline} builder object
     *          
     */
    public TermSuitePipeline customAE(AnalysisEngineDescription ae, String taskName) {
        try {
            return aggregateAndReturn(ae, taskName, 0);
        } catch (Exception e) {
            throw new TermSuitePipelineException(e);
        }
    }

}