de.tudarmstadt.ukp.clarin.webanno.tsv.WebannoTsv3Writer.java Source code

Introduction

Here is the source code for de.tudarmstadt.ukp.clarin.webanno.tsv.WebannoTsv3Writer.java
Source

/*******************************************************************************
 * Copyright 2014
 * Ubiquitous Knowledge Processing (UKP) Lab and FG Language Technology
 * Technische Universitt Darmstadt
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *  http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 ******************************************************************************/
package de.tudarmstadt.ukp.clarin.webanno.tsv;

import static org.apache.commons.io.IOUtils.closeQuietly;
import static org.apache.uima.fit.util.CasUtil.getType;
import static org.apache.uima.fit.util.CasUtil.selectFS;
import static org.apache.uima.fit.util.JCasUtil.select;
import static org.apache.uima.fit.util.JCasUtil.selectCovered;

import java.io.IOException;
import java.io.OutputStream;
import java.util.*;

import org.apache.commons.io.IOUtils;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.ArrayFS;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.Feature;
import org.apache.uima.cas.FeatureStructure;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.util.CasUtil;
import org.apache.uima.jcas.JCas;

import de.tudarmstadt.ukp.clarin.webanno.tsv.util.AnnotationUnit;
import de.tudarmstadt.ukp.dkpro.core.api.io.JCasFileWriter_ImplBase;
import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS;
import de.tudarmstadt.ukp.dkpro.core.api.parameter.ComponentParameters;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token;
import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency;

/**
 * Export annotations in TAB separated format. Header includes information about
 * the UIMA type and features The number of columns are depend on the number of
 * types/features exist. All the spans will be written first and subsequently
 * all the relations. relation is given in the form of Source--&gt;Target and
 * the RelationType is added to the Target token. The next column indicates the
 * source of the relation (the source of the arc drown)
 *
 *
 */

public class WebannoTsv3Writer extends JCasFileWriter_ImplBase {

    /**
     * Name of configuration parameter that contains the character encoding used
     * by the input files.
     */
    public static final String PARAM_ENCODING = ComponentParameters.PARAM_SOURCE_ENCODING;
    @ConfigurationParameter(name = PARAM_ENCODING, mandatory = true, defaultValue = "UTF-8")
    private String encoding;

    public static final String PARAM_FILENAME_SUFFIX = "filenameSuffix";
    @ConfigurationParameter(name = PARAM_FILENAME_SUFFIX, mandatory = true, defaultValue = ".tsv")
    private String filenameSuffix;

    public static final String PARAM_SPAN_LAYERS = "spanLayers";
    @ConfigurationParameter(name = PARAM_SPAN_LAYERS, mandatory = true, defaultValue = {})
    private List<String> spanLayers;

    public static final String PARAM_SLOT_FEATS = "slotFeatures";
    @ConfigurationParameter(name = PARAM_SLOT_FEATS, mandatory = true, defaultValue = {})
    private List<String> slotFeatures;

    public static final String PARAM_LINK_TYPES = "linkTypes";
    @ConfigurationParameter(name = PARAM_LINK_TYPES, mandatory = true, defaultValue = {})
    private List<String> linkTypes;

    public static final String PARAM_SLOT_TARGETS = "slotTargets";
    @ConfigurationParameter(name = PARAM_SLOT_TARGETS, mandatory = true, defaultValue = {})
    private List<String> slotTargets;

    public static final String PARAM_CHAIN_LAYERS = "chainLayers";
    @ConfigurationParameter(name = PARAM_CHAIN_LAYERS, mandatory = true, defaultValue = {})
    private List<String> chainLayers;

    public static final String PARAM_RELATION_LAYERS = "relationLayers";
    @ConfigurationParameter(name = PARAM_RELATION_LAYERS, mandatory = true, defaultValue = {})
    private List<String> relationLayers;

    private static final String TAB = "\t";
    private static final String LF = "\n";
    private static final String DEPENDENT = "Dependent";
    private static final String GOVERNOR = "Governor";
    private static final String REF_REL = "referenceRelation";
    private static final String CHAIN = "Chain";
    private static final String LINK = "Link";
    private static final String FIRST = "first";
    private static final String NEXT = "next";
    public static final String SP = "T_SP"; // span annotation type
    public static final String CH = "T_CH"; // chain annotation type
    public static final String RL = "T_RL"; // relation annotation type
    public static final String ROLE = "ROLE_";
    public static final String BT = "BT_"; // base type for the relation
    // annotation
    private List<AnnotationUnit> units = new ArrayList<>();
    // number of subunits under this Annotation Unit
    private Map<AnnotationUnit, Integer> subUnits = new HashMap<>();
    private Map<String, Set<String>> featurePerLayer = new LinkedHashMap<>();
    private Map<AnnotationUnit, String> unitsLineNumber = new HashMap<>();
    private Map<AnnotationUnit, String> sentenceUnits = new HashMap<>();
    private Map<String, Map<AnnotationUnit, List<List<String>>>> annotationsPerPostion = new HashMap<>();
    private Map<Feature, Type> slotFeatureTypes = new HashMap<>();

    private Map<Type, Map<FeatureStructure, Integer>> annotaionRefPerType = new HashMap<>();

    private Map<String, Map<AnnotationUnit, Boolean>> ambigUnits = new HashMap<>();
    private Map<Type, Map<AnnotationUnit, Map<FeatureStructure, Integer>>> multiAnnosPerUnit = new HashMap<>();
    private Map<String, String> slotLinkTypes = new HashMap<>();
    private Map<Type, Integer> layerMaps = new LinkedHashMap<>();

    @Override
    public void process(JCas aJCas) throws AnalysisEngineProcessException {
        OutputStream docOS = null;
        try {
            docOS = getOutputStream(aJCas, filenameSuffix);
            setSlotLinkTypes();
            setLinkMaps(aJCas);
            setTokenSentenceAddress(aJCas);
            setAmbiguity(aJCas);
            setSpanAnnotation(aJCas);
            setChainAnnotation(aJCas);
            setRelationAnnotation(aJCas);
            writeHeader(docOS);
            for (AnnotationUnit unit : units) {
                if (sentenceUnits.containsKey(unit)) {
                    String[] sentWithNl = sentenceUnits.get(unit).split("\n");
                    IOUtils.write(LF + "#Text=" + sentWithNl[0] + LF, docOS, encoding);
                    // if sentence contains new line character
                    // GITHUB ISSUE 318: New line in sentence should be exported as is
                    if (sentWithNl.length > 1) {
                        for (int i = 0; i < sentWithNl.length - 1; i++) {
                            IOUtils.write("#Text=" + sentWithNl[i + 1] + LF, docOS, encoding);
                        }
                    }
                }
                if (unit.isSubtoken) {
                    IOUtils.write(
                            unitsLineNumber.get(unit) + TAB + unit.begin + "-" + unit.end + TAB + unit.token + TAB,
                            docOS, encoding);

                } else {
                    IOUtils.write(
                            unitsLineNumber.get(unit) + TAB + unit.begin + "-" + unit.end + TAB + unit.token + TAB,
                            docOS, encoding);
                }
                for (String type : featurePerLayer.keySet()) {
                    List<List<String>> annos = annotationsPerPostion.getOrDefault(type, new HashMap<>())
                            .getOrDefault(unit, new ArrayList<>());
                    List<String> merged = null;
                    for (List<String> annofs : annos) {
                        if (merged == null) {
                            merged = annofs;
                        } else {

                            for (int i = 0; i < annofs.size(); i++) {
                                merged.set(i, merged.get(i) + "|" + annofs.get(i));
                            }
                        }
                    }
                    if (merged != null) {
                        for (String anno : merged) {
                            IOUtils.write(anno + TAB, docOS, encoding);
                        }
                    } // No annotation of this type in this layer
                    else {
                        // if type do not have a feature, 
                        if (featurePerLayer.get(type).size() == 0) {
                            IOUtils.write("_" + TAB, docOS, encoding);
                        } else {
                            for (String feature : featurePerLayer.get(type)) {
                                IOUtils.write("_" + TAB, docOS, encoding);
                            }
                        }
                    }
                }
                IOUtils.write(LF, docOS, encoding);
            }
        } catch (Exception e) {
            throw new AnalysisEngineProcessException(e);
        } finally {
            closeQuietly(docOS);
        }
    }

    private void setSlotLinkTypes() {
        int i = 0;
        for (String f : slotFeatures) {
            slotLinkTypes.put(f, linkTypes.get(i));
            i++;
        }
    }

    private void setLinkMaps(JCas aJCas) {
        for (String l : spanLayers) {
            if (l.equals(Token.class.getName())) {
                continue;
            }
            Type type = getType(aJCas.getCas(), l);
            layerMaps.put(type, layerMaps.size() + 1);
        }
        for (String l : chainLayers) {
            Type type = getType(aJCas.getCas(), l + LINK);
            layerMaps.put(type, layerMaps.size() + 1);
        }
        for (String l : relationLayers) {
            Type type = getType(aJCas.getCas(), l);
            layerMaps.put(type, layerMaps.size() + 1);
        }
    }

    /**
     * Write headers, in the sequence <br>
     * Type TAB List(Features sep by TAB)
     * 
     * @param docOS
     * @throws IOException
     */
    private void writeHeader(OutputStream docOS) throws IOException {
        IOUtils.write("#FORMAT=WebAnno TSV 3" + LF, docOS, encoding);
        for (String type : featurePerLayer.keySet()) {
            String annoType;
            if (spanLayers.contains(type)) {
                annoType = SP;
            } else if (relationLayers.contains(type)) {
                annoType = RL;
            } else {
                annoType = CH;
            }
            IOUtils.write("#" + annoType + "=" + type + "|", docOS, encoding);
            StringBuffer fsb = new StringBuffer();
            for (String feature : featurePerLayer.get(type)) {
                if (fsb.length() < 1) {
                    fsb.append(feature);
                } else {
                    fsb.append("|" + feature);
                }
            }
            IOUtils.write(fsb.toString() + LF, docOS, encoding);
        }
        IOUtils.write(LF, docOS, encoding);
    }

    private void setAmbiguity(JCas aJCas) {
        List<String> spanAndTokenLayers = spanLayers;
        spanAndTokenLayers.add(Token.class.getName());
        for (String l : spanAndTokenLayers) {
            Type type = getType(aJCas.getCas(), l);
            ambigUnits.putIfAbsent(type.getName(), new HashMap<>());
            for (AnnotationFS fs : CasUtil.select(aJCas.getCas(), type)) {
                AnnotationUnit unit = getFirstUnit(fs);
                // multiple token anno
                if (isMultipleTokenAnnotation(fs.getBegin(), fs.getEnd())) {
                    SubTokenAnno sta = new SubTokenAnno();
                    sta.setBegin(fs.getBegin());
                    sta.setEnd(fs.getEnd());
                    sta.setText(fs.getCoveredText());
                    Set<AnnotationUnit> sus = new LinkedHashSet<>();
                    for (AnnotationUnit newUnit : getSubUnits(sta, sus)) {
                        ambigUnits.get(type.getName()).put(newUnit, true);
                    }
                }
                // stacked anno
                else if (ambigUnits.get(type.getName()).get(unit) != null) {
                    ambigUnits.get(type.getName()).put(unit, true);
                }
                //single or first occurrence of stacked anno
                else {
                    ambigUnits.get(type.getName()).put(unit, false);
                }
            }

        }
    }

    private void setSpanAnnotation(JCas aJCas) {
        int i = 0;
        // store slot targets for each slot features
        for (String l : spanLayers) {
            Type type = getType(aJCas.getCas(), l);
            for (Feature f : type.getFeatures()) {
                if (slotFeatures != null && slotFeatures.contains(f.getName())) {
                    slotFeatureTypes.put(f, getType(aJCas.getCas(), slotTargets.get(i)));
                    i++;
                }
            }
        }

        for (String l : spanLayers) {
            if (l.equals(Token.class.getName())) {
                continue;
            }
            Map<AnnotationUnit, List<List<String>>> annotationsPertype;
            if (annotationsPerPostion.get(l) == null) {
                annotationsPertype = new HashMap<>();

            } else {
                annotationsPertype = annotationsPerPostion.get(l);
            }
            Type type = getType(aJCas.getCas(), l);
            for (AnnotationFS fs : CasUtil.select(aJCas.getCas(), type)) {
                AnnotationUnit unit = new AnnotationUnit(fs.getBegin(), fs.getEnd(), false, fs.getCoveredText());
                // annotation is per Token
                if (units.contains(unit)) {
                    setSpanAnnoPerFeature(annotationsPertype, type, fs, unit, false, false);
                }
                // Annotation is on sub-token or multiple tokens
                else {
                    SubTokenAnno sta = new SubTokenAnno();
                    sta.setBegin(fs.getBegin());
                    sta.setEnd(fs.getEnd());
                    sta.setText(fs.getCoveredText());
                    boolean isMultiToken = isMultiToken(fs);
                    boolean isFirst = true;
                    Set<AnnotationUnit> sus = new LinkedHashSet<>();
                    for (AnnotationUnit newUnit : getSubUnits(sta, sus)) {
                        setSpanAnnoPerFeature(annotationsPertype, type, fs, newUnit, isMultiToken, isFirst);
                        isFirst = false;
                    }
                }
            }
            if (annotationsPertype.keySet().size() > 0) {
                annotationsPerPostion.put(l, annotationsPertype);
            }
        }
    }

    private void setChainAnnotation(JCas aJCas) {
        for (String l : chainLayers) {
            if (l.equals(Token.class.getName())) {
                continue;
            }

            Map<AnnotationUnit, List<List<String>>> annotationsPertype = null;
            Type type = getType(aJCas.getCas(), l + CHAIN);
            Feature chainFirst = type.getFeatureByBaseName(FIRST);
            int chainNo = 1;
            for (FeatureStructure chainFs : selectFS(aJCas.getCas(), type)) {
                AnnotationFS linkFs = (AnnotationFS) chainFs.getFeatureValue(chainFirst);
                AnnotationUnit unit = getUnit(linkFs.getBegin(), linkFs.getEnd(), linkFs.getCoveredText());
                Type lType = linkFs.getType();

                // this is the layer with annotations
                l = lType.getName();
                if (annotationsPerPostion.get(l) == null) {
                    annotationsPertype = new HashMap<>();

                } else {
                    annotationsPertype = annotationsPerPostion.get(l);
                }
                Feature linkNext = linkFs.getType().getFeatureByBaseName(NEXT);
                int linkNo = 1;
                while (linkFs != null) {
                    AnnotationFS nextLinkFs = (AnnotationFS) linkFs.getFeatureValue(linkNext);
                    if (nextLinkFs != null) {
                        addChinFeatureAnno(annotationsPertype, lType, linkFs, unit, linkNo, chainNo);
                    } else {
                        addChinFeatureAnno(annotationsPertype, lType, linkFs, unit, linkNo, chainNo);
                    }
                    linkFs = nextLinkFs;
                    linkNo++;
                    if (nextLinkFs != null)
                        unit = getUnit(linkFs.getBegin(), linkFs.getEnd(), linkFs.getCoveredText());
                }
                if (annotationsPertype.keySet().size() > 0) {
                    annotationsPerPostion.put(l, annotationsPertype);
                }
                chainNo++;
            }
        }
    }

    private void setRelationAnnotation(JCas aJCas) {
        for (String l : relationLayers) {
            if (l.equals(Token.class.getName())) {
                continue;
            }
            Map<AnnotationUnit, List<List<String>>> annotationsPertype;
            if (annotationsPerPostion.get(l) == null) {
                annotationsPertype = new HashMap<>();

            } else {
                annotationsPertype = annotationsPerPostion.get(l);
            }
            Type type = getType(aJCas.getCas(), l);
            Feature dependentFeature = null;
            Feature governorFeature = null;

            for (Feature feature : type.getFeatures()) {
                if (feature.getShortName().equals(DEPENDENT)) {

                    // check if the dependent is 
                    dependentFeature = feature;
                }
                if (feature.getShortName().equals(GOVERNOR)) {
                    governorFeature = feature;
                }
            }
            for (AnnotationFS fs : CasUtil.select(aJCas.getCas(), type)) {
                AnnotationFS depFs = (AnnotationFS) fs.getFeatureValue(dependentFeature);
                AnnotationFS govFs = (AnnotationFS) fs.getFeatureValue(governorFeature);

                AnnotationUnit govUnit = getFirstUnit(
                        getUnit(govFs.getBegin(), govFs.getEnd(), govFs.getCoveredText()));
                AnnotationUnit depUnit = getFirstUnit(
                        getUnit(depFs.getBegin(), depFs.getEnd(), depFs.getCoveredText()));

                // Since de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency is over
                // Over POS anno which itself attached to Token, we need the POS type here
                Type govType = govFs.getType();
                if (type.getName().equals(Dependency.class.getName())) {
                    govType = aJCas.getCas().getTypeSystem().getType(POS.class.getName());
                }

                int govRef = 0;
                int depRef = 0;

                // For that unit test case onle, where annotations are on Tokens.
                // The WebAnno world do not ever process Token as an annotation
                if (!govType.getName().equals(Token.class.getName())
                        && ambigUnits.get(govType.getName()).get(govUnit).equals(true)) {
                    govRef = annotaionRefPerType.get(govType).get(govFs);
                }

                if (!govType.getName().equals(Token.class.getName())
                        && ambigUnits.get(govType.getName()).get(depUnit).equals(true)) {
                    depRef = annotaionRefPerType.get(govType).get(depFs);
                }

                setRelationAnnoPerFeature(annotationsPertype, type, fs, depUnit, govUnit, govRef, depRef, govType);

            }
            if (annotationsPertype.keySet().size() > 0) {
                annotationsPerPostion.put(l, annotationsPertype);
            }
        }
    }

    private boolean isMultiToken(AnnotationFS aFs) {

        for (AnnotationUnit unit : units) {
            if (unit.begin <= aFs.getBegin() && unit.end > aFs.getBegin() && unit.end < aFs.getEnd()) {
                return true;
            }
        }
        return false;
    }

    private AnnotationUnit getUnit(int aBegin, int aEnd, String aText) {
        for (AnnotationUnit unit : units) {
            if (unit.begin == aBegin && unit.end == aEnd) {
                return unit;
            }
        }
        return new AnnotationUnit(aBegin, aEnd, false, aText);
    }

    private Set<AnnotationUnit> getSubUnits(SubTokenAnno aSTA, Set<AnnotationUnit> aSubUnits) {
        AnnotationUnit prevUnit = null;
        List<AnnotationUnit> tmpUnits = new ArrayList<>(units);
        if (aSTA.getBegin() == aSTA.getEnd()) {

            AnnotationUnit newUnit = new AnnotationUnit(aSTA.getBegin(), aSTA.getEnd(), false, "");
            for (AnnotationUnit unit : units) {
                if (unit.begin >= newUnit.begin && unit.end >= newUnit.end) {
                    updateUnitLists(tmpUnits, unit, newUnit);
                    aSubUnits.add(newUnit);
                    units = new ArrayList<>(tmpUnits);
                    return aSubUnits;
                }
            }
        }

        for (AnnotationUnit unit : units) {
            if (unit.end > aSTA.end) {
                break;
            }
            // this is a sub-token annotation
            if (unit.begin <= aSTA.getBegin() && aSTA.getBegin() <= unit.end && aSTA.getEnd() <= unit.end) {
                AnnotationUnit newUnit = new AnnotationUnit(aSTA.getBegin(), aSTA.getEnd(), false, aSTA.getText());

                updateUnitLists(tmpUnits, unit, newUnit);

                aSubUnits.add(newUnit);
            }
            // if sub-token annotation crosses multiple tokens
            else if ((unit.begin <= aSTA.getBegin() && aSTA.getBegin() < unit.end && aSTA.getEnd() > unit.end)) {

                int thisSubTextLen = unit.end - aSTA.begin;

                AnnotationUnit newUnit = new AnnotationUnit(aSTA.getBegin(), unit.end, false,
                        aSTA.getText().substring(0, thisSubTextLen));
                aSubUnits.add(newUnit);

                updateUnitLists(tmpUnits, unit, newUnit);

                aSTA.setBegin(getNextUnitBegin(aSTA.getBegin()));

                aSTA.setText(aSTA.getText().trim().substring(thisSubTextLen));
                getSubUnits(aSTA, aSubUnits);
            }
            // empty annotation between tokens
            else if (aSTA.getBegin() <= unit.begin && prevUnit != null && prevUnit.end < unit.begin) {
                int thisSubTextLen = unit.begin - aSTA.begin;

                AnnotationUnit newUnit = new AnnotationUnit(aSTA.getBegin(), unit.begin, false,
                        aSTA.getText().substring(0, thisSubTextLen));
                aSubUnits.add(newUnit);

                updateUnitLists(tmpUnits, prevUnit, newUnit);

                aSTA.setBegin(unit.begin);

                aSTA.setText(aSTA.getText().trim().substring(thisSubTextLen));
                getSubUnits(aSTA, aSubUnits);
            } else {
                prevUnit = unit;
            }
        }
        units = new ArrayList<>(tmpUnits);
        return aSubUnits;
    }

    private int getNextUnitBegin(int aSTABegin) {
        for (AnnotationUnit unit : units) {
            if (unit.begin > aSTABegin && !unit.isSubtoken) {
                return unit.begin;
            }
        }
        // this is the last token
        return aSTABegin;
    }

    /**
     * If there is at least one non-sub-token annotation whose begin is larger
     * than this one, it is a multiple tokens (or crossing multiple tokens)
     * annotation
     * 
     * @param aBegin
     * @param aEnd
     * @return
     */
    private boolean isMultipleTokenAnnotation(int aBegin, int aEnd) {
        for (AnnotationUnit unit : units) {
            if (unit.begin > aBegin && unit.begin < aEnd && !unit.isSubtoken) {
                return true;
            }
        }
        // this is the last token
        return false;
    }

    private void updateUnitLists(List<AnnotationUnit> tmpUnits, AnnotationUnit unit, AnnotationUnit newUnit) {
        if (!tmpUnits.contains(newUnit)) {
            newUnit.isSubtoken = true;
            // is this sub-token already there
            if (!tmpUnits.contains(newUnit)) {
                tmpUnits.add(tmpUnits.indexOf(unit) + 1, newUnit);
                subUnits.put(unit, subUnits.getOrDefault(unit, 0) + 1);
                unitsLineNumber.put(newUnit, unitsLineNumber.get(unit) + "." + subUnits.get(unit));
            }
        }
    }

    private void setSpanAnnoPerFeature(Map<AnnotationUnit, List<List<String>>> aAnnotationsPertype, Type aType,
            AnnotationFS aFs, AnnotationUnit aUnit, boolean aIsMultiToken, boolean aIsFirst) {
        List<String> annoPerFeatures = new ArrayList<>();
        featurePerLayer.putIfAbsent(aType.getName(), new LinkedHashSet<>());
        int ref = getRefId(aType, aFs, aUnit);

        if (ambigUnits.get(aType.getName()).get(getFirstUnit(aUnit)).equals(false)) {
            ref = 0;
        }
        for (Feature feature : aType.getFeatures()) {
            if (feature.toString().equals("uima.cas.AnnotationBase:sofa")
                    || feature.toString().equals("uima.tcas.Annotation:begin")
                    || feature.toString().equals("uima.tcas.Annotation:end")
                    || feature.getShortName().equals(GOVERNOR) || feature.getShortName().equals(DEPENDENT)
                    || feature.getShortName().equals(FIRST) || feature.getShortName().equals(NEXT)) {
                continue;
            }

            // if slot feature
            if (slotFeatures != null && slotFeatures.contains(feature.getName())) {
                if (aFs.getFeatureValue(feature) != null) {
                    ArrayFS array = (ArrayFS) aFs.getFeatureValue(feature);
                    StringBuffer sbRole = new StringBuffer();
                    StringBuffer sbTarget = new StringBuffer();
                    for (FeatureStructure linkFS : array.toArray()) {
                        String role = linkFS.getStringValue(linkFS.getType().getFeatureByBaseName("role"));
                        AnnotationFS targetFs = (AnnotationFS) linkFS
                                .getFeatureValue(linkFS.getType().getFeatureByBaseName("target"));
                        Type tType = targetFs.getType();

                        AnnotationUnit firstUnit = getFirstUnit(targetFs);
                        ref = getRefId(tType, targetFs, firstUnit);
                        // Check if the target is ambiguous or not
                        if (ambigUnits.get(tType.getName()).get(firstUnit).equals(false)) {
                            ref = 0;
                        }
                        if (role == null) {
                            role = "*";
                        } else {
                            // Escape special character
                            role = replaceEscapeChars(role);
                        }
                        if (sbRole.length() < 1) {
                            sbRole.append(role);
                            // record the actual target type column number if slot target is
                            // uima.tcas.Annotation
                            int targetTypeNumber = 0;
                            if (slotFeatureTypes.get(feature).getName().equals(CAS.TYPE_NAME_ANNOTATION)) {
                                targetTypeNumber = layerMaps.get(tType);
                            }
                            sbTarget.append(unitsLineNumber.get(firstUnit)
                                    + (targetTypeNumber == 0 ? "" : "-" + targetTypeNumber)
                                    + (ref > 0 ? "[" + ref + "]" : ""));
                        } else {
                            sbRole.append(";");
                            sbTarget.append(";");
                            sbRole.append(role);
                            int targetTypeNumber = 0;
                            if (slotFeatureTypes.get(feature).getName().equals(CAS.TYPE_NAME_ANNOTATION)) {
                                targetTypeNumber = layerMaps.get(tType);
                            }
                            sbTarget.append(unitsLineNumber.get(firstUnit)
                                    + (targetTypeNumber == 0 ? "" : "-" + targetTypeNumber)
                                    + (ref > 0 ? "[" + ref + "]" : ""));
                        }
                    }
                    annoPerFeatures.add(sbRole.toString().isEmpty() ? "_" : sbRole.toString());
                    annoPerFeatures.add(sbTarget.toString().isEmpty() ? "_" : sbTarget.toString());
                } else {
                    // setting it to null
                    annoPerFeatures.add("_");
                    annoPerFeatures.add("_");
                }
                featurePerLayer.get(aType.getName())
                        .add(ROLE + feature.getName() + "_" + slotLinkTypes.get(feature.getName()));
                featurePerLayer.get(aType.getName()).add(slotFeatureTypes.get(feature).getName());
            } else {
                String annotation = aFs.getFeatureValueAsString(feature);
                if (annotation == null) {
                    annotation = "*";
                } else {
                    // Escape special character
                    annotation = replaceEscapeChars(annotation);
                }
                annotation = annotation + (ref > 0 ? "[" + ref + "]" : "");
                // only add BIO markers to multiple annotations
                setAnnoFeature(aIsMultiToken, aIsFirst, annoPerFeatures, annotation);

                featurePerLayer.get(aType.getName()).add(feature.getShortName());
            }
        }
        aAnnotationsPertype.putIfAbsent(aUnit, new ArrayList<>());
        // If the layer do not have a feature at all, add dummy * as a place holder
        if (annoPerFeatures.size() == 0) {
            setAnnoFeature(aIsMultiToken, aIsFirst, annoPerFeatures, "*" + (ref > 0 ? "[" + ref + "]" : ""));
        }
        aAnnotationsPertype.get(aUnit).add(annoPerFeatures);
    }

    /**
     * 
     * @param aAnnotationsPertype
     *            store annotations per type associated with the annotation
     *            units
     * @param aType
     *            the coreference annotation type
     * @param aFs
     *            the feature structure
     * @param aUnit
     *            the current annotation unit of the coreference chain
     * @param aLinkNo
     *            a reference to the link in a chain, starting at one for the
     *            first link and n for the last link in the chain
     * @param achainNo
     *            a reference to the chain, starting at 1 for the first chain
     *            and n for the last chain where n is the number of coreference
     *            chains the document
     */

    private void addChinFeatureAnno(Map<AnnotationUnit, List<List<String>>> aAnnotationsPertype, Type aType,
            AnnotationFS aFs, AnnotationUnit aUnit, int aLinkNo, int achainNo) {
        featurePerLayer.putIfAbsent(aType.getName(), new LinkedHashSet<>());
        // StringBuffer sbAnnotation = new StringBuffer();
        // annotation is per Token
        if (units.contains(aUnit)) {
            setChainAnnoPerFeature(aAnnotationsPertype, aType, aFs, aUnit, aLinkNo, achainNo, false, false);
        }
        // Annotation is on sub-token or multiple tokens
        else {
            SubTokenAnno sta = new SubTokenAnno();
            sta.setBegin(aFs.getBegin());
            sta.setEnd(aFs.getEnd());
            sta.setText(aFs.getCoveredText());
            boolean isMultiToken = isMultiToken(aFs);
            boolean isFirst = true;
            Set<AnnotationUnit> sus = new LinkedHashSet<>();
            for (AnnotationUnit newUnit : getSubUnits(sta, sus)) {
                setChainAnnoPerFeature(aAnnotationsPertype, aType, aFs, newUnit, aLinkNo, achainNo, isMultiToken,
                        isFirst);
                isFirst = false;
            }
        }
    }

    private void setChainAnnoPerFeature(Map<AnnotationUnit, List<List<String>>> aAnnotationsPertype, Type aType,
            AnnotationFS aFs, AnnotationUnit aUnit, int aLinkNo, int achainNo, boolean aMultiUnit, boolean aFirst) {
        List<String> annoPerFeatures = new ArrayList<>();
        for (Feature feature : aType.getFeatures()) {
            if (feature.toString().equals("uima.cas.AnnotationBase:sofa")
                    || feature.toString().equals("uima.tcas.Annotation:begin")
                    || feature.toString().equals("uima.tcas.Annotation:end")
                    || feature.getShortName().equals(GOVERNOR) || feature.getShortName().equals(DEPENDENT)
                    || feature.getShortName().equals(FIRST) || feature.getShortName().equals(NEXT)) {
                continue;
            }
            String annotation = aFs.getFeatureValueAsString(feature);

            if (annotation == null)
                annotation = "*";
            else
                annotation = replaceEscapeChars(annotation);

            if (feature.getShortName().equals(REF_REL)) {
                annotation = annotation + "->" + achainNo + "-" + aLinkNo;
            } else if (aMultiUnit) {
                annotation = annotation + "[" + achainNo + "]";
            } else {
                annotation = annotation + "[" + achainNo + "]";
            }
            featurePerLayer.get(aType.getName()).add(feature.getShortName());

            annoPerFeatures.add(annotation);
        }
        aAnnotationsPertype.putIfAbsent(aUnit, new ArrayList<>());
        ambigUnits.putIfAbsent(aType.getName(), new HashMap<>());
        ambigUnits.get(aType.getName()).put(aUnit, true); // coref are always ambig

        if (annoPerFeatures.size() == 0)
            annoPerFeatures.add("*" + "[" + achainNo + "]");
        aAnnotationsPertype.get(aUnit).add(annoPerFeatures);
    }

    private void setRelationAnnoPerFeature(Map<AnnotationUnit, List<List<String>>> annotationsPertype, Type type,
            AnnotationFS fs, AnnotationUnit depUnit, AnnotationUnit govUnit, int aGovRef, int aDepRef,
            Type aDepType) {
        List<String> annoPerFeatures = new ArrayList<>();
        featurePerLayer.putIfAbsent(type.getName(), new LinkedHashSet<>());
        for (Feature feature : type.getFeatures()) {
            if (feature.toString().equals("uima.cas.AnnotationBase:sofa")
                    || feature.toString().equals("uima.tcas.Annotation:begin")
                    || feature.toString().equals("uima.tcas.Annotation:end")
                    || feature.getShortName().equals(GOVERNOR) || feature.getShortName().equals(DEPENDENT)
                    || feature.getShortName().equals(FIRST) || feature.getShortName().equals(NEXT)) {
                continue;
            }
            int ref = getRefId(type, fs, depUnit);
            String annotation = fs.getFeatureValueAsString(feature);
            if (annotation == null) {
                annotation = "*";
            } else {
                annotation = replaceEscapeChars(annotation);
            }
            annoPerFeatures.add(annotation);// +(ref > 0 ? "[" + ref + "]" : ""));
            featurePerLayer.get(type.getName()).add(feature.getShortName());
        }
        // add the governor and dependent unit addresses (separated by _
        String govRef = unitsLineNumber.get(govUnit)
                + ((aDepRef > 0 || aGovRef > 0) ? "[" + aGovRef + "_" + aDepRef + "]" : "");
        annoPerFeatures.add(govRef);
        featurePerLayer.get(type.getName()).add(BT + aDepType.getName());
        // the column for the dependent unit address
        annotationsPertype.putIfAbsent(depUnit, new ArrayList<>());
        if (annoPerFeatures.size() == 0)
            annoPerFeatures.add("*");
        annotationsPertype.get(depUnit).add(annoPerFeatures);
    }

    private String replaceEscapeChars(String annotation) {
        return annotation.replace("\\", "\\\\").replace("[", "\\[").replace("]", "\\]").replace("|", "\\|")
                .replace("_", "\\_").replace("->", "\\->").replace(";", "\\;").replace("\t", "\\t")
                .replace("\n", "\\n").replace("*", "\\*");
    }

    private void setAnnoFeature(boolean aIsMultiToken, boolean aIsFirst, List<String> aAnnoPerFeatures,
            String annotation) {
        if (aIsMultiToken) {
            if (aIsFirst) {
                aAnnoPerFeatures.add(annotation);
            } else {
                aAnnoPerFeatures.add(annotation);
            }
        } else {
            aAnnoPerFeatures.add(annotation);
        }
    }

    private AnnotationUnit getFirstUnit(AnnotationFS targetFs) {
        SubTokenAnno sta = new SubTokenAnno();
        sta.setBegin(targetFs.getBegin());
        sta.setEnd(targetFs.getEnd());
        sta.setText(targetFs.getCoveredText());
        Set<AnnotationUnit> sus = new LinkedHashSet<>();
        AnnotationUnit firstUnit = null;
        for (AnnotationUnit u : getSubUnits(sta, sus)) {
            firstUnit = u;
            break;
        }
        return firstUnit;
    }

    // for relation annotation drawn on multiple span annotation, we put the info only to the first
    // unit
    private AnnotationUnit getFirstUnit(AnnotationUnit aUnit) {
        SubTokenAnno sta = new SubTokenAnno();
        sta.setBegin(aUnit.begin);
        sta.setEnd(aUnit.end);
        sta.setText(aUnit.token);
        Set<AnnotationUnit> sus = new LinkedHashSet<>();
        AnnotationUnit firstUnit = null;
        for (AnnotationUnit u : getSubUnits(sta, sus)) {
            firstUnit = u;
            break;
        }
        return firstUnit;
    }

    /**
     * Annotations of same type those: <br>
     * 1) crosses multiple sentences AND <br>
     * 2) repeated on the same unit (even if different value) <br>
     * Will be referenced by a number so that re-importing or processing outside
     * WebAnno can be easily distinguish same sets of annotations. This is much
     * Meaningful for relation/slot and chain annotations. Reference numbers are incremental 
     * 
     * @param type
     *            The annotation type
     * @param fs
     *            the annotation
     * @param unit
     *            the annotation element (Token or sub-tokens)
     * @return the reference number to be attached on this annotation value
     */
    private int getRefId(Type type, AnnotationFS fs, AnnotationUnit unit) {

        // first time
        if (annotaionRefPerType.get(type) == null) {

            Map<FeatureStructure, Integer> annoRefs = new HashMap<>();
            annoRefs.put(fs, 1);
            annotaionRefPerType.put(type, annoRefs);

            multiAnnosPerUnit.putIfAbsent(type, new HashMap<>());
            Map<FeatureStructure, Integer> multiAnooRefs = new HashMap<>();
            multiAnooRefs.put(fs, 1);
            multiAnnosPerUnit.get(type).put(unit, multiAnooRefs);
            return 1;
        } else {

            // This is a multiple token annotation, re-USE reference id
            if (annotaionRefPerType.get(type).get(fs) != null) {
                return annotaionRefPerType.get(type).get(fs);
            }

            Map<FeatureStructure, Integer> annoRefs = annotaionRefPerType.get(type);
            int max = Collections.max(annoRefs.values()); // the last reference number so far.
            annoRefs.put(fs, max + 1);
            annotaionRefPerType.put(type, annoRefs);

            /*            Map<Integer, FeatureStructure> refsAnnos = refAnnotaionperType.get(type);
                        refsAnnos.put(max + 1, fs);
                        refAnnotaionperType.put(type, refsAnnos);*/

            int ref = annotaionRefPerType.get(type).get(fs);
            Map<FeatureStructure, Integer> multiAnooRefs = multiAnnosPerUnit.get(type).get(unit);
            if (multiAnooRefs == null) {
                multiAnooRefs = new HashMap<>();
                multiAnooRefs.put(fs, ref);
                multiAnnosPerUnit.get(type).put(unit, multiAnooRefs);
                return ref;
            }
            // this is for sure a stacked annotation
            else {
                multiAnooRefs.put(fs, ref);
                multiAnnosPerUnit.get(type).put(unit, multiAnooRefs);
                return ref;
            }
        }
    }

    private void setTokenSentenceAddress(JCas aJCas) {
        int sentNMumber = 1;
        for (Sentence sentence : select(aJCas, Sentence.class)) {
            int lineNumber = 1;
            for (Token token : selectCovered(Token.class, sentence)) {
                AnnotationUnit unit = new AnnotationUnit(token.getBegin(), token.getEnd(), false,
                        token.getCoveredText());
                units.add(unit);
                if (lineNumber == 1) {
                    sentenceUnits.put(unit, sentence.getCoveredText());
                }
                unitsLineNumber.put(unit, sentNMumber + "-" + lineNumber);
                lineNumber++;
            }
            sentNMumber++;
        }

    }

    class SubTokenAnno {
        int begin;
        int end;
        String text;

        public int getBegin() {
            return begin;
        }

        public int getEnd() {
            return end;
        }

        public void setEnd(int end) {
            this.end = end;
        }

        public void setBegin(int begin) {
            this.begin = begin;
        }

        public String getText() {
            return text;
        }

        public void setText(String text) {
            this.text = text;
        }

    }
}