edu.cmu.lti.oaqa.framework.eval.gs.PassageGoldStandardFilePersistenceProvider.java Source code

Java tutorial

Introduction

Here is the source code for edu.cmu.lti.oaqa.framework.eval.gs.PassageGoldStandardFilePersistenceProvider.java

Source

/*
 *  Copyright 2013 Carnegie Mellon University
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */

package edu.cmu.lti.oaqa.framework.eval.gs;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Scanner;
import java.util.regex.MatchResult;
import java.util.regex.Pattern;

import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.resource.ResourceSpecifier;
import org.oaqa.model.Passage;
import org.springframework.core.io.Resource;
import org.springframework.core.io.support.PathMatchingResourcePatternResolver;

/**
 * A gold standard persistence provider that can read a file containing gold standard annotations
 * into the memory, and stored in a map structure, and populate gold standard labels for each input
 * element.
 * 
 * Required parameter: DataSet, LineSyntax (specifying what the line syntax of the gold standard
 * annotation, e.g. "(\d+)\s+(\d+)\s+(\d+)" represent the sequence id, begin and end are separated
 * by white-spaces), and PathPattern (refer to the PathMatchingResourcePatternResolver in the spring
 * framework for more detail)
 * 
 * @author Zi Yang <ziy@cs.cmu.edu>
 * 
 */
public class PassageGoldStandardFilePersistenceProvider extends AbstractGoldStandardPersistenceProvider {

    private static final PathMatchingResourcePatternResolver resolver = new PathMatchingResourcePatternResolver();

    private Map<DatasetSequenceId, List<GoldStandardSpan>> id2gsSpans = new HashMap<DatasetSequenceId, List<GoldStandardSpan>>();

    @Override
    public boolean initialize(ResourceSpecifier aSpecifier, Map<String, Object> aAdditionalParams)
            throws ResourceInitializationException {
        boolean ret = super.initialize(aSpecifier, aAdditionalParams);
        String dataset = (String) getParameterValue("DataSet");
        Pattern lineSyntaxPattern = Pattern.compile((String) getParameterValue("LineSyntax"));
        try {
            Resource[] resources = resolver.getResources((String) getParameterValue("PathPattern"));
            for (Resource resource : resources) {
                Scanner scanner = new Scanner(resource.getInputStream());
                while (scanner.findInLine(lineSyntaxPattern) != null) {
                    MatchResult result = scanner.match();
                    DatasetSequenceId id = new DatasetSequenceId(dataset, result.group(1));
                    List<GoldStandardSpan> list = id2gsSpans.get(id);
                    if (list == null) {
                        list = new ArrayList<GoldStandardSpan>();
                        id2gsSpans.put(id, list);
                    }
                    GoldStandardSpan annotation = new GoldStandardSpan(result.group(2),
                            Integer.parseInt(result.group(3)), Integer.parseInt(result.group(4)), result.group(5));
                    list.add(annotation);
                    if (scanner.hasNextLine()) {
                        scanner.nextLine();
                    } else {
                        break;
                    }
                }
                scanner.close();
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
        return ret;
    }

    @Override
    public List<Passage> populateRetrievalGS(String dataset, String sequenceId, JCas gsView) {
        List<Passage> gsAnnotations = new ArrayList<Passage>();
        List<GoldStandardSpan> gsSpans = id2gsSpans.get(new DatasetSequenceId(dataset, sequenceId));
        if (gsSpans != null) {
            for (GoldStandardSpan gsSpan : gsSpans) {
                Passage passage = new Passage(gsView);
                passage.setUri(gsSpan.docId);
                passage.setBegin(gsSpan.begin);
                passage.setEnd(gsSpan.end);
                passage.setAspects(gsSpan.aspects);
                gsAnnotations.add(passage);
            }
        }
        return gsAnnotations;
    }

    /**
     * A dataset, sequenceId pair used as the key of the GSProvider to populate gold-standards wrt
     * particular input.
     * 
     * @author Zi Yang <ziy@cs.cmu.edu>
     * 
     */
    public class DatasetSequenceId {
        final String dataset;

        final String sequenceId;

        public DatasetSequenceId(String dataset, String sequenceId) {
            this.dataset = dataset;
            this.sequenceId = sequenceId;
        }

        @Override
        public int hashCode() {
            final int prime = 31;
            int result = 1;
            //result = prime * result + getOuterType().hashCode();
            result = prime * result + ((dataset == null) ? 0 : dataset.hashCode());
            result = prime * result + sequenceId.hashCode();
            return result;
        }

        @Override
        public boolean equals(Object obj) {
            if (this == obj) {
                return true;
            }
            if (!(obj instanceof DatasetSequenceId)) {
                return false;
            }
            DatasetSequenceId other = (DatasetSequenceId) obj;
            //if (!getOuterType().equals(other.getOuterType()))
            //  return false;
            if (dataset == null) {
                if (other.dataset != null) {
                    return false;
                }
            } else if (!dataset.equals(other.dataset)) {
                return false;
            }
            if (!sequenceId.equals(other.sequenceId)) {
                return false;
            }
            return true;
        }

        private PassageGoldStandardFilePersistenceProvider getOuterType() {
            return PassageGoldStandardFilePersistenceProvider.this;
        }

        public String toString() {
            return String.format("[%s:%s]", dataset, sequenceId);
        }

    }

    /**
     * Equivalent to Annotation, without the requirement to specify a CAS for the annotation, since
     * during initialize(), the CAS is still not clear to the pipeline.
     * 
     * @author Zi Yang <ziy@cs.cmu.edu>
     * 
     */
    public class GoldStandardSpan {
        String docId;

        int begin, end;

        String aspects;

        public GoldStandardSpan(String docId, int begin, int end, String aspects) {
            super();
            this.docId = docId;
            this.begin = begin;
            this.end = end;
            this.aspects = aspects;
        }

        @Override
        public int hashCode() {
            final int prime = 31;
            int result = 1;
            result = prime * result + getOuterType().hashCode();
            result = prime * result + begin;
            result = prime * result + ((docId == null) ? 0 : docId.hashCode());
            result = prime * result + end;
            return result;
        }

        @Override
        public boolean equals(Object obj) {
            if (this == obj)
                return true;
            if (obj == null)
                return false;
            if (getClass() != obj.getClass())
                return false;
            GoldStandardSpan other = (GoldStandardSpan) obj;
            if (!getOuterType().equals(other.getOuterType()))
                return false;
            if (begin != other.begin)
                return false;
            if (docId == null) {
                if (other.docId != null)
                    return false;
            } else if (!docId.equals(other.docId))
                return false;
            if (end != other.end)
                return false;
            return true;
        }

        private PassageGoldStandardFilePersistenceProvider getOuterType() {
            return PassageGoldStandardFilePersistenceProvider.this;
        }

    }

}