org.apache.ctakes.temporal.data.analysis.CompareFeatureStructures.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.ctakes.temporal.data.analysis.CompareFeatureStructures.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.ctakes.temporal.data.analysis;

import java.io.File;
import java.io.FileInputStream;
import java.util.Iterator;
import java.util.List;
import java.util.Set;

import javax.annotation.Nullable;

import org.apache.uima.cas.CASException;
import org.apache.uima.cas.Feature;
import org.apache.uima.cas.FeatureStructure;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.impl.XmiCasDeserializer;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.cas.FSArray;
import org.apache.uima.jcas.cas.NonEmptyFSList;
import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.fit.factory.JCasFactory;
import org.apache.uima.fit.util.JCasUtil;

import com.google.common.base.Function;
import com.google.common.base.Objects;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.Iterators;
import com.google.common.collect.ListMultimap;
import com.google.common.collect.Lists;
import com.google.common.collect.Ordering;
import com.google.common.collect.Sets;
import com.lexicalscope.jewel.cli.CliFactory;
import com.lexicalscope.jewel.cli.Option;

import difflib.Chunk;
import difflib.Delta;
import difflib.Patch;
import difflib.myers.Equalizer;
import difflib.myers.MyersDiff;

public class CompareFeatureStructures {
    static interface Options {
        @Option(longName = "dir1")
        public File getDirectory1();

        @Option(longName = "dir2")
        public File getDirectory2();

        @Option(longName = "roots", defaultValue = {
                "org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation",
                "org.apache.ctakes.typesystem.type.relation.Relation" })
        public List<String> getAnnotationClassNames();
    }

    public static void main(String[] args) throws Exception {
        Options options = CliFactory.parseArguments(Options.class, args);
        List<Class<?>> annotationClasses = Lists.newArrayList();
        for (String annotationClassName : options.getAnnotationClassNames()) {
            annotationClasses.add(Class.forName(annotationClassName));
        }

        MyersDiff<String> stringDiff = new MyersDiff<String>();
        MyersDiff<FeatureStructure> fsDiff = new MyersDiff<FeatureStructure>(new FeatureStructureEqualizer());

        File originalDir = options.getDirectory1();
        File revisedDir = options.getDirectory2();
        Patch<String> dirPatch = stringDiff.diff(originalDir.list(), revisedDir.list());
        if (!dirPatch.getDeltas().isEmpty()) {
            log("--- %s files\n", originalDir);
            log("+++ %s files\n", revisedDir);
            log(dirPatch);
        } else {
            for (String fileName : originalDir.list()) {
                File originalFile = new File(originalDir, fileName);
                File revisedFile = new File(revisedDir, fileName);
                JCas originalJCas = readXMI(originalFile);
                JCas revisedJCas = readXMI(revisedFile);
                List<String> originalViews = getViewNames(originalJCas);
                List<String> revisedViews = getViewNames(revisedJCas);
                Patch<String> viewsPatch = stringDiff.diff(originalViews, revisedViews);
                if (!viewsPatch.getDeltas().isEmpty()) {
                    log("--- %s views\n", originalFile);
                    log("+++ %s views\n", revisedFile);
                    log(viewsPatch);
                } else {
                    for (String viewName : originalViews) {
                        JCas originalView = originalJCas.getView(viewName);
                        JCas revisedView = revisedJCas.getView(viewName);
                        List<FeatureStructure> originalFSes = toFeatureStructures(originalView, annotationClasses);
                        List<FeatureStructure> revisedFSes = toFeatureStructures(revisedView, annotationClasses);
                        Patch<FeatureStructure> fsPatch = fsDiff.diff(originalFSes, revisedFSes);
                        if (!fsPatch.getDeltas().isEmpty()) {
                            log("--- %s view %s\n", originalFile, viewName);
                            log("+++ %s view %s\n", revisedFile, viewName);
                            for (Delta<FeatureStructure> fsDelta : fsPatch.getDeltas()) {
                                logHeader(fsDelta);
                                switch (fsDelta.getType()) {
                                case DELETE:
                                case INSERT:
                                    log(fsDelta);
                                    break;
                                case CHANGE:
                                    List<String> originalLines = toLines(fsDelta.getOriginal().getLines());
                                    List<String> revisedLines = toLines(fsDelta.getRevised().getLines());
                                    Patch<String> linesPatch = stringDiff.diff(originalLines, revisedLines);
                                    ListMultimap<Integer, String> deletes = ArrayListMultimap.create();
                                    ListMultimap<Integer, String> inserts = ArrayListMultimap.create();
                                    Set<Integer> skips = Sets.newHashSet();
                                    for (Delta<String> linesDelta : linesPatch.getDeltas()) {
                                        Chunk<String> originalChunk = linesDelta.getOriginal();
                                        Chunk<String> revisedChunk = linesDelta.getRevised();
                                        int start = originalChunk.getPosition();
                                        deletes.putAll(start, originalChunk.getLines());
                                        inserts.putAll(start, revisedChunk.getLines());
                                        for (int i = start; i < start + originalChunk.size(); ++i) {
                                            skips.add(i);
                                        }
                                    }
                                    for (int i = 0; i < originalLines.size(); ++i) {
                                        if (!skips.contains(i)) {
                                            log(" %s\n", originalLines.get(i));
                                        }
                                        for (String line : deletes.get(i)) {
                                            log("-%s\n", line);
                                        }
                                        for (String line : inserts.get(i)) {
                                            log("+%s\n", line);
                                        }
                                    }
                                    break;
                                }
                            }
                        }
                    }
                }
            }
        }

    }

    private static <T> void log(String message, Object... args) {
        System.err.printf(message, args);
    }

    private static <T> void log(Patch<T> patch) {
        for (Delta<T> delta : patch.getDeltas()) {
            logHeader(delta);
            log(delta);
        }
    }

    private static <T> void logHeader(Delta<T> delta) {
        Chunk<T> original = delta.getOriginal();
        Chunk<T> revised = delta.getRevised();
        log("@@ -%d,%d +%d,%d @@\n", original.getPosition(), original.size(), revised.getPosition(),
                revised.size());
    }

    private static <T> void log(Delta<T> delta) {
        Chunk<T> original = delta.getOriginal();
        Chunk<T> revised = delta.getRevised();
        for (T line : original.getLines()) {
            log("-%s\n", line.toString().replaceAll("\n", "\n-"));
        }
        for (T line : revised.getLines()) {
            log("+%s\n", line.toString().replaceAll("\n", "\n+"));
        }
    }

    private static JCas readXMI(File xmiFile) throws Exception {
        JCas jCas = JCasFactory.createJCas();
        FileInputStream inputStream = new FileInputStream(xmiFile);
        try {
            XmiCasDeserializer.deserialize(inputStream, jCas.getCas());
        } finally {
            inputStream.close();
        }
        return jCas;
    }

    private static List<String> getViewNames(JCas jCas) throws CASException {
        List<String> viewNames = Lists.newArrayList();
        Iterator<JCas> viewIter = jCas.getViewIterator();
        while (viewIter.hasNext()) {
            viewNames.add(viewIter.next().getViewName());
        }
        return viewNames;
    }

    private static List<FeatureStructure> toFeatureStructures(JCas jCas, List<Class<?>> annotationClasses) {
        List<FeatureStructure> fsList = Lists.newArrayList();
        for (Class<?> annotationClass : annotationClasses) {
            Type type = JCasUtil.getType(jCas, annotationClass);
            Iterators.addAll(fsList, jCas.getFSIndexRepository().getAllIndexedFS(type));
        }
        return BY_TYPE_AND_OFFSETS.sortedCopy(fsList);
    }

    private static final Ordering<FeatureStructure> BY_TYPE_AND_OFFSETS = Ordering.natural()
            .<Comparable<?>>lexicographical().onResultOf(new Function<FeatureStructure, Iterable<Comparable<?>>>() {
                @Override
                public Iterable<Comparable<?>> apply(@Nullable FeatureStructure input) {
                    List<Integer> offsets = Lists.newArrayList();
                    this.findOffsets(input, offsets);
                    List<Comparable<?>> result = Lists.<Comparable<?>>newArrayList(input.getType().getName());
                    result.addAll(Ordering.natural().sortedCopy(offsets));
                    return result;
                }

                private void findOffsets(FeatureStructure input, List<Integer> offsets) {
                    if (input != null) {
                        if (input instanceof Annotation) {
                            Annotation annotation = (Annotation) input;
                            offsets.add(annotation.getBegin());
                            offsets.add(annotation.getEnd());
                        } else if (input instanceof FSArray) {
                            FSArray fsArray = (FSArray) input;
                            for (int i = 0; i < fsArray.size(); ++i) {
                                this.findOffsets(fsArray.get(i), offsets);
                            }
                        } else if (input instanceof NonEmptyFSList) {
                            NonEmptyFSList fsList = (NonEmptyFSList) input;
                            this.findOffsets(fsList.getHead(), offsets);
                            this.findOffsets(fsList.getTail(), offsets);
                        } else {
                            for (Feature feature : input.getType().getFeatures()) {
                                if (!feature.getRange().isPrimitive()) {
                                    this.findOffsets(input.getFeatureValue(feature), offsets);
                                }
                            }
                        }
                    }
                }
            });

    public static List<String> toLines(List<FeatureStructure> fsList) {
        List<String> lines = Lists.newArrayList();
        for (FeatureStructure fs : fsList) {
            for (String line : fs.toString().split("\n")) {
                lines.add(line);
            }
        }
        return lines;
    }

    static class FeatureStructureEqualizer implements Equalizer<FeatureStructure> {

        @Override
        public boolean equals(FeatureStructure original, FeatureStructure revised) {
            return this.equals(original, revised, Lists.<FeatureStructure>newArrayList());
        }

        private boolean equals(FeatureStructure original, FeatureStructure revised, List<FeatureStructure> seen) {
            if (!seen.contains(original) && !seen.contains(revised)) {
                seen.add(original);
                seen.add(revised);
                for (Feature feature : original.getType().getFeatures()) {
                    if (feature.getName().equals("uima.cas.AnnotationBase:sofa")) {
                        continue;
                    }
                    if (feature.getRange().isPrimitive()) {
                        String originalValue = original.getFeatureValueAsString(feature);
                        String revisedValue = revised.getFeatureValueAsString(feature);
                        if (!Objects.equal(originalValue, revisedValue)) {
                            return false;
                        }
                    } else {
                        FeatureStructure originalValue = original.getFeatureValue(feature);
                        FeatureStructure revisedValue = revised.getFeatureValue(feature);
                        if (originalValue == null || revisedValue == null
                                || !originalValue.getType().getName().equals(revisedValue.getType().getName())) {
                            if (!Objects.equal(originalValue, revisedValue)) {
                                return false;
                            }
                        } else {
                            if (!this.equals(originalValue, revisedValue, seen)) {
                                return false;
                            }
                        }
                    }
                }
            }
            return true;
        }
    }
}