eu.project.ttc.readers.TermSuiteJsonCasSerializer.java Source code

Java tutorial

Introduction

Here is the source code for eu.project.ttc.readers.TermSuiteJsonCasSerializer.java

Source

/*******************************************************************************
 * Copyright 2015-2016 - CNRS (Centre National de Recherche Scientifique)
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 *
 *******************************************************************************/

package eu.project.ttc.readers;

import static eu.project.ttc.readers.JsonCasConstants.F_BEGIN;
import static eu.project.ttc.readers.JsonCasConstants.F_CASE;
import static eu.project.ttc.readers.JsonCasConstants.F_CATEGORY;
import static eu.project.ttc.readers.JsonCasConstants.F_CORPUS_SIZE;
import static eu.project.ttc.readers.JsonCasConstants.F_CUMULATED_DOCUMENT_SIZE;
import static eu.project.ttc.readers.JsonCasConstants.F_DEGREE;
import static eu.project.ttc.readers.JsonCasConstants.F_DOCUMENT_INDEX;
import static eu.project.ttc.readers.JsonCasConstants.F_DOCUMENT_SIZE;
import static eu.project.ttc.readers.JsonCasConstants.F_END;
import static eu.project.ttc.readers.JsonCasConstants.F_FIXED_EXPRESSIONS;
import static eu.project.ttc.readers.JsonCasConstants.F_FORMATION;
import static eu.project.ttc.readers.JsonCasConstants.F_GENDER;
import static eu.project.ttc.readers.JsonCasConstants.F_LABELS;
import static eu.project.ttc.readers.JsonCasConstants.F_LAST_SEGMENT;
import static eu.project.ttc.readers.JsonCasConstants.F_LEMMA;
import static eu.project.ttc.readers.JsonCasConstants.F_MOOD;
import static eu.project.ttc.readers.JsonCasConstants.F_NB_DOCUMENTS;
import static eu.project.ttc.readers.JsonCasConstants.F_NUMBER;
import static eu.project.ttc.readers.JsonCasConstants.F_OFFSET_IN_SOURCE;
import static eu.project.ttc.readers.JsonCasConstants.F_PATTERN;
import static eu.project.ttc.readers.JsonCasConstants.F_PERSON;
import static eu.project.ttc.readers.JsonCasConstants.F_REGEX_LABEL;
import static eu.project.ttc.readers.JsonCasConstants.F_SDI;
import static eu.project.ttc.readers.JsonCasConstants.F_SPOTTING_RULE_NAME;
import static eu.project.ttc.readers.JsonCasConstants.F_STEM;
import static eu.project.ttc.readers.JsonCasConstants.F_SUB_CATEGORY;
import static eu.project.ttc.readers.JsonCasConstants.F_TAG;
import static eu.project.ttc.readers.JsonCasConstants.F_TENSE;
import static eu.project.ttc.readers.JsonCasConstants.F_TERM_KEY;
import static eu.project.ttc.readers.JsonCasConstants.F_TERM_OCC_ANNOTATIONS;
import static eu.project.ttc.readers.JsonCasConstants.F_TEXT;
import static eu.project.ttc.readers.JsonCasConstants.F_URI;
import static eu.project.ttc.readers.JsonCasConstants.F_WORDS;
import static eu.project.ttc.readers.JsonCasConstants.F_WORD_ANNOTATIONS;

import java.io.IOException;
import java.io.Writer;

import org.apache.uima.cas.FSIterator;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.cas.FSArray;
import org.apache.uima.jcas.cas.StringArray;
import org.apache.uima.jcas.tcas.Annotation;

import com.fasterxml.jackson.core.JsonFactory;
import com.fasterxml.jackson.core.JsonGenerator;
import com.google.common.base.Joiner;

import eu.project.ttc.types.FixedExpression;
import eu.project.ttc.types.SourceDocumentInformation;
import eu.project.ttc.types.TermOccAnnotation;
import eu.project.ttc.types.WordAnnotation;

/**
 * Created by smeoni on 27/05/16.
 */
public class TermSuiteJsonCasSerializer {

    public static void serialize(Writer writer, JCas jCas) throws IOException {

        JsonFactory jsonFactory = new JsonFactory();
        JsonGenerator jg = jsonFactory.createGenerator(writer);
        jg.useDefaultPrettyPrinter();
        jg.writeStartObject();
        jg.writeFieldName(F_SDI);
        writeSDI(jg, jCas);
        jg.writeFieldName(F_WORD_ANNOTATIONS);
        writeWordAnnotations(jg, jCas);
        jg.writeFieldName(F_TERM_OCC_ANNOTATIONS);
        writeTermOccAnnotations(jg, jCas);
        jg.writeFieldName(F_FIXED_EXPRESSIONS);
        writeFixedExpressions(jg, jCas);
        writeCoveredText(jg, jCas);
        jg.writeEndObject();
        jg.flush();
        writer.close();
    }

    private static void writeCoveredText(JsonGenerator jg, JCas jCas) throws IOException {
        String text = jCas.getDocumentText();
        writeStringField(jg, F_TEXT, text);
    }

    private static void writeSDI(JsonGenerator jg, JCas jCas) throws IOException {
        SourceDocumentInformation sdi = (SourceDocumentInformation) jCas
                .getAnnotationIndex(SourceDocumentInformation.type).iterator().next();
        jg.writeStartObject();
        writeStringField(jg, F_URI, sdi.getUri());
        writeIntField(jg, F_OFFSET_IN_SOURCE, sdi.getOffsetInSource());
        writeIntField(jg, F_DOCUMENT_INDEX, sdi.getDocumentIndex());
        writeIntField(jg, F_NB_DOCUMENTS, sdi.getNbDocuments());
        writeIntField(jg, F_DOCUMENT_SIZE, sdi.getDocumentSize());
        writeLongField(jg, F_CUMULATED_DOCUMENT_SIZE, sdi.getCumulatedDocumentSize());
        writeLongField(jg, F_CORPUS_SIZE, sdi.getCorpusSize());
        writeBooleanField(jg, F_LAST_SEGMENT, sdi.getLastSegment());
        writeOffsets(jg, sdi);
        jg.writeEndObject();
    }

    private static void writeWordAnnotations(JsonGenerator jg, JCas jCas) throws IOException {
        jg.writeStartArray();
        FSIterator<Annotation> it = jCas.getAnnotationIndex(WordAnnotation.type).iterator();
        while (it.hasNext()) {
            WordAnnotation wa = (WordAnnotation) it.next();
            jg.writeStartObject();
            writeStringField(jg, F_CATEGORY, wa.getCategory());
            writeStringField(jg, F_LEMMA, wa.getLemma());
            writeStringField(jg, F_STEM, wa.getStem());
            writeStringField(jg, F_TAG, wa.getTag());
            writeStringField(jg, F_SUB_CATEGORY, wa.getSubCategory());
            writeStringField(jg, F_REGEX_LABEL, wa.getRegexLabel());
            writeStringField(jg, F_NUMBER, wa.getNumber());
            writeStringField(jg, F_GENDER, wa.getGender());
            writeStringField(jg, F_CASE, wa.getCase());
            writeStringField(jg, F_MOOD, wa.getMood());
            writeStringField(jg, F_TENSE, wa.getTense());
            writeStringField(jg, F_PERSON, wa.getPerson());
            writeStringField(jg, F_DEGREE, wa.getDegree());
            writeStringField(jg, F_FORMATION, wa.getFormation());
            writeStringField(jg, F_LABELS, wa.getLabels());
            writeOffsets(jg, wa);
            jg.writeEndObject();
        }
        jg.writeEndArray();
    }

    private static void writeOffsets(JsonGenerator jg, Annotation a) throws IOException {
        writeIntField(jg, F_BEGIN, a.getBegin());
        writeIntField(jg, F_END, a.getEnd());
    }

    private static void writeTermOccAnnotations(JsonGenerator jg, JCas jCas) throws IOException {
        jg.writeStartArray();
        FSIterator<Annotation> it = jCas.getAnnotationIndex(TermOccAnnotation.type).iterator();
        while (it.hasNext()) {
            TermOccAnnotation toa = (TermOccAnnotation) it.next();
            jg.writeStartObject();
            writeStringFSArrayField(jg, F_PATTERN, toa.getPattern());
            writeStringField(jg, F_SPOTTING_RULE_NAME, toa.getSpottingRuleName());
            writeStringField(jg, F_TERM_KEY, toa.getTermKey());
            writeIntFSArrayField(jg, F_WORDS, toa.getWords());
            writeOffsets(jg, toa);
            jg.writeEndObject();
        }
        jg.writeEndArray();
    }

    private static void writeFixedExpressions(JsonGenerator jg, JCas jCas) throws IOException {
        jg.writeStartArray();
        FSIterator<Annotation> it = jCas.getAnnotationIndex(FixedExpression.type).iterator();
        while (it.hasNext()) {
            FixedExpression fe = (FixedExpression) it.next();
            jg.writeStartObject();
            writeOffsets(jg, fe);
            jg.writeEndObject();
        }
        jg.writeEndArray();
    }

    private static void writeIntFSArrayField(JsonGenerator jg, String fieldName, FSArray words) throws IOException {
        if (words == null)
            return;
        jg.writeArrayFieldStart(fieldName);

        for (int i = 0; i < words.size(); i++) {
            WordAnnotation wa = (WordAnnotation) words.get(i);
            jg.writeStartArray();
            jg.writeNumber(wa.getBegin());
            jg.writeNumber(wa.getEnd());
            jg.writeEndArray();
        }
        jg.writeEndArray();
    }

    private static void writeIntField(JsonGenerator jg, String fieldName, Integer value) throws IOException {
        writeLongField(jg, fieldName, value.longValue());
    }

    private static void writeLongField(JsonGenerator jg, String fieldName, Long value) throws IOException {
        if (value == null)
            return;
        jg.writeNumberField(fieldName, value);
    }

    private static void writeStringFSArrayField(JsonGenerator jg, String fieldName, StringArray value)
            throws IOException {
        if (value == null)
            return;
        jg.writeStringField(fieldName, Joiner.on(" ").join(value.toArray()));
    }

    private static void writeBooleanField(JsonGenerator jg, String fieldName, Boolean value) throws IOException {
        if (value == null)
            return;
        jg.writeBooleanField(fieldName, value);
    }

    private static void writeStringField(JsonGenerator jg, String fieldName, String value) throws IOException {
        if (value == null)
            return;
        jg.writeStringField(fieldName, value);
    }

}