Java tutorial
/******************************************************************************* * Copyright 2015-2016 - CNRS (Centre National de Recherche Scientifique) * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. * *******************************************************************************/ package eu.project.ttc.readers; import static eu.project.ttc.readers.JsonCasConstants.F_BEGIN; import static eu.project.ttc.readers.JsonCasConstants.F_CASE; import static eu.project.ttc.readers.JsonCasConstants.F_CATEGORY; import static eu.project.ttc.readers.JsonCasConstants.F_CORPUS_SIZE; import static eu.project.ttc.readers.JsonCasConstants.F_CUMULATED_DOCUMENT_SIZE; import static eu.project.ttc.readers.JsonCasConstants.F_DEGREE; import static eu.project.ttc.readers.JsonCasConstants.F_DOCUMENT_INDEX; import static eu.project.ttc.readers.JsonCasConstants.F_DOCUMENT_SIZE; import static eu.project.ttc.readers.JsonCasConstants.F_END; import static eu.project.ttc.readers.JsonCasConstants.F_FIXED_EXPRESSIONS; import static eu.project.ttc.readers.JsonCasConstants.F_FORMATION; import static eu.project.ttc.readers.JsonCasConstants.F_GENDER; import static eu.project.ttc.readers.JsonCasConstants.F_LABELS; import static eu.project.ttc.readers.JsonCasConstants.F_LAST_SEGMENT; import static eu.project.ttc.readers.JsonCasConstants.F_LEMMA; import static eu.project.ttc.readers.JsonCasConstants.F_MOOD; import static eu.project.ttc.readers.JsonCasConstants.F_NB_DOCUMENTS; import static eu.project.ttc.readers.JsonCasConstants.F_NUMBER; import static eu.project.ttc.readers.JsonCasConstants.F_OFFSET_IN_SOURCE; import static eu.project.ttc.readers.JsonCasConstants.F_PATTERN; import static eu.project.ttc.readers.JsonCasConstants.F_PERSON; import static eu.project.ttc.readers.JsonCasConstants.F_REGEX_LABEL; import static eu.project.ttc.readers.JsonCasConstants.F_SDI; import static eu.project.ttc.readers.JsonCasConstants.F_SPOTTING_RULE_NAME; import static eu.project.ttc.readers.JsonCasConstants.F_STEM; import static eu.project.ttc.readers.JsonCasConstants.F_SUB_CATEGORY; import static eu.project.ttc.readers.JsonCasConstants.F_TAG; import static eu.project.ttc.readers.JsonCasConstants.F_TENSE; import static eu.project.ttc.readers.JsonCasConstants.F_TERM_KEY; import static eu.project.ttc.readers.JsonCasConstants.F_TERM_OCC_ANNOTATIONS; import static eu.project.ttc.readers.JsonCasConstants.F_TEXT; import static eu.project.ttc.readers.JsonCasConstants.F_URI; import static eu.project.ttc.readers.JsonCasConstants.F_WORDS; import static eu.project.ttc.readers.JsonCasConstants.F_WORD_ANNOTATIONS; import java.io.IOException; import java.io.Writer; import org.apache.uima.cas.FSIterator; import org.apache.uima.jcas.JCas; import org.apache.uima.jcas.cas.FSArray; import org.apache.uima.jcas.cas.StringArray; import org.apache.uima.jcas.tcas.Annotation; import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.core.JsonGenerator; import com.google.common.base.Joiner; import eu.project.ttc.types.FixedExpression; import eu.project.ttc.types.SourceDocumentInformation; import eu.project.ttc.types.TermOccAnnotation; import eu.project.ttc.types.WordAnnotation; /** * Created by smeoni on 27/05/16. */ public class TermSuiteJsonCasSerializer { public static void serialize(Writer writer, JCas jCas) throws IOException { JsonFactory jsonFactory = new JsonFactory(); JsonGenerator jg = jsonFactory.createGenerator(writer); jg.useDefaultPrettyPrinter(); jg.writeStartObject(); jg.writeFieldName(F_SDI); writeSDI(jg, jCas); jg.writeFieldName(F_WORD_ANNOTATIONS); writeWordAnnotations(jg, jCas); jg.writeFieldName(F_TERM_OCC_ANNOTATIONS); writeTermOccAnnotations(jg, jCas); jg.writeFieldName(F_FIXED_EXPRESSIONS); writeFixedExpressions(jg, jCas); writeCoveredText(jg, jCas); jg.writeEndObject(); jg.flush(); writer.close(); } private static void writeCoveredText(JsonGenerator jg, JCas jCas) throws IOException { String text = jCas.getDocumentText(); writeStringField(jg, F_TEXT, text); } private static void writeSDI(JsonGenerator jg, JCas jCas) throws IOException { SourceDocumentInformation sdi = (SourceDocumentInformation) jCas .getAnnotationIndex(SourceDocumentInformation.type).iterator().next(); jg.writeStartObject(); writeStringField(jg, F_URI, sdi.getUri()); writeIntField(jg, F_OFFSET_IN_SOURCE, sdi.getOffsetInSource()); writeIntField(jg, F_DOCUMENT_INDEX, sdi.getDocumentIndex()); writeIntField(jg, F_NB_DOCUMENTS, sdi.getNbDocuments()); writeIntField(jg, F_DOCUMENT_SIZE, sdi.getDocumentSize()); writeLongField(jg, F_CUMULATED_DOCUMENT_SIZE, sdi.getCumulatedDocumentSize()); writeLongField(jg, F_CORPUS_SIZE, sdi.getCorpusSize()); writeBooleanField(jg, F_LAST_SEGMENT, sdi.getLastSegment()); writeOffsets(jg, sdi); jg.writeEndObject(); } private static void writeWordAnnotations(JsonGenerator jg, JCas jCas) throws IOException { jg.writeStartArray(); FSIterator<Annotation> it = jCas.getAnnotationIndex(WordAnnotation.type).iterator(); while (it.hasNext()) { WordAnnotation wa = (WordAnnotation) it.next(); jg.writeStartObject(); writeStringField(jg, F_CATEGORY, wa.getCategory()); writeStringField(jg, F_LEMMA, wa.getLemma()); writeStringField(jg, F_STEM, wa.getStem()); writeStringField(jg, F_TAG, wa.getTag()); writeStringField(jg, F_SUB_CATEGORY, wa.getSubCategory()); writeStringField(jg, F_REGEX_LABEL, wa.getRegexLabel()); writeStringField(jg, F_NUMBER, wa.getNumber()); writeStringField(jg, F_GENDER, wa.getGender()); writeStringField(jg, F_CASE, wa.getCase()); writeStringField(jg, F_MOOD, wa.getMood()); writeStringField(jg, F_TENSE, wa.getTense()); writeStringField(jg, F_PERSON, wa.getPerson()); writeStringField(jg, F_DEGREE, wa.getDegree()); writeStringField(jg, F_FORMATION, wa.getFormation()); writeStringField(jg, F_LABELS, wa.getLabels()); writeOffsets(jg, wa); jg.writeEndObject(); } jg.writeEndArray(); } private static void writeOffsets(JsonGenerator jg, Annotation a) throws IOException { writeIntField(jg, F_BEGIN, a.getBegin()); writeIntField(jg, F_END, a.getEnd()); } private static void writeTermOccAnnotations(JsonGenerator jg, JCas jCas) throws IOException { jg.writeStartArray(); FSIterator<Annotation> it = jCas.getAnnotationIndex(TermOccAnnotation.type).iterator(); while (it.hasNext()) { TermOccAnnotation toa = (TermOccAnnotation) it.next(); jg.writeStartObject(); writeStringFSArrayField(jg, F_PATTERN, toa.getPattern()); writeStringField(jg, F_SPOTTING_RULE_NAME, toa.getSpottingRuleName()); writeStringField(jg, F_TERM_KEY, toa.getTermKey()); writeIntFSArrayField(jg, F_WORDS, toa.getWords()); writeOffsets(jg, toa); jg.writeEndObject(); } jg.writeEndArray(); } private static void writeFixedExpressions(JsonGenerator jg, JCas jCas) throws IOException { jg.writeStartArray(); FSIterator<Annotation> it = jCas.getAnnotationIndex(FixedExpression.type).iterator(); while (it.hasNext()) { FixedExpression fe = (FixedExpression) it.next(); jg.writeStartObject(); writeOffsets(jg, fe); jg.writeEndObject(); } jg.writeEndArray(); } private static void writeIntFSArrayField(JsonGenerator jg, String fieldName, FSArray words) throws IOException { if (words == null) return; jg.writeArrayFieldStart(fieldName); for (int i = 0; i < words.size(); i++) { WordAnnotation wa = (WordAnnotation) words.get(i); jg.writeStartArray(); jg.writeNumber(wa.getBegin()); jg.writeNumber(wa.getEnd()); jg.writeEndArray(); } jg.writeEndArray(); } private static void writeIntField(JsonGenerator jg, String fieldName, Integer value) throws IOException { writeLongField(jg, fieldName, value.longValue()); } private static void writeLongField(JsonGenerator jg, String fieldName, Long value) throws IOException { if (value == null) return; jg.writeNumberField(fieldName, value); } private static void writeStringFSArrayField(JsonGenerator jg, String fieldName, StringArray value) throws IOException { if (value == null) return; jg.writeStringField(fieldName, Joiner.on(" ").join(value.toArray())); } private static void writeBooleanField(JsonGenerator jg, String fieldName, Boolean value) throws IOException { if (value == null) return; jg.writeBooleanField(fieldName, value); } private static void writeStringField(JsonGenerator jg, String fieldName, String value) throws IOException { if (value == null) return; jg.writeStringField(fieldName, value); } }