uk.ac.kcl.service.GateService.java Source code

Java tutorial

Introduction

Here is the source code for uk.ac.kcl.service.GateService.java

Source

/* 
 * Copyright 2016 King's College London, Richard Jackson <richgjackson@gmail.com>.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package uk.ac.kcl.service;

import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import gate.*;
import gate.creole.ExecutionException;
import gate.util.GateException;
import gate.util.persistence.PersistenceManager;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.annotation.Profile;
import org.springframework.core.env.Environment;
import org.springframework.stereotype.Service;
import uk.ac.kcl.exception.DeIdentificationFailedException;

import javax.annotation.PostConstruct;
import java.io.File;
import java.io.IOException;
import java.util.*;
import java.util.concurrent.LinkedBlockingQueue;

/**
 *
 * @author rich
 */

@Service("gateService")
@Profile({ "gate" })
public class GateService {

    private static final org.slf4j.Logger LOG = LoggerFactory.getLogger(GateService.class);

    private LinkedBlockingQueue<CorpusController> genericQueue;
    private int poolSize;
    private Collection<String> annotationSets;
    private Collection<String> annotationTypes;

    private LinkedBlockingQueue<CorpusController> deIdQueue;

    @Autowired
    private Environment env;

    private GateService() {
    }

    @PostConstruct
    public void init() throws GateException, IOException {

        File gateHome = new File(env.getProperty("gateHome"));
        poolSize = Integer.parseInt(env.getProperty("poolSize"));
        //in case called by other contexts
        if (!Gate.isInitialised()) {
            Gate.setGateHome(gateHome);
            Gate.runInSandbox(true);
            Gate.init();
        }

        loadresources();
    }

    private void loadresources() throws GateException, IOException {
        List<String> activeProfiles = Arrays.asList(env.getActiveProfiles());
        //if called assume resources are either new/damaged/stale etc and delete all
        Gate.getCreoleRegister().getAllInstances("gate.Resource").forEach(Factory::deleteResource);

        if (activeProfiles.contains("gate")) {
            File gateApp = new File(env.getProperty("gateApp"));
            annotationSets = new ArrayList<>();
            try {
                annotationSets.addAll(Arrays.asList(env.getProperty("gateAnnotationSets").split(",")));
            } catch (NullPointerException ex) {
                LOG.info("No annotation sets listed for extraction. Using default set");
            }
            annotationTypes = new ArrayList<>();
            try {
                annotationTypes.addAll(Arrays.asList(env.getProperty("gateAnnotationTypes").split(",")));
            } catch (NullPointerException ex) {
                LOG.info("No annotation types listed for extraction. Extracting all types");
            }
            genericQueue = new LinkedBlockingQueue<>();
            Corpus corpus = Factory.newCorpus("Corpus");
            CorpusController pipeline = (CorpusController) PersistenceManager.loadObjectFromFile(gateApp);
            pipeline.setCorpus(corpus);
            genericQueue.add(pipeline);
            while (genericQueue.size() != poolSize) {
                genericQueue.add((CorpusController) Factory.duplicate(pipeline));
            }
        }

        if (activeProfiles.contains("deid")) {
            File deidApp = new File(env.getProperty("deIdApp"));
            deIdQueue = new LinkedBlockingQueue<>();
            Corpus corpus = Factory.newCorpus("Corpus");
            CorpusController pipeline = (CorpusController) PersistenceManager.loadObjectFromFile(deidApp);
            pipeline.setCorpus(corpus);
            deIdQueue.add(pipeline);
            while (deIdQueue.size() != poolSize) {
                deIdQueue.add((CorpusController) Factory.duplicate(pipeline));
            }
        }
    }

    public void processDoc(Document doc) throws ExecutionException {
        CorpusController controller = null;
        try {
            controller = genericQueue.take();
        } catch (InterruptedException ex) {
            LOG.warn("GATE app execution interrupted", ex);
        }
        assert controller != null;
        controller.getCorpus().add(doc);
        controller.execute();
        controller.getCorpus().clear();
        try {
            genericQueue.put(controller);
        } catch (InterruptedException ex) {
            LOG.info("Interrupted", ex);
        }
    }

    public String deIdentifyString(String text, String primaryKeyFieldValue)
            throws DeIdentificationFailedException {
        Document doc;
        try {
            doc = Factory.newDocument(text);

            doc.getFeatures().put("primaryKeyFieldValue", primaryKeyFieldValue);
            CorpusController controller;

            controller = deIdQueue.take();

            controller.getCorpus().add(doc);
            controller.execute();
            controller.getCorpus().clear();

            deIdQueue.put(controller);

            text = doc.getContent().toString();
            Factory.deleteResource(doc);
        } catch (Exception ex) {
            LOG.error("GATE app execution error", ex);
            try {
                loadresources();
            } catch (GateException | IOException e) {
                LOG.error("could not reload resources", ex);
            }
            throw new DeIdentificationFailedException("GATE app execution error");
        }
        return text;
    }

    public Object convertDocToJSON(gate.Document doc) throws IOException {
        Map<String, Collection<Annotation>> gateMap = new HashMap<>();

        if (annotationSets.size() == 0) {
            addTypes(doc, gateMap);
        } else if (annotationSets.size() > 0) {
            addTypes(doc, gateMap);
        }

        Object result = new ObjectMapper().readValue(gate.corpora.DocumentJsonUtils.toJson(doc, gateMap),
                Object.class);

        return result;
    }

    private void addTypes(Document doc, Map<String, Collection<Annotation>> map) {
        if (annotationTypes.size() == 0) {
            map.put("default", doc.getAnnotations());
        } else if (annotationTypes.size() > 0) {
            for (String type : annotationTypes) {
                map.put(type, doc.getAnnotations().get(type));
            }
        }
    }
}