pl.edu.pwr.litmap.textobjects.Textobject.java Source code

Java tutorial

Introduction

Here is the source code for pl.edu.pwr.litmap.textobjects.Textobject.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */

package pl.edu.pwr.litmap.textobjects;

import java.io.IOException;
import java.util.ArrayList;

import pl.edu.pwr.litmap.ccl.Annotation;
import pl.edu.pwr.litmap.ccl.Sentence;
import pl.edu.pwr.litmap.ccl.Tag;
import pl.edu.pwr.litmap.ccl.Token;
import pl.edu.pwr.litmap.exceptions.OverQueryLimitException;
import pl.edu.pwr.litmap.exceptions.UnrecognizedWebserviceResponseException;
import pl.edu.pwr.litmap.heuristics.LocationNameHeuristicResult;
import pl.edu.pwr.litmap.heuristics.LocationNameRecognize;
import pl.edu.pwr.litmap.objectrecognize.LocationRecognize;
import pl.edu.pwr.litmap.objectrecognize.LocationRecognizeResult;
import pl.edu.pwr.litmap.relations.SerelRelation;

import com.fasterxml.jackson.annotation.JsonIgnore;
import com.google.code.geocoder.model.LatLng;

/**
 *
 * @author Wojciech Gawe
 */
public class Textobject {

    /**
     * Text containing this textobject
     */
    protected final Text text;
    protected final Annotation chunk;

    private LocationRecognizeResult locationRecognizeResult = LocationRecognizeResult.EMPTY;
    private LocationGeocodeStatus locationGeocodeStatus = LocationGeocodeStatus.UNCHECKED;

    public Textobject(Text text, Annotation chunk) {
        this.text = text;
        this.chunk = chunk;
    }

    /**
     * @return Text (fragment of sentence)
     */
    public String getRawText() {
        return chunk.getRawText();
    }

    public String getRawSentenceText() {
        return chunk.getSentence().getRawText();
    }

    @JsonIgnore
    public Sentence getSentence() {
        return chunk.getSentence();
    }

    /*
     * @return Text of sentence with this texobject 
     * and textobject from relations surrounded by tag <span>
     * with appropriate classes
     */
    public String getSentenceTextWithSpan() {
        String openTagThisText = "<span class=\"litmap-textobject\">";
        String closeTagThisText = "</span>";
        String openTagRelFromText = "<span class=\"litmap-textobject-rel-from\">";
        String closeTagRelFromText = "</span>";
        String openTagRelToText = "<span class=\"litmap-textobject-rel-to\">";
        String closeTagRelToText = "</span>";
        StringBuilder sb = new StringBuilder();
        boolean tagThisOpen = false;
        boolean tagRelFromOpen = false;
        boolean tagRelToOpen = false;
        int token_index = 0;
        for (Token token : chunk.getSentence().getTokens()) {
            Annotation chunkFrom = null;
            Annotation chunkTo = null;

            if (chunk.getTokens().contains(token_index)) {
                if (!tagThisOpen) {
                    sb.append(openTagThisText);
                    tagThisOpen = true;
                }
            }
            for (SerelRelation sr : chunk.getRelationsFrom()) {
                if (sr.getFrom().getTokens().contains(token_index)) {
                    chunkFrom = sr.getFrom();
                    if (!tagRelFromOpen) {
                        sb.append(openTagRelFromText);
                        tagRelFromOpen = true;
                    }
                    break;
                }
            }
            for (SerelRelation sr : chunk.getRelationsTo()) {
                if (sr.getTo().getTokens().contains(token_index)) {
                    chunkTo = sr.getTo();
                    if (!tagRelToOpen) {
                        sb.append(openTagRelToText);
                        tagRelToOpen = true;
                    }
                    break;
                }
            }

            sb.append(token.toStringSimple());

            if (tagThisOpen && this.chunk.getEnd() == token_index) {
                sb.append(closeTagThisText);
                tagThisOpen = false;
            }
            if (tagRelFromOpen && chunkFrom.getEnd() == token_index) {
                sb.append(closeTagRelFromText);
                tagRelFromOpen = false;
            }
            if (tagRelToOpen && chunkTo.getEnd() == token_index) {
                sb.append(closeTagRelToText);
                tagRelToOpen = false;
            }

            if (!token.getNoSpaceAfter()) {
                sb.append(' ');
            }
            token_index++;
        }
        return sb.toString();
    }

    /**
     * W obiektach, ktrych nazwa skada si z wielu wyrazw czasami sam .getBase() daje bdne wyniki, np 
     * "Dawid Podsiado" -> "Dawid podsi"
     * "Syryjskiego Obserwatorium Praw Czowieka" -> "syryjski obserwatorium prawo czowiek"
     * ale
     * "Sebastian Nowak: -> "Sebastian Nowak"
     * 
     * Bdy rwnie w jednowyrazowych:
     * "[w] Opolu" -> "opole" (brak pierwszej duej litery)
     * Dlatego jeeli wykryje zmian wielkoci pierwszej litery (lub inn cech wskazujc - np. nazwa ulicy, ktej forma podstawowa zazwyczaj jest bdna)
     * na moliwo wystpienia bedu pozostawia wersje z tekstu (zamiast nieprawidowej formy podstawowej)
     * @return the baseName
     */
    public String getBaseName() {
        String baseName;
        StringBuilder sb = new StringBuilder();
        boolean possibleErrorInRecognize = false;

        if (this.getNameClass().equals(NameClass.ROAD_NAM)
                || this.getNameClass().equals(NameClass.ADDRESS_STREET_NAM)) {
            possibleErrorInRecognize = true;
        } else if (chunk.getTokens().size() > 1) {
            possibleErrorInRecognize = true;
        } else {

            for (Integer token_index : chunk.getTokens()) {
                Token token = chunk.getSentence().getTokens().get(token_index);
                Tag tag = token.getTags().get(0);

                if (Character.isUpperCase(token.getFirstValue().charAt(0))
                        && !Character.isUpperCase(tag.getBase().charAt(0))) {
                    possibleErrorInRecognize = true;
                    break;
                }

                sb.append(tag.getBase());
                if (!token.getNoSpaceAfter()) {
                    sb.append(' ');
                }
            }
        }

        baseName = possibleErrorInRecognize ? getRawText() : sb.toString().trim();

        return baseName;
    }

    public String getForceBaseName() {
        StringBuilder sb = new StringBuilder();
        for (Integer token_index : chunk.getTokens()) {
            Token token = chunk.getSentence().getTokens().get(token_index);
            Tag tag = token.getTags().get(0);
            sb.append(tag.getBase());
            if (!token.getNoSpaceAfter()) {
                sb.append(' ');
            }
        }
        return sb.toString().trim();
    }

    /**
     * @return the nameClass
     */
    public NameClass getNameClass() {
        NameClass result = null;
        try {
            result = NameClass.valueOf(chunk.getType().toUpperCase());
        } catch (IllegalArgumentException e) {
            System.out.println("Class " + NameClass.class + " does not contain \"" + chunk.getType().toUpperCase()
                    + "\". Textobject raw-text = \"" + this.getRawText() + "\".");
            throw e;
        }
        return result;
    }

    /**
     * 
     * @return location lat and lng text | empty String when cannot get location
     * @throws IOException 
     * @throws OverQueryLimitException 
     */
    public String getLocationText() {
        String locationText = "";
        if (getLocation() != null) {
            locationText = getLocation().toUrlValue(6);
        }
        return locationText;
    }

    public double getLocationLat() throws OverQueryLimitException, IOException {
        double locationLat = 0.0;
        if (getLocation() != null) {
            String[] arr = getLocationText().split(",");
            if (arr.length == 2)
                locationLat = Double.parseDouble(arr[0]);
        }
        return locationLat;
    }

    public double getLocationLng() throws OverQueryLimitException, IOException {
        double locationLat = 0.0;
        if (getLocation() != null) {
            String[] arr = getLocationText().split(",");
            if (arr.length == 2)
                locationLat = Double.parseDouble(arr[1]);
        }
        return locationLat;
    }

    public LocationRecognizeResult getLocationRecognizeResult() {
        if (locationGeocodeStatus.equals(LocationGeocodeStatus.OVER_QUERY_LIMIT)) { // if over req/sec
            locationGeocodeStatus = LocationGeocodeStatus.UNCHECKED;
        }
        if (locationGeocodeStatus.equals(LocationGeocodeStatus.UNCHECKED)) {
            try {
                LocationNameHeuristicResult locationNameHeuristicResult = LocationNameRecognize.getInstance()
                        .doHeuristic(this);
                locationRecognizeResult = LocationRecognize.process(locationNameHeuristicResult);
                if (locationRecognizeResult.getList().isEmpty()) {
                    if (locationRecognizeResult.getSearchString().isEmpty()) {
                        locationGeocodeStatus = LocationGeocodeStatus.EMPTY_INPUT;
                    } else {
                        locationGeocodeStatus = LocationGeocodeStatus.ZERO_RESULTS;
                    }
                } else {
                    locationGeocodeStatus = LocationGeocodeStatus.SUCCESS;
                }
            } catch (OverQueryLimitException e) {
                locationGeocodeStatus = LocationGeocodeStatus.OVER_QUERY_LIMIT;
            } catch (UnrecognizedWebserviceResponseException e) {
                locationGeocodeStatus = LocationGeocodeStatus.WEBSERVICE_UNKNOWN_ERROR;
            } catch (IOException e) {
                locationGeocodeStatus = LocationGeocodeStatus.IO_EXCEPTION;
            }
        }
        if (locationRecognizeResult == null) {
            locationRecognizeResult = LocationRecognizeResult.EMPTY;
        }
        return locationRecognizeResult;
    }

    /**
     * 
     * @return LatLng | null when cannot get location
     */
    public LatLng getLocation() {
        return getLocationRecognizeResult().getList().isEmpty() ? null
                : locationRecognizeResult.getList().get(0).getGeometry().getLocation();
    }

    public boolean hasChunk(Annotation chunk) {
        return chunk == this.chunk;
    }

    @JsonIgnore
    public Text getTextContainingThisTextobject() {
        return this.text;
    }

    @JsonIgnore
    public ArrayList<Textobject> getRelationFromTextobjects() {
        ArrayList<Textobject> relFromTextobjects = new ArrayList<>();
        for (SerelRelation sr : this.chunk.getRelationsFrom()) {
            relFromTextobjects.add(text.getTextobjectByChunk(sr.getFrom()));
        }
        return relFromTextobjects;
    }

    @JsonIgnore
    public ArrayList<Textobject> getRelationToTextobjects() {
        ArrayList<Textobject> relToTextobjects = new ArrayList<>();
        for (SerelRelation sr : this.chunk.getRelationsTo()) {
            relToTextobjects.add(text.getTextobjectByChunk(sr.getTo()));
        }
        return relToTextobjects;
    }

    public LocationGeocodeStatus getLocationGeocodeStatus() {
        return this.locationGeocodeStatus;
    }

    @Override
    public String toString() {
        StringBuilder sb = new StringBuilder();
        sb.append("baseName: ");
        sb.append(getBaseName());
        sb.append("; nameClass: ");
        sb.append(getNameClass().toString());
        sb.append("; text: ");
        sb.append(getRawText());
        sb.append("; locationText: ");
        sb.append(getLocationText());

        return sb.toString();
    }

}