pl.edu.icm.cermine.metadata.model.DocumentAffiliation.java Source code

Java tutorial

Introduction

Here is the source code for pl.edu.icm.cermine.metadata.model.DocumentAffiliation.java

Source

/**
 * This file is part of CERMINE project.
 * Copyright (c) 2011-2013 ICM-UW
 *
 * CERMINE is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * CERMINE is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with CERMINE. If not, see <http://www.gnu.org/licenses/>.
 */

package pl.edu.icm.cermine.metadata.model;

import java.text.Normalizer;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.lang.StringUtils;
import pl.edu.icm.cermine.content.cleaning.ContentCleaner;
import pl.edu.icm.cermine.metadata.tools.MetadataTools;
import pl.edu.icm.cermine.parsing.model.ParsableString;
import pl.edu.icm.cermine.parsing.model.Token;

/**
 * Represents a document affiliation as a parsable string.
 *
 * @author Dominika Tkaczyk
 * @author Bartosz Tarnawski
 */
public class DocumentAffiliation implements ParsableString<Token<AffiliationLabel>> {

    private String id;

    private String index;

    private String rawText;

    private List<Token<AffiliationLabel>> tokens;

    public DocumentAffiliation(String rawText) {
        this("id", rawText);
    }

    public DocumentAffiliation(String id, String rawText) {
        this(id, null, rawText);
    }

    public DocumentAffiliation(String id, String index, String rawText) {
        this.id = id;
        this.index = ContentCleaner.clean(index);
        if (rawText.matches(".+ \\([^\\(\\)]*\\)$")) {
            rawText = StringUtils.reverse(rawText).replaceFirst("\\)", "").replaceFirst("\\( ", " ,");
            rawText = StringUtils.reverse(rawText);
        }
        this.rawText = MetadataTools.cleanAndNormalize(rawText);
        this.tokens = new ArrayList<Token<AffiliationLabel>>();
    }

    public String getId() {
        return id;
    }

    public void setId(String id) {
        this.id = id;
    }

    public String getIndex() {
        return index;
    }

    public void setIndex(String index) {
        this.index = index;
    }

    @Override
    public String getRawText() {
        return rawText;
    }

    public void setRawText(String rawText) {
        this.rawText = rawText;
    }

    @Override
    public List<Token<AffiliationLabel>> getTokens() {
        return tokens;
    }

    @Override
    public void setTokens(List<Token<AffiliationLabel>> tokens) {
        this.tokens = tokens;
    }

    @Override
    public void addToken(Token<AffiliationLabel> token) {
        this.tokens.add(token);
    }

    public void mergeTokens() {
        if (tokens == null || tokens.isEmpty()) {
            return;
        }
        Token<AffiliationLabel> actToken = null;
        List<Token<AffiliationLabel>> newTokens = new ArrayList<Token<AffiliationLabel>>();
        for (Token<AffiliationLabel> token : tokens) {
            if (actToken == null) {
                actToken = new Token<AffiliationLabel>(token.getText(), token.getStartIndex(), token.getEndIndex(),
                        token.getLabel());

            } else if (actToken.getLabel().equals(token.getLabel())) {
                actToken.setEndIndex(token.getEndIndex());
            } else {
                newTokens.add(actToken);
                actToken = new Token<AffiliationLabel>(token.getText(), token.getStartIndex(), token.getEndIndex(),
                        token.getLabel());
            }
        }
        newTokens.add(actToken);
        for (Token<AffiliationLabel> token : newTokens) {
            int i = newTokens.indexOf(token);
            if (i + 1 == newTokens.size()) {
                token.setEndIndex(rawText.length());
            } else {
                token.setEndIndex(newTokens.get(i + 1).getStartIndex());
            }
            token.setText(rawText.substring(token.getStartIndex(), token.getEndIndex()));
        }
        tokens = newTokens;
    }

    @Override
    public void appendText(String text) {
        this.rawText += text;
    }

    @Override
    public void clean() {
        index = ContentCleaner.clean(index);
        rawText = ContentCleaner.cleanAllAndBreaks(rawText);
    }

    public String getOrganization() {
        for (Token<AffiliationLabel> token : tokens) {
            if (token.getLabel().equals(AffiliationLabel.INST)) {
                return Normalizer.normalize(token.getText(), Normalizer.Form.NFC);
            }
        }
        return null;
    }

    public String getCountry() {
        for (Token<AffiliationLabel> token : tokens) {
            if (token.getLabel().equals(AffiliationLabel.COUN)) {
                return token.getText().replaceAll("[^ a-zA-Z]$", "");
            }
        }
        return null;
    }

}