de.faustedition.genesis.lines.VerseManager.java Source code

Java tutorial

Introduction

Here is the source code for de.faustedition.genesis.lines.VerseManager.java

Source

/*
 * Copyright (c) 2014 Faust Edition development team.
 *
 * This file is part of the Faust Edition.
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

package de.faustedition.genesis.lines;

import com.google.common.base.Function;
import com.google.common.base.Objects;
import com.google.common.base.Preconditions;
import com.google.common.collect.*;
import de.faustedition.document.MaterialUnit;
import de.faustedition.graph.FaustGraph;
import de.faustedition.search.Normalization;
import de.faustedition.transcript.TranscriptManager;
import eu.interedition.text.*;
import eu.interedition.text.neo4j.LayerNode;
import eu.interedition.text.neo4j.Neo4jTextRepository;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.*;
import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.codehaus.jackson.JsonNode;
import org.neo4j.graphdb.Node;
import org.neo4j.graphdb.Relationship;
import org.neo4j.graphdb.index.Index;
import org.neo4j.graphdb.index.IndexHits;
import org.neo4j.graphdb.index.IndexManager;
import org.neo4j.helpers.collection.MapUtil;
import org.neo4j.index.lucene.ValueContext;
import org.restlet.data.Status;
import org.restlet.resource.ResourceException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.annotation.DependsOn;
import org.springframework.stereotype.Component;

import javax.annotation.Nullable;
import java.io.IOException;
import java.util.*;
import java.util.regex.Pattern;

import static eu.interedition.text.Query.*;

/**
 * User: moz
 * Date: 09.07.14
 * Time: 17:03
 */

@Component
@DependsOn(value = "transcriptManager")
public class VerseManager {

    private static final String INDEX_VERSE_INTERVAL = "index-verse-interval";

    private static final String INDEX_VERSE_FULLTEXT = "index-verse-text";

    private static final Pattern VERSE_NUMBER_PATTERN = Pattern.compile("[0-9]+");

    private static final Logger LOG = LoggerFactory.getLogger(VerseManager.class);

    @Autowired
    private eu.interedition.text.neo4j.Neo4jTextRepository<JsonNode> textRepository;

    @Autowired
    private FaustGraph faustGraph;

    @Autowired
    private TranscriptManager transcriptManager;

    public void register(FaustGraph faustGraph, Neo4jTextRepository<JsonNode> textRepo, MaterialUnit mu,
            LayerNode<JsonNode> transcript) {

        for (VerseInterval vi : registeredFor(transcript)) {
            ((GraphVerseInterval) vi).node.delete();
        }
        Index<Node> verseTextIndex = faustGraph.getDb().index().forNodes(INDEX_VERSE_FULLTEXT,
                MapUtil.stringMap(IndexManager.PROVIDER, "lucene", "type", "fulltext"));

        final SortedSet<Long> verses = Sets.newTreeSet();
        for (Layer<JsonNode> verse : textRepo
                .query(and(text(transcript), name(new Name(TextConstants.TEI_NS, "l"))))) {

            //final Matcher verseNumberMatcher = VERSE_NUMBER_PATTERN.matcher(Objects.firstNonNull(verse.data().path("n").getTextValue(), ""));
            long lineNum = verse.data().get("n") != null ? verse.data().get("n").asLong(-1) : -1;

            if (lineNum >= 0) {
                verses.add(lineNum);
            }

            Anchor<JsonNode> anchor = Iterables.getOnlyElement(verse.getAnchors());
            try {
                String verseText = anchor.getText().read(anchor.getRange());
                LOG.trace("Indexing verse: " + verseText);
                verseTextIndex.add(((LayerNode) verse).node, "fulltext", Normalization.normalize(verseText));

            } catch (IOException e) {
                LOG.error("Error indexing line " + lineNum);
            }
        }

        Index<Node> verseIndex = faustGraph.getDb().index().forNodes(INDEX_VERSE_INTERVAL);

        long start = -1;
        long next = -1;
        for (Iterator<Long> it = verses.iterator(); it.hasNext();) {
            final long verse = it.next();
            if (verse == next) {
                next++;
            } else if (verse > next) {
                if (start >= 0) {
                    final GraphVerseInterval vi = new GraphVerseInterval(faustGraph.getDb(), (int) start,
                            (int) next - 1);
                    vi.setTranscript(transcript);
                    verseIndex.add(vi.node, "start", ValueContext.numeric(vi.getStart()));
                    verseIndex.add(vi.node, "end", ValueContext.numeric(vi.getEnd()));

                }

                start = verse;
                next = verse + 1;
            }

            if (!it.hasNext() && start >= 0) {
                final GraphVerseInterval vi = new GraphVerseInterval(faustGraph.getDb(), (int) start, (int) verse);
                vi.setTranscript(transcript);
                verseIndex.add(vi.node, "start", ValueContext.numeric(vi.getStart()));
                verseIndex.add(vi.node, "end", ValueContext.numeric(vi.getEnd()));

            }
        }
        if (LOG.isDebugEnabled()) {
            // TODO LOG.debug("Registered verse intervals {} for {}", Iterables.toString(registeredFor(session, transcript)), transcript);
        }
    }

    public Iterable<VerseInterval> registeredFor(Layer<JsonNode> transcript) {

        Iterable<Relationship> relationshipsToVerseIntervals = ((LayerNode) transcript).node
                .getRelationships(GraphVerseInterval.VERSE_INTERVAL_IN_TRANSCRIPT_RT);
        Function<Relationship, VerseInterval> getAttachedNode = new Function<Relationship, VerseInterval>() {
            @Override
            public VerseInterval apply(@Nullable Relationship input) {
                return new GraphVerseInterval(input.getEndNode());
            }
        };
        return Iterables.transform(relationshipsToVerseIntervals, getAttachedNode);
    }

    /*
       public static Iterable<TranscribedVerseInterval> all(Session session) {
          return iterateResults(session.createCriteria(TranscribedVerseInterval.class), TranscribedVerseInterval.class);
       }
    */

    public Iterable<GraphVerseInterval> forInterval(VerseInterval verseInterval) {

        // find all intervals for which start < verseInterval.getEnd() and end > verseInterval.getStart()
        Index<Node> verseIndex = faustGraph.getDb().index().forNodes(INDEX_VERSE_INTERVAL);
        BooleanQuery andQuery = new BooleanQuery();
        NumericRangeQuery<Integer> startQuery = NumericRangeQuery.newIntRange("start", 0, verseInterval.getEnd(),
                true, true);
        NumericRangeQuery<Integer> endQuery = NumericRangeQuery.newIntRange("end", verseInterval.getStart(),
                Integer.MAX_VALUE, true, true);
        andQuery.add(startQuery, BooleanClause.Occur.MUST);
        andQuery.add(endQuery, BooleanClause.Occur.MUST);
        IndexHits<Node> resultNodes = verseIndex.query(andQuery);
        Function<Node, GraphVerseInterval> verseIntervalForNode = new Function<Node, GraphVerseInterval>() {
            @Override
            public GraphVerseInterval apply(@Nullable Node input) {
                return new GraphVerseInterval(input);
            }
        };
        return Iterables.transform(resultNodes, verseIntervalForNode);
    }

    public ImmutableListMultimap<MaterialUnit, GraphVerseInterval> indexByMaterialUnit(
            Iterable<GraphVerseInterval> verseIntervals) {

        Function materialUnitOfVerseInterval = new Function<GraphVerseInterval, MaterialUnit>() {
            @Override
            public MaterialUnit apply(@Nullable GraphVerseInterval input) {
                return transcriptManager.materialUnitForTranscript(input.getTranscript(textRepository));
            }
        };

        return Multimaps.index(verseIntervals, materialUnitOfVerseInterval);
    }

    public static SortedSet<VerseInterval> scenesOf(int part) {
        final SortedSet<VerseInterval> scenes = Sets.newTreeSet(INTERVAL_COMPARATOR);
        switch (part) {
        case 0:
            scenes.addAll(PROLOGUE_SCENES.values());
            break;
        case 1:
            scenes.addAll(FAUST_1_SCENES.values());
            break;
        case 2:
            for (SortedMap<Integer, VerseInterval> act : FAUST_2_SCENES.values()) {
                scenes.addAll(act.values());
            }
            break;
        default:
            throw new IllegalArgumentException("Part " + part);
        }
        return scenes;
    }

    public static VerseInterval fromRequestAttibutes(Map<String, Object> requestAttributes)
            throws ResourceException {
        try {
            final int part = Integer.parseInt((String) requestAttributes.get("part"));
            final int actOrScene = Integer
                    .parseInt(Objects.firstNonNull((String) requestAttributes.get("act_scene"), "0"));
            final int scene = Integer.parseInt(Objects.firstNonNull((String) requestAttributes.get("scene"), "0"));

            switch (part) {
            case 0:
            case 1:
                if (actOrScene == 0) {
                    return ofPart(part);
                }
                return ofScene(part, actOrScene);
            case 2:
                if (actOrScene == 0) {
                    return ofPart(part);
                }
                if (scene == 0) {
                    return ofAct(part, actOrScene);
                } else {
                    return ofScene(part, actOrScene, scene);
                }
            default:
                throw new IllegalArgumentException("Part " + part);
            }
        } catch (IllegalArgumentException e) {
            throw new ResourceException(Status.CLIENT_ERROR_BAD_REQUEST, e.getMessage());
        }
    }

    public Iterable<LayerNode<JsonNode>> fulltextQuery(String queryString) {
        Preconditions.checkState(this.faustGraph.getDb().index().existsForNodes(INDEX_VERSE_FULLTEXT));
        Index<Node> verseFulltextIndex = this.faustGraph.getDb().index().forNodes(INDEX_VERSE_FULLTEXT);

        // construct a fuzzy query
        String normalizedQueryString = Normalization.normalize(queryString);
        ArrayList<String> tokens = Lists.newArrayList(normalizedQueryString.split(" "));
        Function<String, SpanMultiTermQueryWrapper> stringToQuery = new Function<String, SpanMultiTermQueryWrapper>() {
            @Override
            public SpanMultiTermQueryWrapper apply(@Nullable String input) {
                Term term = new Term("fulltext", input.toLowerCase());
                FuzzyQuery fuzzyQuery = new FuzzyQuery(term);
                SpanMultiTermQueryWrapper spanMultiTermQueryWrapper = new SpanMultiTermQueryWrapper(fuzzyQuery);
                return spanMultiTermQueryWrapper;
            }
        };

        SpanMultiTermQueryWrapper[] clauses = (Lists.newArrayList(Iterables.transform(tokens, stringToQuery))
                .toArray(new SpanMultiTermQueryWrapper[tokens.size()]));

        SpanNearQuery query = new SpanNearQuery(clauses, 5, false);
        IndexHits<Node> verseResults = verseFulltextIndex.query(query);

        Function<Node, LayerNode<JsonNode>> wrapLayerNodes = new Function<Node, LayerNode<JsonNode>>() {
            @Override
            public LayerNode apply(@Nullable Node input) {
                return new LayerNode<JsonNode>(textRepository, input);
            }
        };

        return Iterables.transform(verseResults, wrapLayerNodes);
    }

    public static VerseInterval ofPart(int part) {
        switch (part) {
        case 0:
        case 1:
            final SortedMap<Integer, VerseInterval> scenes = (part == 0 ? PROLOGUE_SCENES : FAUST_1_SCENES);
            return new SimpleVerseInterval(Integer.toString(part), scenes.get(scenes.firstKey()).getStart(),
                    scenes.get(scenes.lastKey()).getEnd());
        case 2:
            final SortedMap<Integer, VerseInterval> firstAct = FAUST_2_SCENES.get(FAUST_2_SCENES.firstKey());
            final SortedMap<Integer, VerseInterval> lastAct = FAUST_2_SCENES.get(FAUST_2_SCENES.lastKey());
            return new SimpleVerseInterval(Integer.toString(part), firstAct.get(firstAct.firstKey()).getStart(),
                    lastAct.get(lastAct.lastKey()).getEnd());
        default:
            throw new IllegalArgumentException("Part " + part);
        }
    }

    public static VerseInterval ofScene(int part, int act, int scene) {
        Preconditions.checkArgument(part == 2, "Part " + part);
        Preconditions.checkArgument(FAUST_2_SCENES.containsKey(act), "Act " + act);
        final SortedMap<Integer, VerseInterval> scenes = FAUST_2_SCENES.get(act);
        Preconditions.checkArgument(scenes.containsKey(scene), "Scene " + scene);
        return scenes.get(scene);
    }

    public static VerseInterval ofScene(int part, int scene) {
        Preconditions.checkArgument(part == 0 || part == 1, "Part " + part);
        final SortedMap<Integer, VerseInterval> scenes = (part == 0 ? PROLOGUE_SCENES : FAUST_1_SCENES);
        Preconditions.checkArgument(scenes.containsKey(scene), "Scene " + scene);
        return scenes.get(scene);
    }

    public static VerseInterval ofAct(int part, int act) {
        Preconditions.checkArgument(part == 2, "Part " + part);
        Preconditions.checkArgument(FAUST_2_SCENES.containsKey(act), "Act " + act);

        final SortedMap<Integer, VerseInterval> scenes = FAUST_2_SCENES.get(act);
        return new SimpleVerseInterval(String.format("%d.%d", part, act), scenes.get(scenes.firstKey()).getStart(),
                scenes.get(scenes.lastKey()).getEnd());
    }

    public static final Comparator<VerseInterval> INTERVAL_COMPARATOR = new Comparator<VerseInterval>() {

        @Override
        public int compare(VerseInterval o1, VerseInterval o2) {
            final int startDiff = o1.getStart() - o2.getStart();
            return (startDiff == 0 ? (o2.getEnd() - o1.getEnd()) : startDiff);
        }
    };

    public static final SortedMap<Integer, VerseInterval> PROLOGUE_SCENES = Maps.newTreeMap();
    public static final SortedMap<Integer, VerseInterval> FAUST_1_SCENES = Maps.newTreeMap();
    public static final SortedMap<Integer, SortedMap<Integer, VerseInterval>> FAUST_2_SCENES = Maps.newTreeMap();

    static {
        PROLOGUE_SCENES.put(1, new SimpleVerseInterval("Zueignung", 1, 33));
        PROLOGUE_SCENES.put(2, new SimpleVerseInterval("Vorspiel auf dem Theater", 33, 243));
        PROLOGUE_SCENES.put(3, new SimpleVerseInterval("Prolog im Himmel", 243, 354));

        FAUST_1_SCENES.put(1, new SimpleVerseInterval("Nacht", 354, 808));
        FAUST_1_SCENES.put(2, new SimpleVerseInterval("Vor dem Tor", 808, 1178));
        FAUST_1_SCENES.put(3, new SimpleVerseInterval("Studierzimmer I", 1178, 1530));
        FAUST_1_SCENES.put(4, new SimpleVerseInterval("Studierzimmer II", 1530, 2073));
        FAUST_1_SCENES.put(5, new SimpleVerseInterval("Auerbachs Keller in Leipzig", 2073, 2337));
        FAUST_1_SCENES.put(6, new SimpleVerseInterval("Hexenk\u00fcche", 2337, 2605));
        FAUST_1_SCENES.put(7, new SimpleVerseInterval("Strae. Faust Margarete", 2605, 2678));
        FAUST_1_SCENES.put(8, new SimpleVerseInterval("Abend. Ein kleines reinliches Zimmer", 2678, 2805));
        FAUST_1_SCENES.put(9, new SimpleVerseInterval("Spaziergang", 2805, 2865));
        FAUST_1_SCENES.put(10, new SimpleVerseInterval("Der Nachbarin Haus", 2865, 3025));
        FAUST_1_SCENES.put(11, new SimpleVerseInterval("Stra\u00dfe. Faust Mephistopheles", 3025, 3073));
        FAUST_1_SCENES.put(12, new SimpleVerseInterval("Garten", 3073, 3205));
        FAUST_1_SCENES.put(13, new SimpleVerseInterval("Ein Gartenh\u00e4uschen", 3205, 3217));
        FAUST_1_SCENES.put(14, new SimpleVerseInterval("Wald und H\u00f6hle", 3217, 3374));
        FAUST_1_SCENES.put(15, new SimpleVerseInterval("Gretchens Stube", 3374, 3414));
        FAUST_1_SCENES.put(16, new SimpleVerseInterval("Marthens Garten", 3414, 3544));
        FAUST_1_SCENES.put(17, new SimpleVerseInterval("Am Brunnen", 3544, 3587));
        FAUST_1_SCENES.put(18, new SimpleVerseInterval("Zwinger", 3587, 3620));
        FAUST_1_SCENES.put(19, new SimpleVerseInterval("Nacht. Stra\u00dfe vor Gretchens T\u00fcre", 3620, 3776));
        FAUST_1_SCENES.put(20, new SimpleVerseInterval("Dom", 3776, 3835));
        FAUST_1_SCENES.put(21, new SimpleVerseInterval("Walpurgisnacht", 3835, 4223));
        FAUST_1_SCENES.put(22, new SimpleVerseInterval("Walpurgisnachtstraum", 4223, 4399));
        // 23: Tr&uuml;ber Tag Feld, EA, S. 291-294
        FAUST_1_SCENES.put(24, new SimpleVerseInterval("Nacht, offen Feld", 4399, 4405));
        FAUST_1_SCENES.put(25, new SimpleVerseInterval("Kerker", 4405, 4613));

        final SortedMap<Integer, VerseInterval> faust2FirstActScenes = Maps.newTreeMap();
        faust2FirstActScenes.put(1, new SimpleVerseInterval("Anmutige Gegend", 4613, 4728));
        faust2FirstActScenes.put(2, new SimpleVerseInterval("Kaiserliche Pfalz", 4728, 6566));
        FAUST_2_SCENES.put(1, faust2FirstActScenes);

        final SortedMap<Integer, VerseInterval> faust2SecondActScenes = Maps.newTreeMap();
        faust2SecondActScenes.put(1,
                new SimpleVerseInterval("Hochgew\u00f6lbtes, enges, gothisches Zimmer", 6566, 6819));
        faust2SecondActScenes.put(2, new SimpleVerseInterval("Laboratorium", 6819, 7005));
        faust2SecondActScenes.put(3, new SimpleVerseInterval("Klassische Walpurgisnacht", 7005, 8488));
        FAUST_2_SCENES.put(2, faust2SecondActScenes);

        final SortedMap<Integer, VerseInterval> faust2ThirdActScenes = Maps.newTreeMap();
        faust2ThirdActScenes.put(1, new SimpleVerseInterval("Vor dem Pallaste des Menelas zu Sparta", 8488, 9127));
        faust2ThirdActScenes.put(2, new SimpleVerseInterval("Innerer Burghof", 9127, 9574));
        faust2ThirdActScenes.put(3, new SimpleVerseInterval("Schattiger Hain", 9574, 10039));
        FAUST_2_SCENES.put(3, faust2ThirdActScenes);

        final SortedMap<Integer, VerseInterval> faust2FourthActScenes = Maps.newTreeMap();
        faust2FourthActScenes.put(1, new SimpleVerseInterval("Hochgebirg", 10039, 10345));
        faust2FourthActScenes.put(2, new SimpleVerseInterval("Auf dem Vorgebirg", 10345, 10783));
        faust2FourthActScenes.put(3, new SimpleVerseInterval("Des Gegenkaisers Zelt", 10783, 11043));
        FAUST_2_SCENES.put(4, faust2FourthActScenes);

        final SortedMap<Integer, VerseInterval> faust2FifthActScenes = Maps.newTreeMap();
        faust2FifthActScenes.put(1, new SimpleVerseInterval("Offene Gegend", 11043, 11143));
        faust2FifthActScenes.put(2, new SimpleVerseInterval("Pallast", 11143, 11288));
        faust2FifthActScenes.put(3, new SimpleVerseInterval("Tiefe Nacht", 11288, 11511));
        faust2FifthActScenes.put(4, new SimpleVerseInterval("Gro\u00dfer Vorhof des Pallasts", 11511, 11844));
        faust2FifthActScenes.put(5, new SimpleVerseInterval("Bergschluchten", 11844, 12112));
        FAUST_2_SCENES.put(5, faust2FifthActScenes);
    }

}