com.mothsoft.alexis.engine.textual.TopicDocumentMatcherImpl.java Source code

Java tutorial

Introduction

Here is the source code for com.mothsoft.alexis.engine.textual.TopicDocumentMatcherImpl.java

Source

/*   Copyright 2012 Tim Garrett, Mothsoft LLC
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */
package com.mothsoft.alexis.engine.textual;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.commons.lang.time.StopWatch;
import org.apache.log4j.Logger;
import org.hibernate.ScrollableResults;
import org.springframework.transaction.PlatformTransactionManager;
import org.springframework.transaction.TransactionStatus;
import org.springframework.transaction.support.TransactionCallbackWithoutResult;
import org.springframework.transaction.support.TransactionTemplate;

import com.mothsoft.alexis.dao.DocumentDao;
import com.mothsoft.alexis.dao.TopicDao;
import com.mothsoft.alexis.domain.Document;
import com.mothsoft.alexis.domain.DocumentScore;
import com.mothsoft.alexis.domain.DocumentState;
import com.mothsoft.alexis.domain.SortOrder;
import com.mothsoft.alexis.domain.Topic;
import com.mothsoft.alexis.domain.TopicDocument;
import com.mothsoft.alexis.engine.Task;
import com.mothsoft.alexis.security.CurrentUserUtil;

/**
 * @author tgarrett
 */
public class TopicDocumentMatcherImpl implements Task {

    private static final Logger logger = Logger.getLogger(TopicDocumentMatcherImpl.class);

    private TopicDao topicDao;
    private DocumentDao documentDao;
    private TransactionTemplate transactionTemplate;

    public TopicDocumentMatcherImpl() throws IOException {
    }

    public void setDocumentDao(final DocumentDao documentDao) {
        this.documentDao = documentDao;
    }

    public void setTopicDao(final TopicDao topicDao) {
        this.topicDao = topicDao;
    }

    public void setTransactionManager(final PlatformTransactionManager transactionManager) {
        this.transactionTemplate = new TransactionTemplate(transactionManager);
    }

    public void execute() {

        logger.info("Starting Topic<=>Document Matching");

        final StopWatch stopWatch = new StopWatch();

        // a unique state for documents pending matching so we can transition
        // items that had no topics (prevent a big future spike when a topic is
        // added that now matches really old documents)
        stopWatch.start();
        bulkUpdateDocumentState(DocumentState.PARSED, DocumentState.PENDING_TOPIC_MATCHING);
        stopWatch.stop();
        logger.info("Marking PARSED documents as PENDING_TOPIC_MATCHING took: " + stopWatch.toString());
        stopWatch.reset();

        stopWatch.start();
        match();
        stopWatch.stop();
        logger.info("Matching documents and topics took: " + stopWatch.toString());
        stopWatch.reset();

        // update any documents that had no assignments
        stopWatch.start();
        bulkUpdateDocumentState(DocumentState.PENDING_TOPIC_MATCHING, DocumentState.MATCHED_TO_TOPICS);
        stopWatch.stop();
        logger.info("Marking PENDING_TOPIC_MATCHING documents as MATCHED_TO_TOPICS took: " + stopWatch.toString());
    }

    private void bulkUpdateDocumentState(final DocumentState queryState, final DocumentState nextState) {
        this.transactionTemplate.execute(new TransactionCallbackWithoutResult() {

            @Override
            protected void doInTransactionWithoutResult(TransactionStatus txStatus) {
                TopicDocumentMatcherImpl.this.documentDao.bulkUpdateDocumentState(queryState, nextState);
            }
        });
    }

    private void match() {
        this.transactionTemplate.execute(new TransactionCallbackWithoutResult() {

            @Override
            protected void doInTransactionWithoutResult(TransactionStatus txStatus) {
                try {
                    CurrentUserUtil.setSystemUserAuthentication();

                    final Map<Long, List<TopicScore>> documentTopicMap = new HashMap<Long, List<TopicScore>>();

                    final List<Topic> topics = TopicDocumentMatcherImpl.this.topicDao.list();
                    for (final Topic topic : topics) {
                        mapMatches(topic, documentTopicMap);
                    }

                    saveMatches(documentTopicMap);

                    final long rowsAffected = documentTopicMap.size();
                    logger.info("Topic<=>Document matching associated " + rowsAffected + " items");
                } finally {
                    CurrentUserUtil.clearAuthentication();
                }
            }

        });
    }

    private void mapMatches(final Topic topic, final Map<Long, List<TopicScore>> documentTopicMap) {
        final String query = topic.getSearchExpression();

        final ScrollableResults scrollableResults = this.documentDao.scrollableSearch(topic.getUserId(),
                DocumentState.PENDING_TOPIC_MATCHING, query, SortOrder.DATE_ASC, null, null);

        try {
            while (scrollableResults.next()) {
                final Object[] array = scrollableResults.get();

                // allow for state document index
                if (array[0] == null) {
                    continue;
                }

                final DocumentScore documentScore = new DocumentScore((Document) array[0], (Float) array[1]);
                mapMatches(topic, documentScore, documentTopicMap);
            }
        } finally {
            scrollableResults.close();
        }
    }

    private void mapMatches(final Topic topic, final DocumentScore documentScore,
            final Map<Long, List<TopicScore>> documentTopicMap) {
        final Long documentId = documentScore.getDocument().getId();

        if (!documentTopicMap.containsKey(documentId)) {
            documentTopicMap.put(documentId, new ArrayList<TopicScore>(16));
        }

        final TopicScore topicScore = new TopicScore(topic, documentScore.getScore());
        documentTopicMap.get(documentId).add(topicScore);
    }

    private void saveMatches(final Map<Long, List<TopicScore>> documentTopicMap) {
        final Date now = new Date();

        // try to reduce contention by always starting with the oldest doc ID
        // and processing in the same order as other bulk operations
        final List<Long> sortedDocumentIds = new ArrayList<Long>(documentTopicMap.keySet());
        Collections.sort(sortedDocumentIds);

        for (final Long documentId : sortedDocumentIds) {
            final Document document = this.documentDao.get(documentId);

            final List<TopicScore> topicScores = documentTopicMap.get(documentId);
            for (final TopicScore topicScore : topicScores) {
                final Topic topic = topicScore.getTopic();
                final Float score = topicScore.getScore();

                final TopicDocument topicDocument = new TopicDocument(topic, document, score);
                this.topicDao.add(topicDocument);

                topic.setLastDocumentMatchDate(now);
                this.topicDao.update(topic);
            }

            document.setState(DocumentState.MATCHED_TO_TOPICS);
            this.documentDao.update(document);
        }
    }

    private class TopicScore {
        private Topic topic;
        private Float score;

        TopicScore(final Topic topic, final float score) {
            this.topic = topic;
            this.score = score;
        }

        Topic getTopic() {
            return this.topic;
        }

        Float getScore() {
            return this.score;
        }
    }

}