Java tutorial
/* Copyright 2012 Tim Garrett, Mothsoft LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.mothsoft.alexis.dao; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.Date; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Set; import javax.persistence.EntityManager; import javax.persistence.LockModeType; import javax.persistence.PersistenceContext; import javax.persistence.Query; import org.apache.commons.lang.time.StopWatch; import org.apache.log4j.Logger; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermFreqVector; import org.apache.lucene.queryParser.MultiFieldQueryParser; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.NumericRangeQuery; import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; import org.apache.lucene.util.Version; import org.hibernate.CacheMode; import org.hibernate.QueryException; import org.hibernate.ScrollMode; import org.hibernate.ScrollableResults; import org.hibernate.Session; import org.hibernate.search.FullTextQuery; import org.hibernate.search.FullTextSession; import org.hibernate.search.Search; import org.hibernate.search.SearchFactory; import org.hibernate.search.indexes.IndexReaderAccessor; import org.springframework.stereotype.Repository; import com.mothsoft.alexis.domain.DataRange; import com.mothsoft.alexis.domain.Document; import com.mothsoft.alexis.domain.DocumentContent; import com.mothsoft.alexis.domain.DocumentScore; import com.mothsoft.alexis.domain.DocumentState; import com.mothsoft.alexis.domain.DocumentTerm; import com.mothsoft.alexis.domain.Edge; import com.mothsoft.alexis.domain.Graph; import com.mothsoft.alexis.domain.ImportantNamedEntity; import com.mothsoft.alexis.domain.ImportantTerm; import com.mothsoft.alexis.domain.Node; import com.mothsoft.alexis.domain.SortOrder; import com.mothsoft.alexis.domain.StopWords; import com.mothsoft.alexis.domain.TFIDF; import com.mothsoft.alexis.domain.TopicDocument; import com.mothsoft.alexis.security.CurrentUserUtil; @Repository public class DocumentDaoImpl implements DocumentDao { private static final Logger logger = Logger.getLogger(DocumentDaoImpl.class); private static final DocumentState ANY_DOCUMENT_STATE = null; private static final Date NO_DATE = null; private static final String CONTENT_TEXT_FIELD_NAME = "content.text"; @PersistenceContext private EntityManager em; public DocumentDaoImpl() throws IOException { } public void setEm(final EntityManager em) { this.em = em; } public void add(final Document document) { this.em.persist(document); } public void add(final DocumentContent content) { this.em.persist(content); } public void bulkUpdateDocumentState(DocumentState queryState, DocumentState nextState) { final Query query = this.em .createQuery("SELECT d FROM Document d WHERE d.intState = :queryState ORDER BY d.id ASC"); query.setParameter("queryState", queryState.getValue()); @SuppressWarnings("unchecked") final List<Document> documents = query.getResultList(); for (final Document document : documents) { document.setState(nextState); } } public Document findByUrl(final String url) { final Query query = this.em.createQuery("FROM Document WHERE url = :url"); query.setParameter("url", url); @SuppressWarnings("unchecked") final List<Document> results = query.getResultList(); if (results.size() == 1) { return results.get(0); } return null; } public Document get(final Long id) { if (CurrentUserUtil.isSystem()) { return this.em.find(Document.class, id); } else { final Long userId = CurrentUserUtil.getCurrentUserId(); final Query query = this.em .createQuery("select d from Document d inner join d.documentUsers du inner join du.user user " + "where user.id = :userId and d.id = :docId"); query.setParameter("userId", userId); query.setParameter("docId", id); final Document document = (Document) query.getSingleResult(); return document; } } public void update(final Document document) { this.em.merge(document); } public DataRange<Document> listDocumentsByOwner(final Long userId, final int first, final int count) { final StopWatch stopWatch = new StopWatch(); stopWatch.start(); final SortOrder sortOrder = SortOrder.DATE_DESC; final DataRange<DocumentScore> scoredRange = this.searchWithAllOptions(userId, false, null, null, sortOrder, null /* ignore start date */, null /* ignore end date */, first, count); final List<Document> range = new ArrayList<Document>(scoredRange.getRange().size()); for (final DocumentScore scoredDoc : scoredRange.getRange()) { range.add(scoredDoc.getDocument()); } final DataRange<Document> dataRange = new DataRange<Document>(range, scoredRange.getFirstRow(), scoredRange.getTotalRowsAvailable()); stopWatch.stop(); logger.debug(stopWatch.toString()); return dataRange; } public DataRange<Document> listDocumentsInTopicsByOwner(final Long userId, final int first, final int count) { final StopWatch stopWatch = new StopWatch(); stopWatch.start(); final SortOrder sortOrder = SortOrder.DATE_DESC; final DataRange<DocumentScore> scoredRange = this.searchWithAllOptions(userId, true, DocumentState.MATCHED_TO_TOPICS, null, sortOrder, null, null, first, count); final List<Document> range = new ArrayList<Document>(scoredRange.getRange().size()); for (final DocumentScore scoredDoc : scoredRange.getRange()) { range.add(scoredDoc.getDocument()); } final DataRange<Document> dataRange = new DataRange<Document>(range, scoredRange.getFirstRow(), scoredRange.getTotalRowsAvailable()); stopWatch.stop(); logger.debug(stopWatch.toString()); return dataRange; } public Document findAndLockOneDocument(final DocumentState state) { final Query query = this.em.createQuery("from Document where intState = :state order by id asc"); query.setParameter("state", state.getValue()); query.setMaxResults(1); @SuppressWarnings("unchecked") final List<Document> results = query.getResultList(); if (results.isEmpty()) { return null; } final Document document = results.get(0); document.lock(); this.em.merge(document); return document; } public List<ImportantTerm> getImportantTerms(Long userId, Date startDate, Date endDate, int count, boolean filterStopWords) { final FullTextQuery fullTextQuery = this.buildFullTextQuery(null, userId, startDate, endDate, false, ANY_DOCUMENT_STATE, FullTextQuery.DOCUMENT_ID); return getImportantTerms(fullTextQuery, count, filterStopWords); } @SuppressWarnings("unchecked") private List<ImportantTerm> getImportantTerms(FullTextQuery fullTextQuery, int count, boolean filterStopWords) { final Long start = System.currentTimeMillis(); final List<Object[]> results = fullTextQuery.list(); final LinkedHashMap<String, Tuple<Integer, Float>> termCountMap = new LinkedHashMap<String, Tuple<Integer, Float>>(); final FullTextSession fullTextSession = Search.getFullTextSession((Session) this.em.getDelegate()); final SearchFactory searchFactory = fullTextSession.getSearchFactory(); final IndexReaderAccessor ira = searchFactory.getIndexReaderAccessor(); final IndexReader reader = ira.open(com.mothsoft.alexis.domain.Document.class); final IndexSearcher searcher = new IndexSearcher(reader); final List<ImportantTerm> importantTerms; final int numDocs; try { numDocs = reader.numDocs(); Term luceneTerm = new Term(CONTENT_TEXT_FIELD_NAME); if (logger.isDebugEnabled()) { logger.debug(String.format("Found %d matching Lucene documents of %d in reader", results.size(), numDocs)); } // loop over all the matching documents for (final Object[] ith : results) { int docId = ((Number) ith[0]).intValue(); final TermFreqVector tfv = reader.getTermFreqVector(docId, CONTENT_TEXT_FIELD_NAME); if (tfv == null) { continue; } final String[] terms = tfv.getTerms(); final int[] freqs = tfv.getTermFrequencies(); // total document size int size = 0; for (int freq : freqs) { size += freq; } if (logger.isDebugEnabled()) { logger.debug( String.format("Lucene document %d has %d terms, to be merged with running count %d", docId, size, termCountMap.size())); } // loop over the terms and aggregate the counts and tf-idf int i = 0; for (final String term : terms) { if (StopWords.ENGLISH.contains(term)) { continue; } luceneTerm = luceneTerm.createTerm(term); final int termCount = freqs[i++]; final Tuple<Integer, Float> countScore; if (termCountMap.containsKey(term)) { countScore = termCountMap.get(term); countScore.t1 += termCount; countScore.t2 += (TFIDF.score(term, termCount, size, numDocs, searcher.docFreq(luceneTerm))); } else { countScore = new Tuple<Integer, Float>(); countScore.t1 = termCount; countScore.t2 = (TFIDF.score(term, termCount, size, numDocs, searcher.docFreq(luceneTerm))); termCountMap.put(term, countScore); } } } if (logger.isDebugEnabled()) { logger.debug("Completed Lucene document processing."); } importantTerms = new ArrayList<ImportantTerm>(termCountMap.size()); // find max TF-IDF float maxTfIdf = 0.0f; for (final Tuple<Integer, Float> ith : termCountMap.values()) { if (ith.t2 > maxTfIdf) { maxTfIdf = ith.t2; } } for (final Map.Entry<String, Tuple<Integer, Float>> entry : termCountMap.entrySet()) { final int ithCount = entry.getValue().t1; final float ithTfIdf = entry.getValue().t2; importantTerms.add(new ImportantTerm(entry.getKey(), ithCount, ithTfIdf, maxTfIdf)); } if (logger.isDebugEnabled()) { logger.debug("Completed term aggregation, will clear term map"); } termCountMap.clear(); } catch (IOException e) { throw new RuntimeException(e); } finally { try { searcher.close(); } catch (IOException e) { logger.warn("Failed to close searcher: " + e, e); } ira.close(reader); } if (logger.isDebugEnabled()) { logger.debug("Sorting terms"); } Collections.sort(importantTerms, new Comparator<ImportantTerm>() { @Override public int compare(ImportantTerm term1, ImportantTerm term2) { return -1 * term1.getTfIdf().compareTo(term2.getTfIdf()); } }); if (logger.isDebugEnabled()) { logger.debug("Term sort complete"); } if (importantTerms.isEmpty() || importantTerms.size() < count) { if (logger.isDebugEnabled()) { logger.debug("Will return full list."); } logger.debug("Timer: " + (System.currentTimeMillis() - start)); return importantTerms; } else { if (logger.isDebugEnabled()) { logger.debug( "Will return sublist containing " + count + " of " + importantTerms.size() + " terms."); } logger.debug("Timer: " + (System.currentTimeMillis() - start)); return importantTerms.subList(0, count); } } @SuppressWarnings("unchecked") public List<ImportantTerm> getImportantTerms(Long documentId, int howMany, boolean filterStopWords) { final Query query; if (filterStopWords) { query = this.em.createQuery("select dt from DocumentTerm dt join dt.document d join dt.term t " + " where d.id = :documentId and t.valueLowercase NOT IN (:stopWords) " + " and dt.tfIdf is not null order by dt.tfIdf DESC"); query.setParameter("stopWords", StopWords.ENGLISH); } else { query = this.em.createQuery("select dt from DocumentTerm dt join dt.document d join dt.term t " + " where d.id = :documentId and dt.tfIdf is not null order by dt.tfIdf DESC"); } query.setParameter("documentId", documentId); query.setMaxResults(howMany); final List<DocumentTerm> documentTerms = query.getResultList(); float maxTfIdf = -1.0f; for (final DocumentTerm documentTerm : documentTerms) { if (documentTerm.getTfIdf() > maxTfIdf) { maxTfIdf = documentTerm.getTfIdf(); } } final List<ImportantTerm> importantTerms = new ArrayList<ImportantTerm>(documentTerms.size()); for (final DocumentTerm documentTerm : documentTerms) { importantTerms.add(new ImportantTerm(documentTerm.getTerm().getValueLowercase(), documentTerm.getCount(), documentTerm.getTfIdf(), maxTfIdf)); } return importantTerms; } public List<Document> listTopDocuments(Long userId, Date startDate, Date endDate, int count) { final StopWatch stopWatch = new StopWatch(); stopWatch.start(); final Query query = this.em .createQuery("select d from Topic topic join topic.topicDocuments td join td.document d " + " where topic.userId = :userId " + " and td.creationDate > :startDate and td.creationDate < :endDate " + " and td.score > 0.2 " + " order by td.score desc"); query.setParameter("userId", userId); query.setParameter("startDate", startDate); query.setParameter("endDate", endDate); query.setFirstResult(0); query.setMaxResults(count); query.setLockMode(LockModeType.NONE); @SuppressWarnings("unchecked") final List<Document> range = query.getResultList(); stopWatch.stop(); logger.debug(stopWatch.toString()); return range; } @Override public ScrollableResults scrollableSearch(Long userId, DocumentState state, String queryString, SortOrder sortOrder, Date startDate, Date endDate) { final StopWatch stopWatch = new StopWatch(); stopWatch.start(); final FullTextQuery fullTextQuery = this.buildFullTextQuery(queryString, userId, startDate, endDate, false, state, FullTextQuery.THIS, FullTextQuery.SCORE); final Sort sort; switch (sortOrder) { case DATE_ASC: sort = new Sort(new SortField("id", SortField.LONG)); break; case DATE_DESC: sort = new Sort(new SortField("id", SortField.LONG, true)); break; case RELEVANCE: sort = new Sort(SortField.FIELD_SCORE, new SortField("id", SortField.LONG, true)); break; default: throw new IllegalArgumentException("Unexpected SortOrder: " + sortOrder.name()); } fullTextQuery.setSort(sort); fullTextQuery.setFetchSize(50); fullTextQuery.setReadOnly(true); fullTextQuery.setCacheable(false); fullTextQuery.setCacheMode(CacheMode.IGNORE); final ScrollableResults result = fullTextQuery.scroll(ScrollMode.FORWARD_ONLY); stopWatch.stop(); logger.debug(stopWatch.toString()); return result; } public DataRange<DocumentScore> searchByOwnerAndExpression(Long userId, String queryString, SortOrder sortOrder, Date startDate, Date endDate, int first, int count) { final boolean requireTopicsForUser = false; return searchWithAllOptions(userId, requireTopicsForUser, null, queryString, sortOrder, startDate, endDate, first, count); } public int searchResultCount(Long userId, DocumentState state, String queryString, Date startDate, Date endDate) { final DataRange<DocumentScore> range = searchByOwnerAndStateAndExpression(userId, state, queryString, startDate, endDate, 0, 1); return range.getTotalRowsAvailable(); } public DataRange<DocumentScore> searchByOwnerAndStateAndExpression(Long userId, DocumentState state, String queryString, Date startDate, Date endDate, int first, int count) { final boolean requireTopicsForUser = false; return searchWithAllOptions(userId, requireTopicsForUser, state, queryString, null /* default */, startDate, endDate, first, count); } private DataRange<DocumentScore> searchWithAllOptions(final Long userId, final boolean requireTopicsForUser, final DocumentState state, final String queryString, final SortOrder sortOrder, final Date startDate, final Date endDate, final int first, final int count) { final StopWatch stopWatch = new StopWatch(); stopWatch.start(); final FullTextQuery fullTextQuery = this.buildFullTextQuery(queryString, userId, startDate, endDate, requireTopicsForUser, state, FullTextQuery.THIS, FullTextQuery.SCORE); fullTextQuery.setFirstResult(first); fullTextQuery.setMaxResults(count); // optional sort order if (sortOrder == null || sortOrder == SortOrder.RELEVANCE) { final Sort defaultSort = new Sort(SortField.FIELD_SCORE, new SortField("id", SortField.LONG, true)); fullTextQuery.setSort(defaultSort); } else if (sortOrder == SortOrder.DATE_DESC) { final Sort sort = new Sort(new SortField("creationDate", SortField.LONG, true)); fullTextQuery.setSort(sort); } else if (sortOrder == SortOrder.DATE_ASC) { final Sort sort = new Sort(new SortField("creationDate", SortField.LONG)); fullTextQuery.setSort(sort); } @SuppressWarnings("unchecked") final List<Object[]> results = fullTextQuery.list(); final List<DocumentScore> range = new ArrayList<DocumentScore>(results.size()); // copy to DocumentScore holder objects for (final Object[] ith : results) { final Document ithDoc = (Document) ith[0]; final Float ithScore = (Float) ith[1]; range.add(new DocumentScore(ithDoc, ithScore)); } final int totalRows = fullTextQuery.getResultSize(); final DataRange<DocumentScore> result = new DataRange<DocumentScore>(range, first, totalRows); stopWatch.stop(); logger.debug(stopWatch.toString()); return result; } private FullTextQuery buildFullTextQuery(final String queryString, final Long userId, final Date startDate, final Date endDate, final boolean requireTopicsForUser, final DocumentState state, final String... projectionConstants) { final String[] fields = new String[] { "title", "description", CONTENT_TEXT_FIELD_NAME, "author" }; final MultiFieldQueryParser parser = new MultiFieldQueryParser(Version.LUCENE_35, fields, new StandardAnalyzer(Version.LUCENE_35)); org.apache.lucene.search.BooleanQuery compositeQuery = new org.apache.lucene.search.BooleanQuery(); if (queryString != null) { org.apache.lucene.search.Query luceneTextQuery; try { luceneTextQuery = parser.parse(queryString); compositeQuery.add(luceneTextQuery, Occur.MUST); } catch (ParseException e) { throw new QueryException(e); } } org.apache.lucene.search.Query luceneSecurityQuery = NumericRangeQuery.newLongRange("user", userId, userId, true, true); compositeQuery.add(luceneSecurityQuery, Occur.MUST); if (startDate != null || endDate != null) { final Long startMillis = startDate == null ? 0 : startDate.getTime(); final Long endMillis = endDate == null ? Long.MAX_VALUE : endDate.getTime(); org.apache.lucene.search.Query dateRangeQuery = NumericRangeQuery.newLongRange("creationDate", startMillis, endMillis, true, true); compositeQuery.add(dateRangeQuery, Occur.MUST); } if (requireTopicsForUser) { org.apache.lucene.search.Query topicUserQuery = NumericRangeQuery.newLongRange("topicUser", userId, userId, true, true); compositeQuery.add(topicUserQuery, Occur.MUST); } if (state != null) { final int stateInt = state.getValue(); org.apache.lucene.search.Query stateQuery = NumericRangeQuery.newIntRange("state", stateInt, stateInt, true, true); compositeQuery.add(stateQuery, Occur.MUST); } final Session session = (Session) this.em.getDelegate(); final FullTextSession fullTextSession = Search.getFullTextSession(session); final FullTextQuery fullTextQuery = fullTextSession.createFullTextQuery(compositeQuery) .setProjection(projectionConstants); return fullTextQuery; } // FIXME - this arose after making documents shared by multiple users while // topics are still private. Users were seeing the names of other users' // topics. Tried filters, formulas, left joins, and about everything else I // could think of. // Left joins were especially troublesome as it seemed impossible to write // joins that would handle all 3 of the following scenarios: // 1.) No topics assigned to a document, 2.) No topics *for the current // user* assigned to a document, 3.) Topics assigned to current user. // Invariably, one of these 3 would be broken. // This is more performant than a lot of other options I thought of but // it still requires cirumventing what it seems a framework like Hibernate // or JPA should be able to provide. It is also not lazy-loadable and should // be used with great care on large collections or objects where collection // may not be read. public List<TopicDocument> getTopicDocuments(final Long documentId) { final StopWatch stopWatch = new StopWatch(); stopWatch.start(); final Long userId = CurrentUserUtil.getCurrentUserId(); final Query query = this.em.createQuery("select td " + "from TopicDocument td join td.topic topic " + "where td.document.id = :documentId and topic.userId = :userId " + "order by td.score desc"); query.setParameter("userId", userId); query.setParameter("documentId", documentId); @SuppressWarnings("unchecked") final List<TopicDocument> filteredTopicDocuments = (List<TopicDocument>) query.getResultList(); stopWatch.stop(); logger.debug(stopWatch.toString()); return filteredTopicDocuments; } /* * (non-Javadoc) * * @see * com.mothsoft.alexis.dao.DocumentDao#getRelatedTerms(java.lang.String, * java.lang.Long, int) */ @SuppressWarnings("unchecked") public Graph getRelatedTerms(final String queryString, final Long userId, final int howMany) { final StopWatch stopWatch = new StopWatch(); stopWatch.start(); final FullTextQuery fullTextQuery = this.buildFullTextQuery(queryString, userId, NO_DATE, NO_DATE, false, DocumentState.MATCHED_TO_TOPICS, FullTextQuery.ID); // find the specified number of terms from the most recent 100 documents // that match the query final Sort sort = new Sort(new SortField("creationDate", SortField.LONG, true)); fullTextQuery.setSort(sort); fullTextQuery.setFirstResult(0); fullTextQuery.setMaxResults(100); final List<Long> documentIds = new ArrayList<Long>(100); final List<Long> termIds = new ArrayList<Long>(100); final List<Object[]> results = fullTextQuery.list(); for (final Object[] ith : results) { final Long id = (Long) ith[0]; documentIds.add(id); } final Map<String, Node> nodes = new LinkedHashMap<String, Node>(); final Node root = new Node(queryString, Boolean.TRUE); nodes.put(queryString, root); final Map<String, Edge> edges = new HashMap<String, Edge>(); if (!documentIds.isEmpty()) { final Session session = (Session) this.em.getDelegate(); final org.hibernate.SQLQuery termsQuery = session.createSQLQuery("SELECT term.id " + " FROM document_term dt INNER JOIN term on term.id = dt.term_id " + " WHERE dt.document_id IN (:documentIds) GROUP BY term.id ORDER BY SUM(dt.tf_idf) DESC"); termsQuery.setParameterList("documentIds", documentIds); termsQuery.setMaxResults(100); termIds.addAll((List<Long>) termsQuery.list()); } if (!documentIds.isEmpty() && !termIds.isEmpty()) { final Session session = (Session) this.em.getDelegate(); final org.hibernate.SQLQuery associationsQuery = session.createSQLQuery( "SELECT CONCAT(a.term_value) term_a_value, CONCAT(b.term_value) term_b_value, SUM(da.association_weight) sum_weight " + " FROM document_association da " + " INNER JOIN term a ON da.term_a_id = a.id " + " AND a.part_of_speech NOT IN (1, 3, 18, 19, 25, 39, 40) " + " AND length(a.term_value) > 2 " + " INNER JOIN term b ON da.term_b_id = b.id " + " AND b.part_of_speech NOT IN (1, 3, 18, 19, 25, 39, 40) " + " AND length(b.term_value) > 2 " + " WHERE da.document_id IN (:documentIds) AND (da.term_a_id IN (:termIds) OR da.term_b_id IN (:termIds)) " + " GROUP BY a.id, b.id ORDER BY sum_weight DESC"); associationsQuery.setParameterList("documentIds", documentIds); associationsQuery.setParameterList("termIds", termIds); associationsQuery.setMaxResults(howMany); final List<Object[]> relatedTermsResults = associationsQuery.list(); final Set<String> aNodeKeys = new HashSet<String>(); final Set<String> bNodeKeys = new HashSet<String>(); for (final Object[] ith : relatedTermsResults) { final String a = (String) ith[0]; final String b = (String) ith[1]; if (!nodes.containsKey(a)) { final Node node = new Node(a); nodes.put(a, node); } if (!nodes.containsKey(b)) { final Node node = new Node(b); nodes.put(b, node); } if (a.equals(b)) { continue; } final String edgeKey = a + "||" + b; final String edgeKeyInverse = b + "||" + a; if (!edges.containsKey(edgeKey) && !edges.containsKey(edgeKeyInverse)) { final Node nodeA = nodes.get(a); final Node nodeB = nodes.get(b); aNodeKeys.add(a); bNodeKeys.add(b); final Edge edge = new Edge(nodeA, nodeB); edges.put(edgeKey, edge); } } // "orphan" handling, any b that is not also an a needs an edge from // root final Set<String> orphanKeys = new HashSet<String>(); orphanKeys.addAll(bNodeKeys); orphanKeys.removeAll(aNodeKeys); for (final String orphanKey : orphanKeys) { final Node orphan = nodes.get(orphanKey); final Edge orphanToParent = new Edge(root, orphan); edges.put(root.getName() + "||" + orphan.getName(), orphanToParent); } } final List<Node> nodeList = new ArrayList<Node>(nodes.size()); // keep root as first element nodes.remove(root.getName()); nodeList.add(root); nodeList.addAll(nodes.values()); final Graph graph = new Graph(nodeList, new ArrayList<Edge>(edges.values())); stopWatch.stop(); logger.info("Related terms search took: " + stopWatch.toString()); return graph; } @SuppressWarnings("unchecked") @Override public List<ImportantNamedEntity> getImportantNamedEntities(Long userId, Date startDate, Date endDate, int howMany) { final Query query = this.em .createQuery("SELECT NEW com.mothsoft.alexis.domain.ImportantNamedEntity(ne.name, sum(ne.count)) " + "FROM DocumentNamedEntity ne JOIN ne.document document JOIN document.documentUsers documentUser " + "WHERE document.creationDate >= :startDate AND document.creationDate <= :endDate AND documentUser.user.id = :userId " + "GROUP BY ne.name ORDER BY sum(ne.count) DESC"); query.setParameter("startDate", startDate); query.setParameter("endDate", endDate); query.setParameter("userId", userId); query.setMaxResults(howMany); return query.getResultList(); } @SuppressWarnings("unchecked") @Override public List<ImportantNamedEntity> getImportantNamedEntitiesForDocument(Long documentId, int howMany) { final Query query = this.em .createQuery("SELECT NEW com.mothsoft.alexis.domain.ImportantNamedEntity(ne.name, sum(ne.count)) " + "FROM DocumentNamedEntity ne JOIN ne.document document WHERE document.id = :documentId " + "GROUP BY ne.name ORDER BY sum(ne.count) DESC"); query.setParameter("documentId", documentId); query.setMaxResults(howMany); return query.getResultList(); } private class Tuple<T1, T2> { public T1 t1; public T2 t2; } }