List of usage examples for java.util LinkedList addAll
public boolean addAll(Collection<? extends E> c)
From source file:org.alfresco.repo.search.impl.lucene.AbstractLuceneQueryParser.java
@SuppressWarnings("unchecked") protected Query getFieldQueryImpl(String field, String queryText, AnalysisMode analysisMode, LuceneFunction luceneFunction) throws ParseException { // Use the analyzer to get all the tokens, and then build a TermQuery, // PhraseQuery, or noth // TODO: Untokenised columns with functions require special handling if (luceneFunction != LuceneFunction.FIELD) { throw new UnsupportedOperationException( "Field queries are not supported on lucene functions (UPPER, LOWER, etc)"); }// www .j a v a 2 s . co m // if the incoming string already has a language identifier we strip it iff and addit back on again String localePrefix = ""; String toTokenise = queryText; if (queryText.startsWith("{")) { int position = queryText.indexOf("}"); String language = queryText.substring(0, position + 1); Locale locale = new Locale(queryText.substring(1, position)); String token = queryText.substring(position + 1); boolean found = false; if (!locale.toString().isEmpty()) { for (Locale current : Locale.getAvailableLocales()) { if (current.toString().equalsIgnoreCase(locale.toString())) { found = true; break; } } } if (found) { localePrefix = language; toTokenise = token; } else { toTokenise = token; } } String testText = toTokenise; boolean requiresMLTokenDuplication = false; String localeString = null; if (field.startsWith(PROPERTY_FIELD_PREFIX) && (localePrefix.length() == 0)) { if ((queryText.length() > 0) && (queryText.charAt(0) == '\u0000')) { int position = queryText.indexOf("\u0000", 1); testText = queryText.substring(position + 1); requiresMLTokenDuplication = true; localeString = queryText.substring(1, position); } } // find the positions of any escaped * and ? and ignore them Set<Integer> wildcardPoistions = getWildcardPositions(testText); TokenStream source; if ((localePrefix.length() == 0) || (wildcardPoistions.size() > 0) || (analysisMode == AnalysisMode.IDENTIFIER)) { source = getAnalyzer().tokenStream(field, new StringReader(toTokenise), analysisMode); } else { source = getAnalyzer().tokenStream(field, new StringReader( "\u0000" + localePrefix.substring(1, localePrefix.length() - 1) + "\u0000" + toTokenise), analysisMode); localePrefix = ""; } ArrayList<org.apache.lucene.analysis.Token> list = new ArrayList<org.apache.lucene.analysis.Token>(); org.apache.lucene.analysis.Token reusableToken = new org.apache.lucene.analysis.Token(); org.apache.lucene.analysis.Token nextToken; int positionCount = 0; boolean severalTokensAtSamePosition = false; while (true) { try { nextToken = source.next(reusableToken); } catch (IOException e) { nextToken = null; } if (nextToken == null) break; list.add((org.apache.lucene.analysis.Token) nextToken.clone()); if (nextToken.getPositionIncrement() != 0) positionCount += nextToken.getPositionIncrement(); else severalTokensAtSamePosition = true; } try { source.close(); } catch (IOException e) { // ignore } // add any alpha numeric wildcards that have been missed // Fixes most stop word and wild card issues for (int index = 0; index < testText.length(); index++) { char current = testText.charAt(index); if (((current == '*') || (current == '?')) && wildcardPoistions.contains(index)) { StringBuilder pre = new StringBuilder(10); if (index == 0) { // "*" and "?" at the start boolean found = false; for (int j = 0; j < list.size(); j++) { org.apache.lucene.analysis.Token test = list.get(j); if ((test.startOffset() <= 0) && (0 < test.endOffset())) { found = true; break; } } if (!found && (testText.length() == 1)) { // Add new token followed by * not given by the tokeniser org.apache.lucene.analysis.Token newToken = new org.apache.lucene.analysis.Token(0, 0); newToken.setTermBuffer(""); newToken.setType("ALPHANUM"); if (requiresMLTokenDuplication) { Locale locale = I18NUtil.parseLocale(localeString); MLAnalysisMode mlAnalysisMode = searchParameters.getMlAnalaysisMode() == null ? defaultSearchMLAnalysisMode : searchParameters.getMlAnalaysisMode(); MLTokenDuplicator duplicator = new MLTokenDuplicator(locale, mlAnalysisMode); Iterator<org.apache.lucene.analysis.Token> it = duplicator.buildIterator(newToken); if (it != null) { int count = 0; while (it.hasNext()) { list.add(it.next()); count++; if (count > 1) { severalTokensAtSamePosition = true; } } } } // content else { list.add(newToken); } } } else if (index > 0) { // Add * and ? back into any tokens from which it has been removed boolean tokenFound = false; for (int j = 0; j < list.size(); j++) { org.apache.lucene.analysis.Token test = list.get(j); if ((test.startOffset() <= index) && (index < test.endOffset())) { if (requiresMLTokenDuplication) { String termText = new String(test.termBuffer(), 0, test.termLength()); int position = termText.indexOf("}"); String language = termText.substring(0, position + 1); String token = termText.substring(position + 1); if (index >= test.startOffset() + token.length()) { test.setTermBuffer(language + token + current); } } else { if (index >= test.startOffset() + test.termLength()) { test.setTermBuffer(test.term() + current); } } tokenFound = true; break; } } if (!tokenFound) { for (int i = index - 1; i >= 0; i--) { char c = testText.charAt(i); if (Character.isLetterOrDigit(c)) { boolean found = false; for (int j = 0; j < list.size(); j++) { org.apache.lucene.analysis.Token test = list.get(j); if ((test.startOffset() <= i) && (i < test.endOffset())) { found = true; break; } } if (found) { break; } else { pre.insert(0, c); } } else { break; } } if (pre.length() > 0) { // Add new token followed by * not given by the tokeniser org.apache.lucene.analysis.Token newToken = new org.apache.lucene.analysis.Token( index - pre.length(), index); newToken.setTermBuffer(pre.toString()); newToken.setType("ALPHANUM"); if (requiresMLTokenDuplication) { Locale locale = I18NUtil.parseLocale(localeString); MLAnalysisMode mlAnalysisMode = searchParameters.getMlAnalaysisMode() == null ? defaultSearchMLAnalysisMode : searchParameters.getMlAnalaysisMode(); MLTokenDuplicator duplicator = new MLTokenDuplicator(locale, mlAnalysisMode); Iterator<org.apache.lucene.analysis.Token> it = duplicator.buildIterator(newToken); if (it != null) { int count = 0; while (it.hasNext()) { list.add(it.next()); count++; if (count > 1) { severalTokensAtSamePosition = true; } } } } // content else { list.add(newToken); } } } } StringBuilder post = new StringBuilder(10); if (index > 0) { for (int i = index + 1; i < testText.length(); i++) { char c = testText.charAt(i); if (Character.isLetterOrDigit(c)) { boolean found = false; for (int j = 0; j < list.size(); j++) { org.apache.lucene.analysis.Token test = list.get(j); if ((test.startOffset() <= i) && (i < test.endOffset())) { found = true; break; } } if (found) { break; } else { post.append(c); } } else { break; } } if (post.length() > 0) { // Add new token followed by * not given by the tokeniser org.apache.lucene.analysis.Token newToken = new org.apache.lucene.analysis.Token(index + 1, index + 1 + post.length()); newToken.setTermBuffer(post.toString()); newToken.setType("ALPHANUM"); if (requiresMLTokenDuplication) { Locale locale = I18NUtil.parseLocale(localeString); MLAnalysisMode mlAnalysisMode = searchParameters.getMlAnalaysisMode() == null ? defaultSearchMLAnalysisMode : searchParameters.getMlAnalaysisMode(); MLTokenDuplicator duplicator = new MLTokenDuplicator(locale, mlAnalysisMode); Iterator<org.apache.lucene.analysis.Token> it = duplicator.buildIterator(newToken); if (it != null) { int count = 0; while (it.hasNext()) { list.add(it.next()); count++; if (count > 1) { severalTokensAtSamePosition = true; } } } } // content else { list.add(newToken); } } } } } Collections.sort(list, new Comparator<org.apache.lucene.analysis.Token>() { public int compare(Token o1, Token o2) { int dif = o1.startOffset() - o2.startOffset(); if (dif != 0) { return dif; } else { return o2.getPositionIncrement() - o1.getPositionIncrement(); } } }); // Combined * and ? based strings - should redo the tokeniser // Build tokens by position LinkedList<LinkedList<org.apache.lucene.analysis.Token>> tokensByPosition = new LinkedList<LinkedList<org.apache.lucene.analysis.Token>>(); LinkedList<org.apache.lucene.analysis.Token> currentList = null; for (org.apache.lucene.analysis.Token c : list) { if (c.getPositionIncrement() == 0) { if (currentList == null) { currentList = new LinkedList<org.apache.lucene.analysis.Token>(); tokensByPosition.add(currentList); } currentList.add(c); } else { currentList = new LinkedList<org.apache.lucene.analysis.Token>(); tokensByPosition.add(currentList); currentList.add(c); } } // Build all the token sequences and see which ones get strung together LinkedList<LinkedList<org.apache.lucene.analysis.Token>> allTokenSequences = new LinkedList<LinkedList<org.apache.lucene.analysis.Token>>(); for (LinkedList<org.apache.lucene.analysis.Token> tokensAtPosition : tokensByPosition) { if (allTokenSequences.size() == 0) { for (org.apache.lucene.analysis.Token t : tokensAtPosition) { LinkedList<org.apache.lucene.analysis.Token> newEntry = new LinkedList<org.apache.lucene.analysis.Token>(); newEntry.add(t); allTokenSequences.add(newEntry); } } else { LinkedList<LinkedList<org.apache.lucene.analysis.Token>> newAllTokeSequences = new LinkedList<LinkedList<org.apache.lucene.analysis.Token>>(); FOR_FIRST_TOKEN_AT_POSITION_ONLY: for (org.apache.lucene.analysis.Token t : tokensAtPosition) { boolean tokenFoundSequence = false; for (LinkedList<org.apache.lucene.analysis.Token> tokenSequence : allTokenSequences) { LinkedList<org.apache.lucene.analysis.Token> newEntry = new LinkedList<org.apache.lucene.analysis.Token>(); newEntry.addAll(tokenSequence); if (newEntry.getLast().endOffset() <= t.startOffset()) { newEntry.add(t); tokenFoundSequence = true; } newAllTokeSequences.add(newEntry); } if (false == tokenFoundSequence) { LinkedList<org.apache.lucene.analysis.Token> newEntry = new LinkedList<org.apache.lucene.analysis.Token>(); newEntry.add(t); newAllTokeSequences.add(newEntry); } // Limit the max number of permutations we consider if (newAllTokeSequences.size() > 64) { break FOR_FIRST_TOKEN_AT_POSITION_ONLY; } } allTokenSequences = newAllTokeSequences; } } // build the uniquie LinkedList<LinkedList<org.apache.lucene.analysis.Token>> fixedTokenSequences = new LinkedList<LinkedList<org.apache.lucene.analysis.Token>>(); for (LinkedList<org.apache.lucene.analysis.Token> tokenSequence : allTokenSequences) { LinkedList<org.apache.lucene.analysis.Token> fixedTokenSequence = new LinkedList<org.apache.lucene.analysis.Token>(); fixedTokenSequences.add(fixedTokenSequence); org.apache.lucene.analysis.Token replace = null; for (org.apache.lucene.analysis.Token c : tokenSequence) { if (replace == null) { StringBuilder prefix = new StringBuilder(); for (int i = c.startOffset() - 1; i >= 0; i--) { char test = testText.charAt(i); if (((test == '*') || (test == '?')) && wildcardPoistions.contains(i)) { prefix.insert(0, test); } else { break; } } String pre = prefix.toString(); if (requiresMLTokenDuplication) { String termText = new String(c.termBuffer(), 0, c.termLength()); int position = termText.indexOf("}"); String language = termText.substring(0, position + 1); String token = termText.substring(position + 1); replace = new org.apache.lucene.analysis.Token(c.startOffset() - pre.length(), c.endOffset()); replace.setTermBuffer(language + pre + token); replace.setType(c.type()); replace.setPositionIncrement(c.getPositionIncrement()); } else { String termText = new String(c.termBuffer(), 0, c.termLength()); replace = new org.apache.lucene.analysis.Token(c.startOffset() - pre.length(), c.endOffset()); replace.setTermBuffer(pre + termText); replace.setType(c.type()); replace.setPositionIncrement(c.getPositionIncrement()); } } else { StringBuilder prefix = new StringBuilder(); StringBuilder postfix = new StringBuilder(); StringBuilder builder = prefix; for (int i = c.startOffset() - 1; i >= replace.endOffset(); i--) { char test = testText.charAt(i); if (((test == '*') || (test == '?')) && wildcardPoistions.contains(i)) { builder.insert(0, test); } else { builder = postfix; postfix.setLength(0); } } String pre = prefix.toString(); String post = postfix.toString(); // Does it bridge? if ((pre.length() > 0) && (replace.endOffset() + pre.length()) == c.startOffset()) { String termText = new String(c.termBuffer(), 0, c.termLength()); if (requiresMLTokenDuplication) { int position = termText.indexOf("}"); @SuppressWarnings("unused") String language = termText.substring(0, position + 1); String token = termText.substring(position + 1); int oldPositionIncrement = replace.getPositionIncrement(); String replaceTermText = new String(replace.termBuffer(), 0, replace.termLength()); replace = new org.apache.lucene.analysis.Token(replace.startOffset(), c.endOffset()); replace.setTermBuffer(replaceTermText + pre + token); replace.setType(replace.type()); replace.setPositionIncrement(oldPositionIncrement); } else { int oldPositionIncrement = replace.getPositionIncrement(); String replaceTermText = new String(replace.termBuffer(), 0, replace.termLength()); replace = new org.apache.lucene.analysis.Token(replace.startOffset(), c.endOffset()); replace.setTermBuffer(replaceTermText + pre + termText); replace.setType(replace.type()); replace.setPositionIncrement(oldPositionIncrement); } } else { String termText = new String(c.termBuffer(), 0, c.termLength()); if (requiresMLTokenDuplication) { int position = termText.indexOf("}"); String language = termText.substring(0, position + 1); String token = termText.substring(position + 1); String replaceTermText = new String(replace.termBuffer(), 0, replace.termLength()); org.apache.lucene.analysis.Token last = new org.apache.lucene.analysis.Token( replace.startOffset(), replace.endOffset() + post.length()); last.setTermBuffer(replaceTermText + post); last.setType(replace.type()); last.setPositionIncrement(replace.getPositionIncrement()); fixedTokenSequence.add(last); replace = new org.apache.lucene.analysis.Token(c.startOffset() - pre.length(), c.endOffset()); replace.setTermBuffer(language + pre + token); replace.setType(c.type()); replace.setPositionIncrement(c.getPositionIncrement()); } else { String replaceTermText = new String(replace.termBuffer(), 0, replace.termLength()); org.apache.lucene.analysis.Token last = new org.apache.lucene.analysis.Token( replace.startOffset(), replace.endOffset() + post.length()); last.setTermBuffer(replaceTermText + post); last.setType(replace.type()); last.setPositionIncrement(replace.getPositionIncrement()); fixedTokenSequence.add(last); replace = new org.apache.lucene.analysis.Token(c.startOffset() - pre.length(), c.endOffset()); replace.setTermBuffer(pre + termText); replace.setType(c.type()); replace.setPositionIncrement(c.getPositionIncrement()); } } } } // finish last if (replace != null) { StringBuilder postfix = new StringBuilder(); if ((replace.endOffset() >= 0) && (replace.endOffset() < testText.length())) { for (int i = replace.endOffset(); i < testText.length(); i++) { char test = testText.charAt(i); if (((test == '*') || (test == '?')) && wildcardPoistions.contains(i)) { postfix.append(test); } else { break; } } } String post = postfix.toString(); int oldPositionIncrement = replace.getPositionIncrement(); String replaceTermText = new String(replace.termBuffer(), 0, replace.termLength()); replace = new org.apache.lucene.analysis.Token(replace.startOffset(), replace.endOffset() + post.length()); replace.setTermBuffer(replaceTermText + post); replace.setType(replace.type()); replace.setPositionIncrement(oldPositionIncrement); fixedTokenSequence.add(replace); } } // rebuild fixed list ArrayList<org.apache.lucene.analysis.Token> fixed = new ArrayList<org.apache.lucene.analysis.Token>(); for (LinkedList<org.apache.lucene.analysis.Token> tokenSequence : fixedTokenSequences) { for (org.apache.lucene.analysis.Token token : tokenSequence) { fixed.add(token); } } // reorder by start position and increment Collections.sort(fixed, new Comparator<org.apache.lucene.analysis.Token>() { public int compare(Token o1, Token o2) { int dif = o1.startOffset() - o2.startOffset(); if (dif != 0) { return dif; } else { return o1.getPositionIncrement() - o2.getPositionIncrement(); } } }); // make sure we remove any tokens we have duplicated @SuppressWarnings("rawtypes") OrderedHashSet unique = new OrderedHashSet(); unique.addAll(fixed); fixed = new ArrayList<org.apache.lucene.analysis.Token>(unique); list = fixed; // add any missing locales back to the tokens if (localePrefix.length() > 0) { for (int j = 0; j < list.size(); j++) { org.apache.lucene.analysis.Token currentToken = list.get(j); String termText = new String(currentToken.termBuffer(), 0, currentToken.termLength()); currentToken.setTermBuffer(localePrefix + termText); } } if (list.size() == 0) return null; else if (list.size() == 1) { nextToken = list.get(0); String termText = new String(nextToken.termBuffer(), 0, nextToken.termLength()); if (termText.contains("*") || termText.contains("?")) { return newWildcardQuery( new Term(field, getLowercaseExpandedTerms() ? termText.toLowerCase() : termText)); } else { return newTermQuery(new Term(field, termText)); } } else { if (severalTokensAtSamePosition) { if (positionCount == 1) { // no phrase query: BooleanQuery q = newBooleanQuery(true); for (int i = 0; i < list.size(); i++) { Query currentQuery; nextToken = list.get(i); String termText = new String(nextToken.termBuffer(), 0, nextToken.termLength()); if (termText.contains("*") || termText.contains("?")) { currentQuery = newWildcardQuery(new Term(field, getLowercaseExpandedTerms() ? termText.toLowerCase() : termText)); } else { currentQuery = newTermQuery(new Term(field, termText)); } q.add(currentQuery, BooleanClause.Occur.SHOULD); } return q; } // Consider if we can use a multi-phrase query (e.g for synonym use rather then WordDelimiterFilterFactory) else if (canUseMultiPhraseQuery(fixedTokenSequences)) { // phrase query: MultiPhraseQuery mpq = newMultiPhraseQuery(); mpq.setSlop(internalSlop); ArrayList<Term> multiTerms = new ArrayList<Term>(); int position = 0; for (int i = 0; i < list.size(); i++) { nextToken = list.get(i); String termText = new String(nextToken.termBuffer(), 0, nextToken.termLength()); Term term = new Term(field, termText); if ((termText != null) && (termText.contains("*") || termText.contains("?"))) { addWildcardTerms(multiTerms, term); } else { multiTerms.add(term); } if (nextToken.getPositionIncrement() > 0 && multiTerms.size() > 0) { if (getEnablePositionIncrements()) { mpq.add(multiTerms.toArray(new Term[0]), position); } else { mpq.add(multiTerms.toArray(new Term[0])); } checkTermCount(field, queryText, mpq); multiTerms.clear(); } position += nextToken.getPositionIncrement(); } if (getEnablePositionIncrements()) { if (multiTerms.size() > 0) { mpq.add(multiTerms.toArray(new Term[0]), position); } // else // { // mpq.add(new Term[] { new Term(field, "\u0000") }, position); // } } else { if (multiTerms.size() > 0) { mpq.add(multiTerms.toArray(new Term[0])); } // else // { // mpq.add(new Term[] { new Term(field, "\u0000") }); // } } checkTermCount(field, queryText, mpq); return mpq; } // Word delimiter factory and other odd things generate complex token patterns // Smart skip token sequences with small tokens that generate toomany wildcards // Fall back to the larger pattern // e.g Site1* will not do (S ite 1*) or (Site 1*) if 1* matches too much (S ite1*) and (Site1*) will still be OK // If we skip all (for just 1* in the input) this is still an issue. else { boolean skippedTokens = false; BooleanQuery q = newBooleanQuery(true); TOKEN_SEQUENCE: for (LinkedList<org.apache.lucene.analysis.Token> tokenSequence : fixedTokenSequences) { // phrase query: MultiPhraseQuery mpq = newMultiPhraseQuery(); mpq.setSlop(internalSlop); int position = 0; for (int i = 0; i < tokenSequence.size(); i++) { nextToken = (org.apache.lucene.analysis.Token) tokenSequence.get(i); String termText = new String(nextToken.termBuffer(), 0, nextToken.termLength()); Term term = new Term(field, termText); if (getEnablePositionIncrements()) { if ((termText != null) && (termText.contains("*") || termText.contains("?"))) { mpq.add(getMatchingTerms(field, term), position); } else { mpq.add(new Term[] { term }, position); } if (exceedsTermCount(mpq)) { // We could duplicate the token sequence without the failing wildcard expansion and try again ?? skippedTokens = true; continue TOKEN_SEQUENCE; } if (nextToken.getPositionIncrement() > 0) { position += nextToken.getPositionIncrement(); } else { position++; } } else { if ((termText != null) && (termText.contains("*") || termText.contains("?"))) { mpq.add(getMatchingTerms(field, term)); } else { mpq.add(term); } if (exceedsTermCount(mpq)) { skippedTokens = true; continue TOKEN_SEQUENCE; } } } q.add(mpq, BooleanClause.Occur.SHOULD); } if (skippedTokens && (q.clauses().size() == 0)) { throw new LuceneQueryParserException( "Query skipped all token sequences as wildcards generated too many clauses: " + field + " " + queryText); } return q; } } else { MultiPhraseQuery q = new MultiPhraseQuery(); q.setSlop(internalSlop); int position = 0; for (int i = 0; i < list.size(); i++) { nextToken = list.get(i); String termText = new String(nextToken.termBuffer(), 0, nextToken.termLength()); Term term = new Term(field, termText); if (getEnablePositionIncrements()) { if ((termText != null) && (termText.contains("*") || termText.contains("?"))) { q.add(getMatchingTerms(field, term), position); } else { q.add(new Term[] { term }, position); } checkTermCount(field, queryText, q); if (nextToken.getPositionIncrement() > 0) { position += nextToken.getPositionIncrement(); } else { position++; } } else { if ((termText != null) && (termText.contains("*") || termText.contains("?"))) { q.add(getMatchingTerms(field, term)); } else { q.add(term); } checkTermCount(field, queryText, q); } } return q; } } }
From source file:gedi.riboseq.inference.orf.OrfFinder.java
/** * Coordinates are in codonsRegion space! * @param index/*ww w . j a v a 2 s .c o m*/ * @param sequence * @param sg * @param codonsRegion * @return */ public ArrayList<OrfWithCodons> findOrfs(int index, String sequence, SpliceGraph sg, ImmutableReferenceGenomicRegion<IntervalTreeSet<Codon>> codonsRegion) { SimpleDirectedGraph<Codon> fg = new SimpleDirectedGraph<Codon>("Codongraph"); // if (!codonsRegion.getReference().toString().equals("chr4+") || !codonsRegion.getRegion().contains(140_283_087)) // return 0; LeftMostInFrameAndClearList buff = new LeftMostInFrameAndClearList(); IntervalTreeSet<Codon> codons = codonsRegion.getData(); codons.removeIf(c -> c.getTotalActivity() < minCodonActivity); if (codons.size() == 0) return new ArrayList<OrfWithCodons>(); // add stop codons for easy orf inference HashSet<Codon> stopCodons = new HashSet<Codon>(); Trie<String> stop = new Trie<String>(); stop.put("TAG", "TAG"); stop.put("TGA", "TGA"); stop.put("TAA", "TAA"); stop.iterateAhoCorasick(sequence) .map(r -> new Codon(new ArrayGenomicRegion(r.getStart(), r.getEnd()), r.getValue())) .toCollection(stopCodons); for (Intron intr : sg.iterateIntrons().loop()) { ArrayGenomicRegion reg = new ArrayGenomicRegion(intr.getStart() - 2, intr.getStart(), intr.getEnd(), intr.getEnd() + 1); String cod = stop.get(SequenceUtils.extractSequence(reg, sequence)); if (cod != null) stopCodons.add(new Codon(reg, cod)); reg = new ArrayGenomicRegion(intr.getStart() - 1, intr.getStart(), intr.getEnd(), intr.getEnd() + 2); cod = stop.get(SequenceUtils.extractSequence(reg, sequence)); if (cod != null) stopCodons.add(new Codon(reg, cod)); } stopCodons.removeAll(codons); codons.addAll(stopCodons); ArrayList<OrfWithCodons> re = new ArrayList<OrfWithCodons>(); HashSet<Codon> usedForAnno = new HashSet<Codon>(); if (assembleAnnotationFirst) { // new: first use annotated transcripts in a greedy fashion ArrayList<ImmutableReferenceGenomicRegion<Transcript>> transcripts = annotation.ei(codonsRegion) .filter(t -> t.getData().isCoding()).map(t -> codonsRegion.induce(t, "T")).list(); int acount = 0; LinkedList<OrfWithCodons> orfs = new LinkedList<OrfWithCodons>(); GenomicRegion best; HashSet<Codon> aremoved = new HashSet<Codon>(); do { best = null; double bestSum = 0; for (ImmutableReferenceGenomicRegion<Transcript> tr : transcripts) { double[] a = new double[tr.getRegion().getTotalLength()]; for (Codon c : codons) { if (tr.getRegion().containsUnspliced(c)) { int p = tr.induce(c.getStart()); assert a[p] == 0; if (!aremoved.contains(c)) a[p] = c.totalActivity; if (c.isStop()) a[p] = -1; } } for (int f = 0; f < 3; f++) { int s = -1; double sum = 0; for (int p = f; p < a.length; p += 3) { if (a[p] == -1) {//stop if (sum > bestSum) { bestSum = sum; best = tr.getRegion().map(new ArrayGenomicRegion(s, p + 3)); } s = -1; sum = 0; } else sum += a[p]; if (a[p] > 0 && s == -1) s = p; } } } if (best != null) { ArrayList<Codon> cods = new ArrayList<>(); int uniqueCodons = 0; double uniqueActivity = 0; double totalActivity = 0; for (Codon c : codons) { if (best.containsUnspliced(c) && best.induce(c.getStart()) % 3 == 0) { if (aremoved.add(c)) { uniqueActivity += c.totalActivity; uniqueCodons++; } totalActivity += c.totalActivity; if (c.totalActivity > 0) cods.add(c); } } // System.out.println(codonsRegion.map(best)); if ((uniqueCodons >= minUniqueCodons || uniqueCodons == cods.size()) && uniqueActivity > minUniqueActivity && totalActivity > minOrfTotalActivity) { Collections.sort(cods); usedForAnno.addAll(cods); OrfWithCodons orf = new OrfWithCodons(index, 0, acount++, best.toArrayGenomicRegion(), cods, true); orfs.add(orf); } } } while (best != null); if (orfs.size() > 1) { // they are not necessarily connected! LinkedList<OrfWithCodons>[] connected = findConnectedOrfs(orfs); orfs.clear(); for (LinkedList<OrfWithCodons> corfs : connected) { for (boolean changed = true; changed && corfs.size() > 1;) { changed = false; if (useEM) inferOverlappingOrfActivitiesEM(corfs); else overlapUniqueCoverage(corfs); Iterator<OrfWithCodons> it = corfs.iterator(); while (it.hasNext()) { OrfWithCodons orf = it.next(); if (orf.getEstimatedTotalActivity() < minOrfTotalActivity) { it.remove(); changed = true; } } } if (corfs.size() > 1) distributeCodons(corfs); orfs.addAll(corfs); } } re.addAll(orfs); } // as edges only are represented in the splice graph, singleton codons are discarded (which does make sense anyway) for (Codon c : codons) { if (!c.isStop()) { // find unspliced successors (can be more than one, when the successor codon itself is spliced! all of them have the same start!) int max = c.getEnd() + maxAminoDist * 3; for (Codon n : codons .getIntervalsIntersecting(c.getEnd(), c.getEnd() + maxAminoDist * 3, buff.startAndClear(c)) .get()) { if (!containsInframeStop(sequence.substring(c.getEnd(), n.getStart()))) fg.addInteraction(c, n); max = n.getStart() + 2; } // find all spliced successors for each splice junction that comes before n or maxAminoDist sg.forEachIntronStartingBetween(c.getEnd(), max + 1, intron -> { for (Codon n : codons.getIntervalsIntersecting(intron.getEnd(), intron.getEnd() + maxAminoDist * 3 - (intron.getStart() - c.getEnd()), buff.startAndClear(c, intron)).get()) if (!containsInframeStop(SequenceUtils.extractSequence(new ArrayGenomicRegion(c.getStart(), intron.getStart(), intron.getEnd(), n.getStart()), sequence))) fg.addInteraction(c, n, intron); }); } } int cc = 1; for (SimpleDirectedGraph<Codon> g : fg.getWeaklyConnectedComponents()) { if (EI.wrap(g.getSources()).mapToDouble(c -> c.getTotalActivity()).sum() == 0) continue; // iterate longest paths in g LinkedList<Codon> topo = g.getTopologicalOrder(); HashSet<Codon> remInTopo = new HashSet<Codon>(topo); remInTopo.removeIf(c -> !stopCodons.contains(c) && !usedForAnno.contains(c)); HashSet<Codon> removed = new HashSet<Codon>(remInTopo); // double maxPathScore = 0; LinkedList<OrfWithCodons> orfs = new LinkedList<OrfWithCodons>(); int count = 0; while (removed.size() < topo.size()) { HashMap<Codon, MutablePair<GenomicRegion, Double>> longestPrefixes = new HashMap<Codon, MutablePair<GenomicRegion, Double>>(); for (Codon c : topo) longestPrefixes.put(c, new MutablePair<GenomicRegion, Double>(c, removed.contains(c) ? 0 : (c.getTotalActivity()))); Codon longestEnd = null; HashMap<Codon, Codon> backtracking = new HashMap<Codon, Codon>(); for (Codon c : topo) { // if (codonsRegion.map(c).getStart()==100_466_118) // System.out.println(c); // // if (codonsRegion.map(c).getStart()==100_465_842) // System.out.println(c); double len = longestPrefixes.get(c).Item2; for (AdjacencyNode<Codon> n = g.getTargets(c); n != null; n = n.next) { MutablePair<GenomicRegion, Double> pref = longestPrefixes.get(n.node); double nnact = removed.contains(n.node) ? 0 : (n.node.getTotalActivity()); if (pref.Item2 <= len + nnact) { pref.set(extendFullPath(longestPrefixes.get(c).Item1, c, n.node, n.getLabel()), len + nnact); backtracking.put(n.node, c); } } if (longestEnd == null || longestPrefixes.get(longestEnd).Item2 <= len) longestEnd = c; } // determine longest path by backtracking and mark all codons on the path as removed ArrayList<Codon> orfCodons = new ArrayList<Codon>(); double totalActivity = 0; double uniqueActivity = 0; int uniqueCodons = 0; for (Codon c = longestEnd; c != null; c = backtracking.get(c)) { if (removed.add(c) && c.getTotalActivity() > 0) { uniqueCodons++; uniqueActivity += c.getTotalActivity(); } if (c.getTotalActivity() > 0) // to remove dummy stop codons orfCodons.add(c); totalActivity += c.getTotalActivity(); } // System.out.println(codonsRegion.map(longestPrefixes.get(longestEnd).Item1)); if ((uniqueCodons >= minUniqueCodons || uniqueCodons == orfCodons.size()) && uniqueActivity > minUniqueActivity && totalActivity > minOrfTotalActivity) { Collections.reverse(orfCodons); MutablePair<GenomicRegion, Double> triple = longestPrefixes.get(longestEnd); ArrayGenomicRegion region = triple.Item1.toArrayGenomicRegion(); String lastCodon = SequenceUtils.extractSequence( region.map( new ArrayGenomicRegion(region.getTotalLength() - 3, region.getTotalLength())), sequence); OrfWithCodons orf = new OrfWithCodons(index, cc, count++, region, orfCodons, stop.containsKey(lastCodon)); orfs.add(orf); } // maxPathScore = Math.max(maxPathScore,totalActivity); } if (orfs.size() > 1) { // they are not necessarily connected! LinkedList<OrfWithCodons>[] connected = findConnectedOrfs(orfs); orfs.clear(); for (LinkedList<OrfWithCodons> corfs : connected) { for (boolean changed = true; changed && corfs.size() > 1;) { changed = false; if (useEM) inferOverlappingOrfActivitiesEM(corfs); else overlapUniqueCoverage(corfs); Iterator<OrfWithCodons> it = corfs.iterator(); while (it.hasNext()) { OrfWithCodons orf = it.next(); if (orf.getEstimatedTotalActivity() < minOrfTotalActivity) { it.remove(); changed = true; } } } if (corfs.size() > 1) distributeCodons(corfs); orfs.addAll(corfs); } } re.addAll(orfs); cc++; } return re; }
From source file:org.nuxeo.ecm.core.storage.sql.NXQLQueryMaker.java
public Query buildQuery(SQLInfo sqlInfo, Model model, Session session, String query, QueryFilter queryFilter, Object... params) throws StorageException { this.sqlInfo = sqlInfo; database = sqlInfo.database;/*w ww .j av a 2 s. c om*/ dialect = sqlInfo.dialect; this.model = model; this.session = session; // transform the query according to the transformers defined by the // security policies SQLQuery sqlQuery = SQLQueryParser.parse(query); for (SQLQuery.Transformer transformer : queryFilter.getQueryTransformers()) { sqlQuery = transformer.transform(queryFilter.getPrincipal(), sqlQuery); } /* * Find all relevant types and keys for the criteria. */ QueryAnalyzer info = new QueryAnalyzer(); try { info.visitQuery(sqlQuery); } catch (QueryCannotMatchException e) { // query cannot match return null; } catch (QueryMakerException e) { throw new StorageException(e.getMessage(), e); } /* * Find all the types to take into account (all concrete types being a * subtype of the passed types) based on the FROM list. */ Set<String> types = new HashSet<String>(); for (String typeName : info.fromTypes) { if ("document".equals(typeName)) { typeName = "Document"; } Set<String> subTypes = model.getDocumentSubTypes(typeName); if (subTypes == null) { throw new StorageException("Unknown type: " + typeName); } types.addAll(subTypes); } types.remove(model.ROOT_TYPE); /* * Restrict types based on toplevel ecm:primaryType and ecm:mixinType * predicates. */ types.removeAll(info.typesExcluded); if (!info.typesAnyRequired.isEmpty()) { types.retainAll(info.typesAnyRequired); } if (types.isEmpty()) { // conflicting types requirement, query cannot match return null; } /* * Merge facet filter into mixin clauses and immutable flag. */ FacetFilter facetFilter = queryFilter.getFacetFilter(); if (facetFilter == null) { facetFilter = FacetFilter.ALLOW; } info.mixinsExcluded.addAll(facetFilter.excluded); if (info.mixinsExcluded.remove(FacetNames.IMMUTABLE)) { if (info.immutableClause == Boolean.TRUE) { // conflict on immutable condition, query cannot match return null; } info.immutableClause = Boolean.FALSE; } info.mixinsAllRequired.addAll(facetFilter.required); if (info.mixinsAllRequired.remove(FacetNames.IMMUTABLE)) { if (info.immutableClause == Boolean.FALSE) { // conflict on immutable condition, query cannot match return null; } info.immutableClause = Boolean.TRUE; } /* * Find the relevant tables to join with. */ Set<String> fragmentNames = new HashSet<String>(); for (String prop : info.props) { PropertyInfo propertyInfo = model.getPropertyInfo(prop); if (propertyInfo == null) { throw new StorageException("Unknown field: " + prop); } fragmentNames.add(propertyInfo.fragmentName); } fragmentNames.remove(model.hierTableName); // Do we need to add the versions table too? if (info.needsVersionsTable || info.immutableClause != null) { fragmentNames.add(model.VERSION_TABLE_NAME); } /* * Build the FROM / JOIN criteria for each select. */ DocKind[] docKinds; if (info.proxyClause == Boolean.TRUE) { if (info.immutableClause == Boolean.FALSE) { // proxy but not immutable: query cannot match return null; } docKinds = new DocKind[] { DocKind.PROXY }; } else if (info.proxyClause == Boolean.FALSE || info.immutableClause == Boolean.FALSE) { docKinds = new DocKind[] { DocKind.DIRECT }; } else { docKinds = new DocKind[] { DocKind.DIRECT, DocKind.PROXY }; } Table hier = database.getTable(model.hierTableName); boolean aliasColumns = docKinds.length > 1; Select select = null; String orderBy = null; List<String> statements = new ArrayList<String>(2); List<Serializable> selectParams = new LinkedList<Serializable>(); for (DocKind docKind : docKinds) { // The hierarchy table, which may be an alias table. Table hierTable; // Quoted id in the hierarchy. This is the id returned by the query. String hierId; // Quoted name in the hierarchy. This is the id returned by the query. String hierName; // The hierarchy table of the data. Table dataHierTable; // Quoted id attached to the data that matches. String dataHierId; List<String> joins = new LinkedList<String>(); LinkedList<String> leftJoins = new LinkedList<String>(); List<Serializable> leftJoinsParams = new LinkedList<Serializable>(); LinkedList<String> implicitJoins = new LinkedList<String>(); List<Serializable> implicitJoinsParams = new LinkedList<Serializable>(); List<String> whereClauses = new LinkedList<String>(); List<Serializable> whereParams = new LinkedList<Serializable>(); switch (docKind) { case DIRECT: hierTable = hier; hierId = hierTable.getColumn(model.MAIN_KEY).getFullQuotedName(); hierName = hierTable.getColumn(model.HIER_CHILD_NAME_KEY).getFullQuotedName(); dataHierTable = hierTable; dataHierId = hierId; joins.add(hierTable.getQuotedName()); break; case PROXY: hierTable = new TableAlias(hier, TABLE_HIER_ALIAS); String hierFrom = hier.getQuotedName() + " " + hierTable.getQuotedName(); // TODO use dialect hierId = hierTable.getColumn(model.MAIN_KEY).getFullQuotedName(); hierName = hierTable.getColumn(model.HIER_CHILD_NAME_KEY).getFullQuotedName(); // joined (data) dataHierTable = hier; dataHierId = hier.getColumn(model.MAIN_KEY).getFullQuotedName(); // proxies Table proxies = database.getTable(model.PROXY_TABLE_NAME); String proxiesid = proxies.getColumn(model.MAIN_KEY).getFullQuotedName(); String proxiestargetid = proxies.getColumn(model.PROXY_TARGET_KEY).getFullQuotedName(); // join all that joins.add(hierFrom); joins.add(String.format(JOIN_ON, proxies.getQuotedName(), hierId, proxiesid)); joins.add(String.format(JOIN_ON, dataHierTable.getQuotedName(), dataHierId, proxiestargetid)); break; default: throw new AssertionError(docKind); } // main data joins for (String fragmentName : fragmentNames) { Table table = database.getTable(fragmentName); // the versions table joins on the real hier table boolean useHier = model.VERSION_TABLE_NAME.equals(fragmentName); leftJoins.add(String.format(JOIN_ON, table.getQuotedName(), useHier ? hierId : dataHierId, table.getColumn(model.MAIN_KEY).getFullQuotedName())); } /* * Filter on facets and mixin types, and create the structural WHERE * clauses for the type. */ List<String> typeStrings = new ArrayList<String>(types.size()); NEXT_TYPE: for (String type : types) { Set<String> facets = model.getDocumentTypeFacets(type); for (String facet : info.mixinsExcluded) { if (facets.contains(facet)) { continue NEXT_TYPE; } } for (String facet : info.mixinsAllRequired) { if (!facets.contains(facet)) { continue NEXT_TYPE; } } if (!info.mixinsAnyRequired.isEmpty()) { Set<String> intersection = new HashSet<String>(info.mixinsAnyRequired); intersection.retainAll(facets); if (intersection.isEmpty()) { continue NEXT_TYPE; } } // this type is good typeStrings.add("?"); whereParams.add(type); } if (typeStrings.isEmpty()) { return null; // mixins excluded all types, no match possible } whereClauses.add(String.format("%s IN (%s)", dataHierTable.getColumn(model.MAIN_PRIMARY_TYPE_KEY).getFullQuotedName(), StringUtils.join(typeStrings, ", "))); /* * Add clause for immutable match. */ if (docKind == DocKind.DIRECT && info.immutableClause != null) { String where = String.format("%s IS %s", database.getTable(model.VERSION_TABLE_NAME).getColumn(model.MAIN_KEY).getFullQuotedName(), info.immutableClause.booleanValue() ? "NOT NULL" : "NULL"); whereClauses.add(where); } /* * Parse the WHERE clause from the original query, and deduce from * it actual WHERE clauses and potential JOINs. */ WhereBuilder whereBuilder; try { whereBuilder = new WhereBuilder(database, session, hierTable, hierId, dataHierTable, dataHierId, docKind == DocKind.PROXY, aliasColumns); } catch (QueryMakerException e) { throw new StorageException(e.getMessage(), e); } if (info.wherePredicate != null) { info.wherePredicate.accept(whereBuilder); // JOINs added by fulltext queries leftJoins.addAll(whereBuilder.leftJoins); leftJoinsParams.addAll(whereBuilder.leftJoinsParams); implicitJoins.addAll(whereBuilder.implicitJoins); implicitJoinsParams.addAll(whereBuilder.implicitJoinsParams); // WHERE clause String where = whereBuilder.buf.toString(); if (where.length() != 0) { whereClauses.add(where); whereParams.addAll(whereBuilder.whereParams); } } /* * Security check. */ if (queryFilter.getPrincipals() != null) { Serializable principals = queryFilter.getPrincipals(); Serializable permissions = queryFilter.getPermissions(); if (!dialect.supportsArrays()) { principals = StringUtils.join((String[]) principals, '|'); permissions = StringUtils.join((String[]) permissions, '|'); } if (dialect.supportsReadAcl()) { /* optimized read acl */ whereClauses.add(dialect.getReadAclsCheckSql("r.acl_id")); whereParams.add(principals); joins.add(String.format("%s AS r ON %s = r.id", model.HIER_READ_ACL_TABLE_NAME, hierId)); } else { whereClauses.add(dialect.getSecurityCheckSql(hierId)); whereParams.add(principals); whereParams.add(permissions); } } /* * Columns on which to do ordering. */ String selectWhat = hierId; // always add the name, it will be used for intalio crm selectWhat += ", " + hierName; if (aliasColumns) { // UNION, so we need all orderable columns, aliased int n = 0; for (String key : info.orderKeys) { Column column = whereBuilder.findColumn(key, false, true); String qname = column.getFullQuotedName(); selectWhat += ", " + qname + " AS " + dialect.openQuote() + COL_ORDER_ALIAS_PREFIX + ++n + dialect.closeQuote(); } } /* * Order by. Compute it just once. May use just aliases. */ if (orderBy == null && sqlQuery.orderBy != null) { whereBuilder.buf.setLength(0); sqlQuery.orderBy.accept(whereBuilder); orderBy = whereBuilder.buf.toString(); } /* * Resulting select. */ select = new Select(null); select.setWhat(selectWhat); leftJoins.addFirst(StringUtils.join(joins, " JOIN ")); String from = StringUtils.join(leftJoins, " LEFT JOIN "); if (!implicitJoins.isEmpty()) { implicitJoins.addFirst(from); from = StringUtils.join(implicitJoins, ", "); } select.setFrom(from); select.setWhere(StringUtils.join(whereClauses, " AND ")); selectParams.addAll(leftJoinsParams); selectParams.addAll(implicitJoinsParams); selectParams.addAll(whereParams); statements.add(select.getStatement()); } /* * Create the whole select. */ if (statements.size() > 1) { select = new Select(null); String selectWhat = hier.getColumn(model.MAIN_KEY).getQuotedName(); selectWhat = selectWhat + ", " + hier.getColumn(model.HIER_CHILD_NAME_KEY).getQuotedName(); select.setWhat(selectWhat); // note that Derby has bizarre restrictions on parentheses placement // around UNION, see http://issues.apache.org/jira/browse/DERBY-2374 String from = '(' + StringUtils.join(statements, " UNION ALL ") + ')'; if (dialect.needsAliasForDerivedTable()) { from += " AS " + dialect.openQuote() + UNION_ALIAS + dialect.closeQuote(); } select.setFrom(from); } select.setOrderBy(orderBy); List<Column> whatColumns = Collections.singletonList(hier.getColumn(model.MAIN_KEY)); Query q = new Query(); q.selectInfo = new SQLInfoSelect(select.getStatement(), whatColumns, null, null); q.selectParams = selectParams; return q; }
From source file:canreg.client.analysis.CasesByAgeGroupChartTableBuilder.java
@Override public LinkedList<String> buildTable(String tableHeader, String reportFileName, int startYear, int endYear, Object[][] incidenceData, PopulationDataset[] populations, // can be null PopulationDataset[] standardPopulations, LinkedList<ConfigFields> configList, String[] engineParameters, FileTypes fileType) throws NotCompatibleDataException { // String footerString = java.util.ResourceBundle.getBundle("canreg/client/analysis/resources/AgeSpecificCasesPerHundredThousandTableBuilder").getString("TABLE BUILT ") + new Date() + java.util.ResourceBundle.getBundle("canreg/client/analysis/resources/AgeSpecificCasesPerHundredThousandTableBuilder").getString(" BY CANREG5."); LinkedList<String> generatedFiles = new LinkedList<String>(); if (Arrays.asList(engineParameters).contains("barchart")) { chartType = ChartType.BAR;//from www . ja v a2s. com } else { chartType = ChartType.PIE; } if (Arrays.asList(engineParameters).contains("legend")) { legendOn = true; } if (Arrays.asList(engineParameters).contains("r")) { useR = true; } localSettings = CanRegClientApp.getApplication().getLocalSettings(); rpath = localSettings.getProperty(LocalSettings.R_PATH); // does R exist? if (rpath == null || rpath.isEmpty() || !new File(rpath).exists()) { useR = false; // force false if R is not installed } icd10GroupDescriptions = ConfigFieldsReader.findConfig("ICD10_groups", configList); cancerGroupsLocal = EditorialTableTools.generateICD10Groups(icd10GroupDescriptions); // indexes keyGroupsMap = new EnumMap<KeyCancerGroupsEnum, Integer>(KeyCancerGroupsEnum.class); keyGroupsMap.put(KeyCancerGroupsEnum.allCancerGroupsIndex, EditorialTableTools.getICD10index("ALL", icd10GroupDescriptions)); keyGroupsMap.put(KeyCancerGroupsEnum.skinCancerGroupIndex, EditorialTableTools.getICD10index("C44", icd10GroupDescriptions)); keyGroupsMap.put(KeyCancerGroupsEnum.otherCancerGroupsIndex, EditorialTableTools.getICD10index("O&U", icd10GroupDescriptions)); keyGroupsMap.put(KeyCancerGroupsEnum.allCancerGroupsButSkinIndex, EditorialTableTools.getICD10index("ALLbC44", icd10GroupDescriptions)); skinCancerGroupIndex = keyGroupsMap.get(KeyCancerGroupsEnum.skinCancerGroupIndex); allCancerGroupsIndex = keyGroupsMap.get(KeyCancerGroupsEnum.allCancerGroupsIndex); allCancerGroupsButSkinIndex = keyGroupsMap.get(KeyCancerGroupsEnum.allCancerGroupsButSkinIndex); otherCancerGroupsIndex = keyGroupsMap.get(KeyCancerGroupsEnum.otherCancerGroupsIndex); numberOfCancerGroups = cancerGroupsLocal.length; int columnToCount = allCancerGroupsIndex; List<AgeGroup> ageGroups = new LinkedList<AgeGroup>(); // TODO: Make these dynamic? ageGroups.add(new AgeGroup(0, 14)); ageGroups.add(new AgeGroup(15, 29)); ageGroups.add(new AgeGroup(30, 49)); ageGroups.add(new AgeGroup(50, 69)); ageGroups.add(new AgeGroup(70, null)); double[] casesLine; if (incidenceData != null) { String sexString, icdString; String morphologyString; double casesArray[][][] = new double[numberOfSexes][ageGroups.size()][numberOfCancerGroups]; double cum64Array[][][] = new double[numberOfSexes][ageGroups.size()][numberOfCancerGroups]; double cum74Array[][][] = new double[numberOfSexes][ageGroups.size()][numberOfCancerGroups]; double asrArray[][][] = new double[numberOfSexes][ageGroups.size()][numberOfCancerGroups]; int sex, icdIndex, cases, age; List<Integer> dontCount = new LinkedList<Integer>(); // all sites but skin? if (Arrays.asList(engineParameters).contains("noC44")) { dontCount.add(skinCancerGroupIndex); tableHeader += ", excluding C44"; columnToCount = allCancerGroupsButSkinIndex; } for (Object[] dataLine : incidenceData) { // Set default icdIndex = -1; cases = 0; age = 0; // Extract data sexString = (String) dataLine[SEX_COLUMN]; sex = Integer.parseInt(sexString.trim()); // sex = 3 is unknown sex if (sex > 2) { sex = 3; } else { sex -= 1; // sex 1 male maps to column 0... } morphologyString = (String) dataLine[MORPHOLOGY_COLUMN]; icdString = (String) dataLine[ICD10_COLUMN]; icdIndex = Tools.assignICDGroupIndex(keyGroupsMap, icdString, morphologyString, cancerGroupsLocal); if (!dontCount.contains(icdIndex) && icdIndex != DONT_COUNT) { // Extract cases cases = (Integer) dataLine[CASES_COLUMN]; age = (Integer) dataLine[AGE_COLUMN]; for (int group = 0; group < ageGroups.size(); group++) { if (ageGroups.get(group).fitsInAgeGroup(age)) { if (sex <= numberOfSexes && icdIndex >= 0) { casesArray[sex][group][icdIndex] += cases; } else { if (otherCancerGroupsIndex >= 0) { casesArray[sex][group][otherCancerGroupsIndex] += cases; } } if (allCancerGroupsIndex >= 0) { casesArray[sex][group][allCancerGroupsIndex] += cases; } if (allCancerGroupsButSkinIndex >= 0 && skinCancerGroupIndex >= 0 && icdIndex != skinCancerGroupIndex) { casesArray[sex][group][allCancerGroupsButSkinIndex] += cases; } } } } else { // System.out.println("Not counted: " + icdString + "/" + morphologyString); } } //if (populations != null && populations.length > 0) { // // calculate pops // for (PopulationDataset pop : populations) { // for (AgeGroup ag : ageGroups) { // try { // addPopulationDataSetToAgeGroup(pop, ag); // } catch (IncompatiblePopulationDataSetException ex) { // Logger.getLogger(CasesByAgeGroupChartTableBuilder.class.getName()).log(Level.SEVERE, null, ex); // } // } // } // } format = NumberFormat.getInstance(); format.setMaximumFractionDigits(1); for (int sexNumber : new int[] { 0, 1 }) { String fileName = reportFileName + "-" + sexLabel[sexNumber] + "." + fileType.toString(); File file = new File(fileName); List<CancerCasesCount> casesCounts = new LinkedList<CancerCasesCount>(); Double total = 0.0; for (int group = 0; group < ageGroups.size(); group++) { CancerCasesCount thisElement = new CancerCasesCount(null, ageGroups.get(group).toString(), 0.0, group); casesLine = casesArray[sexNumber][group]; thisElement.setCount(thisElement.getCount() + casesLine[columnToCount]); total += casesLine[columnToCount]; casesCounts.add(thisElement); } if (useR && !fileType.equals(FileTypes.jchart) && !fileType.equals(FileTypes.csv)) { String header = tableHeader + ", \n" + TableBuilderInterface.sexLabel[sexNumber]; generatedFiles.addAll(Tools.generateRChart(casesCounts, fileName, header, fileType, chartType, false, 0.0, rpath, false, "Age Group")); } else { Color color; if (sexNumber == 0) { color = Color.BLUE; } else { color = Color.RED; } String header = tableHeader + ", " + TableBuilderInterface.sexLabel[sexNumber]; charts[sexNumber] = Tools.generateJChart(casesCounts, fileName, header, fileType, chartType, false, legendOn, 0.0, total, color, "Age Group"); try { generatedFiles.add(Tools.writeJChartToFile(charts[sexNumber], file, fileType)); } catch (IOException ex) { Logger.getLogger(TopNChartTableBuilder.class.getName()).log(Level.SEVERE, null, ex); } catch (DocumentException ex) { Logger.getLogger(TopNChartTableBuilder.class.getName()).log(Level.SEVERE, null, ex); } } } } return generatedFiles; }
From source file:org.pdfsam.plugin.coverfooter.listeners.RunButtonActionListener.java
public void actionPerformed(ActionEvent e) { if (WorkExecutor.getInstance().getRunningThreads() > 0 || panel.getSelectionPanel().isAdding()) { DialogUtility.showWarningAddingDocument(panel); return;//from ww w. j a v a 2s . c o m } PdfSelectionTableItem[] items = panel.getSelectionPanel().getTableRows(); if (ArrayUtils.isEmpty(items)) { DialogUtility.showWarningNoDocsSelected(panel, DialogUtility.AT_LEAST_ONE_DOC); return; } LinkedList<String> args = new LinkedList<String>(); LinkedList<String> args1 = new LinkedList<String>(); LinkedList<String> argsFooter = new LinkedList<String>(); // validation and permission check are demanded try { if (panel.getOutputCompressedCheck().isSelected()) { args.add("-" + ConcatParsedCommand.COMPRESSED_ARG); } if (panel.getMergeTypeCheck().isSelected()) { args.add("-" + ConcatParsedCommand.COPYFIELDS_ARG); } args.add("-" + ConcatParsedCommand.PDFVERSION_ARG); args.add(((StringItem) panel.getVersionCombo().getSelectedItem()).getId()); PdfSelectionTableItem[] coveritems = panel.getCoverSelectionPanel().getTableRows(); PdfSelectionTableItem[] footeritems = panel.getFooterSelectionPanel().getTableRows(); String coverSelectionString = ""; // manage cover if ((coveritems == null || coveritems.length != 1) && (footeritems == null || footeritems.length != 1)) { JOptionPane.showMessageDialog(panel, GettextResource.gettext(Configuration.getInstance().getI18nResourceBundle(), "Select at least one cover or one footer"), GettextResource.gettext(Configuration.getInstance().getI18nResourceBundle(), "Warning"), JOptionPane.WARNING_MESSAGE); } else { // overwrite confirmation if (panel.getOverwriteCheckbox().isSelected() && Configuration.getInstance().isAskOverwriteConfirmation()) { int dialogRet = DialogUtility.askForOverwriteConfirmation(panel); if (JOptionPane.NO_OPTION == dialogRet) { panel.getOverwriteCheckbox().setSelected(false); } else if (JOptionPane.CANCEL_OPTION == dialogRet) { return; } } if (panel.getOverwriteCheckbox().isSelected()) { args.add("-" + ConcatParsedCommand.OVERWRITE_ARG); } if ((coveritems != null && coveritems.length == 1)) { PdfSelectionTableItem coveritem = coveritems[0]; String coverSelection = (coveritem.getPageSelection() != null && coveritem.getPageSelection().length() > 0) ? coveritem.getPageSelection() : CoverFooterMainGUI.ALL_STRING; if (coverSelection.trim().length() > 0 && coverSelection.indexOf(",") != 0) { String[] selectionsArray = coverSelection.split(","); for (int j = 0; j < selectionsArray.length; j++) { String tmpString = selectionsArray[j].trim(); if ((tmpString != null) && (!tmpString.equals(""))) { args.add("-" + ConcatParsedCommand.F_ARG); String f = coveritem.getInputFile().getAbsolutePath(); if ((coveritem.getPassword()) != null && (coveritem.getPassword()).length() > 0) { log.debug(GettextResource.gettext( Configuration.getInstance().getI18nResourceBundle(), "Found a password for input file.")); f += ":" + coveritem.getPassword(); } args.add(f); coverSelectionString += (tmpString.matches("[\\d]+")) ? tmpString + "-" + tmpString + ":" : tmpString + ":"; } } } else { args.add("-" + ConcatParsedCommand.F_ARG); String f = coveritem.getInputFile().getAbsolutePath(); if ((coveritem.getPassword()) != null && (coveritem.getPassword()).length() > 0) { log.debug(GettextResource.gettext(Configuration.getInstance().getI18nResourceBundle(), "Found a password for input file.")); f += ":" + coveritem.getPassword(); } args.add(f); coverSelectionString += (coverSelection.matches("[\\d]+")) ? coverSelection + "-" + coverSelection + ":" : coverSelection + ":"; } } String footerSelectionString = ""; // manage footer if ((footeritems != null && footeritems.length == 1)) { PdfSelectionTableItem footeritem = footeritems[0]; String footerSelection = (footeritem.getPageSelection() != null && footeritem.getPageSelection().length() > 0) ? footeritem.getPageSelection() : CoverFooterMainGUI.ALL_STRING; if (footerSelection.trim().length() > 0 && footerSelection.indexOf(",") != 0) { String[] selectionsArray = footerSelection.split(","); for (int j = 0; j < selectionsArray.length; j++) { String tmpString = selectionsArray[j].trim(); if ((tmpString != null) && (!tmpString.equals(""))) { argsFooter.add("-" + ConcatParsedCommand.F_ARG); String footerItem = footeritem.getInputFile().getAbsolutePath(); if ((footeritem.getPassword()) != null && (footeritem.getPassword()).length() > 0) { log.debug(GettextResource.gettext( Configuration.getInstance().getI18nResourceBundle(), "Found a password for input file.")); footerItem += ":" + footeritem.getPassword(); } argsFooter.add(footerItem); footerSelectionString += (tmpString.matches("[\\d]+")) ? tmpString + "-" + tmpString + ":" : tmpString + ":"; } } } else { argsFooter.add("-" + ConcatParsedCommand.F_ARG); String footerItem = footeritem.getInputFile().getAbsolutePath(); if ((footeritem.getPassword()) != null && (footeritem.getPassword()).length() > 0) { log.debug(GettextResource.gettext(Configuration.getInstance().getI18nResourceBundle(), "Found a password for input file.")); footerItem += ":" + footeritem.getPassword(); } argsFooter.add(footerItem); footerSelectionString += (footerSelection.matches("[\\d]+")) ? footerSelection + "-" + footerSelection + ":" : footerSelection + ":"; } } // selection page PdfSelectionTableItem item = null; for (int i = 0; i < items.length; i++) { String pageSelectionString = coverSelectionString; try { args1.clear(); args1.addAll(args); item = items[i]; String pageSelection = (item.getPageSelection() != null && item.getPageSelection().length() > 0) ? item.getPageSelection() : CoverFooterMainGUI.ALL_STRING; if (pageSelection.trim().length() > 0 && pageSelection.indexOf(",") != 0) { String[] selectionsArray = pageSelection.split(","); for (int j = 0; j < selectionsArray.length; j++) { String tmpString = selectionsArray[j].trim(); if ((tmpString != null) && (!tmpString.equals(""))) { args1.add("-" + ConcatParsedCommand.F_ARG); String f = item.getInputFile().getAbsolutePath(); if ((item.getPassword()) != null && (item.getPassword()).length() > 0) { log.debug(GettextResource.gettext( Configuration.getInstance().getI18nResourceBundle(), "Found a password for input file.")); f += ":" + item.getPassword(); } args1.add(f); pageSelectionString += (tmpString.matches("[\\d]+")) ? tmpString + "-" + tmpString + ":" : tmpString + ":"; } } } else { args1.add("-" + ConcatParsedCommand.F_ARG); String f = item.getInputFile().getAbsolutePath(); if ((item.getPassword()) != null && (item.getPassword()).length() > 0) { log.debug( GettextResource.gettext(Configuration.getInstance().getI18nResourceBundle(), "Found a password for input file.")); f += ":" + item.getPassword(); } args1.add(f); pageSelectionString += (pageSelection.matches("[\\d]+")) ? pageSelection + "-" + pageSelection + ":" : pageSelection + ":"; } args1.addAll(argsFooter); args1.add("-" + ConcatParsedCommand.U_ARG); args1.add(pageSelectionString + footerSelectionString); // manage output destination option args1.add("-" + ConcatParsedCommand.O_ARG); if (StringUtils.isEmpty(panel.getDestinationTextField().getText())) { String suggestedDir = getSuggestedDestinationDirectory(items[items.length - 1]); int chosenOpt = DialogUtility.showConfirmOuputLocationDialog(panel, suggestedDir); if (JOptionPane.YES_OPTION == chosenOpt) { panel.getDestinationTextField().setText(suggestedDir); } else if (JOptionPane.CANCEL_OPTION == chosenOpt) { return; } } if (panel.getDestinationTextField().getText().length() > 0) { args1.add(panel.getDestinationTextField().getText() + File.separator + item.getInputFile().getName()); } args1.add(AbstractParsedCommand.COMMAND_CONCAT); WorkExecutor.getInstance().execute(new WorkThread(args1.toArray(new String[args1.size()]))); } catch (Exception ex) { log.error(GettextResource.gettext(Configuration.getInstance().getI18nResourceBundle(), "Error: "), ex); } } } } catch (Exception ex) { log.error(GettextResource.gettext(Configuration.getInstance().getI18nResourceBundle(), "Error: "), ex); SoundPlayer.getInstance().playErrorSound(); } }
From source file:canreg.client.analysis.TopNChartTableBuilder.java
@Override public LinkedList<String> buildTable(String tableHeader, String reportFileName, int startYear, int endYear, Object[][] incidenceData, PopulationDataset[] populations, // can be null PopulationDataset[] standardPopulations, LinkedList<ConfigFields> configList, String[] engineParameters, FileTypes fileType) throws NotCompatibleDataException { String footerString = java.util.ResourceBundle .getBundle("canreg/client/analysis/resources/AgeSpecificCasesPerHundredThousandTableBuilder") .getString("TABLE BUILT ") + new Date() + java.util.ResourceBundle .getBundle(//from ww w .ja v a2 s. c o m "canreg/client/analysis/resources/AgeSpecificCasesPerHundredThousandTableBuilder") .getString(" BY CANREG5."); LinkedList<String> generatedFiles = new LinkedList<String>(); if (Arrays.asList(engineParameters).contains("barchart")) { chartType = ChartType.BAR; } else { chartType = ChartType.PIE; includeOther = true; } if (Arrays.asList(engineParameters).contains("legend")) { legendOn = true; } if (Arrays.asList(engineParameters).contains("r")) { useR = true; } if (Arrays.asList(engineParameters).contains("asr")) { countType = CountType.ASR; } else if (Arrays.asList(engineParameters).contains("cum64")) { countType = CountType.CUM64; } else if (Arrays.asList(engineParameters).contains("cum74")) { countType = CountType.CUM74; } else if (Arrays.asList(engineParameters).contains("per100000")) { countType = CountType.PER_HUNDRED_THOUSAND; } else { // default to cases countType = CountType.CASES; } localSettings = CanRegClientApp.getApplication().getLocalSettings(); rpath = localSettings.getProperty(LocalSettings.R_PATH); // does R exist? if (rpath == null || rpath.isEmpty() || !new File(rpath).exists()) { useR = false; // force false if R is not installed } icdLabel = ConfigFieldsReader.findConfig("ICD_groups_labels", configList); icd10GroupDescriptions = ConfigFieldsReader.findConfig("ICD10_groups", configList); cancerGroupsLocal = EditorialTableTools.generateICD10Groups(icd10GroupDescriptions); // indexes keyGroupsMap = new EnumMap<KeyCancerGroupsEnum, Integer>(KeyCancerGroupsEnum.class); keyGroupsMap.put(KeyCancerGroupsEnum.allCancerGroupsIndex, EditorialTableTools.getICD10index("ALL", icd10GroupDescriptions)); keyGroupsMap.put(KeyCancerGroupsEnum.leukemiaNOSCancerGroupIndex, EditorialTableTools.getICD10index(950, cancerGroupsLocal)); keyGroupsMap.put(KeyCancerGroupsEnum.skinCancerGroupIndex, EditorialTableTools.getICD10index("C44", icd10GroupDescriptions)); keyGroupsMap.put(KeyCancerGroupsEnum.bladderCancerGroupIndex, EditorialTableTools.getICD10index("C67", icd10GroupDescriptions)); keyGroupsMap.put(KeyCancerGroupsEnum.mesotheliomaCancerGroupIndex, EditorialTableTools.getICD10index("C45", icd10GroupDescriptions)); keyGroupsMap.put(KeyCancerGroupsEnum.kaposiSarkomaCancerGroupIndex, EditorialTableTools.getICD10index("C46", icd10GroupDescriptions)); keyGroupsMap.put(KeyCancerGroupsEnum.myeloproliferativeDisordersCancerGroupIndex, EditorialTableTools.getICD10index("MPD", icd10GroupDescriptions)); keyGroupsMap.put(KeyCancerGroupsEnum.myelodysplasticSyndromesCancerGroupIndex, EditorialTableTools.getICD10index("MDS", icd10GroupDescriptions)); keyGroupsMap.put(KeyCancerGroupsEnum.allCancerGroupsButSkinIndex, EditorialTableTools.getICD10index("ALLbC44", icd10GroupDescriptions)); keyGroupsMap.put(KeyCancerGroupsEnum.brainAndCentralNervousSystemCancerGroupIndex, EditorialTableTools.getICD10index("C70-72", icd10GroupDescriptions)); keyGroupsMap.put(KeyCancerGroupsEnum.ovaryCancerGroupIndex, EditorialTableTools.getICD10index(569, cancerGroupsLocal)); keyGroupsMap.put(KeyCancerGroupsEnum.otherCancerGroupsIndex, EditorialTableTools.getICD10index("O&U", icd10GroupDescriptions)); otherCancerGroupsIndex = keyGroupsMap.get(KeyCancerGroupsEnum.otherCancerGroupsIndex); skinCancerGroupIndex = keyGroupsMap.get(KeyCancerGroupsEnum.skinCancerGroupIndex); allCancerGroupsIndex = keyGroupsMap.get(KeyCancerGroupsEnum.allCancerGroupsIndex); allCancerGroupsButSkinIndex = keyGroupsMap.get(KeyCancerGroupsEnum.allCancerGroupsButSkinIndex); numberOfCancerGroups = cancerGroupsLocal.length; double[] countsRow; if (populations != null && populations.length > 0) { if (populations[0].getPopulationDatasetID() < 0) { countType = CountType.CASES; } else { // calculate period pop periodPop = new PopulationDataset(); periodPop.setAgeGroupStructure(populations[0].getAgeGroupStructure()); periodPop.setReferencePopulation(populations[0].getReferencePopulation()); for (PopulationDatasetsEntry pde : populations[0].getAgeGroups()) { int count = 0; for (PopulationDataset pds : populations) { count += pds.getAgeGroupCount(pde.getSex(), pde.getAgeGroup()); } periodPop.addAgeGroup(new PopulationDatasetsEntry(pde.getAgeGroup(), pde.getSex(), count)); } } } if (incidenceData != null) { String sexString, icdString, morphologyString; double countArray[][] = new double[numberOfCancerGroups][numberOfSexes]; int sex, icdIndex, numberOfCases, age; double adjustedCases; List<Integer> dontCount = new LinkedList<Integer>(); // all sites but skin? if (Arrays.asList(engineParameters).contains("noC44")) { dontCount.add(skinCancerGroupIndex); tableHeader += ", excluding C44"; } for (Object[] dataLine : incidenceData) { // Set default adjustedCases = 0.0; // Extract data sexString = (String) dataLine[SEX_COLUMN]; sex = Integer.parseInt(sexString.trim()); // sex = 3 is unknown sex if (sex > 2) { sex = 3; } morphologyString = (String) dataLine[MORPHOLOGY_COLUMN]; icdString = (String) dataLine[ICD10_COLUMN]; icdIndex = Tools.assignICDGroupIndex(keyGroupsMap, icdString, morphologyString, cancerGroupsLocal); age = (Integer) dataLine[AGE_COLUMN]; if (!dontCount.contains(icdIndex) && icdIndex != DONT_COUNT) { // Extract cases numberOfCases = (Integer) dataLine[CASES_COLUMN]; if (countType == CountType.PER_HUNDRED_THOUSAND) { adjustedCases = (100000.0 * numberOfCases) / periodPop.getAgeGroupCount(sex, periodPop.getAgeGroupIndex(age)); } else if (countType == CountType.ASR) { try { adjustedCases = 100.0 * (periodPop.getReferencePopulationForAgeGroupIndex(sex, periodPop.getAgeGroupIndex(age)) * numberOfCases) / periodPop.getAgeGroupCount(sex, periodPop.getAgeGroupIndex(age)); } catch (IncompatiblePopulationDataSetException ex) { Logger.getLogger(TopNChartTableBuilder.class.getName()).log(Level.SEVERE, null, ex); } } else if (countType == CountType.CUM64) { if (age < 65) { adjustedCases = (100000.0 * numberOfCases) / periodPop.getAgeGroupCount(sex, periodPop.getAgeGroupIndex(age)) * 5.0 / 1000.0; } } else if (countType == CountType.CUM74) { if (age < 75) { adjustedCases = (100000.0 * numberOfCases) / periodPop.getAgeGroupCount(sex, periodPop.getAgeGroupIndex(age)) * 5.0 / 1000.0; } } else { adjustedCases = numberOfCases; } if (sex <= numberOfSexes && icdIndex >= 0 && icdIndex <= cancerGroupsLocal.length) { countArray[icdIndex][sex - 1] += adjustedCases; } else { if (otherCancerGroupsIndex >= 0) { countArray[otherCancerGroupsIndex][sex - 1] += adjustedCases; } } if (allCancerGroupsIndex >= 0) { countArray[allCancerGroupsIndex][sex - 1] += adjustedCases; } if (allCancerGroupsButSkinIndex >= 0 && skinCancerGroupIndex >= 0 && icdIndex != skinCancerGroupIndex) { countArray[allCancerGroupsButSkinIndex][sex - 1] += adjustedCases; } } } // separate top 10 and the rest TreeSet<CancerCasesCount> topNMale = new TreeSet<CancerCasesCount>(new Comparator<CancerCasesCount>() { @Override public int compare(CancerCasesCount o1, CancerCasesCount o2) { if (o1.getCount().equals(o2.getCount())) { return -o1.toString().compareTo(o2.toString()); } else { return -(o1.getCount().compareTo(o2.getCount())); } } }); LinkedList<CancerCasesCount> theRestMale = new LinkedList<CancerCasesCount>(); TreeSet<CancerCasesCount> topNFemale = new TreeSet<CancerCasesCount>( new Comparator<CancerCasesCount>() { @Override public int compare(CancerCasesCount o1, CancerCasesCount o2) { if (o1.getCount().equals(o2.getCount())) { return -o1.toString().compareTo(o2.toString()); } else { return -(o1.getCount().compareTo(o2.getCount())); } } }); LinkedList<CancerCasesCount> theRestFemale = new LinkedList<CancerCasesCount>(); CancerCasesCount otherElement; CancerCasesCount thisElement; TreeSet<CancerCasesCount> topN; LinkedList<CancerCasesCount> theRest; for (int icdGroupNumber = 0; icdGroupNumber < countArray.length; icdGroupNumber++) { countsRow = countArray[icdGroupNumber]; for (int sexNumber = 0; sexNumber < 2; sexNumber++) { if (sexNumber == 0) { topN = topNMale; theRest = theRestMale; } else { topN = topNFemale; theRest = theRestFemale; } if (countsRow[sexNumber] > 0) { thisElement = new CancerCasesCount(icd10GroupDescriptions[icdGroupNumber], icdLabel[icdGroupNumber].substring(3), countsRow[sexNumber], icdGroupNumber); // if this is the "other" group - add it immediately to "the rest" if (icdGroupNumber == otherCancerGroupsIndex) { theRest.add(thisElement); // if not we check if this is one of the collection groups } else if (icdGroupNumber != allCancerGroupsButSkinIndex && icdGroupNumber != allCancerGroupsIndex) { // if it is less than N cancers in top N - add it if (topN.size() < topNLimit) { topN.add(thisElement); } else { // otherwise we need to compare it to the last element in the top 10 otherElement = topN.last(); if (thisElement.compareTo(otherElement) < 0) { topN.remove(otherElement); theRest.add(otherElement); topN.add(thisElement); } else { theRest.add(thisElement); } } } } } } for (int sexNumber : new int[] { 0, 1 }) { String fileName = reportFileName + "-" + sexLabel[sexNumber] + "." + fileType.toString(); File file = new File(fileName); TreeSet<CancerCasesCount> casesCounts; Double restCount = Tools.sumUpTheRest(theRestMale, dontCount); if (sexNumber == 0) { casesCounts = topNMale; } else { casesCounts = topNFemale; } if (useR && !fileType.equals(FileTypes.jchart) && !fileType.equals(FileTypes.csv)) { String header = "Top 10 by " + countType + ", \n" + tableHeader + ", " + TableBuilderInterface.sexLabel[sexNumber]; generatedFiles.addAll(Tools.generateRChart(casesCounts, fileName, header, fileType, chartType, includeOther, restCount, rpath, true, "Site")); } else { double allCount = countArray[allCancerGroupsIndex][sexNumber]; Color color; if (sexNumber == 0) { color = Color.BLUE; } else { color = Color.RED; } String header = "Top 10 by " + countType + ", " + tableHeader + ", " + TableBuilderInterface.sexLabel[sexNumber]; charts[sexNumber] = Tools.generateJChart(casesCounts, fileName, header, fileType, chartType, includeOther, legendOn, restCount, allCount, color, "Site"); try { generatedFiles.add(Tools.writeJChartToFile(charts[sexNumber], file, fileType)); } catch (IOException ex) { Logger.getLogger(TopNChartTableBuilder.class.getName()).log(Level.SEVERE, null, ex); } catch (DocumentException ex) { Logger.getLogger(TopNChartTableBuilder.class.getName()).log(Level.SEVERE, null, ex); } } } } return generatedFiles; }