List of usage examples for java.util TreeSet add
public boolean add(E e)
From source file:de.julielab.jcore.ae.lingpipegazetteer.chunking.ChunkerProviderImpl.java
private void readDictionary(InputStream dictFile) throws IOException, AnalysisEngineProcessException { long time = System.currentTimeMillis(); if (useApproximateMatching) { dict = new TrieDictionary<String>(); } else {/* w w w . j av a 2 s . c o m*/ dict = new MapDictionary<String>(); } // now read from file and add entries LOGGER.info("readDictionary() - adding entries from " + dictFile + " to dictionary..."); try (InputStreamReader isr = new InputStreamReader(dictFile)) { BufferedReader bf = new BufferedReader(isr); String line = ""; String variant = ""; TreeSet<String> termVariants; TreeSet<String> dictionary = new TreeSet<String>(); while ((line = bf.readLine()) != null) { String[] values = line.split("\t"); if (values.length != 2) { LOGGER.error("readDictionary() - wrong format of line: " + line); throw new AnalysisEngineProcessException(AnalysisEngineProcessException.ANNOTATOR_EXCEPTION, null); } String term = values[0].trim(); String label = values[1].trim(); if (term.length() < MIN_TERM_LENGTH) continue; if (useApproximateMatching && !caseSensitive) term = term.toLowerCase(); if (generateVariants) { LOGGER.debug("readDictionary() - make term variants of (" + term + ", " + label + ") and add them to dictionary (NOTE: this may take a while if dictionary is big!)"); termVariants = makeTermVariants(term); Iterator<String> it = termVariants.iterator(); while (it.hasNext()) { variant = it.next(); if (!stopWords.contains(variant.toLowerCase()) && !variant.equals("")) { // System.err.println("ADDING VARIANT: " + variant + "=" // + label); dictionary.add(variant + SEPARATOR + label); } // dict.addEntry(new DictionaryEntry(it.next(), label, // CHUNK_SCORE)); } it = null; } else { if (!stopWords.contains(term.toLowerCase())) dictionary.add(term + SEPARATOR + label); // dict.addEntry(new DictionaryEntry(term, label, CHUNK_SCORE)); } if (dictionary.size() >= 10000) { LOGGER.debug("readDictionary() - flushing dictionarySet to map dictionary"); dictionary = flushDictionary(dictionary, dict); } } dictionary = flushDictionary(dictionary, dict); dictionary = null; time = System.currentTimeMillis() - time; LOGGER.info("Reading dictionary took {}ms ({}s)", time, time / 1000); } }
From source file:net.semanticmetadata.lire.solr.LireRequestHandler.java
/** * Actual search implementation based on (i) hash based retrieval and (ii) feature based re-ranking. * * @param rsp/* ww w. j a v a 2s . c o m*/ * @param searcher * @param hashFieldName the hash field name * @param maximumHits * @param terms * @param queryFeature * @throws IOException * @throws IllegalAccessException * @throws InstantiationException */ private void doSearch(SolrQueryRequest req, SolrQueryResponse rsp, SolrIndexSearcher searcher, String hashFieldName, int maximumHits, List<Term> terms, Query query, LireFeature queryFeature) throws IOException, IllegalAccessException, InstantiationException { // temp feature instance LireFeature tmpFeature = queryFeature.getClass().newInstance(); // Taking the time of search for statistical purposes. time = System.currentTimeMillis(); Filter filter = null; // if the request contains a filter: if (req.getParams().get("fq") != null) { // only filters with [<field>:<value> ]+ are supported StringTokenizer st = new StringTokenizer(req.getParams().get("fq"), " "); LinkedList<Term> filterTerms = new LinkedList<Term>(); while (st.hasMoreElements()) { String[] tmpToken = st.nextToken().split(":"); if (tmpToken.length > 1) { filterTerms.add(new Term(tmpToken[0], tmpToken[1])); } } if (filterTerms.size() > 0) filter = new TermsFilter(filterTerms); } TopDocs docs; // with query only. if (filter == null) { docs = searcher.search(query, numberOfCandidateResults); } else { docs = searcher.search(query, filter, numberOfCandidateResults); } // TopDocs docs = searcher.search(query, new TermsFilter(terms), numberOfCandidateResults); // with TermsFilter and boosting by simple query // TopDocs docs = searcher.search(new ConstantScoreQuery(new TermsFilter(terms)), numberOfCandidateResults); // just with TermsFilter time = System.currentTimeMillis() - time; rsp.add("RawDocsCount", docs.scoreDocs.length + ""); rsp.add("RawDocsSearchTime", time + ""); // re-rank time = System.currentTimeMillis(); TreeSet<SimpleResult> resultScoreDocs = new TreeSet<SimpleResult>(); float maxDistance = -1f; float tmpScore; String featureFieldName = FeatureRegistry.getFeatureFieldName(hashFieldName); // iterating and re-ranking the documents. BinaryDocValues binaryValues = MultiDocValues.getBinaryValues(searcher.getIndexReader(), featureFieldName); // *** # BytesRef bytesRef;// = new BytesRef(); for (int i = 0; i < docs.scoreDocs.length; i++) { // using DocValues to retrieve the field values ... bytesRef = binaryValues.get(docs.scoreDocs[i].doc); tmpFeature.setByteArrayRepresentation(bytesRef.bytes, bytesRef.offset, bytesRef.length); // Getting the document from the index. // This is the slow step based on the field compression of stored fields. // tmpFeature.setByteArrayRepresentation(d.getBinaryValue(name).bytes, d.getBinaryValue(name).offset, d.getBinaryValue(name).length); tmpScore = queryFeature.getDistance(tmpFeature); if (resultScoreDocs.size() < maximumHits) { // todo: There's potential here for a memory saver, think of a clever data structure that can do the trick without creating a new SimpleResult for each result. resultScoreDocs.add( new SimpleResult(tmpScore, searcher.doc(docs.scoreDocs[i].doc), docs.scoreDocs[i].doc)); maxDistance = resultScoreDocs.last().getDistance(); } else if (tmpScore < maxDistance) { // if it is nearer to the sample than at least one of the current set: // remove the last one ... resultScoreDocs.remove(resultScoreDocs.last()); // add the new one ... resultScoreDocs.add( new SimpleResult(tmpScore, searcher.doc(docs.scoreDocs[i].doc), docs.scoreDocs[i].doc)); // and set our new distance border ... maxDistance = resultScoreDocs.last().getDistance(); } } // System.out.println("** Creating response."); time = System.currentTimeMillis() - time; rsp.add("ReRankSearchTime", time + ""); LinkedList list = new LinkedList(); for (Iterator<SimpleResult> it = resultScoreDocs.iterator(); it.hasNext();) { SimpleResult result = it.next(); HashMap m = new HashMap(2); m.put("d", result.getDistance()); // add fields as requested: if (req.getParams().get("fl") == null) { m.put("id", result.getDocument().get("id")); if (result.getDocument().get("title") != null) m.put("title", result.getDocument().get("title")); } else { String fieldsRequested = req.getParams().get("fl"); if (fieldsRequested.contains("score")) { m.put("score", result.getDistance()); } if (fieldsRequested.contains("*")) { // all fields for (IndexableField field : result.getDocument().getFields()) { String tmpField = field.name(); if (result.getDocument().getFields(tmpField).length > 1) { m.put(result.getDocument().getFields(tmpField)[0].name(), result.getDocument().getValues(tmpField)); } else if (result.getDocument().getFields(tmpField).length > 0) { m.put(result.getDocument().getFields(tmpField)[0].name(), result.getDocument().getFields(tmpField)[0].stringValue()); } } } else { StringTokenizer st; if (fieldsRequested.contains(",")) st = new StringTokenizer(fieldsRequested, ","); else st = new StringTokenizer(fieldsRequested, " "); while (st.hasMoreElements()) { String tmpField = st.nextToken(); if (result.getDocument().getFields(tmpField).length > 1) { m.put(result.getDocument().getFields(tmpField)[0].name(), result.getDocument().getValues(tmpField)); } else if (result.getDocument().getFields(tmpField).length > 0) { m.put(result.getDocument().getFields(tmpField)[0].name(), result.getDocument().getFields(tmpField)[0].stringValue()); } } } } // m.put(field, result.getDocument().get(field)); // m.put(field.replace("_ha", "_hi"), result.getDocument().getBinaryValue(field)); list.add(m); } rsp.add("docs", list); // rsp.add("Test-name", "Test-val"); }
From source file:edu.duke.cs.osprey.ematrix.epic.SeriesFitter.java
static double[] fitSeriesIterative(DoubleMatrix1D[] samp, double trueVals[], double weights[], double lambda, boolean includeConst, int order, double bCutoffs[], double bCutoffs2[], int PCOrder, boolean isPC[]) { long startTime = System.currentTimeMillis(); System.out.println("Starting fitSeriesIterative..."); int numSamples = samp.length; int nd = samp[0].size(); if (bCutoffs.length == 1) {//single bCutoff to be used double bCutoff = bCutoffs[0]; bCutoffs = new double[numSamples]; Arrays.fill(bCutoffs, bCutoff); }/*from ww w . j a va 2 s . com*/ //now bCutoffs has a cutoff for each sample int numParams = getNumParams(nd, includeConst, order); if (PCOrder > order) {//add in parameters for PC orders int numPCs = countTrue(isPC); for (int n = order + 1; n <= PCOrder; n++) numParams += getNumParamsForOrder(numPCs, n); } //now set up the data for the iterative fits //samples can be turned on and off using fitWeights //we will need to create two entries for samples with trueVals between bCutoff and bCutoff2 //since they may be turned on either to penalize deviation from bCutoff or from trueVal //first entry for each entry will penalize deviation from bCutoff if trueVal>=bCutoff //secondEntries are for trueVals between bCutoff and bCutoff2 ArrayList<Integer> secondEntries = new ArrayList<>();//list of samples needing second entry HashMap<Integer, Integer> revSecondEntries = new HashMap<>();//reverse lookup for (int s = 0; s < numSamples; s++) { if ((trueVals[s] >= bCutoffs[s]) && (trueVals[s] < bCutoffs2[s])) {//as in isRestraintActive revSecondEntries.put(s, secondEntries.size()); secondEntries.add(s); } } int numRestraints = numSamples + secondEntries.size(); //data for basic least-squares fits DoubleMatrix1D[] fitSamp = new DoubleMatrix1D[numRestraints]; double fitTrueVals[] = new double[numRestraints]; double fitWeights[] = new double[numRestraints]; for (int s = 0; s < numSamples; s++) {//"normal" entries fitSamp[s] = samp[s]; if (trueVals[s] >= bCutoffs[s]) { fitWeights[s] = 0; fitTrueVals[s] = bCutoffs[s]; } else { fitWeights[s] = weights[s]; fitTrueVals[s] = trueVals[s]; } } for (int s2 = 0; s2 < secondEntries.size(); s2++) { fitSamp[numSamples + s2] = samp[secondEntries.get(s2)]; fitWeights[numSamples + s2] = 0; fitTrueVals[numSamples + s2] = trueVals[secondEntries.get(s2)]; } //Initial guess of set P is all points with trueVals[s] >= bCutoff //that is, all points that have possible series values that make the restraint inactive boolean done = false; double coeffs[] = null; double meanResidual = 0, weightSum = 0; double prevResid = Double.POSITIVE_INFINITY; double oldCoeffs[] = null; double oldSerVals[] = new double[numSamples];//values of series at each sample, for previous iteration //preallocating to all infinity because all trueVals[s]>=bCutoff points start outside P Arrays.fill(oldSerVals, Double.POSITIVE_INFINITY); //for updating boolean firstFit = true;//first fit is not an update DoubleMatrix1D c = DoubleFactory1D.dense.make(numParams);//matrices we update (used in fit) DoubleMatrix2D M = DoubleFactory2D.dense.make(numParams, numParams); double oldFitWeights[] = null; //double fitWeightsCheck[] = fitWeights.clone();//DEBUG!!! while (!done) { if (firstFit) { coeffs = fitSeries(fitSamp, fitTrueVals, fitWeights, lambda, includeConst, order, PCOrder, isPC, false, c, M); firstFit = false; } else { double weightDiffs[] = fitWeights.clone(); for (int s = 0; s < numRestraints; s++) weightDiffs[s] -= oldFitWeights[s]; coeffs = fitSeries(fitSamp, fitTrueVals, weightDiffs, lambda, includeConst, order, PCOrder, isPC, true, c, M); //DEBUG!!! /* for(int s=0; s<numRestraints; s++){ fitWeightsCheck[s] += weightDiffs[s]; if(fitWeightsCheck[s] != fitWeights[s]){ int cefAO = 1111; } } double checkCoeffs[] = fitSeries(fitSamp, fitTrueVals, fitWeights, lambda, includeConst, order, PCOrder, isPC, false, null, null); for(int a=0; a<coeffs.length; a++){ if(Math.abs(checkCoeffs[a]-coeffs[a])>1e-10){ int abc=123; } } DoubleMatrix1D c2 = DoubleFactory1D.dense.make(numParams); DoubleMatrix2D M2 = DoubleFactory2D.dense.make(numParams,numParams); double checkCoeffs2[] = fitSeries(fitSamp, fitTrueVals, fitWeights, lambda, includeConst, order, PCOrder, isPC, false, c2, M2); for(int a=0; a<coeffs.length; a++){ if(Math.abs(checkCoeffs2[a]-coeffs[a])>1e-10){ int abc=123; } }*/ //DEBUG!!! } oldFitWeights = fitWeights.clone(); done = true; ArrayList<SampleCutoffCrossing> scc = new ArrayList<SampleCutoffCrossing>(); meanResidual = 0; weightSum = 0; //boolean doneNoTol = true, done2 = true;//DEBUG!!! //values of series at each sample, based on coeffs double serVals[] = new double[numSamples]; for (int s = 0; s < numSamples; s++) { serVals[s] = evalSeries(coeffs, samp[s], nd, includeConst, order, PCOrder, isPC); //If each series value is below or above bcutoff according to whether the //coeffs were generated by fitting with or without that value included //(respectively), then we have found a local and thus the global minimum //in this quadratic piece of the objective function, so we're done //check for doneness first, using tolerance //i.e. are coeffs (derived from fitWeights) consistent with fitWeights, //within numerical error? If so we have a global minimum if (trueVals[s] >= bCutoffs[s]) { if (fitWeights[s] > 0) { if (!isRestraintTypeActive(trueVals[s], serVals[s], bCutoffs[s], bCutoffs2[s], false, -1e-6)) done = false;//fitWeights penalizing deviation from bCutoff, and this isn't right at coeffs } else { boolean secondRestraintOn = revSecondEntries.containsKey(s); if (secondRestraintOn) secondRestraintOn = (fitWeights[numSamples + revSecondEntries.get(s)] > 0); if (secondRestraintOn) { if (!isRestraintTypeActive(trueVals[s], serVals[s], bCutoffs[s], bCutoffs2[s], true, -1e-6)) done = false;//fitWeights penalizing deviation from trueVal, and this isn't right at coeffs } else {//restraints currently off if (isRestraintActive(trueVals[s], serVals[s], bCutoffs[s], bCutoffs2[s], 1e-6)) done = false;//a restraint should be on at coeffs } } } //for trueVals below bCutoff, restraints don't turn on and off //DEBUG!!!!! //trying to calculate done w/o tol /* if( trueVals[s]>=bCutoffs[s] ){ if(fitWeights[s]>0){ if(!isRestraintTypeActive(trueVals[s],serVals[s],bCutoffs[s],bCutoffs2[s],false,0)) doneNoTol = false;//fitWeights penalizing deviation from bCutoff, and this isn't right at coeffs } else { boolean secondRestraintOn = revSecondEntries.containsKey(s); if(secondRestraintOn) secondRestraintOn = (fitWeights[numSamples+revSecondEntries.get(s)]>0); if(secondRestraintOn){ if(!isRestraintTypeActive(trueVals[s],serVals[s],bCutoffs[s],bCutoffs2[s],true,0)) doneNoTol = false;//fitWeights penalizing deviation from trueVal, and this isn't right at coeffs } else {//restraints currently off if(isRestraintActive(trueVals[s],serVals[s],bCutoffs[s],bCutoffs2[s],0)) doneNoTol = false;//a restraint should be on at coeffs } } } //OK now done2 will be calculated and should be the same but will be calculated like //the weight changes below if( trueVals[s]>=bCutoffs[s] ){ if(isRestraintTypeActive(trueVals[s],serVals[s],bCutoffs[s],bCutoffs2[s],false)){ //activate penalty for deviating from bCutoff if(fitWeights[s]!=weights[s]) done2 = false; if(revSecondEntries.containsKey(s)){ if(fitWeights[numSamples+revSecondEntries.get(s)]!=0) done2 = false; } } else if(isRestraintTypeActive(trueVals[s],serVals[s],bCutoffs[s],bCutoffs2[s],true)){ //activate penalty for deviating from trueVal if(fitWeights[s] != 0) done2 = false; if(revSecondEntries.containsKey(s)){ if(fitWeights[numSamples+revSecondEntries.get(s)] != weights[s]) done2 = false; } else throw new RuntimeException("ERROR: should have second entry for restraint but don't!!"); } else { //deactivate all penalties if(fitWeights[s] != 0) done2 = false; if(revSecondEntries.containsKey(s)){ if(fitWeights[numSamples+revSecondEntries.get(s)] != 0) done2 = false; } //no contribution to residual } }*/ //DEBUG!!! //Now calculate mean residual and crossing points, and update fitWeights double residTerm = 0; if (trueVals[s] >= bCutoffs[s]) { if (isRestraintTypeActive(trueVals[s], serVals[s], bCutoffs[s], bCutoffs2[s], false)) { //activate penalty for deviating from bCutoff fitWeights[s] = weights[s]; if (revSecondEntries.containsKey(s)) fitWeights[numSamples + revSecondEntries.get(s)] = 0; residTerm = (serVals[s] - bCutoffs[s]) * (serVals[s] - bCutoffs[s]); } else if (isRestraintTypeActive(trueVals[s], serVals[s], bCutoffs[s], bCutoffs2[s], true)) { //activate penalty for deviating from trueVal fitWeights[s] = 0; if (revSecondEntries.containsKey(s)) fitWeights[numSamples + revSecondEntries.get(s)] = weights[s]; else throw new RuntimeException("ERROR: should have second entry for restraint but don't!!"); residTerm = (serVals[s] - trueVals[s]) * (serVals[s] - trueVals[s]); } else { //deactivate all penalties fitWeights[s] = 0; if (revSecondEntries.containsKey(s)) fitWeights[numSamples + revSecondEntries.get(s)] = 0; //no contribution to residual } } else //normal least-squares penalty. fitWeights[s] will stay at weights[s] residTerm = (serVals[s] - trueVals[s]) * (serVals[s] - trueVals[s]); meanResidual += weights[s] * residTerm; //If want sample-by-sample output... //System.out.println("TRAININGSET TRUE: "+trueVals[s]+" SER: "+serVals[s]); weightSum += weights[s]; } meanResidual /= weightSum; if (meanResidual == prevResid) System.out.println(); //DEBUG!!! /* if(done!=doneNoTol || done2!=done){ //Let's see what happens if we remove the tolerance... done = doneNoTol; } if(done){ double checkCoeffs[] = fitSeries(fitSamp, fitTrueVals, fitWeights, lambda, includeConst, order, PCOrder, isPC, false, null, null); for(int a=0; a<coeffs.length; a++){ if(Math.abs(checkCoeffs[a]-coeffs[a])>1e-10){ int abc=123; } } }*/ //DEBUG!!! if ((!done) && (meanResidual >= prevResid)) { //Did not obtain a decrease using the Newton step //Let's do an exact line search to rectify the situation if (!useLineSearch) { System.out.println("Skipping line search, returning with residual " + prevResid); return oldCoeffs; } System.out.println("LINE SEARCH"); for (int s = 0; s < numSamples; s++) { //If we go in or out of either type of restraint between serVals and oldSerVals, we create //a SampleCutoffCrossing of the appropriate type (upper or lower (ordinary) restraint) if ((isRestraintTypeActive(trueVals[s], oldSerVals[s], bCutoffs[s], bCutoffs2[s], false)) != (isRestraintTypeActive(trueVals[s], serVals[s], bCutoffs[s], bCutoffs2[s], false))) { //If the restraint disappears at one end we know trueVal>=bCutoff here //create lower restraint SampleCutoffCrossing double crossingPoint = (bCutoffs[s] - oldSerVals[s]) / (serVals[s] - oldSerVals[s]); scc.add(new SampleCutoffCrossing(s, crossingPoint, false)); } if ((isRestraintTypeActive(trueVals[s], oldSerVals[s], bCutoffs[s], bCutoffs2[s], true)) != (isRestraintTypeActive(trueVals[s], serVals[s], bCutoffs[s], bCutoffs2[s], true))) { //upper double crossingPoint2 = (trueVals[s] - oldSerVals[s]) / (serVals[s] - oldSerVals[s]); scc.add(new SampleCutoffCrossing(s, crossingPoint2, true)); } } int changeCount = scc.size(); Collections.sort(scc); TreeSet<Integer> crossingIndices = new TreeSet<Integer>(); for (SampleCutoffCrossing cr : scc) { int s = cr.sampleIndex; crossingIndices.add(s); if (cr.upperRestraint) {//penalizing difference from trueVal cr.quadTerm = weights[s] * (serVals[s] - oldSerVals[s]) * (serVals[s] - oldSerVals[s]); cr.linTerm = 2 * weights[s] * (serVals[s] - oldSerVals[s]) * (oldSerVals[s] - trueVals[s]); cr.constTerm = weights[s] * (oldSerVals[s] - trueVals[s]) * (oldSerVals[s] - trueVals[s]); } else {//penalizing difference from lesser of bCutoff, trueVal cr.quadTerm = weights[s] * (serVals[s] - oldSerVals[s]) * (serVals[s] - oldSerVals[s]); double baseVal = Math.min(trueVals[s], bCutoffs[s]); cr.linTerm = 2 * weights[s] * (serVals[s] - oldSerVals[s]) * (oldSerVals[s] - baseVal); cr.constTerm = weights[s] * (oldSerVals[s] - baseVal) * (oldSerVals[s] - baseVal); //cr.linTerm = 2*weights[s]*(serVals[s]-oldSerVals[s])*(oldSerVals[s]-trueVals[s]); //cr.constTerm = weights[s]*(oldSerVals[s]-trueVals[s])*(oldSerVals[s]-trueVals[s]); } } //Set up quadratic function double quadTerm = 0; double linTerm = 0; double constTerm = 0; //Add in contributions from all non-cutoff-crossing points for (int s = 0; s < numSamples; s++) { if (!crossingIndices.contains(s)) { if (isRestraintTypeActive(trueVals[s], oldSerVals[s], bCutoffs[s], bCutoffs2[s], false)) { //if(trueVals[s]<bCutoffs[s]||serVals[s]<bCutoffs[s]){//penalizing difference from lesser of bCutoff, trueVal quadTerm += weights[s] * (serVals[s] - oldSerVals[s]) * (serVals[s] - oldSerVals[s]); double baseVal = Math.min(trueVals[s], bCutoffs[s]); linTerm += 2 * weights[s] * (serVals[s] - oldSerVals[s]) * (oldSerVals[s] - baseVal); constTerm += weights[s] * (oldSerVals[s] - baseVal) * (oldSerVals[s] - baseVal); //linTerm += 2*weights[s]*(serVals[s]-oldSerVals[s])*(oldSerVals[s]-trueVals[s]); //constTerm += weights[s]*(oldSerVals[s]-trueVals[s])*(oldSerVals[s]-trueVals[s]); } else if (isRestraintTypeActive(trueVals[s], oldSerVals[s], bCutoffs[s], bCutoffs2[s], true)) { //else if(isRestraintActive(trueVals[s],serVals[s],bCutoffs[s],bCutoffs2[s],0)){//penalizing difference from trueVal quadTerm += weights[s] * (serVals[s] - oldSerVals[s]) * (serVals[s] - oldSerVals[s]); linTerm += 2 * weights[s] * (serVals[s] - oldSerVals[s]) * (oldSerVals[s] - trueVals[s]); constTerm += weights[s] * (oldSerVals[s] - trueVals[s]) * (oldSerVals[s] - trueVals[s]); } } } //contributions from cutoff-crossing points at the beginning of the interval //(i.e. at coeffs) for (SampleCutoffCrossing cr : scc) { int s = cr.sampleIndex; if (isRestraintTypeActive(trueVals[s], serVals[s], bCutoffs[s], bCutoffs2[s], cr.upperRestraint)) { //check if this particular restraint (rather than either restraint for this s) is active //if(serVals[s]<bCutoffs[s]){ quadTerm += cr.quadTerm; linTerm += cr.linTerm; constTerm += cr.constTerm; } } //double checkMeanResid = (quadTerm+linTerm+constTerm)/weightSum;//evaluate objective function at a=1 //should match meanResidual! double prevNodeResid = Double.POSITIVE_INFINITY; //The first increase we may consider is from node 0 to 1 //Now walk back until we get an increase //then the minimum will be in one of the last two quadratic pieces int lowestNodeIndex = 0; for (int curChangeIndex = changeCount - 1; curChangeIndex >= 0; curChangeIndex--) { SampleCutoffCrossing cr = scc.get(curChangeIndex); double a = cr.crossingPoint; double curNodeResid = (a * a * quadTerm + a * linTerm + constTerm) / weightSum; int s = cr.sampleIndex; if (curNodeResid > prevNodeResid) { lowestNodeIndex = curChangeIndex + 1; break; } //if the restraint is being removed going from serVals to oldSerVals, remove its coefficients from the quadratic function //if it's being added, add them if (isRestraintTypeActive(trueVals[s], serVals[s], bCutoffs[s], bCutoffs2[s], cr.upperRestraint)) { quadTerm -= cr.quadTerm; linTerm -= cr.linTerm; constTerm -= cr.constTerm; } else { quadTerm += cr.quadTerm; linTerm += cr.linTerm; constTerm += cr.constTerm; } prevNodeResid = curNodeResid; } //At this point, we know our minimum is in either the piece with the //current quad, lin, constTerms or the one we looked at right before //(where lowestNodeIndex is the node separating them) double a_min = -linTerm / (2 * quadTerm); SampleCutoffCrossing cr = scc.get(lowestNodeIndex); if (a_min > cr.crossingPoint) {//minimum must be in previous piece //revert quad and linTerms int s = cr.sampleIndex; //change back quadratic-function coefficients if (isRestraintTypeActive(trueVals[s], serVals[s], bCutoffs[s], bCutoffs2[s], cr.upperRestraint)) { quadTerm += cr.quadTerm; linTerm += cr.linTerm; constTerm += cr.constTerm; } else { quadTerm -= cr.quadTerm; linTerm -= cr.linTerm; constTerm -= cr.constTerm; } a_min = -linTerm / (2 * quadTerm); } //double minResid = (a_min*a_min*quadTerm + a_min*linTerm + constTerm)/weightSum; for (int p = 0; p < numParams; p++) coeffs[p] = coeffs[p] * a_min + (1 - a_min) * oldCoeffs[p]; double minResid = 0; for (int s = 0; s < numSamples; s++) { serVals[s] = evalSeries(coeffs, samp[s], nd, includeConst, order, PCOrder, isPC); if (trueVals[s] >= bCutoffs[s]) { if (isRestraintTypeActive(trueVals[s], serVals[s], bCutoffs[s], bCutoffs2[s], false)) { //activate penalty for deviating from bCutoff fitWeights[s] = weights[s]; if (revSecondEntries.containsKey(s)) fitWeights[numSamples + revSecondEntries.get(s)] = 0; minResid += weights[s] * (serVals[s] - bCutoffs[s]) * (serVals[s] - bCutoffs[s]); } else if (isRestraintTypeActive(trueVals[s], serVals[s], bCutoffs[s], bCutoffs2[s], true)) { minResid += weights[s] * (serVals[s] - trueVals[s]) * (serVals[s] - trueVals[s]); //activate penalty for deviating from trueVal fitWeights[s] = 0; fitWeights[numSamples + revSecondEntries.get(s)] = weights[s]; } else { //deactivate all penalties fitWeights[s] = 0; if (revSecondEntries.containsKey(s)) fitWeights[numSamples + revSecondEntries.get(s)] = 0; } } else minResid += weights[s] * (serVals[s] - trueVals[s]) * (serVals[s] - trueVals[s]); } minResid /= weightSum; if (minResid >= prevResid) { //consider to have converged //NOTE THIS CAN HAPPEN IF THE QUADRATIC APPROXIMATION AT OLDCOEFFS HAS SOLUTION //FAR FROM THE EXACT VALUE (ASSUMING EXACT FITSERIES) OF 1 //THIS CAN HAPPEN IF WE'RE GETTING BELOW THE NUMERICAL PRECISION OF FITSERIES System.out.println("TRAINING SET MEAN RESIDUAL:" + prevResid); System.out.println("CONVERGED IN LINE SEARCH, line search min: " + minResid); System.out.println("fitSeriesIterative time (ms): " + (System.currentTimeMillis() - startTime)); return oldCoeffs; } meanResidual = minResid; } /* //trying backtracking line search, hoping it'll be more numerically stable double backtrackRate = 0.5; double a = 1;//how far we want to be on the line from oldCoeffs (which should allow some descent //along the line) to coeffs (which overshoots) double minResid; double aCoeffs[] = new double[coeffs.length];//coefficients at our point on the line do { a *= backtrackRate; for(int p=0; p<numParams; p++) aCoeffs[p] = coeffs[p]*a + (1-a)*oldCoeffs[p]; minResid = 0; for(int s=0; s<numSamples; s++){ serVals[s] = evalSeries(aCoeffs,samp[s],nd,includeConst,order,PCOrder,isPC); if( trueVals[s]>=bCutoffs[s] ){ if(isRestraintTypeActive(trueVals[s],serVals[s],bCutoffs[s],bCutoffs2[s],false)){ minResid += weights[s]*(serVals[s]-bCutoffs[s])*(serVals[s]-bCutoffs[s]); weights2[s] = weights[s]; } else if(isRestraintTypeActive(trueVals[s],serVals[s],bCutoffs[s],bCutoffs2[s],true)){ minResid += weights[s]*(serVals[s]-trueVals[s])*(serVals[s]-trueVals[s]); weights2[s] = weights[s]; } else weights2[s] = 0; } else minResid += weights[s]*(serVals[s]-trueVals[s])*(serVals[s]-trueVals[s]); } minResid /= weightSum; } while(minResid>prevResid); if(a<1e-4) System.out.println("Warning: line search a got down to "+a); meanResidual = minResid; } */ oldCoeffs = coeffs; System.out.println("STEP RESIDUAL: " + meanResidual); prevResid = meanResidual; oldSerVals = serVals; } System.out.println("TRAINING SET MEAN RESIDUAL:" + meanResidual); System.out.println("fitSeriesIterative time (ms): " + (System.currentTimeMillis() - startTime)); //DEBUG!!! //Note this assumes weights are all 1! /* int numCoeffs = coeffs.length; DoubleMatrix1D residGrad = DoubleFactory1D.dense.make(numCoeffs); double checkResid = 0; for(int s=0; s<samp.length; s++){ double diff = 0; double fitVal = evalSeries(coeffs,samp[s],nd,includeConst,order,PCOrder,isPC); if(trueVals[s]>=bCutoffs[s] && fitVal<bCutoffs[s]){ diff = fitVal-bCutoffs[s]; if(fitWeights[s]==0){ int allosaurus = 15; } if(revSecondEntries.containsKey(s)){ if(fitWeights[numSamples+revSecondEntries.get(s)]>0){ int allosaurus=15; } } } else if( trueVals[s]<bCutoffs[s] || (trueVals[s]<bCutoffs2[s] && fitVal>trueVals[s]) ){ diff = fitVal-trueVals[s]; if(trueVals[s]<bCutoffs[s]){ if(fitWeights[s]==0){ int allosaurus = 15; } } else if(fitWeights[numSamples+revSecondEntries.get(s)]==0 || fitWeights[s]>0){ int allosaurus=15; } } else if(fitWeights[s]>0){ int allosaurus=15; } else if(revSecondEntries.containsKey(s)){ if(fitWeights[numSamples+revSecondEntries.get(s)]>0){ int allosaurus=15; } } checkResid += diff*diff; DoubleMatrix1D termMonomials = DoubleFactory1D.dense.make(numCoeffs); SeriesFitter.calcSampParamCoeffs( termMonomials, samp[s], nd, includeConst, order, PCOrder, isPC ); residGrad.assign( termMonomials, Functions.plusMult(2*diff) ); } checkResid /= samp.length; residGrad.assign( Functions.mult(1./samp.length) ); double checkCoeffs[] = fitSeries(fitSamp, fitTrueVals, fitWeights, lambda, includeConst, order, PCOrder, isPC, false, null, null); double checkResid2 = 0; DoubleMatrix1D residGrad2 = DoubleFactory1D.dense.make(numCoeffs); for(int s=0; s<samp.length; s++){ double diff = 0; double fitVal = evalSeries(checkCoeffs,samp[s],nd,includeConst,order,PCOrder,isPC); if(trueVals[s]>=bCutoffs[s] && fitVal<bCutoffs[s]){ diff = fitVal-bCutoffs[s]; if(fitWeights[s]==0){ int allosaurus = 15; } if(revSecondEntries.containsKey(s)){ if(fitWeights[numSamples+revSecondEntries.get(s)]>0){ int allosaurus=15; } } } else if( trueVals[s]<bCutoffs[s] || (trueVals[s]<bCutoffs2[s] && fitVal>trueVals[s]) ){ diff = fitVal-trueVals[s]; if(trueVals[s]<bCutoffs[s]){ if(fitWeights[s]==0){ int allosaurus = 15; } } else if(fitWeights[numSamples+revSecondEntries.get(s)]==0 || fitWeights[s]>0){ int allosaurus=15; } } else if(fitWeights[s]>0){ int allosaurus=15; } else if(revSecondEntries.containsKey(s)){ if(fitWeights[numSamples+revSecondEntries.get(s)]>0){ int allosaurus=15; } } checkResid2 += diff*diff; DoubleMatrix1D termMonomials = DoubleFactory1D.dense.make(numCoeffs); SeriesFitter.calcSampParamCoeffs( termMonomials, samp[s], nd, includeConst, order, PCOrder, isPC ); residGrad2.assign( termMonomials, Functions.plusMult(2*diff) ); } checkResid2 /= samp.length; residGrad2.assign( Functions.mult(1./samp.length) ); if(residGrad.zDotProduct(residGrad)>1e-10){ int struthiomimus = 23; } int cubist = -1; */ //DEBUG!! return coeffs; }
From source file:net.semanticmetadata.lire.solr.FastLireRequestHandler.java
/** * Actual search implementation based on (i) hash based retrieval and (ii) feature based re-ranking. * * @param rsp//from www. j a v a2 s . c o m * @param searcher * @param hashFieldName the hash field name * @param maximumHits * @param terms * @param queryFeature * @throws java.io.IOException * @throws IllegalAccessException * @throws InstantiationException */ private void doSearch(SolrQueryRequest req, SolrQueryResponse rsp, SolrIndexSearcher searcher, String hashFieldName, int maximumHits, List<Term> terms, Query query, LireFeature queryFeature) throws IOException, IllegalAccessException, InstantiationException { // temp feature instance LireFeature tmpFeature = queryFeature.getClass().newInstance(); // Taking the time of search for statistical purposes. time = System.currentTimeMillis(); Filter filter = null; // if the request contains a filter: if (req.getParams().get("fq") != null) { // only filters with [<field>:<value> ]+ are supported StringTokenizer st = new StringTokenizer(req.getParams().get("fq"), " "); LinkedList<Term> filterTerms = new LinkedList<Term>(); while (st.hasMoreElements()) { String[] tmpToken = st.nextToken().split(":"); if (tmpToken.length > 1) { filterTerms.add(new Term(tmpToken[0], tmpToken[1])); } } if (filterTerms.size() > 0) filter = new TermsFilter(filterTerms); } TopDocs docs; // with query only. if (filter == null) { docs = searcher.search(query, numberOfCandidateResults); } else { docs = searcher.search(query, filter, numberOfCandidateResults); } // TopDocs docs = searcher.search(query, new TermsFilter(terms), numberOfCandidateResults); // with TermsFilter and boosting by simple query // TopDocs docs = searcher.search(new ConstantScoreQuery(new TermsFilter(terms)), numberOfCandidateResults); // just with TermsFilter time = System.currentTimeMillis() - time; rsp.add("RawDocsCount", docs.scoreDocs.length + ""); rsp.add("RawDocsSearchTime", time + ""); // re-rank time = System.currentTimeMillis(); TreeSet<SimpleResult> resultScoreDocs = new TreeSet<SimpleResult>(); float maxDistance = -1f; float tmpScore; String featureFieldName = FeatureRegistry.getFeatureFieldName(hashFieldName); // iterating and re-ranking the documents. BinaryDocValues binaryValues = MultiDocValues.getBinaryValues(searcher.getIndexReader(), featureFieldName); // *** # BytesRef bytesRef = new BytesRef(); for (int i = 0; i < docs.scoreDocs.length; i++) { // using DocValues to retrieve the field values ... binaryValues.get(docs.scoreDocs[i].doc, bytesRef); tmpFeature.setByteArrayRepresentation(bytesRef.bytes, bytesRef.offset, bytesRef.length); // Getting the document from the index. // This is the slow step based on the field compression of stored fields. // tmpFeature.setByteArrayRepresentation(d.getBinaryValue(name).bytes, d.getBinaryValue(name).offset, d.getBinaryValue(name).length); tmpScore = queryFeature.getDistance(tmpFeature); if (resultScoreDocs.size() < maximumHits) { // todo: There's potential here for a memory saver, think of a clever data structure that can do the trick without creating a new SimpleResult for each result. resultScoreDocs.add( new SimpleResult(tmpScore, searcher.doc(docs.scoreDocs[i].doc), docs.scoreDocs[i].doc)); maxDistance = resultScoreDocs.last().getDistance(); } else if (tmpScore < maxDistance) { // if it is nearer to the sample than at least one of the current set: // remove the last one ... resultScoreDocs.remove(resultScoreDocs.last()); // add the new one ... resultScoreDocs.add( new SimpleResult(tmpScore, searcher.doc(docs.scoreDocs[i].doc), docs.scoreDocs[i].doc)); // and set our new distance border ... maxDistance = resultScoreDocs.last().getDistance(); } } // System.out.println("** Creating response."); time = System.currentTimeMillis() - time; rsp.add("ReRankSearchTime", time + ""); LinkedList list = new LinkedList(); for (Iterator<SimpleResult> it = resultScoreDocs.iterator(); it.hasNext();) { SimpleResult result = it.next(); HashMap m = new HashMap(2); m.put("d", result.getDistance()); // add fields as requested: if (req.getParams().get("fl") == null) { m.put("id", result.getDocument().get("id")); if (result.getDocument().get("title") != null) m.put("title", result.getDocument().get("title")); } else { String fieldsRequested = req.getParams().get("fl"); if (fieldsRequested.contains("score")) { m.put("score", result.getDistance()); } if (fieldsRequested.contains("*")) { // all fields for (IndexableField field : result.getDocument().getFields()) { String tmpField = field.name(); if (result.getDocument().getFields(tmpField).length > 1) { m.put(result.getDocument().getFields(tmpField)[0].name(), result.getDocument().getValues(tmpField)); } else if (result.getDocument().getFields(tmpField).length > 0) { m.put(result.getDocument().getFields(tmpField)[0].name(), result.getDocument().getFields(tmpField)[0].stringValue()); } } } else { StringTokenizer st; if (fieldsRequested.contains(",")) st = new StringTokenizer(fieldsRequested, ","); else st = new StringTokenizer(fieldsRequested, " "); while (st.hasMoreElements()) { String tmpField = st.nextToken(); if (result.getDocument().getFields(tmpField).length > 1) { m.put(result.getDocument().getFields(tmpField)[0].name(), result.getDocument().getValues(tmpField)); } else if (result.getDocument().getFields(tmpField).length > 0) { m.put(result.getDocument().getFields(tmpField)[0].name(), result.getDocument().getFields(tmpField)[0].stringValue()); } } } } // m.put(field, result.getDocument().get(field)); // m.put(field.replace("_ha", "_hi"), result.getDocument().getBinaryValue(field)); list.add(m); } rsp.add("docs", list); // rsp.add("Test-name", "Test-val"); }
From source file:guineu.modules.filter.Alignment.RANSAC.RansacAlignerTask.java
/** * * @param peakList//from ww w . ja v a 2 s . co m * @return */ private HashMap<PeakListRow, PeakListRow> getAlignmentMap(Dataset peakList) { // Create a table of mappings for best scores HashMap<PeakListRow, PeakListRow> alignmentMapping = new HashMap<PeakListRow, PeakListRow>(); if (alignedPeakList.getNumberRows() < 1) { return alignmentMapping; } // Create a sorted set of scores matching TreeSet<RowVsRowScore> scoreSet = new TreeSet<RowVsRowScore>(); // RANSAC algorithm List<AlignStructMol> list = ransacPeakLists(alignedPeakList, peakList); PolynomialFunction function = this.getPolynomialFunction(list, ((SimpleLCMSDataset) alignedPeakList).getRowsRTRange()); PeakListRow allRows[] = peakList.getRows().toArray(new PeakListRow[0]); for (PeakListRow row : allRows) { double rt = 0.0; try { rt = function.value(((SimplePeakListRowLCMS) row).getRT()); } catch (NullPointerException e) { rt = ((SimplePeakListRowLCMS) row).getRT(); } if (Double.isNaN(rt) || rt == -1) { rt = ((SimplePeakListRowLCMS) row).getRT(); } Range mzRange = this.mzTolerance.getToleranceRange(((SimplePeakListRowLCMS) row).getMZ()); Range rtRange = this.rtToleranceAfterRTcorrection.getToleranceRange(rt); // Get all rows of the aligned peaklist within parameter limits PeakListRow candidateRows[] = ((SimpleLCMSDataset) alignedPeakList).getRowsInsideRTAndMZRange(rtRange, mzRange); for (PeakListRow candidate : candidateRows) { RowVsRowScore score; try { score = new RowVsRowScore(row, candidate, mzTolerance.getTolerance(), rtToleranceAfterRTcorrection.getTolerance(), rt); scoreSet.add(score); errorMessage = score.getErrorMessage(); } catch (Exception e) { e.printStackTrace(); setStatus(TaskStatus.ERROR); return null; } } progress = (double) processedRows++ / (double) totalRows; } // Iterate scores by descending order Iterator<RowVsRowScore> scoreIterator = scoreSet.iterator(); while (scoreIterator.hasNext()) { RowVsRowScore score = scoreIterator.next(); // Check if the row is already mapped if (alignmentMapping.containsKey(score.getPeakListRow())) { continue; } // Check if the aligned row is already filled if (alignmentMapping.containsValue(score.getAlignedRow())) { continue; } alignmentMapping.put(score.getPeakListRow(), score.getAlignedRow()); } return alignmentMapping; }
From source file:com.cloudera.recordbreaker.schemadict.SchemaSuggest.java
/** * This method infers new schema labels for each element in the input. It returns a Schema object that * has the identical format as the input file's Schema object, but the labels may be changed. */// w w w .j a va 2 s . c om public List<DictionaryMapping> inferSchemaMapping(File avroFile, int k) throws IOException { SchemaStatisticalSummary srcSummary = new SchemaStatisticalSummary("input"); Schema srcSchema = srcSummary.createSummaryFromData(avroFile); srcSummary.setUseAttributeLabels(useAttributeLabels); // // Compare the statistics to the database of schema statistics. Find the closest matches, both // on a per-attribute basis and structurally. // int schemaSize = srcSchema.getFields().size(); // // We start testing the input database against known schemas that have an identical // number of attributes, which should allow for the best matches. This gives us an // initial set of distances. We then expand the search to schemas of greater or fewer // attributes, as long as a given bucket of size-k schemas has a min-distance of less // than the current top-k matches. // // TreeSet<DictionaryMapping> sorter = new TreeSet<DictionaryMapping>(); int numMatches = 0; List<Integer> seenIndexes = new ArrayList<Integer>(); int searchRadius = 0; boolean seenAllCandidates = false; int srcSchemaSize = srcSchema.getFields().size(); int totalSchemasExamined = 0; while (!seenAllCandidates) { // Examine the relevant schema buckets, compute all matches to those schemas for (int j = Math.max(1, srcSchemaSize - searchRadius); j <= Math.min(NUM_BUCKETS, srcSchemaSize + searchRadius); j++) { if (seenIndexes.contains(j - 1)) { continue; } for (SchemaDictionaryEntry elt : dictBySize.get(j - 1)) { ///////////////////////////// // This is where we instrument the mapping stuff. // If the pair is an interesting one, then emit the mapping that // we discover. Why are good matches going undiscovered? ///////////////////////////// SchemaMapping mapping = srcSummary.getBestMapping(elt.getSummary()); if (srcSchema.getName().equals(elt.getSchema().getName())) { System.err .println("Comparing " + srcSchema.getName() + " with " + elt.getSchema().getName()); System.err.println("Obtained mapping: " + mapping.toString()); } totalSchemasExamined++; sorter.add(new DictionaryMapping(mapping, elt)); numMatches++; } seenIndexes.add(j - 1); } // Have we examined the entire corpus of known schemas? if ((srcSchemaSize - searchRadius) <= 1 && (srcSchemaSize + searchRadius) >= NUM_BUCKETS) { seenAllCandidates = true; } else { // Test to see if the best matches are good enough that we can stop looking. // We compare the lowest known match distance to the minimum distance for matches // in the closest non-examined buckets. int lowestSize = srcSchemaSize - searchRadius - 1; int highestSize = srcSchemaSize + searchRadius + 1; double minNearbyDistance = Double.MAX_VALUE; if (lowestSize >= 1) { minNearbyDistance = Math.min(minNearbyDistance, SchemaStatisticalSummary.getMinimumMappingCost(srcSchemaSize, lowestSize)); } if (highestSize <= NUM_BUCKETS) { minNearbyDistance = Math.min(minNearbyDistance, SchemaStatisticalSummary.getMinimumMappingCost(srcSchemaSize, highestSize)); } // Grab from the Sorter the elt that is MIN_ELTS_SUGGESTED into the sorted list if (sorter.size() >= k) { DictionaryMapping testDictMapping = null; int idx = 0; for (DictionaryMapping cur : sorter) { idx++; if (idx == k) { testDictMapping = cur; break; } } if (testDictMapping.getMapping().getDist() < minNearbyDistance) { seenAllCandidates = true; } } } searchRadius++; } // Return the k best schema mappings double smallestDistance = sorter.first().getMapping().getDist(); List<DictionaryMapping> dsts = new ArrayList<DictionaryMapping>(); for (DictionaryMapping dp : sorter) { if (dsts.size() > k && dp.getMapping().getDist() > smallestDistance) { break; } dsts.add(dp); } double pct = totalSchemasExamined / (1.0 * dict.contents().size()); System.err.println("Final search radius of " + searchRadius + " yielded a search over " + pct + " of all known databases."); return dsts; }
From source file:net.spfbl.data.Block.java
public static TreeSet<String> get() throws ProcessException { TreeSet<String> blockSet = new TreeSet<String>(); for (String token : getAll()) { if (!token.contains(":")) { blockSet.add(token); }/*from w w w. j av a 2 s . c o m*/ } return blockSet; }
From source file:net.spfbl.core.Analise.java
public TreeSet<String> getResultFullSet() throws InterruptedException { TreeMap<String, String> map = new TreeMap<String, String>(); whiteFullSet(map);/* w w w .j a va2 s. c o m*/ File resultFile = getResultFile(); if (resultFile.exists()) { try { FileReader fileReader = new FileReader(resultFile); BufferedReader bufferedReader = new BufferedReader(fileReader); try { String line; while ((line = bufferedReader.readLine()) != null) { int index = line.indexOf(' '); if (index > 0) { String ip = line.substring(0, index); try { if (containsResultSet(ip)) { String result = line.substring(index + 1); map.put(ip, result); } } catch (InterruptedException ex) { Server.logError(ex); } } } } finally { bufferedReader.close(); } } catch (Exception ex) { Server.logError(ex); } } TreeSet<String> set = new TreeSet<String>(); for (String ip : map.keySet()) { String result = map.get(ip); set.add(ip + " " + result); } return set; }
From source file:org.dasein.cloud.openstack.nova.os.ext.rackspace.lb.RackspaceLoadBalancers.java
private @Nonnull Collection<String> mapNodes(@Nonnull ProviderContext ctx, @Nonnull String loadBalancerId, @Nullable String[] serverIds) throws CloudException, InternalException { TreeSet<String> nodeIds = new TreeSet<String>(); if (serverIds != null && serverIds.length > 0) { Collection<Node> nodes = getNodes(loadBalancerId); for (String serverId : serverIds) { VirtualMachine vm = provider.getComputeServices().getVirtualMachineSupport() .getVirtualMachine(serverId); if (vm != null) { boolean there = false; if (vm.getProviderRegionId().equals(ctx.getRegionId())) { RawAddress[] addrs = vm.getPrivateAddresses(); for (RawAddress addr : addrs) { for (Node n : nodes) { if (n.address.equals(addr.getIpAddress())) { nodeIds.add(n.nodeId); there = true; break; }/*w w w .j a v a 2 s .c om*/ } if (there) { break; } } } if (!there) { RawAddress[] addrs = vm.getPublicAddresses(); for (RawAddress addr : addrs) { for (Node n : nodes) { if (n.address.equals(addr.getIpAddress())) { nodeIds.add(n.nodeId); there = true; break; } } if (there) { break; } } } } } } return nodeIds; }
From source file:massbank.BatchSearchWorker.java
/** * T}t@C???iHTML`?j/*from w ww .j a v a 2 s. c o m*/ * @param resultFile t@C * @param htmlFile YtpHTMLt@C */ private void createSummary(File resultFile, File htmlFile) { LineNumberReader in = null; PrintWriter out = null; try { //(1) t@C? String line; int cnt = 0; ArrayList<String> nameList = new ArrayList<String>(); ArrayList<String> top1LineList = new ArrayList<String>(); TreeSet<String> top1IdList = new TreeSet<String>(); in = new LineNumberReader(new FileReader(resultFile)); while ((line = in.readLine()) != null) { line = line.trim(); if (line.equals("")) { cnt = 0; } else { cnt++; if (cnt == 1) { nameList.add(line); } else if (cnt == 2) { if (line.equals("-1")) { top1LineList.add("Invalid"); } if (line.equals("0")) { top1LineList.add("0"); } } else if (cnt == 4) { String[] vals = line.split("\t"); String id = vals[0]; top1IdList.add(id); top1LineList.add(line); } } } //? http://www.massbank.jp/ T?[o??KEGG???s HashMap<String, ArrayList> massbank2mapList = new HashMap<String, ArrayList>(); //(2)p HashMap<String, String> massbank2keggList = new HashMap<String, String>(); //(2)p HashMap<String, ArrayList> map2keggList = new HashMap<String, ArrayList>(); //(3)p ArrayList<String> mapNameList = new ArrayList<String>(); //(4)p boolean isKeggReturn = false; // if (serverUrl.indexOf("www.massbank.jp") == -1) { // isKeggReturn = false; // } if (isKeggReturn) { //(2) KEGG ID, Map IDDB String where = "where MASSBANK in("; Iterator it = top1IdList.iterator(); while (it.hasNext()) { String id = (String) it.next(); where += "'" + id + "',"; } where = where.substring(0, where.length() - 1); where += ")"; String sql = "select MASSBANK, t1.KEGG, MAP from " + "(SELECT MASSBANK,KEGG FROM OTHER_DB_IDS " + where + ") t1, PATHWAY_CPDS t2" + " where t1.KEGG=t2.KEGG order by MAP,MASSBANK"; ArrayList<String> mapList = null; try { Class.forName("com.mysql.jdbc.Driver"); String connectUrl = "jdbc:mysql://localhost/MassBank_General"; Connection con = DriverManager.getConnection(connectUrl, "bird", "bird2006"); Statement stmt = con.createStatement(); ResultSet rs = stmt.executeQuery(sql); String prevId = ""; while (rs.next()) { String id = rs.getString(1); String kegg = rs.getString(2); String map = rs.getString(3); if (!id.equals(prevId)) { if (!prevId.equals("")) { massbank2mapList.put(prevId, mapList); } mapList = new ArrayList<String>(); massbank2keggList.put(id, kegg); } mapList.add(map); prevId = id; } massbank2mapList.put(prevId, mapList); rs.close(); stmt.close(); con.close(); } catch (Exception e) { e.printStackTrace(); } if (mapList != null) { //(3) Pathway Map?FtXg?? it = massbank2mapList.keySet().iterator(); while (it.hasNext()) { String id = (String) it.next(); String kegg = (String) massbank2keggList.get(id); ArrayList<String> list1 = massbank2mapList.get(id); for (int i = 0; i < list1.size(); i++) { String map = list1.get(i); ArrayList<String> list2 = null; if (map2keggList.containsKey(map)) { list2 = map2keggList.get(map); list2.add(kegg); } else { list2 = new ArrayList<String>(); list2.add(kegg); map2keggList.put(map, list2); } } } //(4) SOAPPathway Map?Ft?\bh?s it = map2keggList.keySet().iterator(); List<Callable<HashMap<String, String>>> tasks = new ArrayList(); while (it.hasNext()) { String map = (String) it.next(); mapNameList.add(map); ArrayList<String> list = map2keggList.get(map); String[] cpds = list.toArray(new String[] {}); Callable<HashMap<String, String>> task = new ColorPathway(map, cpds); tasks.add(task); } Collections.sort(mapNameList); // Xbhv?[10 ExecutorService exsv = Executors.newFixedThreadPool(10); List<Future<HashMap<String, String>>> results = exsv.invokeAll(tasks); // Pathway mapi[?? String saveRootPath = MassBankEnv.get(MassBankEnv.KEY_TOMCAT_APPTEMP_PATH) + "pathway"; File rootDir = new File(saveRootPath); if (!rootDir.exists()) { rootDir.mkdir(); } // String savePath = saveRootPath + File.separator + this.jobId; // File newDir = new File(savePath); // if ( !newDir.exists() ) { // newDir.mkdir(); // } //(6) Pathway mapURL for (Future<HashMap<String, String>> future : results) { HashMap<String, String> res = future.get(); it = res.keySet().iterator(); String map = (String) it.next(); String mapUrl = res.get(map); String filePath = saveRootPath + File.separator + this.jobId + "_" + map + ".png"; FileUtil.downloadFile(mapUrl, filePath); } } } //(7) ?o out = new PrintWriter(new BufferedWriter(new FileWriter(htmlFile))); // wb_?[?o String reqIonStr = "Both"; try { if (Integer.parseInt(this.ion) > 0) { reqIonStr = "Positive"; } else if (Integer.parseInt(this.ion) < 0) { reqIonStr = "Negative"; } } catch (NumberFormatException nfe) { nfe.printStackTrace(); } String title = "Summary of Batch Service Results"; out.println("<html>"); out.println("<head>"); out.println("<title>" + title + "</title>"); out.println("</head>"); out.println("<body>"); out.println("<h1>" + title + "</h1>"); out.println("<hr>"); out.println("<h3>Request Date : " + this.time + "</h3>"); out.println("Instrument Type : " + this.inst + "<br>"); out.println("MS Type : " + this.ms + "<br>"); out.println("Ion Mode : " + reqIonStr + "<br>"); out.println("<br>"); out.println("<hr>"); out.println("<table border=\"1\" cellspacing=\"0\" cellpadding=\"2\">"); String cols = String.valueOf(mapNameList.size()); out.println("<tr>"); out.println("<th bgcolor=\"LavenderBlush\" rowspan=\"1\">No.</th>"); out.println("<th bgcolor=\"LavenderBlush\" rowspan=\"1\">Query Name</th>"); out.println("<th bgcolor=\"LightCyan\" rowspan=\"1\">Score</th>"); out.println("<th bgcolor=\"LightCyan\" rowspan=\"1\">Hit</th>"); out.println("<th bgcolor=\"LightCyan\" rowspan=\"1\">MassBank ID</th>"); out.println("<th bgcolor=\"LightCyan\" rowspan=\"1\">Record Title</th>"); out.println("<th bgcolor=\"LightCyan\" rowspan=\"1\">Formula</th>"); out.println("<th bgcolor=\"LightCyan\" rowspan=\"1\">Exact Mass</th>"); if (isKeggReturn) { out.println("<th bgcolor=\"LightYellow\" rowspan=\"2\">KEGG ID</th>"); out.println( "<th bgcolor=\"LightYellow\" colspan=\"" + cols + "\">Colored Pathway Maps</th>"); } out.println("</tr>"); out.print("<tr bgcolor=\"moccasin\">"); for (int i = 0; i < mapNameList.size(); i++) { out.print("<th>MAP" + String.valueOf(i + 1) + "</th>"); } out.println("</tr>"); for (int i = 0; i < nameList.size(); i++) { out.println("<tr>"); String no = String.format("%5d", i + 1); no = no.replace(" ", " "); out.println("<td>" + no + "</td>"); // Query Name String queryName = nameList.get(i); out.println("<td nowrap>" + queryName + "</td>"); line = top1LineList.get(i); if (line.equals("0")) { if (isKeggReturn) { cols = String.valueOf(mapNameList.size() + 5); } else { cols = String.valueOf(6); } out.println("<td colspan=\"" + cols + "\">No Hit Record</td>"); } else if (line.equals("Invalid")) { if (isKeggReturn) { cols = String.valueOf(mapNameList.size() + 5); } else { cols = String.valueOf(4); } out.println("<td colspan=\"" + cols + "\">Invalid Query</td>"); } else { String[] data = formatLine(line); String id = data[0]; String recTitle = data[1]; String formula = data[2]; String emass = data[3]; String score = data[4]; String hit = data[5]; boolean isHiScore = false; if (Integer.parseInt(hit) >= 3 && Double.parseDouble(score) >= 0.8) { isHiScore = true; } // Score if (isHiScore) { out.println("<td><b>" + score + "</b></td>"); } else { out.println("<td>" + score + "</td>"); } // hit peak if (isHiScore) { out.println("<td align=\"right\"><b>" + hit + "</b></td>"); } else { out.println("<td align=\"right\">" + hit + "</td>"); } // MassBank ID & Link out.println("<td><a href=\"" + serverUrl + "jsp/FwdRecord.jsp?id=" + id + "\" target=\"_blank\">" + id + "</td>"); // Record Title out.println("<td>" + recTitle + "</td>"); // Formula out.println("<td nowrap>" + formula + "</td>"); // Exact Mass out.println("<td nowrap>" + emass + "</td>"); // KEGG ID & Link if (isKeggReturn) { String keggLink = " -"; if (massbank2keggList.containsKey(id)) { String keggUrl = "http://www.genome.jp/dbget-bin/www_bget?"; String kegg = massbank2keggList.get(id); switch (kegg.charAt(0)) { case 'C': keggUrl += "cpd:" + kegg; break; case 'D': keggUrl += "dr:" + kegg; break; case 'G': keggUrl += "gl:" + kegg; break; } keggLink = "<a href=\"" + keggUrl + "\" target=\"_blank\">" + kegg + "</a>"; } out.println("<td>" + keggLink + "</td>"); // Pathway Map Link if (massbank2mapList.containsKey(id)) { ArrayList<String> list = massbank2mapList.get(id); for (int l1 = mapNameList.size() - 1; l1 >= 0; l1--) { boolean isFound = false; String map = ""; for (int l2 = list.size() - 1; l2 >= 0; l2--) { map = list.get(l2); if (map.equals(mapNameList.get(l1))) { isFound = true; break; } } if (isFound) { ArrayList<String> list2 = map2keggList.get(map); String mapUrl = serverUrl + "temp/pathway/" + this.jobId + "_" + map + ".png"; out.println("<td nowrap><a href=\"" + mapUrl + "\" target=\"_blank\">map:" + map + "(" + list2.size() + ")</a></td>"); } else { out.println("<td> -</td>"); } } } else { for (int l1 = mapNameList.size() - 1; l1 >= 0; l1--) { out.println("<td> -</td>"); } } } } out.println("</tr>"); } out.println("</table>"); } catch (Exception e) { e.printStackTrace(); } finally { try { if (in != null) { in.close(); } } catch (IOException e) { } if (out != null) { out.flush(); out.close(); } } }