Example usage for java.util TreeSet add

List of usage examples for java.util TreeSet add

Introduction

In this page you can find the example usage for java.util TreeSet add.

Prototype

public boolean add(E e) 

Source Link

Document

Adds the specified element to this set if it is not already present.

Usage

From source file:de.julielab.jcore.ae.lingpipegazetteer.chunking.ChunkerProviderImpl.java

private void readDictionary(InputStream dictFile) throws IOException, AnalysisEngineProcessException {
    long time = System.currentTimeMillis();
    if (useApproximateMatching) {
        dict = new TrieDictionary<String>();
    } else {/* w w w  .  j  av a  2  s  . c o  m*/
        dict = new MapDictionary<String>();
    }
    // now read from file and add entries
    LOGGER.info("readDictionary() - adding entries from " + dictFile + " to dictionary...");
    try (InputStreamReader isr = new InputStreamReader(dictFile)) {
        BufferedReader bf = new BufferedReader(isr);
        String line = "";
        String variant = "";
        TreeSet<String> termVariants;
        TreeSet<String> dictionary = new TreeSet<String>();

        while ((line = bf.readLine()) != null) {
            String[] values = line.split("\t");
            if (values.length != 2) {
                LOGGER.error("readDictionary() - wrong format of line: " + line);
                throw new AnalysisEngineProcessException(AnalysisEngineProcessException.ANNOTATOR_EXCEPTION,
                        null);
            }

            String term = values[0].trim();
            String label = values[1].trim();
            if (term.length() < MIN_TERM_LENGTH)
                continue;

            if (useApproximateMatching && !caseSensitive)
                term = term.toLowerCase();

            if (generateVariants) {
                LOGGER.debug("readDictionary() - make term variants of (" + term + ", " + label
                        + ") and add them to dictionary (NOTE: this may take a while if dictionary is big!)");
                termVariants = makeTermVariants(term);
                Iterator<String> it = termVariants.iterator();
                while (it.hasNext()) {
                    variant = it.next();
                    if (!stopWords.contains(variant.toLowerCase()) && !variant.equals("")) {
                        // System.err.println("ADDING VARIANT: " + variant + "="
                        // + label);
                        dictionary.add(variant + SEPARATOR + label);
                    }
                    // dict.addEntry(new DictionaryEntry(it.next(), label,
                    // CHUNK_SCORE));
                }
                it = null;
            } else {
                if (!stopWords.contains(term.toLowerCase()))
                    dictionary.add(term + SEPARATOR + label);
                // dict.addEntry(new DictionaryEntry(term, label, CHUNK_SCORE));
            }

            if (dictionary.size() >= 10000) {
                LOGGER.debug("readDictionary() - flushing dictionarySet to map dictionary");
                dictionary = flushDictionary(dictionary, dict);
            }

        }

        dictionary = flushDictionary(dictionary, dict);
        dictionary = null;
        time = System.currentTimeMillis() - time;
        LOGGER.info("Reading dictionary took {}ms ({}s)", time, time / 1000);
    }
}

From source file:net.semanticmetadata.lire.solr.LireRequestHandler.java

/**
 * Actual search implementation based on (i) hash based retrieval and (ii) feature based re-ranking.
 *
 * @param rsp/*  ww  w. j  a v a 2s  . c o  m*/
 * @param searcher
 * @param hashFieldName the hash field name
 * @param maximumHits
 * @param terms
 * @param queryFeature
 * @throws IOException
 * @throws IllegalAccessException
 * @throws InstantiationException
 */
private void doSearch(SolrQueryRequest req, SolrQueryResponse rsp, SolrIndexSearcher searcher,
        String hashFieldName, int maximumHits, List<Term> terms, Query query, LireFeature queryFeature)
        throws IOException, IllegalAccessException, InstantiationException {
    // temp feature instance
    LireFeature tmpFeature = queryFeature.getClass().newInstance();
    // Taking the time of search for statistical purposes.
    time = System.currentTimeMillis();

    Filter filter = null;
    // if the request contains a filter:
    if (req.getParams().get("fq") != null) {
        // only filters with [<field>:<value> ]+ are supported
        StringTokenizer st = new StringTokenizer(req.getParams().get("fq"), " ");
        LinkedList<Term> filterTerms = new LinkedList<Term>();
        while (st.hasMoreElements()) {
            String[] tmpToken = st.nextToken().split(":");
            if (tmpToken.length > 1) {
                filterTerms.add(new Term(tmpToken[0], tmpToken[1]));
            }
        }
        if (filterTerms.size() > 0)
            filter = new TermsFilter(filterTerms);
    }

    TopDocs docs; // with query only.
    if (filter == null) {
        docs = searcher.search(query, numberOfCandidateResults);
    } else {
        docs = searcher.search(query, filter, numberOfCandidateResults);
    }
    //        TopDocs docs = searcher.search(query, new TermsFilter(terms), numberOfCandidateResults);   // with TermsFilter and boosting by simple query
    //        TopDocs docs = searcher.search(new ConstantScoreQuery(new TermsFilter(terms)), numberOfCandidateResults); // just with TermsFilter
    time = System.currentTimeMillis() - time;
    rsp.add("RawDocsCount", docs.scoreDocs.length + "");
    rsp.add("RawDocsSearchTime", time + "");
    // re-rank
    time = System.currentTimeMillis();
    TreeSet<SimpleResult> resultScoreDocs = new TreeSet<SimpleResult>();
    float maxDistance = -1f;
    float tmpScore;

    String featureFieldName = FeatureRegistry.getFeatureFieldName(hashFieldName);
    // iterating and re-ranking the documents.
    BinaryDocValues binaryValues = MultiDocValues.getBinaryValues(searcher.getIndexReader(), featureFieldName); // ***  #
    BytesRef bytesRef;// = new BytesRef();
    for (int i = 0; i < docs.scoreDocs.length; i++) {
        // using DocValues to retrieve the field values ...
        bytesRef = binaryValues.get(docs.scoreDocs[i].doc);
        tmpFeature.setByteArrayRepresentation(bytesRef.bytes, bytesRef.offset, bytesRef.length);
        // Getting the document from the index.
        // This is the slow step based on the field compression of stored fields.
        //            tmpFeature.setByteArrayRepresentation(d.getBinaryValue(name).bytes, d.getBinaryValue(name).offset, d.getBinaryValue(name).length);
        tmpScore = queryFeature.getDistance(tmpFeature);
        if (resultScoreDocs.size() < maximumHits) { // todo: There's potential here for a memory saver, think of a clever data structure that can do the trick without creating a new SimpleResult for each result.
            resultScoreDocs.add(
                    new SimpleResult(tmpScore, searcher.doc(docs.scoreDocs[i].doc), docs.scoreDocs[i].doc));
            maxDistance = resultScoreDocs.last().getDistance();
        } else if (tmpScore < maxDistance) {
            //                if it is nearer to the sample than at least one of the current set:
            //                remove the last one ...
            resultScoreDocs.remove(resultScoreDocs.last());
            //                add the new one ...
            resultScoreDocs.add(
                    new SimpleResult(tmpScore, searcher.doc(docs.scoreDocs[i].doc), docs.scoreDocs[i].doc));
            //                and set our new distance border ...
            maxDistance = resultScoreDocs.last().getDistance();
        }
    }
    //        System.out.println("** Creating response.");
    time = System.currentTimeMillis() - time;
    rsp.add("ReRankSearchTime", time + "");
    LinkedList list = new LinkedList();
    for (Iterator<SimpleResult> it = resultScoreDocs.iterator(); it.hasNext();) {
        SimpleResult result = it.next();
        HashMap m = new HashMap(2);
        m.put("d", result.getDistance());
        // add fields as requested:
        if (req.getParams().get("fl") == null) {
            m.put("id", result.getDocument().get("id"));
            if (result.getDocument().get("title") != null)
                m.put("title", result.getDocument().get("title"));
        } else {
            String fieldsRequested = req.getParams().get("fl");
            if (fieldsRequested.contains("score")) {
                m.put("score", result.getDistance());
            }
            if (fieldsRequested.contains("*")) {
                // all fields
                for (IndexableField field : result.getDocument().getFields()) {
                    String tmpField = field.name();
                    if (result.getDocument().getFields(tmpField).length > 1) {
                        m.put(result.getDocument().getFields(tmpField)[0].name(),
                                result.getDocument().getValues(tmpField));
                    } else if (result.getDocument().getFields(tmpField).length > 0) {
                        m.put(result.getDocument().getFields(tmpField)[0].name(),
                                result.getDocument().getFields(tmpField)[0].stringValue());
                    }
                }
            } else {
                StringTokenizer st;
                if (fieldsRequested.contains(","))
                    st = new StringTokenizer(fieldsRequested, ",");
                else
                    st = new StringTokenizer(fieldsRequested, " ");
                while (st.hasMoreElements()) {
                    String tmpField = st.nextToken();
                    if (result.getDocument().getFields(tmpField).length > 1) {
                        m.put(result.getDocument().getFields(tmpField)[0].name(),
                                result.getDocument().getValues(tmpField));
                    } else if (result.getDocument().getFields(tmpField).length > 0) {
                        m.put(result.getDocument().getFields(tmpField)[0].name(),
                                result.getDocument().getFields(tmpField)[0].stringValue());
                    }
                }
            }
        }
        //            m.put(field, result.getDocument().get(field));
        //            m.put(field.replace("_ha", "_hi"), result.getDocument().getBinaryValue(field));
        list.add(m);
    }
    rsp.add("docs", list);
    // rsp.add("Test-name", "Test-val");
}

From source file:edu.duke.cs.osprey.ematrix.epic.SeriesFitter.java

static double[] fitSeriesIterative(DoubleMatrix1D[] samp, double trueVals[], double weights[], double lambda,
        boolean includeConst, int order, double bCutoffs[], double bCutoffs2[], int PCOrder, boolean isPC[]) {

    long startTime = System.currentTimeMillis();
    System.out.println("Starting fitSeriesIterative...");

    int numSamples = samp.length;
    int nd = samp[0].size();

    if (bCutoffs.length == 1) {//single bCutoff to be used
        double bCutoff = bCutoffs[0];
        bCutoffs = new double[numSamples];
        Arrays.fill(bCutoffs, bCutoff);
    }/*from   ww w . j  a va 2  s .  com*/
    //now bCutoffs has a cutoff for each sample

    int numParams = getNumParams(nd, includeConst, order);

    if (PCOrder > order) {//add in parameters for PC orders
        int numPCs = countTrue(isPC);

        for (int n = order + 1; n <= PCOrder; n++)
            numParams += getNumParamsForOrder(numPCs, n);
    }

    //now set up the data for the iterative fits
    //samples can be turned on and off using fitWeights
    //we will need to create two entries for samples with trueVals between bCutoff and bCutoff2
    //since they may be turned on either to penalize deviation from bCutoff or from trueVal

    //first entry for each entry will penalize deviation from bCutoff if trueVal>=bCutoff
    //secondEntries are for trueVals between bCutoff and bCutoff2
    ArrayList<Integer> secondEntries = new ArrayList<>();//list of samples needing second entry
    HashMap<Integer, Integer> revSecondEntries = new HashMap<>();//reverse lookup
    for (int s = 0; s < numSamples; s++) {
        if ((trueVals[s] >= bCutoffs[s]) && (trueVals[s] < bCutoffs2[s])) {//as in isRestraintActive
            revSecondEntries.put(s, secondEntries.size());
            secondEntries.add(s);
        }
    }

    int numRestraints = numSamples + secondEntries.size();

    //data for basic least-squares fits
    DoubleMatrix1D[] fitSamp = new DoubleMatrix1D[numRestraints];
    double fitTrueVals[] = new double[numRestraints];
    double fitWeights[] = new double[numRestraints];

    for (int s = 0; s < numSamples; s++) {//"normal" entries
        fitSamp[s] = samp[s];

        if (trueVals[s] >= bCutoffs[s]) {
            fitWeights[s] = 0;
            fitTrueVals[s] = bCutoffs[s];
        } else {
            fitWeights[s] = weights[s];
            fitTrueVals[s] = trueVals[s];
        }
    }
    for (int s2 = 0; s2 < secondEntries.size(); s2++) {
        fitSamp[numSamples + s2] = samp[secondEntries.get(s2)];
        fitWeights[numSamples + s2] = 0;
        fitTrueVals[numSamples + s2] = trueVals[secondEntries.get(s2)];
    }

    //Initial guess of set P is all points with trueVals[s] >= bCutoff
    //that is, all points that have possible series values that make the restraint inactive

    boolean done = false;
    double coeffs[] = null;
    double meanResidual = 0, weightSum = 0;
    double prevResid = Double.POSITIVE_INFINITY;
    double oldCoeffs[] = null;

    double oldSerVals[] = new double[numSamples];//values of series at each sample, for previous iteration
    //preallocating to all infinity because all trueVals[s]>=bCutoff points start outside P
    Arrays.fill(oldSerVals, Double.POSITIVE_INFINITY);

    //for updating
    boolean firstFit = true;//first fit is not an update
    DoubleMatrix1D c = DoubleFactory1D.dense.make(numParams);//matrices we update (used in fit)
    DoubleMatrix2D M = DoubleFactory2D.dense.make(numParams, numParams);
    double oldFitWeights[] = null;

    //double fitWeightsCheck[] = fitWeights.clone();//DEBUG!!!

    while (!done) {

        if (firstFit) {
            coeffs = fitSeries(fitSamp, fitTrueVals, fitWeights, lambda, includeConst, order, PCOrder, isPC,
                    false, c, M);

            firstFit = false;
        } else {
            double weightDiffs[] = fitWeights.clone();
            for (int s = 0; s < numRestraints; s++)
                weightDiffs[s] -= oldFitWeights[s];

            coeffs = fitSeries(fitSamp, fitTrueVals, weightDiffs, lambda, includeConst, order, PCOrder, isPC,
                    true, c, M);

            //DEBUG!!!
            /*
            for(int s=0; s<numRestraints; s++){
            fitWeightsCheck[s] += weightDiffs[s];
            if(fitWeightsCheck[s] != fitWeights[s]){
                int cefAO = 1111;
            }
            }
                    
            double checkCoeffs[] = fitSeries(fitSamp, fitTrueVals, fitWeights, lambda,
            includeConst, order, PCOrder, isPC, false, null, null);
                    
            for(int a=0; a<coeffs.length; a++){
            if(Math.abs(checkCoeffs[a]-coeffs[a])>1e-10){
                int abc=123;
            }
            }
                    
            DoubleMatrix1D c2 = DoubleFactory1D.dense.make(numParams);
            DoubleMatrix2D M2 = DoubleFactory2D.dense.make(numParams,numParams);
                    
            double checkCoeffs2[] = fitSeries(fitSamp, fitTrueVals, fitWeights, lambda,
            includeConst, order, PCOrder, isPC, false, c2, M2);
                    
            for(int a=0; a<coeffs.length; a++){
            if(Math.abs(checkCoeffs2[a]-coeffs[a])>1e-10){
                int abc=123;
            }
            }*/
            //DEBUG!!!

        }

        oldFitWeights = fitWeights.clone();

        done = true;
        ArrayList<SampleCutoffCrossing> scc = new ArrayList<SampleCutoffCrossing>();

        meanResidual = 0;
        weightSum = 0;

        //boolean doneNoTol = true, done2 = true;//DEBUG!!!

        //values of series at each sample, based on coeffs
        double serVals[] = new double[numSamples];
        for (int s = 0; s < numSamples; s++) {
            serVals[s] = evalSeries(coeffs, samp[s], nd, includeConst, order, PCOrder, isPC);

            //If each series value is below or above bcutoff according to whether the
            //coeffs were generated by fitting with or without that value included
            //(respectively), then we have found a local and thus the global minimum
            //in this quadratic piece of the objective function, so we're done

            //check for doneness first, using tolerance
            //i.e. are coeffs (derived from fitWeights) consistent with fitWeights,
            //within numerical error?  If so we have a global minimum

            if (trueVals[s] >= bCutoffs[s]) {
                if (fitWeights[s] > 0) {
                    if (!isRestraintTypeActive(trueVals[s], serVals[s], bCutoffs[s], bCutoffs2[s], false,
                            -1e-6))
                        done = false;//fitWeights penalizing deviation from bCutoff, and this isn't right at coeffs
                } else {
                    boolean secondRestraintOn = revSecondEntries.containsKey(s);
                    if (secondRestraintOn)
                        secondRestraintOn = (fitWeights[numSamples + revSecondEntries.get(s)] > 0);

                    if (secondRestraintOn) {
                        if (!isRestraintTypeActive(trueVals[s], serVals[s], bCutoffs[s], bCutoffs2[s], true,
                                -1e-6))
                            done = false;//fitWeights penalizing deviation from trueVal, and this isn't right at coeffs
                    } else {//restraints currently off
                        if (isRestraintActive(trueVals[s], serVals[s], bCutoffs[s], bCutoffs2[s], 1e-6))
                            done = false;//a restraint should be on at coeffs
                    }
                }
            }
            //for trueVals below bCutoff, restraints don't turn on and off

            //DEBUG!!!!!
            //trying to calculate done w/o tol
            /*
            if( trueVals[s]>=bCutoffs[s] ){
            if(fitWeights[s]>0){
                if(!isRestraintTypeActive(trueVals[s],serVals[s],bCutoffs[s],bCutoffs2[s],false,0))
                    doneNoTol = false;//fitWeights penalizing deviation from bCutoff, and this isn't right at coeffs
            }
            else {
                boolean secondRestraintOn = revSecondEntries.containsKey(s);
                if(secondRestraintOn)
                    secondRestraintOn = (fitWeights[numSamples+revSecondEntries.get(s)]>0);
                        
                if(secondRestraintOn){
                    if(!isRestraintTypeActive(trueVals[s],serVals[s],bCutoffs[s],bCutoffs2[s],true,0))
                        doneNoTol = false;//fitWeights penalizing deviation from trueVal, and this isn't right at coeffs
                }
                else {//restraints currently off
                    if(isRestraintActive(trueVals[s],serVals[s],bCutoffs[s],bCutoffs2[s],0))
                        doneNoTol = false;//a restraint should be on at coeffs
                }
            }
            }
                    
            //OK now done2 will be calculated and should be the same but will be calculated like 
            //the weight changes below
            if( trueVals[s]>=bCutoffs[s] ){
            if(isRestraintTypeActive(trueVals[s],serVals[s],bCutoffs[s],bCutoffs2[s],false)){
                //activate penalty for deviating from bCutoff
                if(fitWeights[s]!=weights[s])
                    done2 = false;
                if(revSecondEntries.containsKey(s)){
                    if(fitWeights[numSamples+revSecondEntries.get(s)]!=0)
                        done2 = false;
                }
            }
            else if(isRestraintTypeActive(trueVals[s],serVals[s],bCutoffs[s],bCutoffs2[s],true)){
                //activate penalty for deviating from trueVal
                if(fitWeights[s] != 0)
                    done2 = false;
                if(revSecondEntries.containsKey(s)){
                    if(fitWeights[numSamples+revSecondEntries.get(s)] != weights[s])
                        done2 = false;
                }
                else
                    throw new RuntimeException("ERROR: should have second entry for restraint but don't!!");
            }
            else {
                //deactivate all penalties
                if(fitWeights[s] != 0)
                    done2 = false;
                if(revSecondEntries.containsKey(s)){
                    if(fitWeights[numSamples+revSecondEntries.get(s)] != 0)
                        done2 = false;
                }
                //no contribution to residual
            }
            }*/

            //DEBUG!!!

            //Now calculate mean residual and crossing points, and update fitWeights
            double residTerm = 0;

            if (trueVals[s] >= bCutoffs[s]) {
                if (isRestraintTypeActive(trueVals[s], serVals[s], bCutoffs[s], bCutoffs2[s], false)) {
                    //activate penalty for deviating from bCutoff
                    fitWeights[s] = weights[s];
                    if (revSecondEntries.containsKey(s))
                        fitWeights[numSamples + revSecondEntries.get(s)] = 0;
                    residTerm = (serVals[s] - bCutoffs[s]) * (serVals[s] - bCutoffs[s]);
                } else if (isRestraintTypeActive(trueVals[s], serVals[s], bCutoffs[s], bCutoffs2[s], true)) {
                    //activate penalty for deviating from trueVal
                    fitWeights[s] = 0;
                    if (revSecondEntries.containsKey(s))
                        fitWeights[numSamples + revSecondEntries.get(s)] = weights[s];
                    else
                        throw new RuntimeException("ERROR: should have second entry for restraint but don't!!");
                    residTerm = (serVals[s] - trueVals[s]) * (serVals[s] - trueVals[s]);
                } else {
                    //deactivate all penalties
                    fitWeights[s] = 0;
                    if (revSecondEntries.containsKey(s))
                        fitWeights[numSamples + revSecondEntries.get(s)] = 0;
                    //no contribution to residual
                }
            } else //normal least-squares penalty.  fitWeights[s] will stay at weights[s] 
                residTerm = (serVals[s] - trueVals[s]) * (serVals[s] - trueVals[s]);

            meanResidual += weights[s] * residTerm;

            //If want sample-by-sample output...
            //System.out.println("TRAININGSET TRUE: "+trueVals[s]+" SER: "+serVals[s]);

            weightSum += weights[s];
        }

        meanResidual /= weightSum;

        if (meanResidual == prevResid)
            System.out.println();

        //DEBUG!!!
        /*
        if(done!=doneNoTol || done2!=done){
        //Let's see what happens if we remove the tolerance...
        done = doneNoTol;
        }
                
                
                
        if(done){
        double checkCoeffs[] = fitSeries(fitSamp, fitTrueVals, fitWeights, lambda,
            includeConst, order, PCOrder, isPC, false, null, null);
                
        for(int a=0; a<coeffs.length; a++){
            if(Math.abs(checkCoeffs[a]-coeffs[a])>1e-10){
                int abc=123;
            }
        }
                
        }*/
        //DEBUG!!!

        if ((!done) && (meanResidual >= prevResid)) {
            //Did not obtain a decrease using the Newton step
            //Let's do an exact line search to rectify the situation

            if (!useLineSearch) {
                System.out.println("Skipping line search, returning with residual " + prevResid);
                return oldCoeffs;
            }

            System.out.println("LINE SEARCH");

            for (int s = 0; s < numSamples; s++) {

                //If we go in or out of either type of restraint between serVals and oldSerVals, we create
                //a SampleCutoffCrossing of the appropriate type (upper or lower (ordinary) restraint)
                if ((isRestraintTypeActive(trueVals[s], oldSerVals[s], bCutoffs[s], bCutoffs2[s],
                        false)) != (isRestraintTypeActive(trueVals[s], serVals[s], bCutoffs[s], bCutoffs2[s],
                                false))) {
                    //If the restraint disappears at one end we know trueVal>=bCutoff here
                    //create lower restraint SampleCutoffCrossing
                    double crossingPoint = (bCutoffs[s] - oldSerVals[s]) / (serVals[s] - oldSerVals[s]);
                    scc.add(new SampleCutoffCrossing(s, crossingPoint, false));
                }

                if ((isRestraintTypeActive(trueVals[s], oldSerVals[s], bCutoffs[s], bCutoffs2[s],
                        true)) != (isRestraintTypeActive(trueVals[s], serVals[s], bCutoffs[s], bCutoffs2[s],
                                true))) {

                    //upper
                    double crossingPoint2 = (trueVals[s] - oldSerVals[s]) / (serVals[s] - oldSerVals[s]);
                    scc.add(new SampleCutoffCrossing(s, crossingPoint2, true));
                }

            }

            int changeCount = scc.size();
            Collections.sort(scc);

            TreeSet<Integer> crossingIndices = new TreeSet<Integer>();
            for (SampleCutoffCrossing cr : scc) {
                int s = cr.sampleIndex;
                crossingIndices.add(s);
                if (cr.upperRestraint) {//penalizing difference from trueVal
                    cr.quadTerm = weights[s] * (serVals[s] - oldSerVals[s]) * (serVals[s] - oldSerVals[s]);
                    cr.linTerm = 2 * weights[s] * (serVals[s] - oldSerVals[s]) * (oldSerVals[s] - trueVals[s]);
                    cr.constTerm = weights[s] * (oldSerVals[s] - trueVals[s]) * (oldSerVals[s] - trueVals[s]);
                } else {//penalizing difference from lesser of bCutoff, trueVal
                    cr.quadTerm = weights[s] * (serVals[s] - oldSerVals[s]) * (serVals[s] - oldSerVals[s]);
                    double baseVal = Math.min(trueVals[s], bCutoffs[s]);
                    cr.linTerm = 2 * weights[s] * (serVals[s] - oldSerVals[s]) * (oldSerVals[s] - baseVal);
                    cr.constTerm = weights[s] * (oldSerVals[s] - baseVal) * (oldSerVals[s] - baseVal);
                    //cr.linTerm = 2*weights[s]*(serVals[s]-oldSerVals[s])*(oldSerVals[s]-trueVals[s]);
                    //cr.constTerm =  weights[s]*(oldSerVals[s]-trueVals[s])*(oldSerVals[s]-trueVals[s]);
                }
            }

            //Set up quadratic function
            double quadTerm = 0;
            double linTerm = 0;
            double constTerm = 0;
            //Add in contributions from all non-cutoff-crossing points
            for (int s = 0; s < numSamples; s++) {
                if (!crossingIndices.contains(s)) {
                    if (isRestraintTypeActive(trueVals[s], oldSerVals[s], bCutoffs[s], bCutoffs2[s], false)) {
                        //if(trueVals[s]<bCutoffs[s]||serVals[s]<bCutoffs[s]){//penalizing difference from lesser of bCutoff, trueVal
                        quadTerm += weights[s] * (serVals[s] - oldSerVals[s]) * (serVals[s] - oldSerVals[s]);
                        double baseVal = Math.min(trueVals[s], bCutoffs[s]);
                        linTerm += 2 * weights[s] * (serVals[s] - oldSerVals[s]) * (oldSerVals[s] - baseVal);
                        constTerm += weights[s] * (oldSerVals[s] - baseVal) * (oldSerVals[s] - baseVal);
                        //linTerm += 2*weights[s]*(serVals[s]-oldSerVals[s])*(oldSerVals[s]-trueVals[s]);
                        //constTerm +=  weights[s]*(oldSerVals[s]-trueVals[s])*(oldSerVals[s]-trueVals[s]);
                    } else if (isRestraintTypeActive(trueVals[s], oldSerVals[s], bCutoffs[s], bCutoffs2[s],
                            true)) {
                        //else if(isRestraintActive(trueVals[s],serVals[s],bCutoffs[s],bCutoffs2[s],0)){//penalizing difference from trueVal
                        quadTerm += weights[s] * (serVals[s] - oldSerVals[s]) * (serVals[s] - oldSerVals[s]);
                        linTerm += 2 * weights[s] * (serVals[s] - oldSerVals[s])
                                * (oldSerVals[s] - trueVals[s]);
                        constTerm += weights[s] * (oldSerVals[s] - trueVals[s]) * (oldSerVals[s] - trueVals[s]);
                    }
                }
            }

            //contributions from cutoff-crossing points at the beginning of the interval
            //(i.e. at coeffs)
            for (SampleCutoffCrossing cr : scc) {
                int s = cr.sampleIndex;
                if (isRestraintTypeActive(trueVals[s], serVals[s], bCutoffs[s], bCutoffs2[s],
                        cr.upperRestraint)) {
                    //check if this particular restraint (rather than either restraint for this s) is active
                    //if(serVals[s]<bCutoffs[s]){
                    quadTerm += cr.quadTerm;
                    linTerm += cr.linTerm;
                    constTerm += cr.constTerm;
                }
            }

            //double checkMeanResid = (quadTerm+linTerm+constTerm)/weightSum;//evaluate objective function at a=1
            //should match meanResidual!

            double prevNodeResid = Double.POSITIVE_INFINITY;
            //The first increase we may consider is from node 0 to 1

            //Now walk back until we get an increase
            //then the minimum will be in one of the last two quadratic pieces
            int lowestNodeIndex = 0;
            for (int curChangeIndex = changeCount - 1; curChangeIndex >= 0; curChangeIndex--) {

                SampleCutoffCrossing cr = scc.get(curChangeIndex);

                double a = cr.crossingPoint;
                double curNodeResid = (a * a * quadTerm + a * linTerm + constTerm) / weightSum;

                int s = cr.sampleIndex;

                if (curNodeResid > prevNodeResid) {
                    lowestNodeIndex = curChangeIndex + 1;
                    break;
                }

                //if the restraint is being removed going from serVals to oldSerVals, remove its coefficients from the quadratic function
                //if it's being added, add them
                if (isRestraintTypeActive(trueVals[s], serVals[s], bCutoffs[s], bCutoffs2[s],
                        cr.upperRestraint)) {
                    quadTerm -= cr.quadTerm;
                    linTerm -= cr.linTerm;
                    constTerm -= cr.constTerm;
                } else {
                    quadTerm += cr.quadTerm;
                    linTerm += cr.linTerm;
                    constTerm += cr.constTerm;
                }

                prevNodeResid = curNodeResid;
            }

            //At this point, we know our minimum is in either the piece with the
            //current quad, lin, constTerms or the one we looked at right before
            //(where lowestNodeIndex is the node separating them)
            double a_min = -linTerm / (2 * quadTerm);
            SampleCutoffCrossing cr = scc.get(lowestNodeIndex);
            if (a_min > cr.crossingPoint) {//minimum must be in previous piece
                //revert quad and linTerms
                int s = cr.sampleIndex;
                //change back quadratic-function coefficients
                if (isRestraintTypeActive(trueVals[s], serVals[s], bCutoffs[s], bCutoffs2[s],
                        cr.upperRestraint)) {
                    quadTerm += cr.quadTerm;
                    linTerm += cr.linTerm;
                    constTerm += cr.constTerm;
                } else {
                    quadTerm -= cr.quadTerm;
                    linTerm -= cr.linTerm;
                    constTerm -= cr.constTerm;
                }
                a_min = -linTerm / (2 * quadTerm);
            }

            //double minResid = (a_min*a_min*quadTerm + a_min*linTerm + constTerm)/weightSum;

            for (int p = 0; p < numParams; p++)
                coeffs[p] = coeffs[p] * a_min + (1 - a_min) * oldCoeffs[p];

            double minResid = 0;
            for (int s = 0; s < numSamples; s++) {
                serVals[s] = evalSeries(coeffs, samp[s], nd, includeConst, order, PCOrder, isPC);

                if (trueVals[s] >= bCutoffs[s]) {
                    if (isRestraintTypeActive(trueVals[s], serVals[s], bCutoffs[s], bCutoffs2[s], false)) {
                        //activate penalty for deviating from bCutoff
                        fitWeights[s] = weights[s];
                        if (revSecondEntries.containsKey(s))
                            fitWeights[numSamples + revSecondEntries.get(s)] = 0;
                        minResid += weights[s] * (serVals[s] - bCutoffs[s]) * (serVals[s] - bCutoffs[s]);
                    } else if (isRestraintTypeActive(trueVals[s], serVals[s], bCutoffs[s], bCutoffs2[s],
                            true)) {
                        minResid += weights[s] * (serVals[s] - trueVals[s]) * (serVals[s] - trueVals[s]);
                        //activate penalty for deviating from trueVal
                        fitWeights[s] = 0;
                        fitWeights[numSamples + revSecondEntries.get(s)] = weights[s];
                    } else {
                        //deactivate all penalties
                        fitWeights[s] = 0;
                        if (revSecondEntries.containsKey(s))
                            fitWeights[numSamples + revSecondEntries.get(s)] = 0;
                    }
                } else
                    minResid += weights[s] * (serVals[s] - trueVals[s]) * (serVals[s] - trueVals[s]);
            }

            minResid /= weightSum;

            if (minResid >= prevResid) {
                //consider to have converged
                //NOTE THIS CAN HAPPEN IF THE QUADRATIC APPROXIMATION AT OLDCOEFFS HAS SOLUTION
                //FAR FROM THE EXACT VALUE (ASSUMING EXACT FITSERIES) OF 1
                //THIS CAN HAPPEN IF WE'RE GETTING BELOW THE NUMERICAL PRECISION OF FITSERIES
                System.out.println("TRAINING SET MEAN RESIDUAL:" + prevResid);
                System.out.println("CONVERGED IN LINE SEARCH, line search min: " + minResid);
                System.out.println("fitSeriesIterative time (ms): " + (System.currentTimeMillis() - startTime));

                return oldCoeffs;
            }

            meanResidual = minResid;
        }

        /*                
        //trying backtracking line search, hoping it'll be more numerically stable
        double backtrackRate = 0.5;
        double a = 1;//how far we want to be on the line from oldCoeffs (which should allow some descent
        //along the line) to coeffs (which overshoots)
        double minResid;
        double aCoeffs[] = new double[coeffs.length];//coefficients at our point on the line
                
        do {
                    
            a *= backtrackRate;
                    
            for(int p=0; p<numParams; p++)
                aCoeffs[p] = coeffs[p]*a + (1-a)*oldCoeffs[p];
                
                
            minResid = 0;
            for(int s=0; s<numSamples; s++){
                serVals[s] = evalSeries(aCoeffs,samp[s],nd,includeConst,order,PCOrder,isPC);
                
                if( trueVals[s]>=bCutoffs[s] ){
                    if(isRestraintTypeActive(trueVals[s],serVals[s],bCutoffs[s],bCutoffs2[s],false)){
                        minResid += weights[s]*(serVals[s]-bCutoffs[s])*(serVals[s]-bCutoffs[s]);
                        weights2[s] = weights[s];
                    }
                    else if(isRestraintTypeActive(trueVals[s],serVals[s],bCutoffs[s],bCutoffs2[s],true)){
                        minResid += weights[s]*(serVals[s]-trueVals[s])*(serVals[s]-trueVals[s]);
                        weights2[s] = weights[s];
                    }
                    else
                        weights2[s] = 0;
                }
                else
                    minResid += weights[s]*(serVals[s]-trueVals[s])*(serVals[s]-trueVals[s]);
                
            }
                
            minResid /= weightSum;
                    
        } while(minResid>prevResid);
                
        if(a<1e-4)
            System.out.println("Warning: line search a got down to "+a);
                
                
        meanResidual = minResid;
                    }
        */

        oldCoeffs = coeffs;
        System.out.println("STEP RESIDUAL: " + meanResidual);
        prevResid = meanResidual;
        oldSerVals = serVals;
    }

    System.out.println("TRAINING SET MEAN RESIDUAL:" + meanResidual);
    System.out.println("fitSeriesIterative time (ms): " + (System.currentTimeMillis() - startTime));

    //DEBUG!!!
    //Note this assumes weights are all 1!
    /*
    int numCoeffs = coeffs.length;
    DoubleMatrix1D residGrad = DoubleFactory1D.dense.make(numCoeffs);
    double checkResid = 0;
    for(int s=0; s<samp.length; s++){
    double diff = 0;
    double fitVal = evalSeries(coeffs,samp[s],nd,includeConst,order,PCOrder,isPC);
    if(trueVals[s]>=bCutoffs[s] 
            && fitVal<bCutoffs[s]){
        diff = fitVal-bCutoffs[s];
                
        if(fitWeights[s]==0){
            int allosaurus = 15;
        }
        if(revSecondEntries.containsKey(s)){
            if(fitWeights[numSamples+revSecondEntries.get(s)]>0){
                int allosaurus=15;
            }
        }
                    
    }
    else if( trueVals[s]<bCutoffs[s] 
            || (trueVals[s]<bCutoffs2[s] && fitVal>trueVals[s]) ){
             
        diff = fitVal-trueVals[s];
                
        if(trueVals[s]<bCutoffs[s]){
            if(fitWeights[s]==0){
                int allosaurus = 15;
            }
        }
        else if(fitWeights[numSamples+revSecondEntries.get(s)]==0 || fitWeights[s]>0){
            int allosaurus=15;
        }
    }
    else if(fitWeights[s]>0){
        int allosaurus=15;
    }
    else if(revSecondEntries.containsKey(s)){
        if(fitWeights[numSamples+revSecondEntries.get(s)]>0){
            int allosaurus=15;
        }
    }
            
    checkResid += diff*diff;
            
    DoubleMatrix1D termMonomials = DoubleFactory1D.dense.make(numCoeffs);
    SeriesFitter.calcSampParamCoeffs( termMonomials, samp[s],
            nd, includeConst, order, PCOrder, isPC );
    residGrad.assign( termMonomials, Functions.plusMult(2*diff) );
    }
            
    checkResid /= samp.length;
    residGrad.assign( Functions.mult(1./samp.length) );
            
            
            
    double checkCoeffs[] = fitSeries(fitSamp, fitTrueVals, fitWeights, lambda,
            includeConst, order, PCOrder, isPC, false, null, null);
            
    double checkResid2 = 0;
    DoubleMatrix1D residGrad2 = DoubleFactory1D.dense.make(numCoeffs);
            
    for(int s=0; s<samp.length; s++){
            
    double diff = 0;
    double fitVal = evalSeries(checkCoeffs,samp[s],nd,includeConst,order,PCOrder,isPC);
    if(trueVals[s]>=bCutoffs[s] 
            && fitVal<bCutoffs[s]){
        diff = fitVal-bCutoffs[s];
                
        if(fitWeights[s]==0){
            int allosaurus = 15;
        }
        if(revSecondEntries.containsKey(s)){
            if(fitWeights[numSamples+revSecondEntries.get(s)]>0){
                int allosaurus=15;
            }
        }
                    
    }
    else if( trueVals[s]<bCutoffs[s] 
            || (trueVals[s]<bCutoffs2[s] && fitVal>trueVals[s]) ){
             
        diff = fitVal-trueVals[s];
                
        if(trueVals[s]<bCutoffs[s]){
            if(fitWeights[s]==0){
                int allosaurus = 15;
            }
        }
        else if(fitWeights[numSamples+revSecondEntries.get(s)]==0 || fitWeights[s]>0){
            int allosaurus=15;
        }
    }
    else if(fitWeights[s]>0){
        int allosaurus=15;
    }
    else if(revSecondEntries.containsKey(s)){
        if(fitWeights[numSamples+revSecondEntries.get(s)]>0){
            int allosaurus=15;
        }
    }
            
    checkResid2 += diff*diff;
            
    DoubleMatrix1D termMonomials = DoubleFactory1D.dense.make(numCoeffs);
    SeriesFitter.calcSampParamCoeffs( termMonomials, samp[s],
            nd, includeConst, order, PCOrder, isPC );
    residGrad2.assign( termMonomials, Functions.plusMult(2*diff) );
    }
            
    checkResid2 /= samp.length;
    residGrad2.assign( Functions.mult(1./samp.length) );
            
            
            
            
    if(residGrad.zDotProduct(residGrad)>1e-10){
    int struthiomimus = 23;
    }
    int cubist = -1;
    */
    //DEBUG!!

    return coeffs;
}

From source file:net.semanticmetadata.lire.solr.FastLireRequestHandler.java

/**
 * Actual search implementation based on (i) hash based retrieval and (ii) feature based re-ranking.
 *
 * @param rsp//from  www.  j  a  v a2  s  .  c o  m
 * @param searcher
 * @param hashFieldName the hash field name
 * @param maximumHits
 * @param terms
 * @param queryFeature
 * @throws java.io.IOException
 * @throws IllegalAccessException
 * @throws InstantiationException
 */
private void doSearch(SolrQueryRequest req, SolrQueryResponse rsp, SolrIndexSearcher searcher,
        String hashFieldName, int maximumHits, List<Term> terms, Query query, LireFeature queryFeature)
        throws IOException, IllegalAccessException, InstantiationException {
    // temp feature instance
    LireFeature tmpFeature = queryFeature.getClass().newInstance();
    // Taking the time of search for statistical purposes.
    time = System.currentTimeMillis();

    Filter filter = null;
    // if the request contains a filter:
    if (req.getParams().get("fq") != null) {
        // only filters with [<field>:<value> ]+ are supported
        StringTokenizer st = new StringTokenizer(req.getParams().get("fq"), " ");
        LinkedList<Term> filterTerms = new LinkedList<Term>();
        while (st.hasMoreElements()) {
            String[] tmpToken = st.nextToken().split(":");
            if (tmpToken.length > 1) {
                filterTerms.add(new Term(tmpToken[0], tmpToken[1]));
            }
        }
        if (filterTerms.size() > 0)
            filter = new TermsFilter(filterTerms);
    }

    TopDocs docs; // with query only.
    if (filter == null) {
        docs = searcher.search(query, numberOfCandidateResults);
    } else {
        docs = searcher.search(query, filter, numberOfCandidateResults);
    }
    //        TopDocs docs = searcher.search(query, new TermsFilter(terms), numberOfCandidateResults);   // with TermsFilter and boosting by simple query
    //        TopDocs docs = searcher.search(new ConstantScoreQuery(new TermsFilter(terms)), numberOfCandidateResults); // just with TermsFilter
    time = System.currentTimeMillis() - time;
    rsp.add("RawDocsCount", docs.scoreDocs.length + "");
    rsp.add("RawDocsSearchTime", time + "");
    // re-rank
    time = System.currentTimeMillis();
    TreeSet<SimpleResult> resultScoreDocs = new TreeSet<SimpleResult>();
    float maxDistance = -1f;
    float tmpScore;

    String featureFieldName = FeatureRegistry.getFeatureFieldName(hashFieldName);
    // iterating and re-ranking the documents.
    BinaryDocValues binaryValues = MultiDocValues.getBinaryValues(searcher.getIndexReader(), featureFieldName); // ***  #
    BytesRef bytesRef = new BytesRef();
    for (int i = 0; i < docs.scoreDocs.length; i++) {
        // using DocValues to retrieve the field values ...
        binaryValues.get(docs.scoreDocs[i].doc, bytesRef);
        tmpFeature.setByteArrayRepresentation(bytesRef.bytes, bytesRef.offset, bytesRef.length);
        // Getting the document from the index.
        // This is the slow step based on the field compression of stored fields.
        //            tmpFeature.setByteArrayRepresentation(d.getBinaryValue(name).bytes, d.getBinaryValue(name).offset, d.getBinaryValue(name).length);
        tmpScore = queryFeature.getDistance(tmpFeature);
        if (resultScoreDocs.size() < maximumHits) { // todo: There's potential here for a memory saver, think of a clever data structure that can do the trick without creating a new SimpleResult for each result.
            resultScoreDocs.add(
                    new SimpleResult(tmpScore, searcher.doc(docs.scoreDocs[i].doc), docs.scoreDocs[i].doc));
            maxDistance = resultScoreDocs.last().getDistance();
        } else if (tmpScore < maxDistance) {
            //                if it is nearer to the sample than at least one of the current set:
            //                remove the last one ...
            resultScoreDocs.remove(resultScoreDocs.last());
            //                add the new one ...
            resultScoreDocs.add(
                    new SimpleResult(tmpScore, searcher.doc(docs.scoreDocs[i].doc), docs.scoreDocs[i].doc));
            //                and set our new distance border ...
            maxDistance = resultScoreDocs.last().getDistance();
        }
    }
    //        System.out.println("** Creating response.");
    time = System.currentTimeMillis() - time;
    rsp.add("ReRankSearchTime", time + "");
    LinkedList list = new LinkedList();
    for (Iterator<SimpleResult> it = resultScoreDocs.iterator(); it.hasNext();) {
        SimpleResult result = it.next();
        HashMap m = new HashMap(2);
        m.put("d", result.getDistance());
        // add fields as requested:
        if (req.getParams().get("fl") == null) {
            m.put("id", result.getDocument().get("id"));
            if (result.getDocument().get("title") != null)
                m.put("title", result.getDocument().get("title"));
        } else {
            String fieldsRequested = req.getParams().get("fl");
            if (fieldsRequested.contains("score")) {
                m.put("score", result.getDistance());
            }
            if (fieldsRequested.contains("*")) {
                // all fields
                for (IndexableField field : result.getDocument().getFields()) {
                    String tmpField = field.name();
                    if (result.getDocument().getFields(tmpField).length > 1) {
                        m.put(result.getDocument().getFields(tmpField)[0].name(),
                                result.getDocument().getValues(tmpField));
                    } else if (result.getDocument().getFields(tmpField).length > 0) {
                        m.put(result.getDocument().getFields(tmpField)[0].name(),
                                result.getDocument().getFields(tmpField)[0].stringValue());
                    }
                }
            } else {
                StringTokenizer st;
                if (fieldsRequested.contains(","))
                    st = new StringTokenizer(fieldsRequested, ",");
                else
                    st = new StringTokenizer(fieldsRequested, " ");
                while (st.hasMoreElements()) {
                    String tmpField = st.nextToken();
                    if (result.getDocument().getFields(tmpField).length > 1) {
                        m.put(result.getDocument().getFields(tmpField)[0].name(),
                                result.getDocument().getValues(tmpField));
                    } else if (result.getDocument().getFields(tmpField).length > 0) {
                        m.put(result.getDocument().getFields(tmpField)[0].name(),
                                result.getDocument().getFields(tmpField)[0].stringValue());
                    }
                }
            }
        }
        //            m.put(field, result.getDocument().get(field));
        //            m.put(field.replace("_ha", "_hi"), result.getDocument().getBinaryValue(field));
        list.add(m);
    }
    rsp.add("docs", list);
    // rsp.add("Test-name", "Test-val");
}

From source file:guineu.modules.filter.Alignment.RANSAC.RansacAlignerTask.java

/**
 *
 * @param peakList//from ww  w  .  ja v  a 2  s .  co  m
 * @return
 */
private HashMap<PeakListRow, PeakListRow> getAlignmentMap(Dataset peakList) {

    // Create a table of mappings for best scores
    HashMap<PeakListRow, PeakListRow> alignmentMapping = new HashMap<PeakListRow, PeakListRow>();

    if (alignedPeakList.getNumberRows() < 1) {
        return alignmentMapping;
    }

    // Create a sorted set of scores matching
    TreeSet<RowVsRowScore> scoreSet = new TreeSet<RowVsRowScore>();

    // RANSAC algorithm
    List<AlignStructMol> list = ransacPeakLists(alignedPeakList, peakList);
    PolynomialFunction function = this.getPolynomialFunction(list,
            ((SimpleLCMSDataset) alignedPeakList).getRowsRTRange());

    PeakListRow allRows[] = peakList.getRows().toArray(new PeakListRow[0]);

    for (PeakListRow row : allRows) {
        double rt = 0.0;
        try {
            rt = function.value(((SimplePeakListRowLCMS) row).getRT());
        } catch (NullPointerException e) {
            rt = ((SimplePeakListRowLCMS) row).getRT();
        }

        if (Double.isNaN(rt) || rt == -1) {
            rt = ((SimplePeakListRowLCMS) row).getRT();
        }

        Range mzRange = this.mzTolerance.getToleranceRange(((SimplePeakListRowLCMS) row).getMZ());
        Range rtRange = this.rtToleranceAfterRTcorrection.getToleranceRange(rt);
        // Get all rows of the aligned peaklist within parameter limits
        PeakListRow candidateRows[] = ((SimpleLCMSDataset) alignedPeakList).getRowsInsideRTAndMZRange(rtRange,
                mzRange);

        for (PeakListRow candidate : candidateRows) {
            RowVsRowScore score;
            try {
                score = new RowVsRowScore(row, candidate, mzTolerance.getTolerance(),
                        rtToleranceAfterRTcorrection.getTolerance(), rt);

                scoreSet.add(score);
                errorMessage = score.getErrorMessage();

            } catch (Exception e) {
                e.printStackTrace();
                setStatus(TaskStatus.ERROR);
                return null;
            }
        }
        progress = (double) processedRows++ / (double) totalRows;
    }

    // Iterate scores by descending order
    Iterator<RowVsRowScore> scoreIterator = scoreSet.iterator();
    while (scoreIterator.hasNext()) {

        RowVsRowScore score = scoreIterator.next();

        // Check if the row is already mapped
        if (alignmentMapping.containsKey(score.getPeakListRow())) {
            continue;
        }

        // Check if the aligned row is already filled
        if (alignmentMapping.containsValue(score.getAlignedRow())) {
            continue;
        }

        alignmentMapping.put(score.getPeakListRow(), score.getAlignedRow());

    }

    return alignmentMapping;
}

From source file:com.cloudera.recordbreaker.schemadict.SchemaSuggest.java

/**
 * This method infers new schema labels for each element in the input.  It returns a Schema object that
 * has the identical format as the input file's Schema object, but the labels may be changed.
 */// w w  w .j  a va 2  s  . c om
public List<DictionaryMapping> inferSchemaMapping(File avroFile, int k) throws IOException {
    SchemaStatisticalSummary srcSummary = new SchemaStatisticalSummary("input");
    Schema srcSchema = srcSummary.createSummaryFromData(avroFile);
    srcSummary.setUseAttributeLabels(useAttributeLabels);

    //
    // Compare the statistics to the database of schema statistics.  Find the closest matches, both
    // on a per-attribute basis and structurally.
    //
    int schemaSize = srcSchema.getFields().size();
    //
    // We start testing the input database against known schemas that have an identical
    // number of attributes, which should allow for the best matches.  This gives us an
    // initial set of distances.  We then expand the search to schemas of greater or fewer
    // attributes, as long as a given bucket of size-k schemas has a min-distance of less
    // than the current top-k matches.
    //
    //
    TreeSet<DictionaryMapping> sorter = new TreeSet<DictionaryMapping>();
    int numMatches = 0;
    List<Integer> seenIndexes = new ArrayList<Integer>();
    int searchRadius = 0;
    boolean seenAllCandidates = false;
    int srcSchemaSize = srcSchema.getFields().size();
    int totalSchemasExamined = 0;

    while (!seenAllCandidates) {
        // Examine the relevant schema buckets, compute all matches to those schemas
        for (int j = Math.max(1, srcSchemaSize - searchRadius); j <= Math.min(NUM_BUCKETS,
                srcSchemaSize + searchRadius); j++) {

            if (seenIndexes.contains(j - 1)) {
                continue;
            }
            for (SchemaDictionaryEntry elt : dictBySize.get(j - 1)) {
                /////////////////////////////
                // This is where we instrument the mapping stuff.
                // If the pair is an interesting one, then emit the mapping that
                // we discover.  Why are good matches going undiscovered?
                /////////////////////////////
                SchemaMapping mapping = srcSummary.getBestMapping(elt.getSummary());
                if (srcSchema.getName().equals(elt.getSchema().getName())) {
                    System.err
                            .println("Comparing " + srcSchema.getName() + " with " + elt.getSchema().getName());
                    System.err.println("Obtained mapping: " + mapping.toString());
                }

                totalSchemasExamined++;
                sorter.add(new DictionaryMapping(mapping, elt));
                numMatches++;
            }
            seenIndexes.add(j - 1);
        }

        // Have we examined the entire corpus of known schemas?
        if ((srcSchemaSize - searchRadius) <= 1 && (srcSchemaSize + searchRadius) >= NUM_BUCKETS) {
            seenAllCandidates = true;
        } else {
            // Test to see if the best matches are good enough that we can stop looking.
            // We compare the lowest known match distance to the minimum distance for matches
            // in the closest non-examined buckets.
            int lowestSize = srcSchemaSize - searchRadius - 1;
            int highestSize = srcSchemaSize + searchRadius + 1;
            double minNearbyDistance = Double.MAX_VALUE;
            if (lowestSize >= 1) {
                minNearbyDistance = Math.min(minNearbyDistance,
                        SchemaStatisticalSummary.getMinimumMappingCost(srcSchemaSize, lowestSize));
            }
            if (highestSize <= NUM_BUCKETS) {
                minNearbyDistance = Math.min(minNearbyDistance,
                        SchemaStatisticalSummary.getMinimumMappingCost(srcSchemaSize, highestSize));
            }
            // Grab from the Sorter the elt that is MIN_ELTS_SUGGESTED into the sorted list
            if (sorter.size() >= k) {
                DictionaryMapping testDictMapping = null;
                int idx = 0;
                for (DictionaryMapping cur : sorter) {
                    idx++;
                    if (idx == k) {
                        testDictMapping = cur;
                        break;
                    }
                }
                if (testDictMapping.getMapping().getDist() < minNearbyDistance) {
                    seenAllCandidates = true;
                }
            }
        }
        searchRadius++;
    }

    // Return the k best schema mappings
    double smallestDistance = sorter.first().getMapping().getDist();
    List<DictionaryMapping> dsts = new ArrayList<DictionaryMapping>();
    for (DictionaryMapping dp : sorter) {
        if (dsts.size() > k && dp.getMapping().getDist() > smallestDistance) {
            break;
        }
        dsts.add(dp);
    }
    double pct = totalSchemasExamined / (1.0 * dict.contents().size());
    System.err.println("Final search radius of " + searchRadius + " yielded a search over " + pct
            + " of all known databases.");
    return dsts;
}

From source file:net.spfbl.data.Block.java

public static TreeSet<String> get() throws ProcessException {
    TreeSet<String> blockSet = new TreeSet<String>();
    for (String token : getAll()) {
        if (!token.contains(":")) {
            blockSet.add(token);
        }/*from w w  w.  j  av  a  2 s . c  o  m*/
    }
    return blockSet;
}

From source file:net.spfbl.core.Analise.java

public TreeSet<String> getResultFullSet() throws InterruptedException {
    TreeMap<String, String> map = new TreeMap<String, String>();
    whiteFullSet(map);/* w w w .j a va2 s. c  o  m*/
    File resultFile = getResultFile();
    if (resultFile.exists()) {
        try {
            FileReader fileReader = new FileReader(resultFile);
            BufferedReader bufferedReader = new BufferedReader(fileReader);
            try {
                String line;
                while ((line = bufferedReader.readLine()) != null) {
                    int index = line.indexOf(' ');
                    if (index > 0) {
                        String ip = line.substring(0, index);
                        try {
                            if (containsResultSet(ip)) {
                                String result = line.substring(index + 1);
                                map.put(ip, result);
                            }
                        } catch (InterruptedException ex) {
                            Server.logError(ex);
                        }
                    }
                }
            } finally {
                bufferedReader.close();
            }
        } catch (Exception ex) {
            Server.logError(ex);
        }
    }
    TreeSet<String> set = new TreeSet<String>();
    for (String ip : map.keySet()) {
        String result = map.get(ip);
        set.add(ip + " " + result);
    }
    return set;
}

From source file:org.dasein.cloud.openstack.nova.os.ext.rackspace.lb.RackspaceLoadBalancers.java

private @Nonnull Collection<String> mapNodes(@Nonnull ProviderContext ctx, @Nonnull String loadBalancerId,
        @Nullable String[] serverIds) throws CloudException, InternalException {
    TreeSet<String> nodeIds = new TreeSet<String>();

    if (serverIds != null && serverIds.length > 0) {
        Collection<Node> nodes = getNodes(loadBalancerId);

        for (String serverId : serverIds) {
            VirtualMachine vm = provider.getComputeServices().getVirtualMachineSupport()
                    .getVirtualMachine(serverId);

            if (vm != null) {
                boolean there = false;

                if (vm.getProviderRegionId().equals(ctx.getRegionId())) {
                    RawAddress[] addrs = vm.getPrivateAddresses();

                    for (RawAddress addr : addrs) {
                        for (Node n : nodes) {
                            if (n.address.equals(addr.getIpAddress())) {
                                nodeIds.add(n.nodeId);
                                there = true;
                                break;
                            }/*w w w .j a v a 2 s .c  om*/
                        }
                        if (there) {
                            break;
                        }
                    }
                }
                if (!there) {
                    RawAddress[] addrs = vm.getPublicAddresses();

                    for (RawAddress addr : addrs) {
                        for (Node n : nodes) {
                            if (n.address.equals(addr.getIpAddress())) {
                                nodeIds.add(n.nodeId);
                                there = true;
                                break;
                            }
                        }
                        if (there) {
                            break;
                        }
                    }
                }
            }
        }
    }
    return nodeIds;
}

From source file:massbank.BatchSearchWorker.java

/**
 * T}t@C???iHTML`?j/*from w ww  .j a v a 2  s. c o  m*/
 * @param resultFile t@C
 * @param htmlFile YtpHTMLt@C
 */
private void createSummary(File resultFile, File htmlFile) {
    LineNumberReader in = null;
    PrintWriter out = null;
    try {
        //(1) t@C?
        String line;
        int cnt = 0;
        ArrayList<String> nameList = new ArrayList<String>();
        ArrayList<String> top1LineList = new ArrayList<String>();
        TreeSet<String> top1IdList = new TreeSet<String>();
        in = new LineNumberReader(new FileReader(resultFile));
        while ((line = in.readLine()) != null) {
            line = line.trim();
            if (line.equals("")) {
                cnt = 0;
            } else {
                cnt++;
                if (cnt == 1) {
                    nameList.add(line);
                } else if (cnt == 2) {
                    if (line.equals("-1")) {
                        top1LineList.add("Invalid");
                    }
                    if (line.equals("0")) {
                        top1LineList.add("0");
                    }
                } else if (cnt == 4) {
                    String[] vals = line.split("\t");
                    String id = vals[0];
                    top1IdList.add(id);
                    top1LineList.add(line);
                }
            }
        }

        //? http://www.massbank.jp/ T?[o??KEGG???s
        HashMap<String, ArrayList> massbank2mapList = new HashMap<String, ArrayList>(); //(2)p
        HashMap<String, String> massbank2keggList = new HashMap<String, String>(); //(2)p
        HashMap<String, ArrayList> map2keggList = new HashMap<String, ArrayList>(); //(3)p
        ArrayList<String> mapNameList = new ArrayList<String>(); //(4)p
        boolean isKeggReturn = false;
        //         if (serverUrl.indexOf("www.massbank.jp") == -1) {
        //            isKeggReturn = false;
        //         }
        if (isKeggReturn) {

            //(2) KEGG ID, Map IDDB
            String where = "where MASSBANK in(";
            Iterator it = top1IdList.iterator();
            while (it.hasNext()) {
                String id = (String) it.next();
                where += "'" + id + "',";
            }
            where = where.substring(0, where.length() - 1);
            where += ")";
            String sql = "select MASSBANK, t1.KEGG, MAP from " + "(SELECT MASSBANK,KEGG FROM OTHER_DB_IDS "
                    + where + ") t1, PATHWAY_CPDS t2" + " where t1.KEGG=t2.KEGG order by MAP,MASSBANK";

            ArrayList<String> mapList = null;
            try {
                Class.forName("com.mysql.jdbc.Driver");
                String connectUrl = "jdbc:mysql://localhost/MassBank_General";
                Connection con = DriverManager.getConnection(connectUrl, "bird", "bird2006");
                Statement stmt = con.createStatement();
                ResultSet rs = stmt.executeQuery(sql);
                String prevId = "";
                while (rs.next()) {
                    String id = rs.getString(1);
                    String kegg = rs.getString(2);
                    String map = rs.getString(3);
                    if (!id.equals(prevId)) {
                        if (!prevId.equals("")) {
                            massbank2mapList.put(prevId, mapList);
                        }
                        mapList = new ArrayList<String>();
                        massbank2keggList.put(id, kegg);
                    }
                    mapList.add(map);
                    prevId = id;
                }
                massbank2mapList.put(prevId, mapList);

                rs.close();
                stmt.close();
                con.close();
            } catch (Exception e) {
                e.printStackTrace();
            }

            if (mapList != null) {

                //(3) Pathway Map?FtXg??
                it = massbank2mapList.keySet().iterator();
                while (it.hasNext()) {
                    String id = (String) it.next();
                    String kegg = (String) massbank2keggList.get(id);

                    ArrayList<String> list1 = massbank2mapList.get(id);
                    for (int i = 0; i < list1.size(); i++) {
                        String map = list1.get(i);
                        ArrayList<String> list2 = null;
                        if (map2keggList.containsKey(map)) {
                            list2 = map2keggList.get(map);
                            list2.add(kegg);
                        } else {
                            list2 = new ArrayList<String>();
                            list2.add(kegg);
                            map2keggList.put(map, list2);
                        }
                    }
                }

                //(4) SOAPPathway Map?Ft?\bh?s
                it = map2keggList.keySet().iterator();
                List<Callable<HashMap<String, String>>> tasks = new ArrayList();
                while (it.hasNext()) {
                    String map = (String) it.next();
                    mapNameList.add(map);
                    ArrayList<String> list = map2keggList.get(map);
                    String[] cpds = list.toArray(new String[] {});
                    Callable<HashMap<String, String>> task = new ColorPathway(map, cpds);
                    tasks.add(task);
                }
                Collections.sort(mapNameList);

                // Xbhv?[10
                ExecutorService exsv = Executors.newFixedThreadPool(10);
                List<Future<HashMap<String, String>>> results = exsv.invokeAll(tasks);

                // Pathway mapi[??
                String saveRootPath = MassBankEnv.get(MassBankEnv.KEY_TOMCAT_APPTEMP_PATH) + "pathway";
                File rootDir = new File(saveRootPath);
                if (!rootDir.exists()) {
                    rootDir.mkdir();
                }
                //               String savePath = saveRootPath + File.separator + this.jobId;
                //               File newDir = new File(savePath);
                //               if ( !newDir.exists() ) {
                //                  newDir.mkdir();
                //               }

                //(6) Pathway mapURL
                for (Future<HashMap<String, String>> future : results) {
                    HashMap<String, String> res = future.get();
                    it = res.keySet().iterator();
                    String map = (String) it.next();
                    String mapUrl = res.get(map);
                    String filePath = saveRootPath + File.separator + this.jobId + "_" + map + ".png";
                    FileUtil.downloadFile(mapUrl, filePath);
                }
            }
        }

        //(7) ?o
        out = new PrintWriter(new BufferedWriter(new FileWriter(htmlFile)));
        // wb_?[?o
        String reqIonStr = "Both";
        try {
            if (Integer.parseInt(this.ion) > 0) {
                reqIonStr = "Positive";
            } else if (Integer.parseInt(this.ion) < 0) {
                reqIonStr = "Negative";
            }
        } catch (NumberFormatException nfe) {
            nfe.printStackTrace();
        }
        String title = "Summary of Batch Service Results";
        out.println("<html>");
        out.println("<head>");
        out.println("<title>" + title + "</title>");
        out.println("</head>");
        out.println("<body>");
        out.println("<h1>" + title + "</h1>");
        out.println("<hr>");
        out.println("<h3>Request Date : " + this.time + "</h3>");
        out.println("Instrument Type : " + this.inst + "<br>");
        out.println("MS Type : " + this.ms + "<br>");
        out.println("Ion Mode : " + reqIonStr + "<br>");
        out.println("<br>");
        out.println("<hr>");
        out.println("<table border=\"1\" cellspacing=\"0\" cellpadding=\"2\">");
        String cols = String.valueOf(mapNameList.size());
        out.println("<tr>");
        out.println("<th bgcolor=\"LavenderBlush\" rowspan=\"1\">No.</th>");
        out.println("<th bgcolor=\"LavenderBlush\" rowspan=\"1\">Query&nbsp;Name</th>");
        out.println("<th bgcolor=\"LightCyan\" rowspan=\"1\">Score</th>");
        out.println("<th bgcolor=\"LightCyan\" rowspan=\"1\">Hit</th>");
        out.println("<th bgcolor=\"LightCyan\" rowspan=\"1\">MassBank&nbsp;ID</th>");
        out.println("<th bgcolor=\"LightCyan\" rowspan=\"1\">Record&nbsp;Title</th>");
        out.println("<th bgcolor=\"LightCyan\" rowspan=\"1\">Formula</th>");
        out.println("<th bgcolor=\"LightCyan\" rowspan=\"1\">Exact Mass</th>");
        if (isKeggReturn) {
            out.println("<th bgcolor=\"LightYellow\" rowspan=\"2\">KEGG&nbsp;ID</th>");
            out.println(
                    "<th bgcolor=\"LightYellow\" colspan=\"" + cols + "\">Colored&nbsp;Pathway&nbsp;Maps</th>");
        }
        out.println("</tr>");
        out.print("<tr bgcolor=\"moccasin\">");
        for (int i = 0; i < mapNameList.size(); i++) {
            out.print("<th>MAP" + String.valueOf(i + 1) + "</th>");
        }
        out.println("</tr>");

        for (int i = 0; i < nameList.size(); i++) {
            out.println("<tr>");
            String no = String.format("%5d", i + 1);
            no = no.replace(" ", "&nbsp;");
            out.println("<td>" + no + "</td>");
            // Query Name
            String queryName = nameList.get(i);
            out.println("<td nowrap>" + queryName + "</td>");

            line = top1LineList.get(i);
            if (line.equals("0")) {
                if (isKeggReturn) {
                    cols = String.valueOf(mapNameList.size() + 5);
                } else {
                    cols = String.valueOf(6);
                }
                out.println("<td colspan=\"" + cols + "\">No Hit Record</td>");
            } else if (line.equals("Invalid")) {
                if (isKeggReturn) {
                    cols = String.valueOf(mapNameList.size() + 5);
                } else {
                    cols = String.valueOf(4);
                }
                out.println("<td colspan=\"" + cols + "\">Invalid Query</td>");
            } else {
                String[] data = formatLine(line);
                String id = data[0];
                String recTitle = data[1];
                String formula = data[2];
                String emass = data[3];
                String score = data[4];
                String hit = data[5];

                boolean isHiScore = false;
                if (Integer.parseInt(hit) >= 3 && Double.parseDouble(score) >= 0.8) {
                    isHiScore = true;
                }

                // Score
                if (isHiScore) {
                    out.println("<td><b>" + score + "</b></td>");
                } else {
                    out.println("<td>" + score + "</td>");
                }

                // hit peak
                if (isHiScore) {
                    out.println("<td align=\"right\"><b>" + hit + "</b></td>");
                } else {
                    out.println("<td align=\"right\">" + hit + "</td>");
                }

                // MassBank ID & Link
                out.println("<td><a href=\"" + serverUrl + "jsp/FwdRecord.jsp?id=" + id
                        + "\" target=\"_blank\">" + id + "</td>");
                // Record Title
                out.println("<td>" + recTitle + "</td>");

                // Formula
                out.println("<td nowrap>" + formula + "</td>");

                // Exact Mass
                out.println("<td nowrap>" + emass + "</td>");

                // KEGG ID & Link
                if (isKeggReturn) {
                    String keggLink = "&nbsp;&nbsp;-";
                    if (massbank2keggList.containsKey(id)) {
                        String keggUrl = "http://www.genome.jp/dbget-bin/www_bget?";
                        String kegg = massbank2keggList.get(id);
                        switch (kegg.charAt(0)) {
                        case 'C':
                            keggUrl += "cpd:" + kegg;
                            break;
                        case 'D':
                            keggUrl += "dr:" + kegg;
                            break;
                        case 'G':
                            keggUrl += "gl:" + kegg;
                            break;
                        }
                        keggLink = "<a href=\"" + keggUrl + "\" target=\"_blank\">" + kegg + "</a>";
                    }
                    out.println("<td>" + keggLink + "</td>");
                    // Pathway Map Link
                    if (massbank2mapList.containsKey(id)) {
                        ArrayList<String> list = massbank2mapList.get(id);
                        for (int l1 = mapNameList.size() - 1; l1 >= 0; l1--) {
                            boolean isFound = false;
                            String map = "";
                            for (int l2 = list.size() - 1; l2 >= 0; l2--) {
                                map = list.get(l2);
                                if (map.equals(mapNameList.get(l1))) {
                                    isFound = true;
                                    break;
                                }
                            }
                            if (isFound) {
                                ArrayList<String> list2 = map2keggList.get(map);
                                String mapUrl = serverUrl + "temp/pathway/" + this.jobId + "_" + map + ".png";
                                out.println("<td nowrap><a href=\"" + mapUrl + "\" target=\"_blank\">map:" + map
                                        + "(" + list2.size() + ")</a></td>");
                            } else {
                                out.println("<td>&nbsp;&nbsp;-</td>");
                            }
                        }
                    } else {
                        for (int l1 = mapNameList.size() - 1; l1 >= 0; l1--) {
                            out.println("<td>&nbsp;&nbsp;-</td>");
                        }
                    }
                }
            }
            out.println("</tr>");
        }
        out.println("</table>");
    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        try {
            if (in != null) {
                in.close();
            }
        } catch (IOException e) {
        }
        if (out != null) {
            out.flush();
            out.close();
        }
    }
}