rndvecgen.RandomQueryGen.java Source code

Java tutorial

Introduction

Here is the source code for rndvecgen.RandomQueryGen.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package rndvecgen;

import indexer.DocVector;
import indexer.QueryVector;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.util.List;
import java.util.Properties;
import sift.VecQueries;
import org.apache.commons.io.FileUtils;

/**
 *
 * @author Debasis
 */
public class RandomQueryGen extends RandomVecGen {
    VecQueries queries;
    int numQueries;
    String vecFileName;
    List<String> vecLines;
    int numDocs;

    public RandomQueryGen(Properties prop) throws Exception {
        super(prop);
        queries = new VecQueries();
        numQueries = Integer.parseInt(prop.getProperty("syntheticdata.numqueries"));
        vecFileName = this.randomSamplesFileName();
        vecLines = FileUtils.readLines(new File(vecFileName));
        numDocs = vecLines.size();
    }

    public RandomQueryGen(String propFile) throws Exception {
        super(propFile);
        queries = new VecQueries();
        numQueries = Integer.parseInt(prop.getProperty("syntheticdata.numqueries"));
        vecFileName = this.randomSamplesFileName();
        vecLines = FileUtils.readLines(new File(vecFileName));
        numDocs = vecLines.size();
    }

    void sampleQueries() {
        boolean usingMixtureDist = prop.getProperty("syntheticdata.query.genmode", "prior").equals("prior");
        QueryVector qvec;
        for (int i = 0; i < numQueries; i++) {
            qvec = usingMixtureDist ? new QueryVector(mixtureDist, i) : new QueryVector(i, numDimensions);
            queries.add(qvec);
        }
    }

    public int getNumQueries() {
        return numQueries;
    }

    void setNNInfos() throws Exception {
        for (DocVector qv : queries.getQueries()) {
            setNN((QueryVector) qv);
        }
    }

    void setNN(QueryVector qvec) throws Exception {
        int numDimensions = qvec.getNumberofDimensions();
        int numIntervals = qvec.getNumberofIntervals();

        float minDist = Float.MAX_VALUE;
        int minDocId = 0;
        DocVector minDocVector = null;
        float dist;

        System.out.println("Computing NN info between vecs: ");

        for (int i = 0; i < numDocs; i++) {
            DocVector dvec = new DocVector(vecLines.get(i), numDimensions, numIntervals);

            dist = dvec.getDist(qvec);
            if (dist < minDist) {
                minDist = dist;
                minDocId = i;
                minDocVector = dvec;
            }
        }

        qvec.setNNInfo(minDocId, minDist);

        System.out.println("Query: " + qvec);
        System.out.println("NN-vec wrt query: " + minDocVector);
    }

    public void generateAndSaveQueries() throws Exception {
        sampleQueries();
        setNNInfos();
        save();
    }

    public void save() throws Exception {

        String qvecFileName = this.randomSamplesFileName() + "queries.txt";
        String distInfoFile = this.randomSamplesFileName() + "queries.nninfo.txt";

        FileWriter fw = new FileWriter(qvecFileName);
        BufferedWriter bw = new BufferedWriter(fw);

        for (DocVector vec : this.queries.getQueries()) {
            bw.write(this.vecToStr(vec.getId(), vec.getVec()));
        }
        bw.close();
        fw.close();

        fw = new FileWriter(distInfoFile);
        bw = new BufferedWriter(fw);

        for (DocVector vec : this.queries.getQueries()) {
            QueryVector qvec = (QueryVector) vec;
            bw.write(qvec.getNN() + "\t" + qvec.getNNDist() + "\n");
        }

        bw.close();
        fw.close();
    }

    public void load() throws Exception {
        String qvecFileName = this.randomSamplesFileName() + "queries.txt";
        String distInfoFile = this.randomSamplesFileName() + "queries.nninfo.txt";

        List<String> nnInfoLines = FileUtils.readLines(new File(distInfoFile));
        List<String> qvecLines = FileUtils.readLines(new File(qvecFileName));

        assert (nnInfoLines.size() == qvecLines.size());

        int i = 0;
        for (String qvecLine : qvecLines) {
            String nnInfo = nnInfoLines.get(i);
            String[] tokens = nnInfo.split("\\s+");
            int nnId = Integer.parseInt(tokens[0]);
            float dist = Float.parseFloat(tokens[1]);
            this.queries.add(new QueryVector(qvecLine, numDimensions, DocVector.numIntervals, nnId, dist));
            i++;
        }
    }

    public List<DocVector> getQueries() throws Exception {
        return queries.getQueries();
    }

    public static void main(String[] args) {
        if (args.length == 0) {
            args = new String[1];
            System.out.println("Usage: java RealValuedVecIndexer <prop-file>");
            args[0] = "init_synthetic.properties";
        }

        try {
            RandomQueryGen rqg = new RandomQueryGen(args[0]);
            rqg.generateAndSaveQueries();
        } catch (Exception ex) {
            ex.printStackTrace();
        }

    }
}