Example usage for java.util HashMap put

List of usage examples for java.util HashMap put

Introduction

In this page you can find the example usage for java.util HashMap put.

Prototype

public V put(K key, V value) 

Source Link

Document

Associates the specified value with the specified key in this map.

Usage

From source file:discovery.compression.kdd2011.ratio.RatioCompressionReport.java

public static void main(String[] args) throws GraphReadingException, IOException, java.text.ParseException {
    opts.addOption("r", true, "Goal compression ratio");

    //      opts.addOption( "a",
    //       true,
    //       "Algorithm used for compression. The default and only currently available option is \"greedy\"");
    //opts.addOption("cost-output",true,"Output file for costs, default is costs.txt");
    //opts.addOption("cost-format",true,"Output format for ");

    opts.addOption("ctype", true, "Connectivity type: global or local, default is global.");
    opts.addOption("connectivity", false,
            "enables output for connectivity. Connectivity info will be written to connectivity.txt");
    opts.addOption("output_bmg", true, "Write bmg file with groups to given file.");
    opts.addOption("algorithm", true, "Algorithm to use, one of: greedy random1 random2 bruteforce slowgreedy");
    opts.addOption("hop2", false, "Only try to merge nodes that have common neighbors");
    opts.addOption("kmedoids", false, "Enables output for kmedoids clustering");
    opts.addOption("kmedoids_k", true, "Number of clusters to be used in kmedoids. Default is 3");
    opts.addOption("kmedoids_output", true,
            "Output file for kmedoid clusters. Default is clusters.txt. This file will be overwritten.");
    opts.addOption("norefresh", false,
            "Use old style merging: all connectivities are not refreshed when merging");
    opts.addOption("edge_attribute", true, "Attribute from bmgraph used as edge weight");
    opts.addOption("only_times", false, "Only write times.txt");
    //opts.addOption("no_metrics",false,"Exit after compression, don't calculate any metrics or produce output bmg for the compression.");
    CommandLineParser parser = new PosixParser();
    CommandLine cmd = null;/*from   ww w. ja  v  a  2 s. c  o  m*/

    try {
        cmd = parser.parse(opts, args);
    } catch (ParseException e) {
        e.printStackTrace();
        System.exit(0);
    }

    boolean connectivity = false;
    double ratio = 0;

    boolean hop2 = cmd.hasOption("hop2");

    RatioCompression compression = new GreedyRatioCompression(hop2);

    if (cmd.hasOption("connectivity"))
        connectivity = true;

    ConnectivityType ctype = ConnectivityType.GLOBAL;
    CompressionMergeModel mergeModel = new PathAverageMergeModel();
    if (cmd.hasOption("ctype")) {
        String ctypeStr = cmd.getOptionValue("ctype");
        if (ctypeStr.equals("local")) {
            ctype = ConnectivityType.LOCAL;
            mergeModel = new EdgeAverageMergeModel();
        } else if (ctypeStr.equals("global")) {
            ctype = ConnectivityType.GLOBAL;
            mergeModel = new PathAverageMergeModel();
        } else {
            System.out.println(PROGRAM_NAME + ": unknown connectivity type " + ctypeStr);
            printHelp();
        }
    }

    if (cmd.hasOption("norefresh"))
        mergeModel = new PathAverageMergeModelNorefresh();
    if (cmd.hasOption("algorithm")) {
        String alg = cmd.getOptionValue("algorithm");
        if (alg.equals("greedy")) {
            compression = new GreedyRatioCompression(hop2);
        } else if (alg.equals("random1")) {
            compression = new RandomRatioCompression(hop2);
        } else if (alg.equals("random2")) {
            compression = new SmartRandomRatioCompression(hop2);
        } else if (alg.equals("bruteforce")) {
            compression = new BruteForceCompression(hop2, ctype == ConnectivityType.LOCAL);
        } else if (alg.equals("slowgreedy")) {
            compression = new SlowGreedyRatioCompression(hop2);
        } else {
            System.out.println("algorithm must be one of: greedy random1 random2 bruteforce slowgreedy");
            printHelp();
        }
    }

    compression.setMergeModel(mergeModel);

    if (cmd.hasOption("r")) {
        ratio = Double.parseDouble(cmd.getOptionValue("r"));
    } else {
        System.out.println(PROGRAM_NAME + ": compression ratio not defined");
        printHelp();
    }

    if (cmd.hasOption("help")) {
        printHelp();
    }

    String infile = null;
    if (cmd.getArgs().length != 0) {
        infile = cmd.getArgs()[0];
    } else {
        printHelp();
    }

    boolean kmedoids = false;
    int kmedoidsK = 3;
    String kmedoidsOutput = "clusters.txt";
    if (cmd.hasOption("kmedoids"))
        kmedoids = true;
    if (cmd.hasOption("kmedoids_k"))
        kmedoidsK = Integer.parseInt(cmd.getOptionValue("kmedoids_k"));
    if (cmd.hasOption("kmedoids_output"))
        kmedoidsOutput = cmd.getOptionValue("kmedoids_output");

    String edgeAttrib = "goodness";
    if (cmd.hasOption("edge_attribute"))
        edgeAttrib = cmd.getOptionValue("edge_attribute");

    // This program should directly use bmgraph-java to read and
    // DefaultGraph should have a constructor that takes a BMGraph as an
    // argument.

    //VisualGraph vg = new VisualGraph(infile, edgeAttrib, false);
    //System.out.println("vg read");
    //SimpleVisualGraph origSG = new SimpleVisualGraph(vg);
    BMGraph bmg = BMGraphUtils.readBMGraph(infile);

    int origN = bmg.getNodes().size();

    //for(int i=0;i<origN;i++)
    //System.out.println(i+"="+origSG.getVisualNode(i));
    System.out.println("bmgraph read");

    BMNode[] i2n = new BMNode[origN];
    HashMap<BMNode, Integer> n2i = new HashMap<BMNode, Integer>();
    {
        int pi = 0;
        for (BMNode nod : bmg.getNodes()) {
            n2i.put(nod, pi);
            i2n[pi++] = nod;
        }
    }

    DefaultGraph dg = new DefaultGraph();
    for (BMEdge e : bmg.getEdges()) {
        dg.addEdge(n2i.get(e.getSource()), n2i.get(e.getTarget()), Double.parseDouble(e.get(edgeAttrib)));
    }

    DefaultGraph origDG = dg.copy();

    System.out.println("inputs read");
    RatioCompression nopCompressor = new RatioCompression.DefaultRatioCompression();
    ResultGraph nopResult = nopCompressor.compressGraph(dg, 1);

    long start = System.currentTimeMillis();
    ResultGraph result = compression.compressGraph(dg, ratio);
    long timeSpent = System.currentTimeMillis() - start;
    double seconds = timeSpent * 0.001;

    BufferedWriter timesWriter = new BufferedWriter(new FileWriter("times.txt", true));
    timesWriter.append("" + seconds + "\n");
    timesWriter.close();

    if (cmd.hasOption("only_times")) {
        System.out.println("Compression done, exiting.");
        System.exit(0);
    }

    BufferedWriter costsWriter = new BufferedWriter(new FileWriter("costs.txt", true));
    costsWriter.append("" + nopResult.getCompressorCosts() + " " + result.getCompressorCosts() + "\n");
    costsWriter.close();

    double[][] origProb;
    double[][] compProb;
    int[] group = new int[origN];

    for (int i = 0; i < result.partition.size(); i++)
        for (int x : result.partition.get(i))
            group[x] = i;

    if (ctype == ConnectivityType.LOCAL) {
        origProb = new double[origN][origN];
        compProb = new double[origN][origN];
        DefaultGraph g = result.uncompressedGraph();
        for (int i = 0; i < origN; i++) {
            for (int j = 0; j < origN; j++) {
                origProb[i][j] = dg.getEdgeWeight(i, j);
                compProb[i][j] = g.getEdgeWeight(i, j);
            }
        }
        System.out.println("Writing edge-dissimilarity");
    } else {

        origProb = ProbDijkstra.getProbMatrix(origDG);

        compProb = new double[origN][origN];

        System.out.println("nodeCount = " + result.graph.getNodeCount());
        double[][] ccProb = ProbDijkstra.getProbMatrix(result.graph);
        System.out.println("ccProb.length = " + ccProb.length);

        System.out.println("ccProb[0].length = " + ccProb[0].length);

        for (int i = 0; i < origN; i++) {
            for (int j = 0; j < origN; j++) {
                if (group[i] == group[j])
                    compProb[i][j] = result.graph.getEdgeWeight(group[i], group[j]);
                else {
                    int gj = group[j];
                    int gi = group[i];
                    compProb[i][j] = ccProb[group[i]][group[j]];
                }
            }
        }

        System.out.println("Writing best-path-dissimilarity");
        //compProb = ProbDijkstra.getProbMatrix(result.uncompressedGraph());

    }

    {
        BufferedWriter connWr = null;//

        if (connectivity) {
            connWr = new BufferedWriter(new FileWriter("connectivity.txt", true));
        }
        double totalDiff = 0;

        for (int i = 0; i < origN; i++) {
            for (int j = i + 1; j < origN; j++) {

                double diff = Math.abs(origProb[i][j] - compProb[i][j]);
                //VisualNode ni = origSG.getVisualNode(i);
                //VisualNode nj = origSG.getVisualNode(j);
                BMNode ni = i2n[i];
                BMNode nj = i2n[j];
                if (connectivity)
                    connWr.append(ni + "\t" + nj + "\t" + origProb[i][j] + "\t" + compProb[i][j] + "\t" + diff
                            + "\n");
                totalDiff += diff * diff;
            }
        }

        if (connectivity) {
            connWr.append("\n");
            connWr.close();
        }

        totalDiff = Math.sqrt(totalDiff);
        BufferedWriter dissWr = new BufferedWriter(new FileWriter("dissimilarity.txt", true));
        dissWr.append("" + totalDiff + "\n");
        dissWr.close();
    }

    if (cmd.hasOption("output_bmg")) {
        BMGraph outgraph = new BMGraph();

        String outputfile = cmd.getOptionValue("output_bmg");
        HashMap<Integer, BMNode> nodes = new HashMap<Integer, BMNode>();

        for (int i = 0; i < result.partition.size(); i++) {
            ArrayList<Integer> g = result.partition.get(i);
            if (g.size() == 0)
                continue;
            BMNode node = new BMNode("Supernode_" + i);
            HashMap<String, String> attributes = new HashMap<String, String>();
            StringBuffer contents = new StringBuffer();
            for (int x : g)
                contents.append(i2n[x] + ",");
            contents.delete(contents.length() - 1, contents.length());

            attributes.put("nodes", contents.toString());
            attributes.put("self-edge", "" + result.graph.getEdgeWeight(i, i));
            node.setAttributes(attributes);
            nodes.put(i, node);
            outgraph.ensureHasNode(node);
        }

        for (int i = 0; i < result.partition.size(); i++) {
            if (result.partition.get(i).size() == 0)
                continue;
            for (int x : result.graph.getNeighbors(i)) {
                if (x < i)
                    continue;
                BMNode from = nodes.get(i);
                BMNode to = nodes.get(x);
                if (from == null || to == null) {
                    System.out.println(from + "->" + to);
                    System.out.println(i + "->" + x);
                    System.out.println("");
                }
                BMEdge e = new BMEdge(nodes.get(i), nodes.get(x), "notype");

                e.setAttributes(new HashMap<String, String>());
                e.put("goodness", "" + result.graph.getEdgeWeight(i, x));
                outgraph.ensureHasEdge(e);
            }
        }
        BMGraphUtils.writeBMGraph(outgraph, outputfile);
    }

    // k medoids!
    if (kmedoids) {
        //KMedoidsResult clustersOrig=KMedoids.runKMedoids(origProb,kmedoidsK);

        if (ctype == ConnectivityType.LOCAL) {
            compProb = ProbDijkstra.getProbMatrix(result.uncompressedGraph());
        }

        //KMedoidsResult compClusters = KMedoids.runKMedoids(ProbDijkstra.getProbMatrix(result.graph),kmedoidsK);
        KMedoidsResult clustersComp = KMedoids.runKMedoids(compProb, kmedoidsK);

        BufferedWriter bw = new BufferedWriter(new FileWriter(kmedoidsOutput));

        for (int i = 0; i < origN; i++) {
            int g = group[i];
            //bw.append(origSG.getVisualNode(i).getBMNode()+" "+compClusters.clusters[g]+"\n");
            bw.append(i2n[i] + " " + clustersComp.clusters[i] + "\n");
        }
        bw.close();
    }

    System.exit(0);
}

From source file:gov.nih.nci.caintegrator.application.gpvisualizer.CaIntegratorRunVisualizer.java

/**
 * args[0] = visualizer task name args[1] = command line args[2] = debug
 * flag args[3] = OS required for running args[4] = CPU type required for
 * running args[5] = libdir on server for this task args[6] = CSV list of
 * downloadable files for inputs args[7] = CSV list of input parameter names
 * args[8] = CSV list of support file names args[9] = CSV list of support
 * file modification dates args[10] = server URL args[11] = LSID of task
 * args[12...n] = optional input parameter arguments
 *//*from  w w w  .  j  a v a2  s .  c  o m*/
public static void main(String[] args) {
    String[] wellKnownNames = { RunVisualizerConstants.NAME, RunVisualizerConstants.COMMAND_LINE,
            RunVisualizerConstants.DEBUG, RunVisualizerConstants.OS, RunVisualizerConstants.CPU_TYPE,
            RunVisualizerConstants.LIBDIR, RunVisualizerConstants.DOWNLOAD_FILES, RunVisualizerConstants.LSID };
    int PARAM_NAMES = 7;
    int SUPPORT_FILE_NAMES = PARAM_NAMES + 1;
    int SUPPORT_FILE_DATES = SUPPORT_FILE_NAMES + 1;
    int SERVER = SUPPORT_FILE_DATES + 1;
    int LSID = SERVER + 1;
    int TASK_ARGS = LSID + 1;

    try {
        HashMap params = new HashMap();
        for (int i = 0; i < wellKnownNames.length; i++) {
            params.put(wellKnownNames[i], args[i]);
        }

        String name = (String) params.get(RunVisualizerConstants.NAME);
        StringTokenizer stParameterNames = new StringTokenizer(args[PARAM_NAMES], ", ");
        int argNum = TASK_ARGS;
        // when pulling parameters from the command line, don't assume that
        // all were provided.
        // some could be missing!
        while (stParameterNames.hasMoreTokens()) {
            String paramName = stParameterNames.nextToken();
            if (argNum < args.length) {
                String paramValue = args[argNum++];
                params.put(paramName, paramValue);
            } else {
                System.err.println("No value specified for " + paramName);
            }
        }
        URL source = new URL(args[SERVER]);

        StringTokenizer stFileNames = new StringTokenizer(args[SUPPORT_FILE_NAMES], ",");
        StringTokenizer stFileDates = new StringTokenizer(args[SUPPORT_FILE_DATES], ",");
        String[] supportFileNames = new String[stFileNames.countTokens()];
        long[] supportFileDates = new long[supportFileNames.length];
        String filename = null;
        String fileDate = null;
        int f = 0;
        while (stFileNames.hasMoreTokens()) {
            supportFileNames[f] = stFileNames.nextToken();
            if (stFileDates.hasMoreTokens()) {
                supportFileDates[f] = Long.parseLong(stFileDates.nextToken());
            } else {
                supportFileDates[f] = -1;
            }
            f++;
        }

        CaIntegratorRunVisualizer visualizer = new CaIntegratorRunVisualizer(params, supportFileNames,
                supportFileDates, new Applet());
        visualizer.run();
    } catch (Throwable t) {
        t.printStackTrace();
    }
}

From source file:akori.AKORI.java

public static void main(String[] args) throws IOException, InterruptedException {
    System.out.println("esto es AKORI");

    URL = "http://www.mbauchile.cl";
    PATH = "E:\\NetBeansProjects\\AKORI\\";
    NAME = "mbauchile.png";
    // Extrar DOM tree

    Document doc = Jsoup.connect(URL).timeout(0).get();

    // The Firefox driver supports javascript 
    WebDriver driver = new FirefoxDriver();
    driver.manage().window().maximize();
    System.out.println(driver.manage().window().getSize().toString());
    System.out.println(driver.manage().window().getPosition().toString());
    int xmax = driver.manage().window().getSize().width;
    int ymax = driver.manage().window().getSize().height;

    // Go to the URL page
    driver.get(URL);/*from   www .  ja  va 2 s  .  c om*/

    File screen = ((TakesScreenshot) driver).getScreenshotAs(OutputType.FILE);
    FileUtils.copyFile(screen, new File(PATH + NAME));

    BufferedImage img = ImageIO.read(new File(PATH + NAME));
    //Graphics2D graph = img.createGraphics();

    BufferedImage img1 = new BufferedImage(xmax, ymax, BufferedImage.TYPE_INT_ARGB);
    Graphics2D graph1 = img.createGraphics();
    double[][] matrix = new double[ymax][xmax];
    BufferedReader in = new BufferedReader(new FileReader("et.txt"));
    String linea;
    double max = 0;
    graph1.drawImage(img, 0, 0, null);
    HashMap<String, Integer> lista = new HashMap<String, Integer>();
    int count = 0;
    for (int i = 0; (linea = in.readLine()) != null && i < 10000; ++i) {
        String[] datos = linea.split(",");
        int x = (int) Double.parseDouble(datos[0]);
        int y = (int) Double.parseDouble(datos[2]);
        long time = Double.valueOf(datos[4]).longValue();
        if (x >= xmax || y >= ymax)
            continue;
        if (time < 691215)
            continue;
        if (time > 705648)
            break;
        if (lista.containsKey(x + "," + y))
            lista.put(x + "," + y, lista.get(x + "," + y) + 1);
        else
            lista.put(x + "," + y, 1);
        ++count;
    }
    System.out.println(count);
    in.close();
    Iterator iter = lista.entrySet().iterator();
    Map.Entry e;
    for (String key : lista.keySet()) {
        Integer i = lista.get(key);
        if (max < i)
            max = i;
    }
    System.out.println(max);
    max = 0;
    while (iter.hasNext()) {
        e = (Map.Entry) iter.next();
        String xy = (String) e.getKey();
        String[] datos = xy.split(",");
        int x = Integer.parseInt(datos[0]);
        int y = Integer.parseInt(datos[1]);
        matrix[y][x] += (int) e.getValue();
        double aux;
        if ((aux = normalMatrix(matrix, y, x, ((int) e.getValue()) * 4)) > max) {
            max = aux;
        }
        //normalMatrix(matrix,x,y,20);
        if (matrix[y][x] > max)
            max = matrix[y][x];
    }
    int A, R, G, B, n;
    for (int i = 0; i < xmax; ++i) {
        for (int j = 0; j < ymax; ++j) {
            if (matrix[j][i] != 0) {
                n = (int) Math.round(matrix[j][i] * 100 / max);
                R = Math.round((255 * n) / 100);
                G = Math.round((255 * (100 - n)) / 100);
                B = 0;
                A = Math.round((255 * n) / 100);
                ;
                if (R > 255)
                    R = 255;
                if (R < 0)
                    R = 0;
                if (G > 255)
                    G = 255;
                if (G < 0)
                    G = 0;
                if (R < 50)
                    A = 0;
                graph1.setColor(new Color(R, G, B, A));
                graph1.fillOval(i, j, 1, 1);
            }
        }
    }
    //graph1.dispose();

    ImageIO.write(img, "png", new File("example.png"));
    System.out.println(max);

    graph1.setColor(Color.RED);
    // Extraer elementos
    Elements e1 = doc.body().getAllElements();
    int i = 1;
    ArrayList<String> tags = new ArrayList<String>();
    for (Element temp : e1) {

        if (tags.indexOf(temp.tagName()) == -1) {
            tags.add(temp.tagName());

            List<WebElement> query = driver.findElements(By.tagName(temp.tagName()));
            for (WebElement temp1 : query) {
                Point po = temp1.getLocation();
                Dimension d = temp1.getSize();
                if (d.width <= 0 || d.height <= 0 || po.x < 0 || po.y < 0)
                    continue;
                System.out.println(i + " " + temp.nodeName());
                System.out.println("  x: " + po.x + " y: " + po.y);
                System.out.println("  width: " + d.width + " height: " + d.height);
                graph1.draw(new Rectangle(po.x, po.y, d.width, d.height));
                ++i;
            }
        }
    }

    graph1.dispose();
    ImageIO.write(img, "png", new File(PATH + NAME));

    driver.quit();

}

From source file:edu.nyu.vida.data_polygamy.standard_techniques.CorrelationTechniques.java

/**
 * @param args/*from   ww w.j a  va2 s. c o m*/
 * @throws ParseException 
 */
@SuppressWarnings({ "deprecation" })
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    Options options = new Options();

    Option forceOption = new Option("f", "force", false,
            "force the computation of the relationship " + "even if files already exist");
    forceOption.setRequired(false);
    options.addOption(forceOption);

    Option g1Option = new Option("g1", "first-group", true, "set first group of datasets");
    g1Option.setRequired(true);
    g1Option.setArgName("FIRST GROUP");
    g1Option.setArgs(Option.UNLIMITED_VALUES);
    options.addOption(g1Option);

    Option g2Option = new Option("g2", "second-group", true, "set second group of datasets");
    g2Option.setRequired(false);
    g2Option.setArgName("SECOND GROUP");
    g2Option.setArgs(Option.UNLIMITED_VALUES);
    options.addOption(g2Option);

    Option machineOption = new Option("m", "machine", true, "machine identifier");
    machineOption.setRequired(true);
    machineOption.setArgName("MACHINE");
    machineOption.setArgs(1);
    options.addOption(machineOption);

    Option nodesOption = new Option("n", "nodes", true, "number of nodes");
    nodesOption.setRequired(true);
    nodesOption.setArgName("NODES");
    nodesOption.setArgs(1);
    options.addOption(nodesOption);

    Option s3Option = new Option("s3", "s3", false, "data on Amazon S3");
    s3Option.setRequired(false);
    options.addOption(s3Option);

    Option awsAccessKeyIdOption = new Option("aws_id", "aws-id", true,
            "aws access key id; " + "this is required if the execution is on aws");
    awsAccessKeyIdOption.setRequired(false);
    awsAccessKeyIdOption.setArgName("AWS-ACCESS-KEY-ID");
    awsAccessKeyIdOption.setArgs(1);
    options.addOption(awsAccessKeyIdOption);

    Option awsSecretAccessKeyOption = new Option("aws_key", "aws-id", true,
            "aws secrect access key; " + "this is required if the execution is on aws");
    awsSecretAccessKeyOption.setRequired(false);
    awsSecretAccessKeyOption.setArgName("AWS-SECRET-ACCESS-KEY");
    awsSecretAccessKeyOption.setArgs(1);
    options.addOption(awsSecretAccessKeyOption);

    Option bucketOption = new Option("b", "s3-bucket", true,
            "bucket on s3; " + "this is required if the execution is on aws");
    bucketOption.setRequired(false);
    bucketOption.setArgName("S3-BUCKET");
    bucketOption.setArgs(1);
    options.addOption(bucketOption);

    Option helpOption = new Option("h", "help", false, "display this message");
    helpOption.setRequired(false);
    options.addOption(helpOption);

    HelpFormatter formatter = new HelpFormatter();
    CommandLineParser parser = new PosixParser();
    CommandLine cmd = null;

    try {
        cmd = parser.parse(options, args);
    } catch (ParseException e) {
        formatter.printHelp(
                "hadoop jar data-polygamy.jar "
                        + "edu.nyu.vida.data_polygamy.standard_techniques.CorrelationTechniques",
                options, true);
        System.exit(0);
    }

    if (cmd.hasOption("h")) {
        formatter.printHelp(
                "hadoop jar data-polygamy.jar "
                        + "edu.nyu.vida.data_polygamy.standard_techniques.CorrelationTechniques",
                options, true);
        System.exit(0);
    }

    boolean s3 = cmd.hasOption("s3");
    String s3bucket = "";
    String awsAccessKeyId = "";
    String awsSecretAccessKey = "";

    if (s3) {
        if ((!cmd.hasOption("aws_id")) || (!cmd.hasOption("aws_key")) || (!cmd.hasOption("b"))) {
            System.out.println(
                    "Arguments 'aws_id', 'aws_key', and 'b'" + " are mandatory if execution is on AWS.");
            formatter.printHelp(
                    "hadoop jar data-polygamy.jar "
                            + "edu.nyu.vida.data_polygamy.standard_techniques.CorrelationTechniques",
                    options, true);
            System.exit(0);
        }
        s3bucket = cmd.getOptionValue("b");
        awsAccessKeyId = cmd.getOptionValue("aws_id");
        awsSecretAccessKey = cmd.getOptionValue("aws_key");
    }

    boolean snappyCompression = false;
    boolean bzip2Compression = false;
    String machine = cmd.getOptionValue("m");
    int nbNodes = Integer.parseInt(cmd.getOptionValue("n"));

    Configuration s3conf = new Configuration();
    if (s3) {
        s3conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId);
        s3conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey);
        s3conf.set("bucket", s3bucket);
    }

    Path path = null;
    FileSystem fs = FileSystem.get(new Configuration());

    ArrayList<String> shortDataset = new ArrayList<String>();
    ArrayList<String> firstGroup = new ArrayList<String>();
    ArrayList<String> secondGroup = new ArrayList<String>();
    HashMap<String, String> datasetAgg = new HashMap<String, String>();

    boolean removeExistingFiles = cmd.hasOption("f");

    String[] firstGroupCmd = cmd.getOptionValues("g1");
    String[] secondGroupCmd = cmd.hasOption("g2") ? cmd.getOptionValues("g2") : new String[0];
    addDatasets(firstGroupCmd, firstGroup, shortDataset, datasetAgg, path, fs, s3conf, s3, s3bucket);
    addDatasets(secondGroupCmd, secondGroup, shortDataset, datasetAgg, path, fs, s3conf, s3, s3bucket);

    if (shortDataset.size() == 0) {
        System.out.println("No datasets to process.");
        System.exit(0);
    }

    if (firstGroup.isEmpty()) {
        System.out.println("First group of datasets (G1) is empty. " + "Doing G1 = G2.");
        firstGroup.addAll(secondGroup);
    }

    if (secondGroup.isEmpty()) {
        System.out.println("Second group of datasets (G2) is empty. " + "Doing G2 = G1.");
        secondGroup.addAll(firstGroup);
    }

    // getting dataset ids

    String datasetNames = "";
    String datasetIds = "";
    HashMap<String, String> datasetId = new HashMap<String, String>();
    Iterator<String> it = shortDataset.iterator();
    while (it.hasNext()) {
        datasetId.put(it.next(), null);
    }

    if (s3) {
        path = new Path(s3bucket + FrameworkUtils.datasetsIndexDir);
        fs = FileSystem.get(path.toUri(), s3conf);
    } else {
        path = new Path(fs.getHomeDirectory() + "/" + FrameworkUtils.datasetsIndexDir);
    }
    BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(path)));
    String line = br.readLine();
    while (line != null) {
        String[] dt = line.split("\t");
        if (datasetId.containsKey(dt[0])) {
            datasetId.put(dt[0], dt[1]);
            datasetNames += dt[0] + ",";
            datasetIds += dt[1] + ",";
        }
        line = br.readLine();
    }
    br.close();
    if (s3)
        fs.close();

    datasetNames = datasetNames.substring(0, datasetNames.length() - 1);
    datasetIds = datasetIds.substring(0, datasetIds.length() - 1);
    it = shortDataset.iterator();
    while (it.hasNext()) {
        String dataset = it.next();
        if (datasetId.get(dataset) == null) {
            System.out.println("No dataset id for " + dataset);
            System.exit(0);
        }
    }

    String firstGroupStr = "";
    String secondGroupStr = "";
    for (String dataset : firstGroup) {
        firstGroupStr += datasetId.get(dataset) + ",";
    }
    for (String dataset : secondGroup) {
        secondGroupStr += datasetId.get(dataset) + ",";
    }
    firstGroupStr = firstGroupStr.substring(0, firstGroupStr.length() - 1);
    secondGroupStr = secondGroupStr.substring(0, secondGroupStr.length() - 1);

    FrameworkUtils.createDir(s3bucket + FrameworkUtils.correlationTechniquesDir, s3conf, s3);

    String dataAttributesInputDirs = "";
    String noRelationship = "";

    HashSet<String> dirs = new HashSet<String>();

    String dataset1;
    String dataset2;
    String datasetId1;
    String datasetId2;
    for (int i = 0; i < firstGroup.size(); i++) {
        for (int j = 0; j < secondGroup.size(); j++) {

            if (Integer.parseInt(datasetId.get(firstGroup.get(i))) < Integer
                    .parseInt(datasetId.get(secondGroup.get(j)))) {
                dataset1 = firstGroup.get(i);
                dataset2 = secondGroup.get(j);
            } else {
                dataset1 = secondGroup.get(j);
                dataset2 = firstGroup.get(i);
            }

            datasetId1 = datasetId.get(dataset1);
            datasetId2 = datasetId.get(dataset2);

            if (dataset1.equals(dataset2))
                continue;
            String correlationOutputFileName = s3bucket + FrameworkUtils.correlationTechniquesDir + "/"
                    + dataset1 + "-" + dataset2 + "/";

            if (removeExistingFiles) {
                FrameworkUtils.removeFile(correlationOutputFileName, s3conf, s3);
            }
            if (!FrameworkUtils.fileExists(correlationOutputFileName, s3conf, s3)) {
                dirs.add(s3bucket + FrameworkUtils.aggregatesDir + "/" + dataset1);
                dirs.add(s3bucket + FrameworkUtils.aggregatesDir + "/" + dataset2);
            } else {
                noRelationship += datasetId1 + "-" + datasetId2 + ",";
            }
        }
    }

    if (dirs.isEmpty()) {
        System.out.println("All the relationships were already computed.");
        System.out.println("Use -f in the beginning of the command line to force the computation.");
        System.exit(0);
    }

    for (String dir : dirs) {
        dataAttributesInputDirs += dir + ",";
    }

    Configuration conf = new Configuration();
    Machine machineConf = new Machine(machine, nbNodes);

    String jobName = "correlation";
    String correlationOutputDir = s3bucket + FrameworkUtils.correlationTechniquesDir + "/tmp/";

    FrameworkUtils.removeFile(correlationOutputDir, s3conf, s3);

    for (int i = 0; i < shortDataset.size(); i++) {
        conf.set("dataset-" + datasetId.get(shortDataset.get(i)) + "-agg", datasetAgg.get(shortDataset.get(i)));
    }
    for (int i = 0; i < shortDataset.size(); i++) {
        conf.set("dataset-" + datasetId.get(shortDataset.get(i)) + "-agg-size",
                Integer.toString(datasetAgg.get(shortDataset.get(i)).split(",").length));
    }
    conf.set("dataset-keys", datasetIds);
    conf.set("dataset-names", datasetNames);
    conf.set("first-group", firstGroupStr);
    conf.set("second-group", secondGroupStr);
    conf.set("main-dataset-id", datasetId.get(shortDataset.get(0)));
    if (noRelationship.length() > 0) {
        conf.set("no-relationship", noRelationship.substring(0, noRelationship.length() - 1));
    }

    conf.set("mapreduce.tasktracker.map.tasks.maximum", String.valueOf(machineConf.getMaximumTasks()));
    conf.set("mapreduce.tasktracker.reduce.tasks.maximum", String.valueOf(machineConf.getMaximumTasks()));
    conf.set("mapreduce.jobtracker.maxtasks.perjob", "-1");
    conf.set("mapreduce.reduce.shuffle.parallelcopies", "20");
    conf.set("mapreduce.input.fileinputformat.split.minsize", "0");
    conf.set("mapreduce.task.io.sort.mb", "200");
    conf.set("mapreduce.task.io.sort.factor", "100");
    conf.set("mapreduce.task.timeout", "2400000");

    if (s3) {
        machineConf.setMachineConfiguration(conf);
        conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId);
        conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey);
        conf.set("bucket", s3bucket);
    }

    if (snappyCompression) {
        conf.set("mapreduce.map.output.compress", "true");
        conf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec");
        //conf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec");
    }
    if (bzip2Compression) {
        conf.set("mapreduce.map.output.compress", "true");
        conf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec");
        //conf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec");
    }

    Job job = new Job(conf);
    job.setJobName(jobName);

    job.setMapOutputKeyClass(PairAttributeWritable.class);
    job.setMapOutputValueClass(SpatioTemporalValueWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(CorrelationTechniquesMapper.class);
    job.setReducerClass(CorrelationTechniquesReducer.class);
    job.setNumReduceTasks(machineConf.getNumberReduces());

    job.setInputFormatClass(SequenceFileInputFormat.class);
    LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);

    FileInputFormat.setInputDirRecursive(job, true);
    FileInputFormat.setInputPaths(job,
            dataAttributesInputDirs.substring(0, dataAttributesInputDirs.length() - 1));
    FileOutputFormat.setOutputPath(job, new Path(correlationOutputDir));

    job.setJarByClass(CorrelationTechniques.class);

    long start = System.currentTimeMillis();
    job.submit();
    job.waitForCompletion(true);
    System.out.println(jobName + "\t" + (System.currentTimeMillis() - start));

    // moving files to right place
    for (int i = 0; i < firstGroup.size(); i++) {
        for (int j = 0; j < secondGroup.size(); j++) {

            if (Integer.parseInt(datasetId.get(firstGroup.get(i))) < Integer
                    .parseInt(datasetId.get(secondGroup.get(j)))) {
                dataset1 = firstGroup.get(i);
                dataset2 = secondGroup.get(j);
            } else {
                dataset1 = secondGroup.get(j);
                dataset2 = firstGroup.get(i);
            }

            if (dataset1.equals(dataset2))
                continue;

            String from = s3bucket + FrameworkUtils.correlationTechniquesDir + "/tmp/" + dataset1 + "-"
                    + dataset2 + "/";
            String to = s3bucket + FrameworkUtils.correlationTechniquesDir + "/" + dataset1 + "-" + dataset2
                    + "/";
            FrameworkUtils.renameFile(from, to, s3conf, s3);
        }
    }
}

From source file:at.tlphotography.jAbuseReport.Reporter.java

/**
 * The main method.//  w  w w.ja va2 s  .  c  o m
 *
 * @param args
 *          the arguments
 */
public static void main(String[] args) {
    parseArguments(args);

    File[] directory = new File(logDir).listFiles(); // get the files in the dir

    for (File file : directory) // iterate over the file
    {
        if (!file.isDirectory() && file.getName().contains(logNames)) // if the file is not a dir and the name contains the logName string
        {
            if (file.getName().endsWith(".gz")) // is it zipped?
            {
                content.putAll(readGZFile(file));
            } else {
                content.putAll(readLogFile(file));
            }
        }
    }

    // save the mails to the log lines
    HashMap<String, ArrayList<LogObject>> finalContent = new HashMap<>();

    Iterator<Entry<String, String>> it = content.entrySet().iterator();
    while (it.hasNext()) {
        Map.Entry<String, String> pair = it.next();
        String mail = whoIsLookUp(pair.getKey());

        if (finalContent.containsKey(mail)) {
            finalContent.get(mail).add(new LogObject(pair.getValue()));
        } else {
            ArrayList<LogObject> temp = new ArrayList<LogObject>();
            temp.add(new LogObject(pair.getValue()));
            finalContent.put(mail, temp);
        }

        it.remove();
    }

    // sort them
    Iterator<Entry<String, ArrayList<LogObject>>> it2 = finalContent.entrySet().iterator();
    while (it2.hasNext()) {
        Entry<String, ArrayList<LogObject>> pair = it2.next();
        Collections.sort(pair.getValue());
        println(pair.getKey() + " =");
        for (LogObject obj : pair.getValue()) {
            println(obj.logContent);
        }

        println("\n");
        it2.remove();
    }

}

From source file:DIA_Umpire_To_Skyline.DIA_Umpire_To_Skyline.java

/**
 * @param args the command line arguments
 *///  w  w  w. j  av  a 2s . c om
public static void main(String[] args) throws FileNotFoundException, IOException, Exception {
    System.out.println(
            "=================================================================================================");
    System.out.println("DIA-Umpire_To_Skyline (version: " + UmpireInfo.GetInstance().Version + ")");
    if (args.length < 1) {
        System.out.println(
                "command format error, it should be like: java -jar -Xmx20G DIA_Umpire_To_Skyline.jar Path NoThreads");
        System.out.println("command : java -jar -Xmx20G DIA_Umpire_To_Skyline.jar Path [Option]\n");
        System.out.println("\nOptions");
        System.out.println("\t-t\tNo. of threads, Ex: -t4 (using four threads, default value)");
        System.out.println(
                "\t-cP\tPath of msconvert.exe for mzXML conversion, Ex: -cP (using four threads, default value)");
        return;
    }
    try {
        ConsoleLogger.SetConsoleLogger(Level.DEBUG);
        ConsoleLogger.SetFileLogger(Level.DEBUG,
                FilenameUtils.getFullPath(args[0]) + "diaumpire_to_skyline.log");
    } catch (Exception e) {
        System.out.println("Logger initialization failed");
    }

    Logger.getRootLogger().info("Path:" + args[0]);
    String msconvertpath = "C:/inetpub/tpp-bin/msconvert";

    String WorkFolder = args[0];
    int NoCPUs = 4;

    for (int i = 1; i < args.length; i++) {
        if (args[i].startsWith("-")) {
            if (args[i].startsWith("-cP")) {
                msconvertpath = args[i].substring(3);
                Logger.getRootLogger().info("MSConvert path: " + msconvertpath);
            }
            if (args[i].startsWith("-t")) {
                NoCPUs = Integer.parseInt(args[i].substring(2));
                Logger.getRootLogger().info("No. of threads: " + NoCPUs);
            }
        }
    }

    HashMap<String, File> AssignFiles = new HashMap<>();

    try {
        File folder = new File(WorkFolder);
        if (!folder.exists()) {
            Logger.getRootLogger().info("Path: " + folder.getAbsolutePath() + " cannot be found.");
        }
        for (final File fileEntry : folder.listFiles()) {
            if (fileEntry.isFile() && fileEntry.getAbsolutePath().toLowerCase().endsWith(".mzxml")
                    && !fileEntry.getAbsolutePath().toLowerCase().endsWith("q1.mzxml")
                    && !fileEntry.getAbsolutePath().toLowerCase().endsWith("q2.mzxml")
                    && !fileEntry.getAbsolutePath().toLowerCase().endsWith("q3.mzxml")) {
                AssignFiles.put(fileEntry.getAbsolutePath(), fileEntry);
            }
            if (fileEntry.isDirectory()) {
                for (final File fileEntry2 : fileEntry.listFiles()) {
                    if (fileEntry2.isFile() && fileEntry2.getAbsolutePath().toLowerCase().endsWith(".mzxml")
                            && !fileEntry2.getAbsolutePath().toLowerCase().endsWith("q1.mzxml")
                            && !fileEntry2.getAbsolutePath().toLowerCase().endsWith("q2.mzxml")
                            && !fileEntry2.getAbsolutePath().toLowerCase().endsWith("q3.mzxml")) {
                        AssignFiles.put(fileEntry2.getAbsolutePath(), fileEntry2);
                    }
                }
            }
        }

        Logger.getRootLogger().info("No. of files assigned :" + AssignFiles.size());
        for (File fileEntry : AssignFiles.values()) {
            Logger.getRootLogger().info(fileEntry.getAbsolutePath());
        }

        ExecutorService executorPool = null;
        executorPool = Executors.newFixedThreadPool(3);

        for (File fileEntry : AssignFiles.values()) {
            String mzXMLFile = fileEntry.getAbsolutePath();
            FileThread thread = new FileThread(mzXMLFile, NoCPUs, msconvertpath);
            executorPool.execute(thread);
        }
        executorPool.shutdown();
        try {
            executorPool.awaitTermination(Long.MAX_VALUE, TimeUnit.NANOSECONDS);
        } catch (InterruptedException e) {
            Logger.getRootLogger().info("interrupted..");
        }
    } catch (Exception e) {
        Logger.getRootLogger().error(e.getMessage());
        throw e;
    }
    Logger.getRootLogger().info("Job done");
    Logger.getRootLogger().info(
            "=================================================================================================");

}

From source file:com.era7.bioinfo.annotation.AutomaticQualityControl.java

public static void main(String[] args) {

    if (args.length != 4) {
        System.out.println("This program expects four parameters: \n" + "1. Gene annotation XML filename \n"
                + "2. Reference protein set (.fasta)\n" + "3. Output TXT filename\n"
                + "4. Initial Blast XML results filename (the one used at the very beginning of the semiautomatic annotation process)\n");
    } else {/*from w  ww  . ja  v  a2  s . c  o m*/

        BufferedWriter outBuff = null;

        try {

            File inFile = new File(args[0]);
            File fastaFile = new File(args[1]);
            File outFile = new File(args[2]);
            File blastFile = new File(args[3]);

            //Primero cargo todos los datos del archivo xml del blast
            BufferedReader buffReader = new BufferedReader(new FileReader(blastFile));
            StringBuilder stBuilder = new StringBuilder();
            String line = null;

            while ((line = buffReader.readLine()) != null) {
                stBuilder.append(line);
            }

            buffReader.close();
            System.out.println("Creating blastoutput...");
            BlastOutput blastOutput = new BlastOutput(stBuilder.toString());
            System.out.println("BlastOutput created! :)");
            stBuilder.delete(0, stBuilder.length());

            HashMap<String, String> blastProteinsMap = new HashMap<String, String>();
            ArrayList<Iteration> iterations = blastOutput.getBlastOutputIterations();
            for (Iteration iteration : iterations) {
                blastProteinsMap.put(iteration.getQueryDef().split("\\|")[1].trim(), iteration.toString());
            }
            //freeing some memory
            blastOutput = null;
            //------------------------------------------------------------------------

            //Initializing writer for output file
            outBuff = new BufferedWriter(new FileWriter(outFile));

            //reading gene annotation xml file.....
            buffReader = new BufferedReader(new FileReader(inFile));
            stBuilder = new StringBuilder();
            line = null;
            while ((line = buffReader.readLine()) != null) {
                stBuilder.append(line);
            }
            buffReader.close();

            XMLElement genesXML = new XMLElement(stBuilder.toString());
            //freeing some memory I don't need anymore
            stBuilder.delete(0, stBuilder.length());

            //reading file with the reference proteins set
            ArrayList<String> proteinsReferenceSet = new ArrayList<String>();
            buffReader = new BufferedReader(new FileReader(fastaFile));
            while ((line = buffReader.readLine()) != null) {
                if (line.charAt(0) == '>') {
                    proteinsReferenceSet.add(line.split("\\|")[1]);
                }
            }
            buffReader.close();

            Element pGenes = genesXML.asJDomElement().getChild(PredictedGenes.TAG_NAME);

            List<Element> contigs = pGenes.getChildren(ContigXML.TAG_NAME);

            System.out.println("There are " + contigs.size() + " contigs to be checked... ");

            outBuff.write("There are " + contigs.size() + " contigs to be checked... \n");
            outBuff.write("Proteins reference set: \n");
            for (String st : proteinsReferenceSet) {
                outBuff.write(st + ",");
            }
            outBuff.write("\n");

            for (Element elem : contigs) {
                ContigXML contig = new ContigXML(elem);

                //escribo el id del contig en el que estoy
                outBuff.write("Checking contig: " + contig.getId() + "\n");
                outBuff.flush();

                List<XMLElement> geneList = contig.getChildrenWith(PredictedGene.TAG_NAME);
                System.out.println("geneList.size() = " + geneList.size());

                int numeroDeGenesParaAnalizar = geneList.size() / FACTOR;
                if (numeroDeGenesParaAnalizar == 0) {
                    numeroDeGenesParaAnalizar++;
                }

                ArrayList<Integer> indicesUtilizados = new ArrayList<Integer>();

                outBuff.write("\nThe contig has " + geneList.size() + " predicted genes, let's analyze: "
                        + numeroDeGenesParaAnalizar + "\n");

                for (int j = 0; j < numeroDeGenesParaAnalizar; j++) {
                    int geneIndex;

                    boolean geneIsDismissed = false;
                    do {
                        geneIsDismissed = false;
                        geneIndex = (int) Math.round(Math.floor(Math.random() * geneList.size()));
                        PredictedGene tempGene = new PredictedGene(geneList.get(geneIndex).asJDomElement());
                        if (tempGene.getStatus().equals(PredictedGene.STATUS_DISMISSED)) {
                            geneIsDismissed = true;
                        }
                    } while (indicesUtilizados.contains(new Integer(geneIndex)) && geneIsDismissed);

                    indicesUtilizados.add(geneIndex);
                    System.out.println("geneIndex = " + geneIndex);

                    //Ahora hay que sacar el gen correspondiente al indice y hacer el control de calidad
                    PredictedGene gene = new PredictedGene(geneList.get(geneIndex).asJDomElement());

                    outBuff.write("\nAnalyzing gene with id: " + gene.getId() + " , annotation uniprot id: "
                            + gene.getAnnotationUniprotId() + "\n");
                    outBuff.write("eValue: " + gene.getEvalue() + "\n");

                    //--------------PETICION POST HTTP BLAST----------------------
                    PostMethod post = new PostMethod(BLAST_URL);
                    post.addParameter("program", "blastx");
                    post.addParameter("sequence", gene.getSequence());
                    post.addParameter("database", "uniprotkb");
                    post.addParameter("email", "ppareja@era7.com");
                    post.addParameter("exp", "1e-10");
                    post.addParameter("stype", "dna");

                    // execute the POST
                    HttpClient client = new HttpClient();
                    int status = client.executeMethod(post);
                    System.out.println("status post = " + status);
                    InputStream inStream = post.getResponseBodyAsStream();

                    String fileName = "jobid.txt";
                    FileOutputStream outStream = new FileOutputStream(new File(fileName));
                    byte[] buffer = new byte[1024];
                    int len;

                    while ((len = inStream.read(buffer)) != -1) {
                        outStream.write(buffer, 0, len);
                    }
                    outStream.close();

                    //Once the file is created I just have to read one line in order to extract the job id
                    buffReader = new BufferedReader(new FileReader(new File(fileName)));
                    String jobId = buffReader.readLine();
                    buffReader.close();

                    System.out.println("jobId = " + jobId);

                    //--------------HTTP CHECK JOB STATUS REQUEST----------------------
                    GetMethod get = new GetMethod(CHECK_JOB_STATUS_URL + jobId);
                    String jobStatus = "";
                    do {

                        try {
                            Thread.sleep(1000);//sleep for 1000 ms                                
                        } catch (InterruptedException ie) {
                            //If this thread was intrrupted by nother thread
                        }

                        status = client.executeMethod(get);
                        //System.out.println("status get = " + status);

                        inStream = get.getResponseBodyAsStream();

                        fileName = "jobStatus.txt";
                        outStream = new FileOutputStream(new File(fileName));

                        while ((len = inStream.read(buffer)) != -1) {
                            outStream.write(buffer, 0, len);
                        }
                        outStream.close();

                        //Once the file is created I just have to read one line in order to extract the job id
                        buffReader = new BufferedReader(new FileReader(new File(fileName)));
                        jobStatus = buffReader.readLine();
                        //System.out.println("jobStatus = " + jobStatus);
                        buffReader.close();

                    } while (!jobStatus.equals(FINISHED_JOB_STATUS));

                    //Once I'm here the blast should've already finished

                    //--------------JOB RESULTS HTTP REQUEST----------------------
                    get = new GetMethod(JOB_RESULT_URL + jobId + "/out");

                    status = client.executeMethod(get);
                    System.out.println("status get = " + status);

                    inStream = get.getResponseBodyAsStream();

                    fileName = "jobResults.txt";
                    outStream = new FileOutputStream(new File(fileName));

                    while ((len = inStream.read(buffer)) != -1) {
                        outStream.write(buffer, 0, len);
                    }
                    outStream.close();

                    //--------parsing the blast results file-----

                    TreeSet<GeneEValuePair> featuresBlast = new TreeSet<GeneEValuePair>();

                    buffReader = new BufferedReader(new FileReader(new File(fileName)));
                    while ((line = buffReader.readLine()) != null) {
                        if (line.length() > 3) {
                            String prefix = line.substring(0, 3);
                            if (prefix.equals("TR:") || prefix.equals("SP:")) {
                                String[] columns = line.split(" ");
                                String id = columns[1];
                                //System.out.println("id = " + id);

                                String e = "";

                                String[] arraySt = line.split("\\.\\.\\.");
                                if (arraySt.length > 1) {
                                    arraySt = arraySt[1].trim().split(" ");
                                    int contador = 0;
                                    for (int k = 0; k < arraySt.length && contador <= 2; k++) {
                                        String string = arraySt[k];
                                        if (!string.equals("")) {
                                            contador++;
                                            if (contador == 2) {
                                                e = string;
                                            }
                                        }

                                    }
                                } else {
                                    //Number before e-
                                    String[] arr = arraySt[0].split("e-")[0].split(" ");
                                    String numeroAntesE = arr[arr.length - 1];
                                    String numeroDespuesE = arraySt[0].split("e-")[1].split(" ")[0];
                                    e = numeroAntesE + "e-" + numeroDespuesE;
                                }

                                double eValue = Double.parseDouble(e);
                                //System.out.println("eValue = " + eValue);
                                GeneEValuePair g = new GeneEValuePair(id, eValue);
                                featuresBlast.add(g);
                            }
                        }
                    }

                    GeneEValuePair currentGeneEValuePair = new GeneEValuePair(gene.getAnnotationUniprotId(),
                            gene.getEvalue());

                    System.out.println("currentGeneEValuePair.id = " + currentGeneEValuePair.id);
                    System.out.println("currentGeneEValuePair.eValue = " + currentGeneEValuePair.eValue);
                    boolean blastContainsGene = false;
                    for (GeneEValuePair geneEValuePair : featuresBlast) {
                        if (geneEValuePair.id.equals(currentGeneEValuePair.id)) {
                            blastContainsGene = true;
                            //le pongo la e que tiene en el wu-blast para poder comparar
                            currentGeneEValuePair.eValue = geneEValuePair.eValue;
                            break;
                        }
                    }

                    if (blastContainsGene) {
                        outBuff.write("The protein was found in the WU-BLAST result.. \n");
                        //Una vez que se que esta en el blast tengo que ver que sea la mejor
                        GeneEValuePair first = featuresBlast.first();
                        outBuff.write("Protein with best eValue according to the WU-BLAST result: " + first.id
                                + " , " + first.eValue + "\n");
                        if (first.id.equals(currentGeneEValuePair.id)) {
                            outBuff.write("Proteins with best eValue match up \n");
                        } else {
                            if (first.eValue == currentGeneEValuePair.eValue) {
                                outBuff.write(
                                        "The one with best eValue is not the same protein but has the same eValue \n");
                            } else if (first.eValue > currentGeneEValuePair.eValue) {
                                outBuff.write(
                                        "The one with best eValue is not the same protein but has a worse eValue :) \n");
                            } else {
                                outBuff.write(
                                        "The best protein from BLAST has an eValue smaller than ours, checking if it's part of the reference set...\n");
                                //System.exit(-1);
                                if (proteinsReferenceSet.contains(first.id)) {
                                    //The protein is in the reference set and that shouldn't happen
                                    outBuff.write(
                                            "The protein was found on the reference set, checking if it belongs to the same contig...\n");
                                    String iterationSt = blastProteinsMap.get(gene.getAnnotationUniprotId());
                                    if (iterationSt != null) {
                                        outBuff.write(
                                                "The protein was found in the BLAST used at the beginning of the annotation process.\n");
                                        Iteration iteration = new Iteration(iterationSt);
                                        ArrayList<Hit> hits = iteration.getIterationHits();
                                        boolean contigFound = false;
                                        Hit errorHit = null;
                                        for (Hit hit : hits) {
                                            if (hit.getHitDef().indexOf(contig.getId()) >= 0) {
                                                contigFound = true;
                                                errorHit = hit;
                                                break;
                                            }
                                        }
                                        if (contigFound) {
                                            outBuff.write(
                                                    "ERROR: A hit from the same contig was find in the Blast file: \n"
                                                            + errorHit.toString() + "\n");
                                        } else {
                                            outBuff.write("There is no hit with the same contig! :)\n");
                                        }
                                    } else {
                                        outBuff.write(
                                                "The protein is NOT in the BLAST used at the beginning of the annotation process.\n");
                                    }

                                } else {
                                    //The protein was not found on the reference set so everything's ok
                                    outBuff.write(
                                            "The protein was not found on the reference, everything's ok :)\n");
                                }
                            }
                        }

                    } else {
                        outBuff.write("The protein was NOT found on the WU-BLAST !! :( \n");

                        //System.exit(-1);
                    }

                }

            }

        } catch (Exception ex) {
            ex.printStackTrace();
        } finally {
            try {
                //closing outputfile
                outBuff.close();
            } catch (IOException ex) {
                Logger.getLogger(AutomaticQualityControl.class.getName()).log(Level.SEVERE, null, ex);
            }
        }

    }
}

From source file:io.anserini.index.UpdateIndex.java

@SuppressWarnings("static-access")
public static void main(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(new Option(HELP_OPTION, "show help"));
    options.addOption(new Option(OPTIMIZE_OPTION, "merge indexes into a single segment"));
    options.addOption(new Option(STORE_TERM_VECTORS_OPTION, "store term vectors"));

    options.addOption(//from   ww w. ja  v a  2s . c  om
            OptionBuilder.withArgName("dir").hasArg().withDescription("index location").create(INDEX_OPTION));
    options.addOption(OptionBuilder.withArgName("file").hasArg().withDescription("file with deleted tweetids")
            .create(DELETES_OPTION));
    options.addOption(OptionBuilder.withArgName("id").hasArg().withDescription("max id").create(MAX_ID_OPTION));

    CommandLine cmdline = null;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        System.exit(-1);
    }

    if (cmdline.hasOption(HELP_OPTION) || !cmdline.hasOption(INDEX_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(UpdateIndex.class.getName(), options);
        System.exit(-1);
    }

    String indexPath = cmdline.getOptionValue(INDEX_OPTION);

    final FieldType textOptions = new FieldType();
    textOptions.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
    textOptions.setStored(true);
    textOptions.setTokenized(true);
    textOptions.setStoreTermVectors(true);

    LOG.info("index: " + indexPath);

    File file = new File("PittsburghUserTimeline");
    if (!file.exists()) {
        System.err.println("Error: " + file + " does not exist!");
        System.exit(-1);
    }

    final StatusStream stream = new JsonStatusCorpusReader(file);

    Status status;
    String s;
    HashMap<Long, String> hm = new HashMap<Long, String>();
    try {
        while ((s = stream.nextRaw()) != null) {
            try {
                status = DataObjectFactory.createStatus(s);

                if (status.getText() == null) {
                    continue;
                }

                hm.put(status.getUser().getId(),
                        hm.get(status.getUser().getId()) + status.getText().replaceAll("[\\r\\n]+", " "));

            } catch (Exception e) {

            }
        }

    } catch (Exception e) {
        e.printStackTrace();
    } finally {

        stream.close();
    }

    ArrayList<String> userIDList = new ArrayList<String>();
    try (BufferedReader br = new BufferedReader(new FileReader(new File("userID")))) {
        String line;
        while ((line = br.readLine()) != null) {
            userIDList.add(line.replaceAll("[\\r\\n]+", ""));

            // process the line.
        }
    }

    try {
        reader = DirectoryReader
                .open(FSDirectory.open(new File(cmdline.getOptionValue(INDEX_OPTION)).toPath()));
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }

    final Directory dir = new SimpleFSDirectory(Paths.get(cmdline.getOptionValue(INDEX_OPTION)));
    final IndexWriterConfig config = new IndexWriterConfig(ANALYZER);

    config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);

    final IndexWriter writer = new IndexWriter(dir, config);

    IndexSearcher searcher = new IndexSearcher(reader);
    System.out.println("The total number of docs indexed "
            + searcher.collectionStatistics(TweetStreamReader.StatusField.TEXT.name).docCount());

    for (int city = 0; city < cityName.length; city++) {

        // Pittsburgh's coordinate -79.976389, 40.439722

        Query q_long = NumericRangeQuery.newDoubleRange(TweetStreamReader.StatusField.LONGITUDE.name,
                new Double(longitude[city] - 0.05), new Double(longitude[city] + 0.05), true, true);
        Query q_lat = NumericRangeQuery.newDoubleRange(TweetStreamReader.StatusField.LATITUDE.name,
                new Double(latitude[city] - 0.05), new Double(latitude[city] + 0.05), true, true);

        BooleanQuery bqCityName = new BooleanQuery();

        Term t = new Term("place", cityName[city]);
        TermQuery query = new TermQuery(t);
        bqCityName.add(query, BooleanClause.Occur.SHOULD);
        System.out.println(query.toString());

        for (int i = 0; i < cityNameAlias[city].length; i++) {
            t = new Term("place", cityNameAlias[city][i]);
            query = new TermQuery(t);
            bqCityName.add(query, BooleanClause.Occur.SHOULD);
            System.out.println(query.toString());
        }

        BooleanQuery bq = new BooleanQuery();

        BooleanQuery finalQuery = new BooleanQuery();

        // either a coordinate match
        bq.add(q_long, BooleanClause.Occur.MUST);
        bq.add(q_lat, BooleanClause.Occur.MUST);

        finalQuery.add(bq, BooleanClause.Occur.SHOULD);
        // or a place city name match
        finalQuery.add(bqCityName, BooleanClause.Occur.SHOULD);

        TotalHitCountCollector totalHitCollector = new TotalHitCountCollector();

        // Query hasFieldQuery = new ConstantScoreQuery(new
        // FieldValueFilter("timeline"));
        //
        // searcher.search(hasFieldQuery, totalHitCollector);
        //
        // if (totalHitCollector.getTotalHits() > 0) {
        // TopScoreDocCollector collector =
        // TopScoreDocCollector.create(Math.max(0,
        // totalHitCollector.getTotalHits()));
        // searcher.search(finalQuery, collector);
        // ScoreDoc[] hits = collector.topDocs().scoreDocs;
        //
        //
        // HashMap<String, Integer> hasHit = new HashMap<String, Integer>();
        // int dupcount = 0;
        // for (int i = 0; i < hits.length; ++i) {
        // int docId = hits[i].doc;
        // Document d;
        //
        // d = searcher.doc(docId);
        //
        // System.out.println(d.getFields());
        // }
        // }

        // totalHitCollector = new TotalHitCountCollector();
        searcher.search(finalQuery, totalHitCollector);

        if (totalHitCollector.getTotalHits() > 0) {
            TopScoreDocCollector collector = TopScoreDocCollector
                    .create(Math.max(0, totalHitCollector.getTotalHits()));
            searcher.search(finalQuery, collector);
            ScoreDoc[] hits = collector.topDocs().scoreDocs;

            System.out.println("City " + cityName[city] + " " + collector.getTotalHits() + " hits.");

            HashMap<String, Integer> hasHit = new HashMap<String, Integer>();
            int dupcount = 0;
            for (int i = 0; i < hits.length; ++i) {
                int docId = hits[i].doc;
                Document d;

                d = searcher.doc(docId);

                if (userIDList.contains(d.get(IndexTweets.StatusField.USER_ID.name))
                        && hm.containsKey(Long.parseLong(d.get(IndexTweets.StatusField.USER_ID.name)))) {
                    //            System.out.println("Has timeline field?" + (d.get("timeline") != null));
                    //            System.out.println(reader.getDocCount("timeline"));
                    //            d.add(new Field("timeline", hm.get(Long.parseLong(d.get(IndexTweets.StatusField.USER_ID.name))),
                    //                textOptions));
                    System.out.println("Found a user hit");
                    BytesRefBuilder brb = new BytesRefBuilder();
                    NumericUtils.longToPrefixCodedBytes(Long.parseLong(d.get(IndexTweets.StatusField.ID.name)),
                            0, brb);
                    Term term = new Term(IndexTweets.StatusField.ID.name, brb.get());
                    //            System.out.println(reader.getDocCount("timeline"));

                    Document d_new = new Document();
                    //            for (IndexableField field : d.getFields()) {
                    //              d_new.add(field);
                    //            }
                    // System.out.println(d_new.getFields());
                    d_new.add(new StringField("userBackground", d.get(IndexTweets.StatusField.USER_ID.name),
                            Store.YES));
                    d_new.add(new Field("timeline",
                            hm.get(Long.parseLong(d.get(IndexTweets.StatusField.USER_ID.name))), textOptions));
                    // System.out.println(d_new.get());
                    writer.addDocument(d_new);
                    writer.commit();

                    //            t = new Term("label", "why");
                    //            TermQuery tqnew = new TermQuery(t);
                    //
                    //            totalHitCollector = new TotalHitCountCollector();
                    //
                    //            searcher.search(tqnew, totalHitCollector);
                    //
                    //            if (totalHitCollector.getTotalHits() > 0) {
                    //              collector = TopScoreDocCollector.create(Math.max(0, totalHitCollector.getTotalHits()));
                    //              searcher.search(tqnew, collector);
                    //              hits = collector.topDocs().scoreDocs;
                    //
                    //              System.out.println("City " + cityName[city] + " " + collector.getTotalHits() + " hits.");
                    //
                    //              for (int k = 0; k < hits.length; k++) {
                    //                docId = hits[k].doc;
                    //                d = searcher.doc(docId);
                    //                System.out.println(d.get(IndexTweets.StatusField.ID.name));
                    //                System.out.println(d.get(IndexTweets.StatusField.PLACE.name));
                    //              }
                    //            }

                    // writer.deleteDocuments(term);
                    // writer.commit();
                    // writer.addDocument(d);
                    // writer.commit();

                    //            System.out.println(reader.getDocCount("timeline"));
                    // writer.updateDocument(term, d);
                    // writer.commit();

                }

            }
        }
    }
    reader.close();
    writer.close();

}

From source file:eu.fbk.dkm.sectionextractor.PageClassMerger.java

public static void main(String args[]) throws IOException {

    CommandLineWithLogger commandLineWithLogger = new CommandLineWithLogger();
    commandLineWithLogger.addOption(OptionBuilder.withArgName("file").hasArg()
            .withDescription("WikiData ID file").isRequired().withLongOpt("wikidata-id").create("i"));
    commandLineWithLogger.addOption(OptionBuilder.withArgName("file").hasArg()
            .withDescription("Airpedia Person file").isRequired().withLongOpt("airpedia").create("a"));
    commandLineWithLogger.addOption(OptionBuilder.withArgName("file").hasArg().withDescription("Output file")
            .isRequired().withLongOpt("output").create("o"));
    CommandLine commandLine = null;/*from  w  w  w. j  av  a 2 s  .c o  m*/
    try {
        commandLine = commandLineWithLogger.getCommandLine(args);
        PropertyConfigurator.configure(commandLineWithLogger.getLoggerProps());
    } catch (Exception e) {
        System.exit(1);
    }

    String wikiIDFileName = commandLine.getOptionValue("wikidata-id");
    String airpediaFileName = commandLine.getOptionValue("airpedia");
    String outputFileName = commandLine.getOptionValue("output");

    HashMap<Integer, String> wikiIDs = new HashMap<>();
    HashSet<Integer> airpediaClasses = new HashSet<>();

    List<String> strings;

    logger.info("Loading file " + wikiIDFileName);
    strings = Files.readLines(new File(wikiIDFileName), Charsets.UTF_8);
    for (String line : strings) {
        line = line.trim();
        if (line.length() == 0) {
            continue;
        }
        if (line.startsWith("#")) {
            continue;
        }

        String[] parts = line.split("\t");
        if (parts.length < 2) {
            continue;
        }

        int id;
        try {
            id = Integer.parseInt(parts[0]);
        } catch (Exception e) {
            continue;
        }
        wikiIDs.put(id, parts[1]);
    }

    logger.info("Loading file " + airpediaFileName);
    strings = Files.readLines(new File(airpediaFileName), Charsets.UTF_8);
    for (String line : strings) {
        line = line.trim();
        if (line.length() == 0) {
            continue;
        }
        if (line.startsWith("#")) {
            continue;
        }

        String[] parts = line.split("\t");
        if (parts.length < 2) {
            continue;
        }

        int id;
        try {
            id = Integer.parseInt(parts[0]);
        } catch (Exception e) {
            continue;
        }
        airpediaClasses.add(id);
    }

    logger.info("Saving information");
    BufferedWriter writer = new BufferedWriter(new FileWriter(outputFileName));
    for (int i : wikiIDs.keySet()) {
        if (!airpediaClasses.contains(i)) {
            continue;
        }

        writer.append(wikiIDs.get(i)).append("\n");
    }
    writer.close();
}

From source file:mujava.cli.genmutes.java

public static void main(String[] args) throws Exception {
    // System.out.println("test");
    genmutesCom jct = new genmutesCom();
    String[] argv = { "-all", "-debug", "Flower" }; // development use, when release,
    // comment out this line
    JCommander jCommander = new JCommander(jct, args);

    // check session name
    if (jct.getParameters().size() > 1) {
        Util.Error("Has more parameters than needed.");
        return;//from   w  ww .ja  va 2  s. co m
    }

    // set session name
    String sessionName = jct.getParameters().get(0);

    muJavaHomePath = Util.loadConfig();
    // check if debug mode
    if (jct.isDebug()) {
        Util.debug = true;
    }

    // get all existing session name
    File folder = new File(muJavaHomePath);
    // check if the config file has defined the correct folder
    if (!folder.isDirectory()) {
        Util.Error("ERROR: cannot locate the folder specified in mujava.config");
        return;
    }
    File[] listOfFiles = folder.listFiles();
    // null checking
    // check the specified folder has files or not
    if (listOfFiles == null) {
        Util.Error("ERROR: no files in the muJava home folder: " + muJavaHomePath);
        return;
    }
    List<String> fileNameList = new ArrayList<>();
    for (File file : listOfFiles) {
        fileNameList.add(file.getName());
    }

    // check if session is already created.
    if (!fileNameList.contains(sessionName)) {
        Util.Error("Session does not exist.");
        return;

    }

    // get all files in the session
    String[] file_list = new String[1];
    // if(jct.getD())
    // {
    File sessionFolder = new File(muJavaHomePath + "/" + sessionName + "/src");
    File[] listOfFilesInSession = sessionFolder.listFiles();
    file_list = new String[listOfFilesInSession.length];
    for (int i = 0; i < listOfFilesInSession.length; i++) {
        file_list[i] = listOfFilesInSession[i].getName();
    }

    // get all mutation operators selected
    HashMap<String, List<String>> ops = new HashMap<String, List<String>>(); // used
    // for
    // add
    // random
    // percentage
    // and
    // maximum

    String[] paras = new String[] { "1", "0" };
    if (jct.getAll()) // all is selected, add all operators
    {

        // if all is selected, all mutation operators are added
        ops.put("AORB", new ArrayList<String>(Arrays.asList(paras)));
        ops.put("AORS", new ArrayList<String>(Arrays.asList(paras)));
        ops.put("AOIU", new ArrayList<String>(Arrays.asList(paras)));
        ops.put("AOIS", new ArrayList<String>(Arrays.asList(paras)));
        ops.put("AODU", new ArrayList<String>(Arrays.asList(paras)));
        ops.put("AODS", new ArrayList<String>(Arrays.asList(paras)));
        ops.put("ROR", new ArrayList<String>(Arrays.asList(paras)));
        ops.put("COR", new ArrayList<String>(Arrays.asList(paras)));
        ops.put("COD", new ArrayList<String>(Arrays.asList(paras)));
        ops.put("COI", new ArrayList<String>(Arrays.asList(paras)));
        ops.put("SOR", new ArrayList<String>(Arrays.asList(paras)));
        ops.put("LOR", new ArrayList<String>(Arrays.asList(paras)));
        ops.put("LOI", new ArrayList<String>(Arrays.asList(paras)));
        ops.put("LOD", new ArrayList<String>(Arrays.asList(paras)));
        ops.put("ASRS", new ArrayList<String>(Arrays.asList(paras)));
        ops.put("SDL", new ArrayList<String>(Arrays.asList(paras)));
        ops.put("ODL", new ArrayList<String>(Arrays.asList(paras)));
        ops.put("VDL", new ArrayList<String>(Arrays.asList(paras)));
        ops.put("CDL", new ArrayList<String>(Arrays.asList(paras)));
        // ops.put("SDL", jct.getAll());

    } else { // if not all, add selected ops to the list
        if (jct.getAORB()) {
            ops.put("AORB", new ArrayList<String>(Arrays.asList(paras)));
        }
        if (jct.getAORS()) {
            ops.put("AORS", new ArrayList<String>(Arrays.asList(paras)));
        }
        if (jct.getAOIU()) {
            ops.put("AOIU", new ArrayList<String>(Arrays.asList(paras)));
        }
        if (jct.getAOIS()) {
            ops.put("AOIS", new ArrayList<String>(Arrays.asList(paras)));
        }
        if (jct.getAODU()) {
            ops.put("AODU", new ArrayList<String>(Arrays.asList(paras)));
        }
        if (jct.getAODS()) {
            ops.put("AODS", new ArrayList<String>(Arrays.asList(paras)));
        }
        if (jct.getROR()) {
            ops.put("ROR", new ArrayList<String>(Arrays.asList(paras)));
        }
        if (jct.getCOR()) {
            ops.put("COR", new ArrayList<String>(Arrays.asList(paras)));
        }
        if (jct.getCOD()) {
            ops.put("COD", new ArrayList<String>(Arrays.asList(paras)));
        }
        if (jct.getCOI()) {
            ops.put("COI", new ArrayList<String>(Arrays.asList(paras)));
        }
        if (jct.getSOR()) {
            ops.put("SOR", new ArrayList<String>(Arrays.asList(paras)));
        }
        if (jct.getLOR()) {
            ops.put("LOR", new ArrayList<String>(Arrays.asList(paras)));
        }
        if (jct.getLOI()) {
            ops.put("LOI", new ArrayList<String>(Arrays.asList(paras)));
        }
        if (jct.getLOD()) {
            ops.put("LOD", new ArrayList<String>(Arrays.asList(paras)));
        }
        if (jct.getASRS()) {
            ops.put("ASRS", new ArrayList<String>(Arrays.asList(paras)));
        }
        if (jct.getSDL()) {
            ops.put("SDL", new ArrayList<String>(Arrays.asList(paras)));
        }
        if (jct.getVDL()) {
            ops.put("VDL", new ArrayList<String>(Arrays.asList(paras)));
        }
        if (jct.getODL()) {
            ops.put("ODL", new ArrayList<String>(Arrays.asList(paras)));
        }
        if (jct.getCDL()) {
            ops.put("CDL", new ArrayList<String>(Arrays.asList(paras)));
        }
    }

    // add default option "all"
    if (ops.size() == 0) {
        ops.put("AORB", new ArrayList<String>(Arrays.asList(paras)));
        ops.put("AORS", new ArrayList<String>(Arrays.asList(paras)));
        ops.put("AOIU", new ArrayList<String>(Arrays.asList(paras)));
        ops.put("AOIS", new ArrayList<String>(Arrays.asList(paras)));
        ops.put("AODU", new ArrayList<String>(Arrays.asList(paras)));
        ops.put("AODS", new ArrayList<String>(Arrays.asList(paras)));
        ops.put("ROR", new ArrayList<String>(Arrays.asList(paras)));
        ops.put("COR", new ArrayList<String>(Arrays.asList(paras)));
        ops.put("COD", new ArrayList<String>(Arrays.asList(paras)));
        ops.put("COI", new ArrayList<String>(Arrays.asList(paras)));
        ops.put("SOR", new ArrayList<String>(Arrays.asList(paras)));
        ops.put("LOR", new ArrayList<String>(Arrays.asList(paras)));
        ops.put("LOI", new ArrayList<String>(Arrays.asList(paras)));
        ops.put("LOD", new ArrayList<String>(Arrays.asList(paras)));
        ops.put("ASRS", new ArrayList<String>(Arrays.asList(paras)));
        ops.put("SDL", new ArrayList<String>(Arrays.asList(paras)));
        ops.put("ODL", new ArrayList<String>(Arrays.asList(paras)));
        ops.put("VDL", new ArrayList<String>(Arrays.asList(paras)));
        ops.put("CDL", new ArrayList<String>(Arrays.asList(paras)));
    }

    // String[] tradional_ops = ops.toArray(new String[0]);
    // set system
    setJMutationStructureAndSession(sessionName);
    // MutationSystem.setJMutationStructureAndSession(sessionName);
    MutationSystem.recordInheritanceRelation();
    // generate mutants
    generateMutants(file_list, ops);

    //System.exit(0);
}