Example usage for java.util Iterator next

Introduction

In this page you can find the example usage for java.util Iterator next.

Prototype

E next();

Source Link

Document

Returns the next element in the iteration.

Usage

From source file:edu.umn.cs.spatialHadoop.operations.KNN.java

public static void main(String[] args) throws IOException {
    final OperationsParams params = new OperationsParams(new GenericOptionsParser(args));
    Path[] paths = params.getPaths();
    if (paths.length <= 1 && !params.checkInput()) {
        printUsage();/* w  ww  .  ja  v  a 2 s  .c o  m*/
        System.exit(1);
    }
    if (paths.length > 1 && !params.checkInputOutput()) {
        printUsage();
        System.exit(1);
    }
    final Path inputFile = params.getInputPath();
    int count = params.getInt("count", 1);
    double closeness = params.getFloat("closeness", -1.0f);
    final Point[] queryPoints = closeness < 0 ? params.getShapes("point", new Point()) : new Point[count];
    final FileSystem fs = inputFile.getFileSystem(params);
    final int k = params.getInt("k", 1);
    int concurrency = params.getInt("concurrency", 100);
    if (k == 0) {
        LOG.warn("k = 0");
    }

    if (queryPoints.length == 0) {
        printUsage();
        throw new RuntimeException("Illegal arguments");
    }
    final Path outputPath = paths.length > 1 ? paths[1] : null;

    if (closeness >= 0) {
        // Get query points according to its closeness to grid intersections
        GlobalIndex<Partition> gindex = SpatialSite.getGlobalIndex(fs, inputFile);
        long seed = params.getLong("seed", System.currentTimeMillis());
        Random random = new Random(seed);
        for (int i = 0; i < count; i++) {
            int i_block = random.nextInt(gindex.size());
            int direction = random.nextInt(4);
            // Generate a point in the given direction
            // Get center point (x, y)
            Iterator<Partition> iterator = gindex.iterator();
            while (i_block-- >= 0)
                iterator.next();
            Partition partition = iterator.next();
            double cx = (partition.x1 + partition.x2) / 2;
            double cy = (partition.y1 + partition.y2) / 2;
            double cw = partition.x2 - partition.x1;
            double ch = partition.y2 - partition.y1;
            int signx = ((direction & 1) == 0) ? 1 : -1;
            int signy = ((direction & 2) == 1) ? 1 : -1;
            double x = cx + cw * closeness / 2 * signx;
            double y = cy + ch * closeness / 2 * signy;
            queryPoints[i] = new Point(x, y);
        }
    }

    final BooleanWritable exceptionHappened = new BooleanWritable();

    Thread.UncaughtExceptionHandler h = new Thread.UncaughtExceptionHandler() {
        public void uncaughtException(Thread th, Throwable ex) {
            ex.printStackTrace();
            exceptionHappened.set(true);
        }
    };

    // Run each query in a separate thread
    final Vector<Thread> threads = new Vector<Thread>();
    for (int i = 0; i < queryPoints.length; i++) {
        Thread thread = new Thread() {
            @Override
            public void run() {
                try {
                    Point query_point = queryPoints[threads.indexOf(this)];
                    OperationsParams newParams = new OperationsParams(params);
                    OperationsParams.setShape(newParams, "point", query_point);
                    Job job = knn(inputFile, outputPath, params);
                } catch (IOException e) {
                    e.printStackTrace();
                } catch (InterruptedException e) {
                    e.printStackTrace();
                } catch (ClassNotFoundException e) {
                    e.printStackTrace();
                }
            }
        };
        thread.setUncaughtExceptionHandler(h);
        threads.add(thread);
    }

    long t1 = System.currentTimeMillis();
    do {
        // Ensure that there is at least MaxConcurrentThreads running
        int i = 0;
        while (i < concurrency && i < threads.size()) {
            Thread.State state = threads.elementAt(i).getState();
            if (state == Thread.State.TERMINATED) {
                // Thread already terminated, remove from the queue
                threads.remove(i);
            } else if (state == Thread.State.NEW) {
                // Start the thread and move to next one
                threads.elementAt(i++).start();
            } else {
                // Thread is still running, skip over it
                i++;
            }
        }
        if (!threads.isEmpty()) {
            try {
                // Sleep for 10 seconds or until the first thread terminates
                threads.firstElement().join(10000);
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
        }
    } while (!threads.isEmpty());
    long t2 = System.currentTimeMillis();
    if (exceptionHappened.get())
        throw new RuntimeException("Not all jobs finished correctly");

    System.out.println("Time for " + queryPoints.length + " jobs is " + (t2 - t1) + " millis");
    System.out.println("Total iterations: " + TotalIterations);
}

From source file:edu.umn.cs.sthadoop.trajectory.KNNDTW.java

public static void main(String[] args) throws IOException {

    args = new String[10];
    args[0] = "/export/scratch/mntgData/geolifeGPS/geolife_Trajectories_1.3/HDFS/index_geolife/yyyy-MM/2008-05";
    args[1] = "/export/scratch/mntgData/geolifeGPS/geolife_Trajectories_1.3/HDFS/knn-dis-result";
    args[2] = "shape:edu.umn.cs.sthadoop.trajectory.GeolifeTrajectory";
    args[3] = "interval:2008-05-01,2008-05-30";
    args[4] = "time:month";
    args[5] = "k:1";
    args[6] = "traj:39.9119983,116.606835;39.9119783,116.6065483;39.9119599,116.6062649;39.9119416,116.6059899;39.9119233,116.6057282;39.9118999,116.6054783;39.9118849,116.6052366;39.9118666,116.6050099;39.91185,116.604775;39.9118299,116.604525;39.9118049,116.6042649;39.91177,116.6040166;39.9117516,116.6037583;39.9117349,116.6035066;39.9117199,116.6032666;39.9117083,116.6030232;39.9117,116.6027566;39.91128,116.5969383;39.9112583,116.5966766;39.9112383,116.5964232;39.9112149,116.5961699;39.9111933,116.5959249;39.9111716,116.5956883";
    args[7] = "-overwrite";
    args[8] = "-local";// "-no-local";
    args[9] = "point:39.9119983,116.606835";

    final OperationsParams params = new OperationsParams(new GenericOptionsParser(args));
    Path[] paths = params.getPaths();
    if (paths.length <= 1 && !params.checkInput()) {
        printUsage();// w  w w  .ja va2  s.co  m
        System.exit(1);
    }
    if (paths.length > 1 && !params.checkInputOutput()) {
        printUsage();
        System.exit(1);
    }
    final Path inputFile = params.getInputPath();
    int count = params.getInt("count", 1);
    double closeness = params.getFloat("closeness", -1.0f);
    final Point[] queryPoints = closeness < 0 ? params.getShapes("point", new Point()) : new Point[count];
    final FileSystem fs = inputFile.getFileSystem(params);
    final int k = params.getInt("k", 1);
    int concurrency = params.getInt("concurrency", 100);
    if (k == 0) {
        LOG.warn("k = 0");
    }

    if (queryPoints.length == 0) {
        printUsage();
        throw new RuntimeException("Illegal arguments");
    }
    final Path outputPath = paths.length > 1 ? paths[1] : null;

    if (closeness >= 0) {
        // Get query points according to its closeness to grid intersections
        GlobalIndex<Partition> gindex = SpatialSite.getGlobalIndex(fs, inputFile);
        long seed = params.getLong("seed", System.currentTimeMillis());
        Random random = new Random(seed);
        for (int i = 0; i < count; i++) {
            int i_block = random.nextInt(gindex.size());
            int direction = random.nextInt(4);
            // Generate a point in the given direction
            // Get center point (x, y)
            Iterator<Partition> iterator = gindex.iterator();
            while (i_block-- >= 0)
                iterator.next();
            Partition partition = iterator.next();
            double cx = (partition.x1 + partition.x2) / 2;
            double cy = (partition.y1 + partition.y2) / 2;
            double cw = partition.x2 - partition.x1;
            double ch = partition.y2 - partition.y1;
            int signx = ((direction & 1) == 0) ? 1 : -1;
            int signy = ((direction & 2) == 1) ? 1 : -1;
            double x = cx + cw * closeness / 2 * signx;
            double y = cy + ch * closeness / 2 * signy;
            queryPoints[i] = new Point(x, y);
        }
    }

    final BooleanWritable exceptionHappened = new BooleanWritable();

    Thread.UncaughtExceptionHandler h = new Thread.UncaughtExceptionHandler() {
        public void uncaughtException(Thread th, Throwable ex) {
            ex.printStackTrace();
            exceptionHappened.set(true);
        }
    };

    // Run each query in a separate thread
    final Vector<Thread> threads = new Vector<Thread>();
    for (int i = 0; i < queryPoints.length; i++) {
        Thread thread = new Thread() {
            @Override
            public void run() {
                try {
                    Point query_point = queryPoints[threads.indexOf(this)];
                    OperationsParams newParams = new OperationsParams(params);
                    OperationsParams.setShape(newParams, "point", query_point);
                    Job job = knn(inputFile, outputPath, params);
                } catch (IOException e) {
                    e.printStackTrace();
                } catch (InterruptedException e) {
                    e.printStackTrace();
                } catch (ClassNotFoundException e) {
                    e.printStackTrace();
                }
            }
        };
        thread.setUncaughtExceptionHandler(h);
        threads.add(thread);
    }

    long t1 = System.currentTimeMillis();
    do {
        // Ensure that there is at least MaxConcurrentThreads running
        int i = 0;
        while (i < concurrency && i < threads.size()) {
            Thread.State state = threads.elementAt(i).getState();
            if (state == Thread.State.TERMINATED) {
                // Thread already terminated, remove from the queue
                threads.remove(i);
            } else if (state == Thread.State.NEW) {
                // Start the thread and move to next one
                threads.elementAt(i++).start();
            } else {
                // Thread is still running, skip over it
                i++;
            }
        }
        if (!threads.isEmpty()) {
            try {
                // Sleep for 10 seconds or until the first thread terminates
                threads.firstElement().join(10000);
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
        }
    } while (!threads.isEmpty());
    long t2 = System.currentTimeMillis();
    if (exceptionHappened.get())
        throw new RuntimeException("Not all jobs finished correctly");

    System.out.println("Time for " + queryPoints.length + " jobs is " + (t2 - t1) + " millis");
    System.out.println("Total iterations: " + TotalIterations);
}

From source file:org.ala.dao.CassandraPelopsHelper.java

public static void main(String[] args) throws Exception {
    CassandraPelopsHelper helper = new CassandraPelopsHelper();
    helper.init();//from   ww w  .j a v a 2s.  c o  m

    Map<String, Object> map = helper.getSubColumnsByGuid("tc", "103067807");
    Set<String> keys = map.keySet();
    Iterator<String> it = keys.iterator();
    while (it.hasNext()) {
        String key = it.next();
        ColumnType type = ColumnType.getColumnType(key);
        Object o = map.get(type.getColumnName());
        if (o instanceof List) {
            List l = (List) o;
        } else {
            Comparable c = (Comparable) o;
        }
    }

    /*
          TaxonConcept t = null;
           List<Comparable> l = new ArrayList<Comparable>();
            
          for(int i=0; i< 10; i++){
               t =  new TaxonConcept();
               t.setId(i);
               t.setGuid("urn:lsid:"+i);
               t.setNameString("Aus bus");
               t.setAuthor("Smith");
               t.setAuthorYear("2008");
               t.setInfoSourceName("AFD");
               t.setInfoSourceURL("http://afd.org.au");
               helper.putSingle("taxonConcept", "tc", "taxonConcept", t.getGuid(), t);
            
               l.add(t);
               if(i % 1000==0){
      System.out.println("id: "+i);
               }
          }
          helper.putList("taxonConcept", "tc", "taxonConcept", "128", l, true);
            
            CommonName c1 = new CommonName();
            c1.setNameString("Dave");
            
            CommonName c2 = new CommonName();
            c2.setNameString("Frank");
            
            helper.putSingle("taxonConcept", "tc", "taxonConcept", "123", t);
            helper.put("taxonConcept", "tc", "commonName", "123", c1);
            helper.put("taxonConcept", "tc", "commonName", "123", c2);
            helper.putSingle("taxonConcept", "tc", "taxonConcept", "124", t);
            
            TaxonConcept tc = (TaxonConcept) helper.get("taxonConcept", "tc", "taxonConcept", "123", TaxonConcept.class);
            System.out.println("Retrieved: "+tc.getNameString());
            
            List<CommonName> cns = (List) helper.getList("taxonConcept", "tc", "commonName", "123", CommonName.class);
            System.out.println("Retrieved: "+cns);
    */
    //cassandra scanning
    Scanner scanner = helper.getScanner("taxonConcept", "tc", "taxonConcept");
    for (int i = 0; i < 10; i++) {
        System.out.println(new String(scanner.getNextGuid()));
    }
    System.exit(0);
}

From source file:edu.nyu.vida.data_polygamy.relationship_computation.Relationship.java

/**
 * @param args/*from w  w w .  j  a  v a2  s . c o  m*/
 * @throws ParseException 
 */
@SuppressWarnings({ "deprecation" })
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    Options options = new Options();

    Option forceOption = new Option("f", "force", false,
            "force the computation of the relationship " + "even if files already exist");
    forceOption.setRequired(false);
    options.addOption(forceOption);

    Option scoreOption = new Option("sc", "score", true, "set threhsold for relationship score");
    scoreOption.setRequired(false);
    scoreOption.setArgName("SCORE THRESHOLD");
    options.addOption(scoreOption);

    Option strengthOption = new Option("st", "strength", true, "set threhsold for relationship strength");
    strengthOption.setRequired(false);
    strengthOption.setArgName("STRENGTH THRESHOLD");
    options.addOption(strengthOption);

    Option completeRandomizationOption = new Option("c", "complete-randomization", false,
            "use complete randomization when performing significance tests");
    completeRandomizationOption.setRequired(false);
    options.addOption(completeRandomizationOption);

    Option idOption = new Option("id", "ids", false, "output id instead of names for datasets and attributes");
    idOption.setRequired(false);
    options.addOption(idOption);

    Option g1Option = new Option("g1", "first-group", true, "set first group of datasets");
    g1Option.setRequired(true);
    g1Option.setArgName("FIRST GROUP");
    g1Option.setArgs(Option.UNLIMITED_VALUES);
    options.addOption(g1Option);

    Option g2Option = new Option("g2", "second-group", true, "set second group of datasets");
    g2Option.setRequired(false);
    g2Option.setArgName("SECOND GROUP");
    g2Option.setArgs(Option.UNLIMITED_VALUES);
    options.addOption(g2Option);

    Option machineOption = new Option("m", "machine", true, "machine identifier");
    machineOption.setRequired(true);
    machineOption.setArgName("MACHINE");
    machineOption.setArgs(1);
    options.addOption(machineOption);

    Option nodesOption = new Option("n", "nodes", true, "number of nodes");
    nodesOption.setRequired(true);
    nodesOption.setArgName("NODES");
    nodesOption.setArgs(1);
    options.addOption(nodesOption);

    Option s3Option = new Option("s3", "s3", false, "data on Amazon S3");
    s3Option.setRequired(false);
    options.addOption(s3Option);

    Option awsAccessKeyIdOption = new Option("aws_id", "aws-id", true,
            "aws access key id; " + "this is required if the execution is on aws");
    awsAccessKeyIdOption.setRequired(false);
    awsAccessKeyIdOption.setArgName("AWS-ACCESS-KEY-ID");
    awsAccessKeyIdOption.setArgs(1);
    options.addOption(awsAccessKeyIdOption);

    Option awsSecretAccessKeyOption = new Option("aws_key", "aws-id", true,
            "aws secrect access key; " + "this is required if the execution is on aws");
    awsSecretAccessKeyOption.setRequired(false);
    awsSecretAccessKeyOption.setArgName("AWS-SECRET-ACCESS-KEY");
    awsSecretAccessKeyOption.setArgs(1);
    options.addOption(awsSecretAccessKeyOption);

    Option bucketOption = new Option("b", "s3-bucket", true,
            "bucket on s3; " + "this is required if the execution is on aws");
    bucketOption.setRequired(false);
    bucketOption.setArgName("S3-BUCKET");
    bucketOption.setArgs(1);
    options.addOption(bucketOption);

    Option helpOption = new Option("h", "help", false, "display this message");
    helpOption.setRequired(false);
    options.addOption(helpOption);

    Option removeOption = new Option("r", "remove-not-significant", false,
            "remove relationships that are not" + "significant from the final output");
    removeOption.setRequired(false);
    options.addOption(removeOption);

    HelpFormatter formatter = new HelpFormatter();
    CommandLineParser parser = new PosixParser();
    CommandLine cmd = null;

    try {
        cmd = parser.parse(options, args);
    } catch (ParseException e) {
        formatter.printHelp("hadoop jar data-polygamy.jar "
                + "edu.nyu.vida.data_polygamy.relationship_computation.Relationship", options, true);
        System.exit(0);
    }

    if (cmd.hasOption("h")) {
        formatter.printHelp("hadoop jar data-polygamy.jar "
                + "edu.nyu.vida.data_polygamy.relationship_computation.Relationship", options, true);
        System.exit(0);
    }

    boolean s3 = cmd.hasOption("s3");
    String s3bucket = "";
    String awsAccessKeyId = "";
    String awsSecretAccessKey = "";

    if (s3) {
        if ((!cmd.hasOption("aws_id")) || (!cmd.hasOption("aws_key")) || (!cmd.hasOption("b"))) {
            System.out.println(
                    "Arguments 'aws_id', 'aws_key', and 'b'" + " are mandatory if execution is on AWS.");
            formatter.printHelp(
                    "hadoop jar data-polygamy.jar "
                            + "edu.nyu.vida.data_polygamy.relationship_computation.Relationship",
                    options, true);
            System.exit(0);
        }
        s3bucket = cmd.getOptionValue("b");
        awsAccessKeyId = cmd.getOptionValue("aws_id");
        awsSecretAccessKey = cmd.getOptionValue("aws_key");
    }

    boolean snappyCompression = false;
    boolean bzip2Compression = false;
    String machine = cmd.getOptionValue("m");
    int nbNodes = Integer.parseInt(cmd.getOptionValue("n"));

    Configuration s3conf = new Configuration();
    if (s3) {
        s3conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId);
        s3conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey);
        s3conf.set("bucket", s3bucket);
    }

    Path path = null;
    FileSystem fs = FileSystem.get(new Configuration());

    ArrayList<String> shortDataset = new ArrayList<String>();
    ArrayList<String> firstGroup = new ArrayList<String>();
    ArrayList<String> secondGroup = new ArrayList<String>();
    HashMap<String, String> datasetAgg = new HashMap<String, String>();

    boolean removeNotSignificant = cmd.hasOption("r");
    boolean removeExistingFiles = cmd.hasOption("f");
    boolean completeRandomization = cmd.hasOption("c");
    boolean hasScoreThreshold = cmd.hasOption("sc");
    boolean hasStrengthThreshold = cmd.hasOption("st");
    boolean outputIds = cmd.hasOption("id");
    String scoreThreshold = hasScoreThreshold ? cmd.getOptionValue("sc") : "";
    String strengthThreshold = hasStrengthThreshold ? cmd.getOptionValue("st") : "";

    // all datasets
    ArrayList<String> all_datasets = new ArrayList<String>();
    if (s3) {
        path = new Path(s3bucket + FrameworkUtils.datasetsIndexDir);
        fs = FileSystem.get(path.toUri(), s3conf);
    } else {
        path = new Path(fs.getHomeDirectory() + "/" + FrameworkUtils.datasetsIndexDir);
    }
    BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(path)));
    String line = br.readLine();
    while (line != null) {
        all_datasets.add(line.split("\t")[0]);
        line = br.readLine();
    }
    br.close();
    if (s3)
        fs.close();
    String[] all_datasets_array = new String[all_datasets.size()];
    all_datasets.toArray(all_datasets_array);

    String[] firstGroupCmd = cmd.getOptionValues("g1");
    String[] secondGroupCmd = cmd.hasOption("g2") ? cmd.getOptionValues("g2") : all_datasets_array;
    addDatasets(firstGroupCmd, firstGroup, shortDataset, datasetAgg, path, fs, s3conf, s3, s3bucket);
    addDatasets(secondGroupCmd, secondGroup, shortDataset, datasetAgg, path, fs, s3conf, s3, s3bucket);

    if (shortDataset.size() == 0) {
        System.out.println("No datasets to process.");
        System.exit(0);
    }

    if (firstGroup.isEmpty()) {
        System.out.println("No indices from datasets in G1.");
        System.exit(0);
    }

    if (secondGroup.isEmpty()) {
        System.out.println("No indices from datasets in G2.");
        System.exit(0);
    }

    // getting dataset ids

    String datasetNames = "";
    String datasetIds = "";
    HashMap<String, String> datasetId = new HashMap<String, String>();
    Iterator<String> it = shortDataset.iterator();
    while (it.hasNext()) {
        datasetId.put(it.next(), null);
    }

    if (s3) {
        path = new Path(s3bucket + FrameworkUtils.datasetsIndexDir);
        fs = FileSystem.get(path.toUri(), s3conf);
    } else {
        path = new Path(fs.getHomeDirectory() + "/" + FrameworkUtils.datasetsIndexDir);
    }
    br = new BufferedReader(new InputStreamReader(fs.open(path)));
    line = br.readLine();
    while (line != null) {
        String[] dt = line.split("\t");
        all_datasets.add(dt[0]);
        if (datasetId.containsKey(dt[0])) {
            datasetId.put(dt[0], dt[1]);
            datasetNames += dt[0] + ",";
            datasetIds += dt[1] + ",";
        }
        line = br.readLine();
    }
    br.close();
    if (s3)
        fs.close();

    datasetNames = datasetNames.substring(0, datasetNames.length() - 1);
    datasetIds = datasetIds.substring(0, datasetIds.length() - 1);
    it = shortDataset.iterator();
    while (it.hasNext()) {
        String dataset = it.next();
        if (datasetId.get(dataset) == null) {
            System.out.println("No dataset id for " + dataset);
            System.exit(0);
        }
    }

    String firstGroupStr = "";
    String secondGroupStr = "";
    for (String dataset : firstGroup) {
        firstGroupStr += datasetId.get(dataset) + ",";
    }
    for (String dataset : secondGroup) {
        secondGroupStr += datasetId.get(dataset) + ",";
    }
    firstGroupStr = firstGroupStr.substring(0, firstGroupStr.length() - 1);
    secondGroupStr = secondGroupStr.substring(0, secondGroupStr.length() - 1);

    String relationshipsDir = "";
    if (outputIds) {
        relationshipsDir = FrameworkUtils.relationshipsIdsDir;
    } else {
        relationshipsDir = FrameworkUtils.relationshipsDir;
    }

    FrameworkUtils.createDir(s3bucket + relationshipsDir, s3conf, s3);

    String random = completeRandomization ? "complete" : "restricted";

    String indexInputDirs = "";
    String noRelationship = "";

    HashSet<String> dirs = new HashSet<String>();

    String dataset1;
    String dataset2;
    String datasetId1;
    String datasetId2;
    for (int i = 0; i < firstGroup.size(); i++) {
        for (int j = 0; j < secondGroup.size(); j++) {

            if (Integer.parseInt(datasetId.get(firstGroup.get(i))) < Integer
                    .parseInt(datasetId.get(secondGroup.get(j)))) {
                dataset1 = firstGroup.get(i);
                dataset2 = secondGroup.get(j);
            } else {
                dataset1 = secondGroup.get(j);
                dataset2 = firstGroup.get(i);
            }

            datasetId1 = datasetId.get(dataset1);
            datasetId2 = datasetId.get(dataset2);

            if (dataset1.equals(dataset2))
                continue;
            String correlationOutputFileName = s3bucket + relationshipsDir + "/" + dataset1 + "-" + dataset2
                    + "/";

            if (removeExistingFiles) {
                FrameworkUtils.removeFile(correlationOutputFileName, s3conf, s3);
            }
            if (!FrameworkUtils.fileExists(correlationOutputFileName, s3conf, s3)) {
                dirs.add(s3bucket + FrameworkUtils.indexDir + "/" + dataset1);
                dirs.add(s3bucket + FrameworkUtils.indexDir + "/" + dataset2);
            } else {
                noRelationship += datasetId1 + "-" + datasetId2 + ",";
            }
        }
    }

    if (dirs.isEmpty()) {
        System.out.println("All the relationships were already computed.");
        System.out.println("Use -f in the beginning of the command line to force the computation.");
        System.exit(0);
    }

    for (String dir : dirs) {
        indexInputDirs += dir + ",";
    }

    Configuration conf = new Configuration();
    Machine machineConf = new Machine(machine, nbNodes);

    String jobName = "relationship" + "-" + random;
    String relationshipOutputDir = s3bucket + relationshipsDir + "/tmp/";

    FrameworkUtils.removeFile(relationshipOutputDir, s3conf, s3);

    for (int i = 0; i < shortDataset.size(); i++) {
        conf.set("dataset-" + datasetId.get(shortDataset.get(i)) + "-agg", datasetAgg.get(shortDataset.get(i)));
    }
    for (int i = 0; i < shortDataset.size(); i++) {
        conf.set("dataset-" + datasetId.get(shortDataset.get(i)) + "-agg-size",
                Integer.toString(datasetAgg.get(shortDataset.get(i)).split(",").length));
    }
    conf.set("dataset-keys", datasetIds);
    conf.set("dataset-names", datasetNames);
    conf.set("first-group", firstGroupStr);
    conf.set("second-group", secondGroupStr);
    conf.set("complete-random", String.valueOf(completeRandomization));
    conf.set("output-ids", String.valueOf(outputIds));
    conf.set("complete-random-str", random);
    conf.set("main-dataset-id", datasetId.get(shortDataset.get(0)));
    conf.set("remove-not-significant", String.valueOf(removeNotSignificant));
    if (noRelationship.length() > 0) {
        conf.set("no-relationship", noRelationship.substring(0, noRelationship.length() - 1));
    }
    if (hasScoreThreshold) {
        conf.set("score-threshold", scoreThreshold);
    }
    if (hasStrengthThreshold) {
        conf.set("strength-threshold", strengthThreshold);
    }

    conf.set("mapreduce.tasktracker.map.tasks.maximum", String.valueOf(machineConf.getMaximumTasks()));
    conf.set("mapreduce.tasktracker.reduce.tasks.maximum", String.valueOf(machineConf.getMaximumTasks()));
    conf.set("mapreduce.jobtracker.maxtasks.perjob", "-1");
    conf.set("mapreduce.reduce.shuffle.parallelcopies", "20");
    conf.set("mapreduce.input.fileinputformat.split.minsize", "0");
    conf.set("mapreduce.task.io.sort.mb", "200");
    conf.set("mapreduce.task.io.sort.factor", "100");
    conf.set("mapreduce.task.timeout", "2400000");

    if (s3) {
        machineConf.setMachineConfiguration(conf);
        conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId);
        conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey);
        conf.set("bucket", s3bucket);
    }

    if (snappyCompression) {
        conf.set("mapreduce.map.output.compress", "true");
        conf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec");
        //conf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec");
    }
    if (bzip2Compression) {
        conf.set("mapreduce.map.output.compress", "true");
        conf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec");
        //conf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec");
    }

    Job job = new Job(conf);
    job.setJobName(jobName);

    job.setMapOutputKeyClass(PairAttributeWritable.class);
    job.setMapOutputValueClass(TopologyTimeSeriesWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(CorrelationMapper.class);
    job.setReducerClass(CorrelationReducer.class);
    job.setNumReduceTasks(machineConf.getNumberReduces());

    job.setInputFormatClass(SequenceFileInputFormat.class);
    //job.setOutputFormatClass(TextOutputFormat.class);
    LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);

    FileInputFormat.setInputDirRecursive(job, true);
    FileInputFormat.setInputPaths(job, indexInputDirs.substring(0, indexInputDirs.length() - 1));
    FileOutputFormat.setOutputPath(job, new Path(relationshipOutputDir));

    job.setJarByClass(Relationship.class);

    long start = System.currentTimeMillis();
    job.submit();
    job.waitForCompletion(true);
    System.out.println(jobName + "\t" + (System.currentTimeMillis() - start));

    // moving files to right place
    for (int i = 0; i < firstGroup.size(); i++) {
        for (int j = 0; j < secondGroup.size(); j++) {

            if (Integer.parseInt(datasetId.get(firstGroup.get(i))) < Integer
                    .parseInt(datasetId.get(secondGroup.get(j)))) {
                dataset1 = firstGroup.get(i);
                dataset2 = secondGroup.get(j);
            } else {
                dataset1 = secondGroup.get(j);
                dataset2 = firstGroup.get(i);
            }

            if (dataset1.equals(dataset2))
                continue;

            String from = s3bucket + relationshipsDir + "/tmp/" + dataset1 + "-" + dataset2 + "/";
            String to = s3bucket + relationshipsDir + "/" + dataset1 + "-" + dataset2 + "/";
            FrameworkUtils.renameFile(from, to, s3conf, s3);
        }
    }
}

From source file:com.amalto.workbench.utils.XSDParser.java

/**
 * Print a simple type definition for the document.
 * /* ww w  .  j  a  v a 2s  .  co m*/
 * @param xsdSimpleTypeDefinition a simple type definition in the schema for schema.
 */
/*
 * public void printSimpleTypeDefinition( XSDSimpleTypeDefinition xsdSimpleTypeDefinition) { if
 * (xsdSimpleTypeDefinition == null) { } else if (xsdSimpleTypeDefinition.getEffectiveEnumerationFacet() != null) {
 * List value = xsdSimpleTypeDefinition.getEffectiveEnumerationFacet() .getValue(); if (value.size() > 1) {
 * System.out.print("("); } for (Iterator enumerators = value.iterator(); enumerators.hasNext();) { String
 * enumerator = enumerators.next().toString(); System.out.print("<em>"); System.out.print(enumerator);
 * System.out.print("</em>"); if (enumerators.hasNext()) { System.out.print("&nbsp;|&nbsp;"); } } if (value.size() >
 * 1) { System.out.print(")"); } } else if (xsdSimpleTypeDefinition.getElement() != null &&
 * xsdSimpleTypeDefinition.getElement().hasAttribute( XSDConstants.ID_ATTRIBUTE)) { System.out.print("<a href='#" +
 * xsdSimpleTypeDefinition.getName() + "-simple-type'>"); System.out.print(xsdSimpleTypeDefinition.getName());
 * System.out.print("</a>"); } else if (XSDVariety.UNION_LITERAL == xsdSimpleTypeDefinition .getVariety()) {
 * System.out.print("("); for (Iterator members = xsdSimpleTypeDefinition .getMemberTypeDefinitions().iterator();
 * members.hasNext();) { XSDSimpleTypeDefinition memberTypeDefinition = (XSDSimpleTypeDefinition) members .next();
 * printSimpleTypeDefinition(memberTypeDefinition); if (members.hasNext()) { System.out.print("&nbsp;|&nbsp;"); } }
 * System.out.print(")"); } else if (XSDVariety.UNION_LITERAL == xsdSimpleTypeDefinition .getVariety()) {
 * System.out.print("List&nbsp;of&nbsp;"); printSimpleTypeDefinition(xsdSimpleTypeDefinition
 * .getItemTypeDefinition()); } else if (xsdSimpleTypeDefinition.getName() != null) { if
 * ("public".equals(xsdSimpleTypeDefinition.getName())) { System.out.print("<a target='Part2' href='" +
 * XSDConstants.PART2 + "#anyURI'>anyURI</a>&nbsp;&nbsp;"); System.out.print("<a target='Errata' href='" + errata +
 * "#pfipublic'><em>public</em></a>"); } else { System.out.print("<b><em>");
 * System.out.print(xsdSimpleTypeDefinition.getName()); System.out.print("</em></b>"); } } else if
 * (xsdSimpleTypeDefinition.getEffectivePatternFacet() != null) { //
 * System.out.print(xsdSimpleTypeDefinition.getEffectivePatternFacet().getLexicalValue());
 * 
 * System.out.print("<em>"); System.out.print("<a target='Part1' href='" + XSDConstants.PART1 +
 * "#coss-identity-constraint'>"); System.out.print("a restricted xpath expression"); System.out.print("</a>");
 * System.out.print("</em>"); } else { System.out.print("***"); } }
 */

public static void main(String args[]) {
    try {
        /*
         * String xsd = ""; FileInputStream fis = new FileInputStream(
         * "/home/bgrieder/workspace/XCBL35/XCBL35.xsd"); BufferedReader br = new BufferedReader(new
         * InputStreamReader(fis, "utf-8")); String line; while ((line = br.readLine()) != null) xsd += line + "\n";
         * 
         * XSDParser parser = new XSDParser(); parser.loadAndPrint(xsd);
         */
        FileWriter fw = new FileWriter("/tmp/xcb35sr.xsd"); //$NON-NLS-1$

        Resource.Factory.Registry.INSTANCE.getExtensionToFactoryMap().put("xsd", new XSDResourceFactoryImpl()); //$NON-NLS-1$
        String xsdFile = "/home/bgrieder/workspace/XCBL35/XCBL35.xsd"; //$NON-NLS-1$
        ResourceSet resourceSet = new ResourceSetImpl();
        XSDResourceImpl xsdResource = (XSDResourceImpl) resourceSet.getResource(URI.createFileURI(xsdFile),
                true);

        /*
         * XSDResourceImpl res = new XSDResourceImpl(URI.createFileURI(xsdFile));
         */
        XSDSchema xsdSchema = xsdResource.getSchema();

        String header = "<xsd:schema " + "elementFormDefault=\"qualified\" " //$NON-NLS-1$ //$NON-NLS-2$
                + "targetNamespace=\"rrn:org.xcbl:schemas/xcbl/v3_5/xcbl35.xsd\" " //$NON-NLS-1$
                + "xmlns=\"rrn:org.xcbl:schemas/xcbl/v3_5/xcbl35.xsd\" " //$NON-NLS-1$
                + "xmlns:xsd=\"http://www.w3.org/2001/XMLSchema\">"; //$NON-NLS-1$

        fw.write(header);

        Iterator it = xsdSchema.getElementDeclarations().iterator();
        for (; it.hasNext();) {
            XSDElementDeclaration elementDeclaration = (XSDElementDeclaration) it.next();
            // if ("Order".equals(elementDeclaration.getName())) {
            fw.write(Util.nodeToString(elementDeclaration.getElement())
                    .replaceAll("xmlns:xsd=\"http:\\/\\/www\\.w3\\.org\\/2001\\/XMLSchema\"", "")); //$NON-NLS-1$ //$NON-NLS-2$
            // }
        }
        it = xsdSchema.getTypeDefinitions().iterator();
        for (; it.hasNext();) {
            XSDTypeDefinition typedef = (XSDTypeDefinition) it.next();
            fw.write(Util.nodeToString(typedef.getElement()));
        }
        String footer = "</xsd:schema>"; //$NON-NLS-1$
        fw.write(footer);
        fw.close();
    } catch (Exception e) {
        log.error(e.getMessage(), e);
    }
}

From source file:edu.umn.cs.sthadoop.operations.HSPKNNQ.java

public static void main(String[] args) throws IOException {

    //./hadoop jar /export/scratch/louai/idea-stHadoop/st-hadoop-uber.jar pknn /mntgIndex/yyyy-MM-dd/2017-08-03 /pknn k:2 point:-78.9659,35.7998 shape:edu.umn.cs.sthadoop.mntg.STPointMntg -overwrite  
    //    args = new String[8];
    //    args[0] = "/export/scratch/mntgData/mntgIndex";
    //    args[1] = "/export/scratch/mntgData/pknn";
    //    args[2] = "-overwrite";
    //    args[3] = "k:10";
    //    args[4] = "point:-78.9659063204100,35.7903907684998";
    //    args[5] = "shape:edu.umn.cs.sthadoop.trajectory.STPointTrajectory";
    //    args[6] = "interval:2017-08-03,2017-08-04";
    //    args[7] = "-overwrite";
    final OperationsParams params = new OperationsParams(new GenericOptionsParser(args));
    Path[] paths = params.getPaths();
    if (paths.length <= 1 && !params.checkInput()) {
        printUsage();//  w  w w  .  j  av a2 s  . c  om
        System.exit(1);
    }
    if (paths.length > 1 && !params.checkInputOutput()) {
        printUsage();
        System.exit(1);
    }

    if (params.get("interval") == null) {
        System.err.println("Temporal range missing");
        printUsage();
        System.exit(1);
    }

    TextSerializable inObj = params.getShape("shape");
    if (!(inObj instanceof STPoint)) {
        if (!(inObj instanceof STRectangle)) {
            LOG.error("Shape is not instance of STPoint or instance of STRectangle");
            printUsage();
            System.exit(1);
        }

    }

    // path to the spatio-temporal index.
    List<Path> STPaths = new ArrayList<Path>();

    try {
        STPaths = STRangeQuery.getIndexedSlices(params);

    } catch (Exception e1) {
        // TODO Auto-generated catch block
        e1.printStackTrace();
    }

    for (Path input : STPaths) {

        final Path inputFile = input;
        int count = params.getInt("count", 1);
        double closeness = params.getFloat("closeness", -1.0f);
        final Point[] queryPoints = closeness < 0 ? params.getShapes("point", new Point()) : new Point[count];
        final FileSystem fs = inputFile.getFileSystem(params);
        final int k = params.getInt("k", 1);
        int concurrency = params.getInt("concurrency", 100);
        if (k == 0) {
            LOG.warn("k = 0");
        }

        if (queryPoints.length == 0) {
            printUsage();
            throw new RuntimeException("Illegal arguments");
        }
        final Path outputPath = paths.length > 1 ? new Path(paths[1].toUri() + "-" + input.getName()) : null;

        if (closeness >= 0) {
            // Get query points according to its closeness to grid intersections
            GlobalIndex<Partition> gindex = SpatialSite.getGlobalIndex(fs, inputFile);
            long seed = params.getLong("seed", System.currentTimeMillis());
            Random random = new Random(seed);
            for (int i = 0; i < count; i++) {
                int i_block = random.nextInt(gindex.size());
                int direction = random.nextInt(4);
                // Generate a point in the given direction
                // Get center point (x, y)
                Iterator<Partition> iterator = gindex.iterator();
                while (i_block-- >= 0)
                    iterator.next();
                Partition partition = iterator.next();
                double cx = (partition.x1 + partition.x2) / 2;
                double cy = (partition.y1 + partition.y2) / 2;
                double cw = partition.x2 - partition.x1;
                double ch = partition.y2 - partition.y1;
                int signx = ((direction & 1) == 0) ? 1 : -1;
                int signy = ((direction & 2) == 1) ? 1 : -1;
                double x = cx + cw * closeness / 2 * signx;
                double y = cy + ch * closeness / 2 * signy;
                queryPoints[i] = new Point(x, y);
            }
        }

        final BooleanWritable exceptionHappened = new BooleanWritable();

        Thread.UncaughtExceptionHandler h = new Thread.UncaughtExceptionHandler() {
            public void uncaughtException(Thread th, Throwable ex) {
                ex.printStackTrace();
                exceptionHappened.set(true);
            }
        };

        // Run each query in a separate thread
        final Vector<Thread> threads = new Vector<Thread>();
        for (int i = 0; i < queryPoints.length; i++) {
            Thread thread = new Thread() {
                @Override
                public void run() {
                    try {
                        Point query_point = queryPoints[threads.indexOf(this)];
                        OperationsParams newParams = new OperationsParams(params);
                        OperationsParams.setShape(newParams, "point", query_point);
                        Job job = knn(inputFile, outputPath, params);
                    } catch (IOException e) {
                        e.printStackTrace();
                    } catch (InterruptedException e) {
                        e.printStackTrace();
                    } catch (ClassNotFoundException e) {
                        e.printStackTrace();
                    }
                }
            };
            thread.setUncaughtExceptionHandler(h);
            threads.add(thread);
        }

        long t1 = System.currentTimeMillis();
        do {
            // Ensure that there is at least MaxConcurrentThreads running
            int i = 0;
            while (i < concurrency && i < threads.size()) {
                Thread.State state = threads.elementAt(i).getState();
                if (state == Thread.State.TERMINATED) {
                    // Thread already terminated, remove from the queue
                    threads.remove(i);
                } else if (state == Thread.State.NEW) {
                    // Start the thread and move to next one
                    threads.elementAt(i++).start();
                } else {
                    // Thread is still running, skip over it
                    i++;
                }
            }
            if (!threads.isEmpty()) {
                try {
                    // Sleep for 10 seconds or until the first thread terminates
                    threads.firstElement().join(10000);
                } catch (InterruptedException e) {
                    e.printStackTrace();
                }
            }
        } while (!threads.isEmpty());
        long t2 = System.currentTimeMillis();
        if (exceptionHappened.get())
            throw new RuntimeException("Not all jobs finished correctly");

        System.out.println("Time for " + queryPoints.length + " jobs is " + (t2 - t1) + " millis");
        System.out.println("Total iterations: " + TotalIterations);
    }
}

From source file:net.iiit.siel.analysis.lang.LanguageIdentifier.java

/**
 * The main method./*from w w  w. j a  v  a2 s .c om*/
 *
 * @param args the arguments
 */
public static void main(String args[]) {

    String usage = "Usage: LanguageIdentifier " + "[-identifyrows filename maxlines] "
            + "[-identifyfile charset filename] " + "[-identifyfileset charset files] "
            + "[-identifytext text] " + "[-identifyurl url]";
    int command = 0;

    final int IDFILE = 1;
    final int IDTEXT = 2;
    final int IDURL = 3;
    final int IDFILESET = 4;
    final int IDROWS = 5;

    Vector fileset = new Vector();
    String filename = "";
    String charset = "";
    String url = "";
    String text = "";
    int max = 0;

    // TODO niket writing test args here..
    /*      args = new String[2];
          args[0] = "-identifyurl";
          args[1] = "file:/home1/niket/TamilSamplePage.html";
          //args[2] = "/home1/niket/nutch-clia/input.txt";
    */
    // TODO niket end here

    if (args.length == 0) {
        System.err.println(usage);
        System.exit(-1);
    }

    for (int i = 0; i < args.length; i++) { // parse command line
        if (args[i].equals("-identifyfile")) {
            command = IDFILE;
            charset = args[++i];
            filename = args[++i];
        }

        if (args[i].equals("-identifyurl")) {
            command = IDURL;
            filename = args[++i];
        }

        if (args[i].equals("-identifyrows")) {
            command = IDROWS;
            filename = args[++i];
            max = Integer.parseInt(args[++i]);
        }

        if (args[i].equals("-identifytext")) {
            command = IDTEXT;
            for (i++; i < args.length - 1; i++)
                text += args[i] + " ";
        }

        if (args[i].equals("-identifyfileset")) {
            command = IDFILESET;
            charset = args[++i];
            for (i++; i < args.length; i++) {
                File[] files = null;
                File f = new File(args[i]);
                if (f.isDirectory()) {
                    files = f.listFiles();
                } else {
                    files = new File[] { f };
                }
                for (int j = 0; j < files.length; j++) {
                    fileset.add(files[j].getAbsolutePath());
                }
            }
        }

    }

    Configuration conf = NutchConfiguration.create();
    String lang = null;
    LanguageIdentifier idfr = new LanguageIdentifier(conf);
    File f;
    FileInputStream fis;
    try {
        switch (command) {

        case IDTEXT:
            lang = idfr.identify(text);
            System.out.println("Lang :" + lang);
            break;

        case IDFILE:
            f = new File(filename);
            fis = new FileInputStream(f);
            lang = idfr.identify(fis, charset);
            fis.close();
            break;

        case IDURL:
            lang = LangIdentifierUtility.IdentifyLangFromURLDirectly(filename);

            /*
             * our url identifier is confused or couldn't identify lang from
             * URL
             */
            if (lang == null || lang.equalsIgnoreCase("en")) {
                System.out.println("Ambuguity in identifying language from URL");
            } else {
                System.out.println("Lang was identified(using URL) as: " + lang);
            }
            break;

        case IDROWS:
            f = new File(filename);
            BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(f)));
            String line;
            while (max > 0 && (line = br.readLine()) != null) {
                line = line.trim();
                if (line.length() > 2) {
                    max--;
                    lang = idfr.identify(line);
                    System.out.println("R=" + lang + ":" + line);
                }
            }

            br.close();
            System.exit(0);
            break;

        case IDFILESET:
            /*
             * used for benchs for (int j=128; j<=524288; j*=2) { long start
             * = System.currentTimeMillis(); idfr.analyzeLength = j;
             */
            System.out.println("FILESET");
            Iterator i = fileset.iterator();
            while (i.hasNext()) {
                try {
                    filename = (String) i.next();
                    f = new File(filename);
                    fis = new FileInputStream(f);
                    lang = idfr.identify(fis, charset);
                    fis.close();
                } catch (Exception e) {
                    System.out.println(e);
                }
                System.out.println(filename + " was identified as " + lang);
            }
            /*
             * used for benchs System.out.println(j + "/" +
             * (System.currentTimeMillis()-start)); }
             */
            System.exit(0);
            break;
        }
    } catch (Exception e) {
        System.out.println(e);
        System.out.println("lang could not be identified properly");
        e.printStackTrace();
    }
    System.out.println("text was identified as " + lang);

    /*
     * DONOT delete the next few lines, they should be enabled, when a lang.
     * mapping map needs to be generated. TODO  this is for printing
     * the hashMapRangeLangIDTable only
     * 
     * idfr.langMarkerObject.printHashmapTableWithFormatting();
     * 
     * System.out
     * .println("\n\n\n Printing english text contents in this file:\n");
     * System.out.println(idfr.langMarkerObject.getLangCharacters(
     * LanguageIdentifierConstants.LangShortNames.ENGLISH
     * .langShortName()).toString());
     * 
     * System.out
     * .println("\n\n\n Printing telugu text contents in this file:\n");
     * System.out.println(idfr.langMarkerObject.getLangCharacters(
     * LanguageIdentifierConstants.LangShortNames.TELUGU
     * .langShortName()).toString());
     * 
     * System.out
     * .println("\n\n\n Printing unknown text contents in this file:\n");
     * System.out.println(idfr.langMarkerObject.getLangCharacters(
     * LanguageIdentifierConstants.LangShortNames.UNKNOWN_LANG
     * .langShortName()).toString());
     */
}

From source file:grnet.filter.XMLFiltering.java

public static void main(String[] args) throws IOException {
    // TODO Auto-generated method ssstub

    Enviroment enviroment = new Enviroment(args[0]);

    if (enviroment.envCreation) {
        Core core = new Core();

        XMLSource source = new XMLSource(args[0]);

        File sourceFile = source.getSource();

        if (sourceFile.exists()) {

            Collection<File> xmls = source.getXMLs();

            System.out.println("Filtering repository:" + enviroment.dataProviderFilteredIn.getName());

            System.out.println("Number of files to filter:" + xmls.size());

            Iterator<File> iterator = xmls.iterator();

            FilteringReport report = null;
            if (enviroment.getArguments().getProps().getProperty(Constants.createReport)
                    .equalsIgnoreCase("true")) {
                report = new FilteringReport(enviroment.getArguments().getDestFolderLocation(),
                        enviroment.getDataProviderFilteredIn().getName());
            }//from   w ww.  j a v a  2 s . co  m

            ConnectionFactory factory = new ConnectionFactory();
            factory.setHost(enviroment.getArguments().getQueueHost());
            factory.setUsername(enviroment.getArguments().getQueueUserName());
            factory.setPassword(enviroment.getArguments().getQueuePassword());

            while (iterator.hasNext()) {

                StringBuffer logString = new StringBuffer();
                logString.append(enviroment.dataProviderFilteredIn.getName());
                File xmlFile = iterator.next();

                String name = xmlFile.getName();
                name = name.substring(0, name.indexOf(".xml"));
                logString.append(" " + name);

                boolean xmlIsFilteredIn = core.filterXML(xmlFile, enviroment.getArguments().getQueries());

                if (xmlIsFilteredIn) {
                    logString.append(" " + "FilteredIn");
                    slf4jLogger.info(logString.toString());

                    Connection connection = factory.newConnection();
                    Channel channel = connection.createChannel();
                    channel.queueDeclare(QUEUE_NAME, false, false, false, null);

                    channel.basicPublish("", QUEUE_NAME, null, logString.toString().getBytes());
                    channel.close();
                    connection.close();

                    try {
                        if (report != null) {
                            report.appendXMLFileNameNStatus(xmlFile.getPath(), Constants.filteredInData);
                            report.raiseFilteredInFilesNum();
                        }

                        FileUtils.copyFileToDirectory(xmlFile, enviroment.getDataProviderFilteredIn());
                    } catch (IOException e) {
                        // TODO Auto-generated catch block
                        // e.printStackTrace();
                        e.printStackTrace();
                        System.out.println("Filtering failed.");
                    }
                } else {
                    logString.append(" " + "FilteredOut");
                    slf4jLogger.info(logString.toString());

                    Connection connection = factory.newConnection();
                    Channel channel = connection.createChannel();
                    channel.queueDeclare(QUEUE_NAME, false, false, false, null);

                    channel.basicPublish("", QUEUE_NAME, null, logString.toString().getBytes());
                    channel.close();
                    connection.close();
                    try {
                        if (report != null) {
                            report.appendXMLFileNameNStatus(xmlFile.getPath(), Constants.filteredOutData);
                            report.raiseFilteredOutFilesNum();
                        }
                        FileUtils.copyFileToDirectory(xmlFile, enviroment.getDataProviderFilteredOuT());
                    } catch (IOException e) {
                        // TODO Auto-generated catch block
                        // e.printStackTrace();
                        e.printStackTrace();
                        System.out.println("Filtering failed.");
                    }
                }
            }
            if (report != null) {

                report.appendXPathExpression(enviroment.getArguments().getQueries());

                report.appendGeneralInfo();
            }
            System.out.println("Filtering is done.");
        }

    }
}

From source file:Batch.java

static public void main(String[] args) {
    Connection conn = null;// w  ww. j a  v a  2 s.  c o m

    try {
        ArrayList breakable = new ArrayList();
        PreparedStatement stmt;
        Iterator users;
        ResultSet rs;

        Class.forName(args[0]).newInstance();
        conn = DriverManager.getConnection(args[1], args[2], args[3]);
        stmt = conn.prepareStatement("SELECT user_id, password " + "FROM user");
        rs = stmt.executeQuery();
        while (rs.next()) {
            String uid = rs.getString(1);
            String pw = rs.getString(2);

            // Assume PasswordCracker is some class that provides
            // a single static method called crack() that attempts
            // to run password cracking routines on the password
            //                if( PasswordCracker.crack(uid, pw) ) {
            //                  breakable.add(uid);
            //            }
        }
        stmt.close();
        if (breakable.size() < 1) {
            return;
        }
        stmt = conn.prepareStatement("UPDATE user " + "SET bad_password = 'Y' " + "WHERE uid = ?");
        users = breakable.iterator();
        while (users.hasNext()) {
            String uid = (String) users.next();

            stmt.setString(1, uid);
            stmt.addBatch();
        }
        stmt.executeBatch();
    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        if (conn != null) {
            try {
                conn.close();
            } catch (Exception e) {
            }
        }
    }
}

From source file:de.unileipzig.ub.indexer.App.java

public static void main(String[] args) throws IOException {

    // create Options object
    Options options = new Options();

    options.addOption("h", "help", false, "display this help");

    options.addOption("f", "filename", true, "name of the JSON file whose content should be indexed");
    options.addOption("i", "index", true, "the name of the target index");
    options.addOption("d", "doctype", true, "the name of the doctype (title, local, ...)");

    options.addOption("t", "host", true, "elasticsearch hostname (default: 0.0.0.0)");
    options.addOption("p", "port", true, "transport port (that's NOT the http port, default: 9300)");
    options.addOption("c", "cluster", true, "cluster name (default: elasticsearch_mdma)");

    options.addOption("b", "bulksize", true, "number of docs sent in one request (default: 3000)");
    options.addOption("v", "verbose", false, "show processing speed while indexing");
    options.addOption("s", "status", false, "only show status of index for file");

    options.addOption("r", "repair", false, "attempt to repair recoverable inconsistencies on the go");
    options.addOption("e", "debug", false, "set logging level to debug");
    options.addOption("l", "logfile", true, "logfile - in not specified only log to stdout");

    options.addOption("m", "memcached", true, "host and port of memcached (default: localhost:11211)");
    options.addOption("z", "latest-flag-on", true,
            "enable latest flag according to field (within content, e.g. 001)");
    options.addOption("a", "flat", false, "flat-mode: do not check for inconsistencies");

    CommandLineParser parser = new PosixParser();
    CommandLine cmd = null;/*from w w w  . j  a va2  s.c  om*/

    try {
        cmd = parser.parse(options, args);
    } catch (ParseException ex) {
        logger.error(ex);
        System.exit(1);
    }

    // setup logging
    Properties systemProperties = System.getProperties();
    systemProperties.put("net.spy.log.LoggerImpl", "net.spy.memcached.compat.log.Log4JLogger");
    System.setProperties(systemProperties);
    Logger.getLogger("net.spy.memcached").setLevel(Level.ERROR);

    Properties props = new Properties();
    props.load(props.getClass().getResourceAsStream("/log4j.properties"));

    if (cmd.hasOption("debug")) {
        props.setProperty("log4j.logger.de.unileipzig", "DEBUG");
    }

    if (cmd.hasOption("logfile")) {
        props.setProperty("log4j.rootLogger", "INFO, stdout, F");
        props.setProperty("log4j.appender.F", "org.apache.log4j.FileAppender");
        props.setProperty("log4j.appender.F.File", cmd.getOptionValue("logfile"));
        props.setProperty("log4j.appender.F.layout", "org.apache.log4j.PatternLayout");
        props.setProperty("log4j.appender.F.layout.ConversionPattern", "%5p | %d | %F | %L | %m%n");
    }

    PropertyConfigurator.configure(props);

    InetAddress addr = InetAddress.getLocalHost();
    String memcachedHostAndPort = addr.getHostAddress() + ":11211";
    if (cmd.hasOption("m")) {
        memcachedHostAndPort = cmd.getOptionValue("m");
    }

    // setup caching
    try {
        if (memcachedClient == null) {
            memcachedClient = new MemcachedClient(
                    new ConnectionFactoryBuilder().setFailureMode(FailureMode.Cancel).build(),
                    AddrUtil.getAddresses("0.0.0.0:11211"));
            try {
                // give client and server 500ms
                Thread.sleep(300);
            } catch (InterruptedException ex) {
            }

            Collection availableServers = memcachedClient.getAvailableServers();
            logger.info(availableServers);
            if (availableServers.size() == 0) {
                logger.info("no memcached servers found");
                memcachedClient.shutdown();
                memcachedClient = null;
            } else {
                logger.info(availableServers.size() + " memcached server(s) detected, fine.");
            }
        }
    } catch (IOException ex) {
        logger.warn("couldn't create a connection, bailing out: " + ex.getMessage());
    }

    // process options

    if (cmd.hasOption("h")) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp("indexer", options, true);
        quit(0);
    }

    boolean verbose = false;
    if (cmd.hasOption("verbose")) {
        verbose = true;
    }

    // ES options
    String[] hosts = new String[] { "0.0.0.0" };
    int port = 9300;
    String clusterName = "elasticsearch_mdma";
    int bulkSize = 3000;

    if (cmd.hasOption("host")) {
        hosts = cmd.getOptionValues("host");
    }
    if (cmd.hasOption("port")) {
        port = Integer.parseInt(cmd.getOptionValue("port"));
    }
    if (cmd.hasOption("cluster")) {
        clusterName = cmd.getOptionValue("cluster");
    }
    if (cmd.hasOption("bulksize")) {
        bulkSize = Integer.parseInt(cmd.getOptionValue("bulksize"));
        if (bulkSize < 1 || bulkSize > 100000) {
            logger.error("bulksize must be between 1 and 100,000");
            quit(1);
        }
    }

    // ES Client
    final Settings settings = ImmutableSettings.settingsBuilder().put("cluster.name", "elasticsearch_mdma")
            .build();
    final TransportClient client = new TransportClient(settings);
    for (String host : hosts) {
        client.addTransportAddress(new InetSocketTransportAddress(host, port));
    }

    if (cmd.hasOption("filename") && cmd.hasOption("index") && cmd.hasOption("doctype")) {

        final String filename = cmd.getOptionValue("filename");

        final File _file = new File(filename);
        if (_file.length() == 0) {
            logger.info(_file.getAbsolutePath() + " is empty, skipping");
            quit(0); // file is empty
        }

        // for flat mode: leave a stampfile beside the json to 
        // indicate previous successful processing
        File directory = new File(filename).getParentFile();
        File stampfile = new File(directory, DigestUtils.shaHex(filename) + ".indexed");

        long start = System.currentTimeMillis();
        long lineCount = 0;

        final String indexName = cmd.getOptionValue("index");
        final String docType = cmd.getOptionValue("doctype");
        BulkRequestBuilder bulkRequest = client.prepareBulk();

        try {
            if (cmd.hasOption("flat")) {
                // flat mode
                // .........
                if (stampfile.exists()) {
                    logger.info("SKIPPING, since it seems this file has already " + "been imported (found: "
                            + stampfile.getAbsolutePath() + ")");
                    quit(0);
                }
            } else {

                final String srcSHA1 = extractSrcSHA1(filename);

                logger.debug(filename + " srcsha1: " + srcSHA1);

                long docsInIndex = getIndexedRecordCount(client, indexName, srcSHA1);
                logger.debug(filename + " indexed: " + docsInIndex);

                long docsInFile = getLineCount(filename);
                logger.debug(filename + " lines: " + docsInFile);

                // in non-flat-mode, indexing would take care
                // of inconsistencies
                if (docsInIndex == docsInFile) {
                    logger.info("UP-TO DATE: " + filename + " (" + docsInIndex + ", " + srcSHA1 + ")");
                    client.close();
                    quit(0);
                }

                if (docsInIndex > 0) {
                    logger.warn("INCONSISTENCY DETECTED: " + filename + ": indexed:" + docsInIndex + " lines:"
                            + docsInFile);

                    if (!cmd.hasOption("r")) {
                        logger.warn(
                                "Please re-run indexer with --repair flag or delete residues first with: $ curl -XDELETE "
                                        + hosts[0] + ":9200/" + indexName
                                        + "/_query -d ' {\"term\" : { \"meta.srcsha1\" : \"" + srcSHA1
                                        + "\" }}'");
                        client.close();
                        quit(1);
                    } else {
                        logger.info("Attempting to clear residues...");
                        // attempt to repair once
                        DeleteByQueryResponse dbqr = client.prepareDeleteByQuery(indexName)
                                .setQuery(termQuery("meta.srcsha1", srcSHA1)).execute().actionGet();

                        Iterator<IndexDeleteByQueryResponse> it = dbqr.iterator();
                        long deletions = 0;
                        while (it.hasNext()) {
                            IndexDeleteByQueryResponse response = it.next();
                            deletions += 1;
                        }
                        logger.info("Deleted residues of " + filename);
                        logger.info("Refreshing [" + indexName + "]");
                        RefreshResponse refreshResponse = client.admin().indices()
                                .refresh(new RefreshRequest(indexName)).actionGet();

                        long indexedAfterDelete = getIndexedRecordCount(client, indexName, srcSHA1);
                        logger.info(indexedAfterDelete + " docs remained");
                        if (indexedAfterDelete > 0) {
                            logger.warn("Not all residues cleaned. Try to fix this manually: $ curl -XDELETE "
                                    + hosts[0] + ":9200/" + indexName
                                    + "/_query -d ' {\"term\" : { \"meta.srcsha1\" : \"" + srcSHA1 + "\" }}'");
                            quit(1);
                        } else {
                            logger.info("Residues are gone. Now trying to reindex: " + filename);
                        }
                    }
                }
            }

            logger.info("INDEXING-REQUIRED: " + filename);
            if (cmd.hasOption("status")) {
                quit(0);
            }

            HashSet idsInBatch = new HashSet();

            String idField = null;
            if (cmd.hasOption("z")) {
                idField = cmd.getOptionValue("z");
            }

            final FileReader fr = new FileReader(filename);
            final BufferedReader br = new BufferedReader(fr);

            String line;
            // one line is one document
            while ((line = br.readLine()) != null) {

                // "Latest-Flag" machine
                // This gets obsolete with a "flat" index
                if (cmd.hasOption("z")) {
                    // flag that indicates, whether the document
                    // about to be indexed will be the latest
                    boolean willBeLatest = true;

                    // check if there is a previous (lower meta.timestamp) document with 
                    // the same identifier (whatever that may be - queried under "content")
                    final String contentIdentifier = getContentIdentifier(line, idField);
                    idsInBatch.add(contentIdentifier);

                    // assumed in meta.timestamp
                    final Long timestamp = Long.parseLong(getTimestamp(line));

                    logger.debug("Checking whether record is latest (line: " + lineCount + ")");
                    logger.debug(contentIdentifier + ", " + timestamp);

                    // get all docs, which match the contentIdentifier
                    // by filter, which doesn't score
                    final TermFilterBuilder idFilter = new TermFilterBuilder("content." + idField,
                            contentIdentifier);
                    final TermFilterBuilder kindFilter = new TermFilterBuilder("meta.kind", docType);
                    final AndFilterBuilder afb = new AndFilterBuilder();
                    afb.add(idFilter).add(kindFilter);
                    final FilteredQueryBuilder fb = filteredQuery(matchAllQuery(), afb);

                    final SearchResponse searchResponse = client.prepareSearch(indexName)
                            .setSearchType(SearchType.DFS_QUERY_THEN_FETCH).setQuery(fb).setFrom(0)
                            .setSize(1200) // 3 years and 105 days assuming daily updates at the most
                            .setExplain(false).execute().actionGet();

                    final SearchHits searchHits = searchResponse.getHits();

                    logger.debug("docs with this id in the index: " + searchHits.getTotalHits());

                    for (final SearchHit hit : searchHits.getHits()) {
                        final String docId = hit.id();
                        final Map<String, Object> source = hit.sourceAsMap();
                        final Map meta = (Map) source.get("meta");
                        final Long docTimestamp = Long.parseLong(meta.get("timestamp").toString());
                        // if the indexed doc timestamp is lower the the current one, 
                        // remove any latest flag
                        if (timestamp >= docTimestamp) {
                            source.remove("latest");
                            final ObjectMapper mapper = new ObjectMapper();
                            // put the updated doc back
                            // IndexResponse response = 
                            client.prepareIndex(indexName, docType).setCreate(false).setId(docId)
                                    .setSource(mapper.writeValueAsBytes(source))
                                    .execute(new ActionListener<IndexResponse>() {
                                        public void onResponse(IndexResponse rspns) {
                                            logger.debug("Removed latest flag from " + contentIdentifier + ", "
                                                    + docTimestamp + ", " + hit.id() + " since (" + timestamp
                                                    + " > " + docTimestamp + ")");
                                        }

                                        public void onFailure(Throwable thrwbl) {
                                            logger.error("Could not remove flag from " + hit.id() + ", "
                                                    + contentIdentifier);
                                        }
                                    });
                            // .execute()
                            //.actionGet();
                        } else {
                            logger.debug("Doc " + hit.id() + " is newer (" + docTimestamp + ")");
                            willBeLatest = false;
                        }
                    }

                    if (willBeLatest) {
                        line = setLatestFlag(line);
                        logger.info("Setting latest flag on " + contentIdentifier + ", " + timestamp);
                    }

                    // end of latest-flag machine
                    // beware - this will be correct as long as there
                    // are no dups within one bulk!
                }

                bulkRequest.add(client.prepareIndex(indexName, docType).setSource(line));
                lineCount++;
                logger.debug("Added line " + lineCount + " to BULK");
                logger.debug(line);

                if (lineCount % bulkSize == 0) {

                    if (idsInBatch.size() != bulkSize && cmd.hasOption("z")) {
                        logger.error(
                                "This batch has duplications in the ID. That's not bad for the index, just makes the latest flag fuzzy");
                        logger.error(
                                "Bulk size was: " + bulkSize + ", but " + idsInBatch.size() + " IDs (only)");
                    }
                    idsInBatch.clear();

                    logger.debug("Issuing BULK request");

                    final long actionCount = bulkRequest.numberOfActions();
                    final BulkResponse bulkResponse = bulkRequest.execute().actionGet();
                    final long tookInMillis = bulkResponse.getTookInMillis();

                    if (bulkResponse.hasFailures()) {
                        logger.fatal("FAILED, bulk not indexed. exiting now.");
                        Iterator<BulkItemResponse> it = bulkResponse.iterator();
                        while (it.hasNext()) {
                            BulkItemResponse bir = it.next();
                            if (bir.isFailed()) {
                                Failure failure = bir.getFailure();
                                logger.fatal("id: " + failure.getId() + ", message: " + failure.getMessage()
                                        + ", type: " + failure.getType() + ", index: " + failure.getIndex());
                            }
                        }
                        quit(1);
                    } else {
                        if (verbose) {
                            final double elapsed = System.currentTimeMillis() - start;
                            final double speed = (lineCount / elapsed * 1000);
                            logger.info("OK (" + filename + ") " + lineCount + " docs indexed (" + actionCount
                                    + "/" + tookInMillis + "ms" + "/" + String.format("%.2f", speed) + "r/s)");
                        }
                    }
                    bulkRequest = client.prepareBulk();
                }
            }

            // handle the remaining items
            final long actionCount = bulkRequest.numberOfActions();
            if (actionCount > 0) {
                final BulkResponse bulkResponse = bulkRequest.execute().actionGet();
                final long tookInMillis = bulkResponse.getTookInMillis();

                if (bulkResponse.hasFailures()) {
                    logger.fatal("FAILED, bulk not indexed. exiting now.");
                    Iterator<BulkItemResponse> it = bulkResponse.iterator();
                    while (it.hasNext()) {
                        BulkItemResponse bir = it.next();
                        if (bir.isFailed()) {
                            Failure failure = bir.getFailure();
                            logger.fatal("id: " + failure.getId() + ", message: " + failure.getMessage()
                                    + ", type: " + failure.getType() + ", index: " + failure.getIndex());
                        }
                    }
                    quit(1);
                } else {

                    // trigger update now
                    RefreshResponse refreshResponse = client.admin().indices()
                            .refresh(new RefreshRequest(indexName)).actionGet();

                    if (verbose) {
                        final double elapsed = System.currentTimeMillis() - start;
                        final double speed = (lineCount / elapsed * 1000);
                        logger.info("OK (" + filename + ") " + lineCount + " docs indexed (" + actionCount + "/"
                                + tookInMillis + "ms" + "/" + String.format("%.2f", speed) + "r/s)");
                    }

                }

            }

            br.close();
            client.close();
            final double elapsed = (System.currentTimeMillis() - start) / 1000;
            final double speed = (lineCount / elapsed);
            logger.info("indexing (" + filename + ") " + lineCount + " docs took " + elapsed + "s (speed: "
                    + String.format("%.2f", speed) + "r/s)");
            if (cmd.hasOption("flat")) {
                try {
                    FileUtils.touch(stampfile);
                } catch (IOException ioe) {
                    logger.warn(".indexed files not created. Will reindex everything everytime.");
                }
            }
        } catch (IOException e) {
            client.close();
            logger.error(e);
            quit(1);
        } finally {
            client.close();
        }
    }
    quit(0);
}