Example usage for java.util HashSet iterator

List of usage examples for java.util HashSet iterator

Introduction

In this page you can find the example usage for java.util HashSet iterator.

Prototype

public Iterator<E> iterator() 

Source Link

Document

Returns an iterator over the elements in this set.

Usage

From source file:Main.java

public static void main(String[] a) {
    String elements[] = { "A", "B", "C", "D", "E" };
    HashSet<String> set = new HashSet<String>(Arrays.asList(elements));

    Iterator iter = set.iterator();
    while (iter.hasNext()) {
        System.out.println(iter.next());
    }/*  w w w. ja v  a2  s.co m*/

}

From source file:Main.java

public static void main(String[] args) {

    HashSet<Integer> hSet = new HashSet<Integer>();

    hSet.add(new Integer("1"));
    hSet.add(new Integer("2"));
    hSet.add(new Integer("3"));

    Iterator itr = hSet.iterator();

    while (itr.hasNext())
        System.out.println(itr.next());
}

From source file:Main.java

public static void main(String args[]) {
    HashSet<String> newset = new HashSet<String>();

    // populate hash set
    newset.add("Learning");
    newset.add("from");
    newset.add("java2s.com");

    // create an iterator
    Iterator iterator = newset.iterator();

    // check values
    while (iterator.hasNext()) {
        System.out.println("Value: " + iterator.next() + " ");
    }/*from w  ww  .j av a2  s  .  c o m*/
}

From source file:subsets.GenerateGFKMatrix.java

public static void main(String args[]) {

    CollectionTools ct = new CollectionTools();

    GenerateGFKMatrix gfk = new GenerateGFKMatrix();

    TreeMap<String, HashSet> SmartCube_SCAttribut = gfk.get_AType_Relation_BType("SmartCube", "SCAttribut",
            "MEMBER_OF", 1);
    TreeMap<String, HashSet> SCAttribut_BCAttribut_part1 = gfk.get_AType_Relation_BType("SCAttribut",
            "BCAttribut", "TRANSITION", -1);
    TreeMap<String, HashSet> SCAttribut_SCAlgo = gfk.get_AType_Relation_BType("SCAttribut", "SCAlgorithmen",
            "DERIVED_BY", -1);
    TreeMap<String, HashSet> SCAlgo_BCAttribut = gfk.get_AType_Relation_BType("SCAlgorithmen", "BCAttribut",
            "USES", -1);

    gfk.get_AType_Relation_BType("Meldekonzept", "BCAttribut", "USES", -1);

    //MATCH (c:Codeliste)<-[:MEMBER_OF]-(cd:Code) WHERE c.Name = "Geschaeftsfallkategorie_CL" RETURN c,cd

    HashSet gfks = gfk.getGFKs();

    TreeMap<String, String> Meldekonzepte_formbeschreibung = gfk.getMeldekonzepte_formbeschreibung();

    TreeMap<String, HashSet> Meldekonzept_GFK = new TreeMap();

    for (String mk : Meldekonzepte_formbeschreibung.keySet()) {
        String form_beschreibung = Meldekonzepte_formbeschreibung.get(mk);

        Iterator it = gfks.iterator();

        while (it.hasNext()) {
            String gfkategorie = (String) it.next();
            if (form_beschreibung.contains(gfkategorie)) {
                HashSet dummy = Meldekonzept_GFK.get(mk);
                if (dummy == null) {
                    dummy = new HashSet();
                    dummy.add(gfkategorie);
                    Meldekonzept_GFK.put(mk, dummy);
                }/*  w  ww  . ja  va2 s . c  o  m*/
                dummy.add(gfkategorie);
            }

        }

    }

    TreeMap<String, HashSet> SmartCube_BCAttribut = ct
            .join_string_hashset_and_string_hashset(SmartCube_SCAttribut, SCAttribut_BCAttribut_part1);
    TreeMap<String, HashSet> SmartCube_SCAlgo = ct.join_string_hashset_and_string_hashset(SmartCube_SCAttribut,
            SCAttribut_SCAlgo);
    TreeMap<String, HashSet> SCAttribut_BCAttribut_part2 = ct
            .join_string_hashset_and_string_hashset(SmartCube_SCAlgo, SCAlgo_BCAttribut);

    TreeMap<String, HashSet> SCAttribut_BCAttribut = new TreeMap();
    SCAttribut_BCAttribut.putAll(SCAttribut_BCAttribut_part1);
    SCAttribut_BCAttribut.putAll(SCAttribut_BCAttribut_part2);

    //TreeMap<String,JSONObject> SC_BC_Dependency = gfk.get_SC_BC_Dependency();

    //Achtung: in der Klasse GenerateSCSubset sind alle Subsets definiert
    GenerateSCSubset gensubset = new GenerateSCSubset();
    //Erzeuge alle Smart Cube Matrizen

    //          TreeMap<String,TreeMap<String, HashSet>> dependency_structure= new TreeMap();
    //          
    //          
    //          
    ArrayList<Cube> all_cubes = gensubset.getCubeDependency_ArrayList();
    //    
    //          //get all relevant Dimensions
    //          

    //GFK - MappingTabelle GFK  : TreeMap<String (GFK), Set of BC Attribute >

    TreeMap<String, HashSet> GFK_BCAttribut = new TreeMap();

    //Hier wird eigentlcihe Dependency Matrix aufgebaut
    for (int i = 0; i < all_cubes.size(); i++) {
        System.out.println(((Cube) all_cubes.get(i)).Bezeichnung);
        Cube cube = all_cubes.get(i);

        for (String mk : cube.Meldekonzept_SCDimensionen.keySet()) {
            HashSet scattribute_pro_meldekonzept = cube.Meldekonzept_SCDimensionen.get(mk);
            Iterator it = scattribute_pro_meldekonzept.iterator();

            //Meldekonzept get BCAttribute = MK_Basic Cube Attribut

            HashSet geschaeftsfallkategorien = Meldekonzept_GFK.get(mk);

            HashSet Meldekonzept_BCDependency = new HashSet();

            //schritt fr schritt durchgehen

            while (it.hasNext()) {//gehe Attribute pro Meldekonzept druch
                String SCAttribut = (String) it.next();
                HashSet BasicCubeAttribute = SCAttribut_BCAttribut.get(SCAttribut);

                //
                //
                //   Fehler - SCAttribut_BCAttribut : Diese Datenstruktur enthaelt auch Cubes anstatt NUR SCAttribute
                //
                //

                //BasicCubeAttribute kann null sein ?
                Meldekonzept_BCDependency.addAll(BasicCubeAttribute);
            }

            Iterator gfk_iterator = geschaeftsfallkategorien.iterator();

            while (gfk_iterator.hasNext()) {
                String gfkategorie = (String) gfk_iterator.next();
                HashSet dummy = new HashSet();
                dummy.addAll(Meldekonzept_BCDependency);
                GFK_BCAttribut.put(gfkategorie, dummy);
            }

        }

        all_cubes.get(i).showMeldekonzept_SCDimensionen();

    }

    System.out.println("Test1234");

}

From source file:edu.nyu.vida.data_polygamy.feature_identification.IndexCreation.java

/**
 * @param args/*  w  ww.  j a va 2s.com*/
 */
@SuppressWarnings({ "deprecation" })
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    Options options = new Options();

    Option forceOption = new Option("f", "force", false,
            "force the computation of the index and events " + "even if files already exist");
    forceOption.setRequired(false);
    options.addOption(forceOption);

    Option thresholdOption = new Option("t", "use-custom-thresholds", false,
            "use custom thresholds for regular and rare events, defined in HDFS_HOME/"
                    + FrameworkUtils.thresholdDir + " file");
    thresholdOption.setRequired(false);
    options.addOption(thresholdOption);

    Option gOption = new Option("g", "group", true,
            "set group of datasets for which the indices and events" + " will be computed");
    gOption.setRequired(true);
    gOption.setArgName("GROUP");
    gOption.setArgs(Option.UNLIMITED_VALUES);
    options.addOption(gOption);

    Option machineOption = new Option("m", "machine", true, "machine identifier");
    machineOption.setRequired(true);
    machineOption.setArgName("MACHINE");
    machineOption.setArgs(1);
    options.addOption(machineOption);

    Option nodesOption = new Option("n", "nodes", true, "number of nodes");
    nodesOption.setRequired(true);
    nodesOption.setArgName("NODES");
    nodesOption.setArgs(1);
    options.addOption(nodesOption);

    Option s3Option = new Option("s3", "s3", false, "data on Amazon S3");
    s3Option.setRequired(false);
    options.addOption(s3Option);

    Option awsAccessKeyIdOption = new Option("aws_id", "aws-id", true,
            "aws access key id; " + "this is required if the execution is on aws");
    awsAccessKeyIdOption.setRequired(false);
    awsAccessKeyIdOption.setArgName("AWS-ACCESS-KEY-ID");
    awsAccessKeyIdOption.setArgs(1);
    options.addOption(awsAccessKeyIdOption);

    Option awsSecretAccessKeyOption = new Option("aws_key", "aws-id", true,
            "aws secrect access key; " + "this is required if the execution is on aws");
    awsSecretAccessKeyOption.setRequired(false);
    awsSecretAccessKeyOption.setArgName("AWS-SECRET-ACCESS-KEY");
    awsSecretAccessKeyOption.setArgs(1);
    options.addOption(awsSecretAccessKeyOption);

    Option bucketOption = new Option("b", "s3-bucket", true,
            "bucket on s3; " + "this is required if the execution is on aws");
    bucketOption.setRequired(false);
    bucketOption.setArgName("S3-BUCKET");
    bucketOption.setArgs(1);
    options.addOption(bucketOption);

    Option helpOption = new Option("h", "help", false, "display this message");
    helpOption.setRequired(false);
    options.addOption(helpOption);

    HelpFormatter formatter = new HelpFormatter();
    CommandLineParser parser = new PosixParser();
    CommandLine cmd = null;

    try {
        cmd = parser.parse(options, args);
    } catch (ParseException e) {
        formatter.printHelp("hadoop jar data-polygamy.jar "
                + "edu.nyu.vida.data_polygamy.feature_identification.IndexCreation", options, true);
        System.exit(0);
    }

    if (cmd.hasOption("h")) {
        formatter.printHelp("hadoop jar data-polygamy.jar "
                + "edu.nyu.vida.data_polygamy.feature_identification.IndexCreation", options, true);
        System.exit(0);
    }

    boolean s3 = cmd.hasOption("s3");
    String s3bucket = "";
    String awsAccessKeyId = "";
    String awsSecretAccessKey = "";

    if (s3) {
        if ((!cmd.hasOption("aws_id")) || (!cmd.hasOption("aws_key")) || (!cmd.hasOption("b"))) {
            System.out.println(
                    "Arguments 'aws_id', 'aws_key', and 'b'" + " are mandatory if execution is on AWS.");
            formatter.printHelp("hadoop jar data-polygamy.jar "
                    + "edu.nyu.vida.data_polygamy.feature_identification.IndexCreation", options, true);
            System.exit(0);
        }
        s3bucket = cmd.getOptionValue("b");
        awsAccessKeyId = cmd.getOptionValue("aws_id");
        awsSecretAccessKey = cmd.getOptionValue("aws_key");
    }

    boolean snappyCompression = false;
    boolean bzip2Compression = false;
    String machine = cmd.getOptionValue("m");
    int nbNodes = Integer.parseInt(cmd.getOptionValue("n"));

    Configuration s3conf = new Configuration();
    if (s3) {
        s3conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId);
        s3conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey);
        s3conf.set("bucket", s3bucket);
    }

    String datasetNames = "";
    String datasetIds = "";

    ArrayList<String> shortDataset = new ArrayList<String>();
    ArrayList<String> shortDatasetIndex = new ArrayList<String>();
    HashMap<String, String> datasetAgg = new HashMap<String, String>();
    HashMap<String, String> datasetId = new HashMap<String, String>();
    HashMap<String, HashMap<Integer, Double>> datasetRegThreshold = new HashMap<String, HashMap<Integer, Double>>();
    HashMap<String, HashMap<Integer, Double>> datasetRareThreshold = new HashMap<String, HashMap<Integer, Double>>();

    Path path = null;
    FileSystem fs = FileSystem.get(new Configuration());
    BufferedReader br;

    boolean removeExistingFiles = cmd.hasOption("f");
    boolean isThresholdUserDefined = cmd.hasOption("t");

    for (String dataset : cmd.getOptionValues("g")) {

        // getting aggregates
        String[] aggregate = FrameworkUtils.searchAggregates(dataset, s3conf, s3);
        if (aggregate.length == 0) {
            System.out.println("No aggregates found for " + dataset + ".");
            continue;
        }

        // getting aggregates header
        String aggregatesHeaderFileName = FrameworkUtils.searchAggregatesHeader(dataset, s3conf, s3);
        if (aggregatesHeaderFileName == null) {
            System.out.println("No aggregate header for " + dataset);
            continue;
        }

        String aggregatesHeader = s3bucket + FrameworkUtils.preProcessingDir + "/" + aggregatesHeaderFileName;

        shortDataset.add(dataset);
        datasetId.put(dataset, null);

        if (s3) {
            path = new Path(aggregatesHeader);
            fs = FileSystem.get(path.toUri(), s3conf);
        } else {
            path = new Path(fs.getHomeDirectory() + "/" + aggregatesHeader);
        }

        br = new BufferedReader(new InputStreamReader(fs.open(path)));
        datasetAgg.put(dataset, br.readLine().split("\t")[1]);
        br.close();
        if (s3)
            fs.close();
    }

    if (shortDataset.size() == 0) {
        System.out.println("No datasets to process.");
        System.exit(0);
    }

    // getting dataset id

    if (s3) {
        path = new Path(s3bucket + FrameworkUtils.datasetsIndexDir);
        fs = FileSystem.get(path.toUri(), s3conf);
    } else {
        path = new Path(fs.getHomeDirectory() + "/" + FrameworkUtils.datasetsIndexDir);
    }
    br = new BufferedReader(new InputStreamReader(fs.open(path)));
    String line = br.readLine();
    while (line != null) {
        String[] dt = line.split("\t");
        if (datasetId.containsKey(dt[0])) {
            datasetId.put(dt[0], dt[1]);
            datasetNames += dt[0] + ",";
            datasetIds += dt[1] + ",";
        }
        line = br.readLine();
    }
    br.close();

    datasetNames = datasetNames.substring(0, datasetNames.length() - 1);
    datasetIds = datasetIds.substring(0, datasetIds.length() - 1);
    Iterator<String> it = shortDataset.iterator();
    while (it.hasNext()) {
        String dataset = it.next();
        if (datasetId.get(dataset) == null) {
            System.out.println("No dataset id for " + dataset);
            System.exit(0);
        }
    }

    // getting user defined thresholds

    if (isThresholdUserDefined) {
        if (s3) {
            path = new Path(s3bucket + FrameworkUtils.thresholdDir);
            fs = FileSystem.get(path.toUri(), s3conf);
        } else {
            path = new Path(fs.getHomeDirectory() + "/" + FrameworkUtils.thresholdDir);
        }
        br = new BufferedReader(new InputStreamReader(fs.open(path)));
        line = br.readLine();
        while (line != null) {
            // getting dataset name
            String dataset = line.trim();
            HashMap<Integer, Double> regThresholds = new HashMap<Integer, Double>();
            HashMap<Integer, Double> rareThresholds = new HashMap<Integer, Double>();
            line = br.readLine();
            while ((line != null) && (line.split("\t").length > 1)) {
                // getting attribute ids and thresholds
                String[] keyVals = line.trim().split("\t");
                int att = Integer.parseInt(keyVals[0].trim());
                regThresholds.put(att, Double.parseDouble(keyVals[1].trim()));
                rareThresholds.put(att, Double.parseDouble(keyVals[2].trim()));
                line = br.readLine();
            }
            datasetRegThreshold.put(dataset, regThresholds);
            datasetRareThreshold.put(dataset, rareThresholds);
        }
        br.close();
    }
    if (s3)
        fs.close();

    // datasets that will use existing merge tree
    ArrayList<String> useMergeTree = new ArrayList<String>();

    // creating index for each spatio-temporal resolution

    FrameworkUtils.createDir(s3bucket + FrameworkUtils.indexDir, s3conf, s3);

    HashSet<String> input = new HashSet<String>();

    for (String dataset : shortDataset) {

        String indexCreationOutputFileName = s3bucket + FrameworkUtils.indexDir + "/" + dataset + "/";
        String mergeTreeFileName = s3bucket + FrameworkUtils.mergeTreeDir + "/" + dataset + "/";

        if (removeExistingFiles) {
            FrameworkUtils.removeFile(indexCreationOutputFileName, s3conf, s3);
            FrameworkUtils.removeFile(mergeTreeFileName, s3conf, s3);
            FrameworkUtils.createDir(mergeTreeFileName, s3conf, s3);
        } else if (datasetRegThreshold.containsKey(dataset)) {
            FrameworkUtils.removeFile(indexCreationOutputFileName, s3conf, s3);
            if (FrameworkUtils.fileExists(mergeTreeFileName, s3conf, s3)) {
                useMergeTree.add(dataset);
            }
        }

        if (!FrameworkUtils.fileExists(indexCreationOutputFileName, s3conf, s3)) {
            input.add(s3bucket + FrameworkUtils.aggregatesDir + "/" + dataset);
            shortDatasetIndex.add(dataset);
        }

    }

    if (input.isEmpty()) {
        System.out.println("All the input datasets have indices.");
        System.out.println("Use -f in the beginning of the command line to force the computation.");
        System.exit(0);
    }

    String aggregateDatasets = "";
    it = input.iterator();
    while (it.hasNext()) {
        aggregateDatasets += it.next() + ",";
    }

    Job icJob = null;
    Configuration icConf = new Configuration();
    Machine machineConf = new Machine(machine, nbNodes);

    String jobName = "index";
    String indexOutputDir = s3bucket + FrameworkUtils.indexDir + "/tmp/";

    FrameworkUtils.removeFile(indexOutputDir, s3conf, s3);

    icConf.set("dataset-name", datasetNames);
    icConf.set("dataset-id", datasetIds);

    if (!useMergeTree.isEmpty()) {
        String useMergeTreeStr = "";
        for (String dt : useMergeTree) {
            useMergeTreeStr += dt + ",";
        }
        icConf.set("use-merge-tree", useMergeTreeStr.substring(0, useMergeTreeStr.length() - 1));
    }

    for (int i = 0; i < shortDataset.size(); i++) {
        String dataset = shortDataset.get(i);
        String id = datasetId.get(dataset);
        icConf.set("dataset-" + id + "-aggregates", datasetAgg.get(dataset));
        if (datasetRegThreshold.containsKey(dataset)) {
            HashMap<Integer, Double> regThresholds = datasetRegThreshold.get(dataset);
            String thresholds = "";
            for (int att : regThresholds.keySet()) {
                thresholds += String.valueOf(att) + "-" + String.valueOf(regThresholds.get(att)) + ",";
            }
            icConf.set("regular-" + id, thresholds.substring(0, thresholds.length() - 1));
        }

        if (datasetRareThreshold.containsKey(dataset)) {
            HashMap<Integer, Double> rareThresholds = datasetRareThreshold.get(dataset);
            String thresholds = "";
            for (int att : rareThresholds.keySet()) {
                thresholds += String.valueOf(att) + "-" + String.valueOf(rareThresholds.get(att)) + ",";
            }
            icConf.set("rare-" + id, thresholds.substring(0, thresholds.length() - 1));
        }
    }

    icConf.set("mapreduce.tasktracker.map.tasks.maximum", String.valueOf(machineConf.getMaximumTasks()));
    icConf.set("mapreduce.tasktracker.reduce.tasks.maximum", String.valueOf(machineConf.getMaximumTasks()));
    icConf.set("mapreduce.jobtracker.maxtasks.perjob", "-1");
    icConf.set("mapreduce.reduce.shuffle.parallelcopies", "20");
    icConf.set("mapreduce.input.fileinputformat.split.minsize", "0");
    icConf.set("mapreduce.task.io.sort.mb", "200");
    icConf.set("mapreduce.task.io.sort.factor", "100");
    //icConf.set("mapreduce.task.timeout", "1800000");
    machineConf.setMachineConfiguration(icConf);

    if (s3) {
        machineConf.setMachineConfiguration(icConf);
        icConf.set("fs.s3.awsAccessKeyId", awsAccessKeyId);
        icConf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey);
        icConf.set("bucket", s3bucket);
    }

    if (snappyCompression) {
        icConf.set("mapreduce.map.output.compress", "true");
        icConf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec");
        //icConf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec");
    }
    if (bzip2Compression) {
        icConf.set("mapreduce.map.output.compress", "true");
        icConf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec");
        //icConf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec");
    }

    icJob = new Job(icConf);
    icJob.setJobName(jobName);

    icJob.setMapOutputKeyClass(AttributeResolutionWritable.class);
    icJob.setMapOutputValueClass(SpatioTemporalFloatWritable.class);
    icJob.setOutputKeyClass(AttributeResolutionWritable.class);
    icJob.setOutputValueClass(TopologyTimeSeriesWritable.class);
    //icJob.setOutputKeyClass(Text.class);
    //icJob.setOutputValueClass(Text.class);

    icJob.setMapperClass(IndexCreationMapper.class);
    icJob.setReducerClass(IndexCreationReducer.class);
    icJob.setNumReduceTasks(machineConf.getNumberReduces());

    icJob.setInputFormatClass(SequenceFileInputFormat.class);
    //icJob.setOutputFormatClass(SequenceFileOutputFormat.class);
    LazyOutputFormat.setOutputFormatClass(icJob, SequenceFileOutputFormat.class);
    //LazyOutputFormat.setOutputFormatClass(icJob, TextOutputFormat.class);
    SequenceFileOutputFormat.setCompressOutput(icJob, true);
    SequenceFileOutputFormat.setOutputCompressionType(icJob, CompressionType.BLOCK);

    FileInputFormat.setInputDirRecursive(icJob, true);
    FileInputFormat.setInputPaths(icJob, aggregateDatasets.substring(0, aggregateDatasets.length() - 1));
    FileOutputFormat.setOutputPath(icJob, new Path(indexOutputDir));

    icJob.setJarByClass(IndexCreation.class);

    long start = System.currentTimeMillis();
    icJob.submit();
    icJob.waitForCompletion(true);
    System.out.println(jobName + "\t" + (System.currentTimeMillis() - start));

    // moving files to right place
    for (String dataset : shortDatasetIndex) {
        String from = s3bucket + FrameworkUtils.indexDir + "/tmp/" + dataset + "/";
        String to = s3bucket + FrameworkUtils.indexDir + "/" + dataset + "/";
        FrameworkUtils.renameFile(from, to, s3conf, s3);
    }

}

From source file:org.apache.nutch.tools.proxy.TestbedProxy.java

/**
 * @param args//w w  w . j a  v a 2  s .  co  m
 */
public static void main(String[] args) throws Exception {
    if (args.length == 0) {
        System.err.println(
                "TestbedProxy [-seg <segment_name> | -segdir <segments>] [-port <nnn>] [-forward] [-fake] [-delay nnn] [-debug]");
        System.err.println("-seg <segment_name>\tpath to a single segment (can be specified multiple times)");
        System.err.println("-segdir <segments>\tpath to a parent directory of multiple segments (as above)");
        System.err.println(
                "-port <nnn>\trun the proxy on port <nnn> (special permissions may be needed for ports < 1024)");
        System.err.println("-forward\tif specified, requests to all unknown urls will be passed to");
        System.err.println("\t\toriginal servers. If false (default) unknown urls generate 404 Not Found.");
        System.err.println(
                "-delay\tdelay every response by nnn seconds. If delay is negative use a random value up to nnn");
        System.err.println("-fake\tif specified, requests to all unknown urls will succeed with fake content");
        System.exit(-1);
    }

    Configuration conf = NutchConfiguration.create();
    int port = conf.getInt("segment.proxy.port", 8181);
    boolean forward = false;
    boolean fake = false;
    boolean delay = false;
    boolean debug = false;
    int delayVal = 0;

    HashSet<Path> segs = new HashSet<Path>();
    for (int i = 0; i < args.length; i++) {
        if (args[i].equals("-segdir")) {
            FileSystem fs = FileSystem.get(conf);
            FileStatus[] fstats = fs.listStatus(new Path(args[++i]));
            Path[] paths = HadoopFSUtil.getPaths(fstats);
            segs.addAll(Arrays.asList(paths));
        } else if (args[i].equals("-port")) {
            port = Integer.parseInt(args[++i]);
        } else if (args[i].equals("-forward")) {
            forward = true;
        } else if (args[i].equals("-delay")) {
            delay = true;
            delayVal = Integer.parseInt(args[++i]);
        } else if (args[i].equals("-fake")) {
            fake = true;
        } else if (args[i].equals("-debug")) {
            debug = true;
        } else if (args[i].equals("-seg")) {
            segs.add(new Path(args[++i]));
        } else {
            LOG.fatal("Unknown argument: " + args[i]);
            System.exit(-1);
        }
    }

    // Create the server
    Server server = new Server();
    SocketConnector connector = new SocketConnector();
    connector.setPort(port);
    connector.setResolveNames(false);
    server.addConnector(connector);

    // create a list of handlers
    HandlerList list = new HandlerList();
    server.addHandler(list);

    if (debug) {
        LOG.info("* Added debug handler.");
        list.addHandler(new LogDebugHandler());
    }

    if (delay) {
        LOG.info("* Added delay handler: "
                + (delayVal < 0 ? "random delay up to " + (-delayVal) : "constant delay of " + delayVal));
        list.addHandler(new DelayHandler(delayVal));
    }

    // XXX alternatively, we can add the DispatchHandler as the first one,
    // XXX to activate handler plugins and redirect requests to appropriate
    // XXX handlers ... Here we always load these handlers

    Iterator<Path> it = segs.iterator();
    while (it.hasNext()) {
        Path p = it.next();
        try {
            SegmentHandler segment = new SegmentHandler(conf, p);
            list.addHandler(segment);
            LOG.info("* Added segment handler for: " + p);
        } catch (Exception e) {
            LOG.warn("Skipping segment '" + p + "': " + StringUtils.stringifyException(e));
        }
    }
    if (forward) {
        LOG.info("* Adding forwarding proxy for all unknown urls ...");
        ServletHandler servlets = new ServletHandler();
        servlets.addServletWithMapping(AsyncProxyServlet.class, "/*");
        servlets.addFilterWithMapping(LogDebugHandler.class, "/*", Handler.ALL);
        list.addHandler(servlets);
    }
    if (fake) {
        LOG.info("* Added fake handler for remaining URLs.");
        list.addHandler(new FakeHandler());
    }
    list.addHandler(new NotFoundHandler());
    // Start the http server
    server.start();
    server.join();
}

From source file:edu.nyu.vida.data_polygamy.scalar_function_computation.Aggregation.java

/**
 * @param args/* www .  j a  va 2 s. c o m*/
 */
@SuppressWarnings({ "deprecation" })
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    Options options = new Options();

    Option forceOption = new Option("f", "force", false,
            "force the computation of the aggregate functions " + "even if files already exist");
    forceOption.setRequired(false);
    options.addOption(forceOption);

    Option gOption = new Option("g", "group", true, "set group of datasets for which the aggregate functions"
            + " will be computed, followed by their temporal and spatial attribute indices");
    gOption.setRequired(true);
    gOption.setArgName("GROUP");
    gOption.setArgs(Option.UNLIMITED_VALUES);
    options.addOption(gOption);

    Option machineOption = new Option("m", "machine", true, "machine identifier");
    machineOption.setRequired(true);
    machineOption.setArgName("MACHINE");
    machineOption.setArgs(1);
    options.addOption(machineOption);

    Option nodesOption = new Option("n", "nodes", true, "number of nodes");
    nodesOption.setRequired(true);
    nodesOption.setArgName("NODES");
    nodesOption.setArgs(1);
    options.addOption(nodesOption);

    Option s3Option = new Option("s3", "s3", false, "data on Amazon S3");
    s3Option.setRequired(false);
    options.addOption(s3Option);

    Option awsAccessKeyIdOption = new Option("aws_id", "aws-id", true,
            "aws access key id; " + "this is required if the execution is on aws");
    awsAccessKeyIdOption.setRequired(false);
    awsAccessKeyIdOption.setArgName("AWS-ACCESS-KEY-ID");
    awsAccessKeyIdOption.setArgs(1);
    options.addOption(awsAccessKeyIdOption);

    Option awsSecretAccessKeyOption = new Option("aws_key", "aws-id", true,
            "aws secrect access key; " + "this is required if the execution is on aws");
    awsSecretAccessKeyOption.setRequired(false);
    awsSecretAccessKeyOption.setArgName("AWS-SECRET-ACCESS-KEY");
    awsSecretAccessKeyOption.setArgs(1);
    options.addOption(awsSecretAccessKeyOption);

    Option bucketOption = new Option("b", "s3-bucket", true,
            "bucket on s3; " + "this is required if the execution is on aws");
    bucketOption.setRequired(false);
    bucketOption.setArgName("S3-BUCKET");
    bucketOption.setArgs(1);
    options.addOption(bucketOption);

    Option helpOption = new Option("h", "help", false, "display this message");
    helpOption.setRequired(false);
    options.addOption(helpOption);

    HelpFormatter formatter = new HelpFormatter();
    CommandLineParser parser = new PosixParser();
    CommandLine cmd = null;

    try {
        cmd = parser.parse(options, args);
    } catch (ParseException e) {
        formatter.printHelp("hadoop jar data-polygamy.jar "
                + "edu.nyu.vida.data_polygamy.scalar_function_computation.Aggregation", options, true);
        System.exit(0);
    }

    if (cmd.hasOption("h")) {
        formatter.printHelp("hadoop jar data-polygamy.jar "
                + "edu.nyu.vida.data_polygamy.scalar_function_computation.Aggregation", options, true);
        System.exit(0);
    }

    boolean s3 = cmd.hasOption("s3");
    String s3bucket = "";
    String awsAccessKeyId = "";
    String awsSecretAccessKey = "";

    if (s3) {
        if ((!cmd.hasOption("aws_id")) || (!cmd.hasOption("aws_key")) || (!cmd.hasOption("b"))) {
            System.out.println(
                    "Arguments 'aws_id', 'aws_key', and 'b'" + " are mandatory if execution is on AWS.");
            formatter.printHelp(
                    "hadoop jar data-polygamy.jar "
                            + "edu.nyu.vida.data_polygamy.scalar_function_computation.Aggregation",
                    options, true);
            System.exit(0);
        }
        s3bucket = cmd.getOptionValue("b");
        awsAccessKeyId = cmd.getOptionValue("aws_id");
        awsSecretAccessKey = cmd.getOptionValue("aws_key");
    }

    boolean snappyCompression = false;
    boolean bzip2Compression = false;
    String machine = cmd.getOptionValue("m");
    int nbNodes = Integer.parseInt(cmd.getOptionValue("n"));

    Configuration s3conf = new Configuration();
    if (s3) {
        s3conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId);
        s3conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey);
        s3conf.set("bucket", s3bucket);
    }

    String datasetNames = "";
    String datasetIds = "";
    String preProcessingDatasets = "";

    ArrayList<String> shortDataset = new ArrayList<String>();
    ArrayList<String> shortDatasetAggregation = new ArrayList<String>();
    HashMap<String, String> datasetTempAtt = new HashMap<String, String>();
    HashMap<String, String> datasetSpatialAtt = new HashMap<String, String>();
    HashMap<String, String> preProcessingDataset = new HashMap<String, String>();
    HashMap<String, String> datasetId = new HashMap<String, String>();

    boolean removeExistingFiles = cmd.hasOption("f");
    String[] datasetArgs = cmd.getOptionValues("g");

    for (int i = 0; i < datasetArgs.length; i += 3) {
        String dataset = datasetArgs[i];

        // getting pre-processing
        String tempPreProcessing = FrameworkUtils.searchPreProcessing(dataset, s3conf, s3);
        if (tempPreProcessing == null) {
            System.out.println("No pre-processing available for " + dataset);
            continue;
        }
        preProcessingDataset.put(dataset, tempPreProcessing);

        shortDataset.add(dataset);
        datasetTempAtt.put(dataset, ((datasetArgs[i + 1] == "null") ? null : datasetArgs[i + 1]));
        datasetSpatialAtt.put(dataset, ((datasetArgs[i + 2] == "null") ? null : datasetArgs[i + 2]));

        datasetId.put(dataset, null);
    }

    if (shortDataset.size() == 0) {
        System.out.println("No datasets to process.");
        System.exit(0);
    }

    // getting dataset id

    Path path = null;
    FileSystem fs = null;

    if (s3) {
        path = new Path(s3bucket + FrameworkUtils.datasetsIndexDir);
        fs = FileSystem.get(path.toUri(), s3conf);
    } else {
        fs = FileSystem.get(new Configuration());
        path = new Path(fs.getHomeDirectory() + "/" + FrameworkUtils.datasetsIndexDir);
    }
    BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(path)));
    String line = br.readLine();
    while (line != null) {
        String[] dt = line.split("\t");
        if (datasetId.containsKey(dt[0])) {
            datasetId.put(dt[0], dt[1]);
            datasetNames += dt[0] + ",";
            datasetIds += dt[1] + ",";
        }
        line = br.readLine();
    }
    br.close();
    if (s3)
        fs.close();

    datasetNames = datasetNames.substring(0, datasetNames.length() - 1);
    datasetIds = datasetIds.substring(0, datasetIds.length() - 1);
    Iterator<String> it = shortDataset.iterator();
    while (it.hasNext()) {
        String dataset = it.next();
        if (datasetId.get(dataset) == null) {
            System.out.println("No dataset id for " + dataset);
            System.exit(0);
        }
    }

    FrameworkUtils.createDir(s3bucket + FrameworkUtils.aggregatesDir, s3conf, s3);

    // getting smallest resolution

    HashMap<String, String> tempResMap = new HashMap<String, String>();
    HashMap<String, String> spatialResMap = new HashMap<String, String>();

    HashMap<String, String> datasetTemporalStrMap = new HashMap<String, String>();
    HashMap<String, String> datasetSpatialStrMap = new HashMap<String, String>();

    HashSet<String> input = new HashSet<String>();

    for (String dataset : shortDataset) {

        String[] datasetArray = preProcessingDataset.get(dataset).split("-");

        String datasetTemporalStr = datasetArray[datasetArray.length - 2];
        int datasetTemporal = utils.temporalResolution(datasetTemporalStr);

        String datasetSpatialStr = datasetArray[datasetArray.length - 1];
        int datasetSpatial = utils.spatialResolution(datasetSpatialStr);

        // finding all possible resolutions

        String[] temporalResolutions = FrameworkUtils.getAggTempResolutions(datasetTemporal);
        String[] spatialResolutions = FrameworkUtils.getAggSpatialResolutions(datasetSpatial);

        String temporalResolution = "";
        String spatialResolution = "";

        String tempRes = "";
        String spatialRes = "";

        boolean dataAdded = false;

        for (int i = 0; i < temporalResolutions.length; i++) {
            for (int j = 0; j < spatialResolutions.length; j++) {

                temporalResolution = temporalResolutions[i];
                spatialResolution = spatialResolutions[j];

                String aggregatesOutputFileName = s3bucket + FrameworkUtils.aggregatesDir + "/" + dataset + "/";

                if (removeExistingFiles) {
                    FrameworkUtils.removeFile(aggregatesOutputFileName, s3conf, s3);
                }

                if (!FrameworkUtils.fileExists(aggregatesOutputFileName, s3conf, s3)) {

                    dataAdded = true;

                    tempRes += temporalResolution + "-";
                    spatialRes += spatialResolution + "-";
                }
            }
        }

        if (dataAdded) {
            input.add(s3bucket + FrameworkUtils.preProcessingDir + "/" + preProcessingDataset.get(dataset));
            shortDatasetAggregation.add(dataset);

            tempResMap.put(dataset, tempRes.substring(0, tempRes.length() - 1));
            spatialResMap.put(dataset, spatialRes.substring(0, spatialRes.length() - 1));

            datasetTemporalStrMap.put(dataset, datasetTemporalStr);
            datasetSpatialStrMap.put(dataset, datasetSpatialStr);
        }
    }

    if (input.isEmpty()) {
        System.out.println("All the input datasets have aggregates.");
        System.out.println("Use -f in the beginning of the command line to force the computation.");
        System.exit(0);
    }

    it = input.iterator();
    while (it.hasNext()) {
        preProcessingDatasets += it.next() + ",";
    }

    Job aggJob = null;
    String aggregatesOutputDir = s3bucket + FrameworkUtils.aggregatesDir + "/tmp/";
    String jobName = "aggregates";

    FrameworkUtils.removeFile(aggregatesOutputDir, s3conf, s3);

    Configuration aggConf = new Configuration();
    Machine machineConf = new Machine(machine, nbNodes);

    aggConf.set("dataset-name", datasetNames);
    aggConf.set("dataset-id", datasetIds);

    for (int i = 0; i < shortDatasetAggregation.size(); i++) {
        String dataset = shortDatasetAggregation.get(i);
        String id = datasetId.get(dataset);
        aggConf.set("dataset-" + id + "-temporal-resolutions", tempResMap.get(dataset));
        aggConf.set("dataset-" + id + "-spatial-resolutions", spatialResMap.get(dataset));
        aggConf.set("dataset-" + id + "-temporal-att", datasetTempAtt.get(dataset));
        aggConf.set("dataset-" + id + "-spatial-att", datasetSpatialAtt.get(dataset));
        aggConf.set("dataset-" + id + "-temporal", datasetTemporalStrMap.get(dataset));
        aggConf.set("dataset-" + id + "-spatial", datasetSpatialStrMap.get(dataset));

        if (s3)
            aggConf.set("dataset-" + id,
                    s3bucket + FrameworkUtils.preProcessingDir + "/" + preProcessingDataset.get(dataset));
        else
            aggConf.set("dataset-" + id, FileSystem.get(new Configuration()).getHomeDirectory() + "/"
                    + FrameworkUtils.preProcessingDir + "/" + preProcessingDataset.get(dataset));
    }

    aggConf.set("mapreduce.tasktracker.map.tasks.maximum", String.valueOf(machineConf.getMaximumTasks()));
    aggConf.set("mapreduce.tasktracker.reduce.tasks.maximum", String.valueOf(machineConf.getMaximumTasks()));
    aggConf.set("mapreduce.jobtracker.maxtasks.perjob", "-1");
    aggConf.set("mapreduce.reduce.shuffle.parallelcopies", "20");
    aggConf.set("mapreduce.input.fileinputformat.split.minsize", "0");
    aggConf.set("mapreduce.task.io.sort.mb", "200");
    aggConf.set("mapreduce.task.io.sort.factor", "100");
    machineConf.setMachineConfiguration(aggConf);

    if (s3) {
        machineConf.setMachineConfiguration(aggConf);
        aggConf.set("fs.s3.awsAccessKeyId", awsAccessKeyId);
        aggConf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey);
    }

    if (snappyCompression) {
        aggConf.set("mapreduce.map.output.compress", "true");
        aggConf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec");
        //aggConf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec");
    }
    if (bzip2Compression) {
        aggConf.set("mapreduce.map.output.compress", "true");
        aggConf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec");
        //aggConf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec");
    }

    aggJob = new Job(aggConf);
    aggJob.setJobName(jobName);

    aggJob.setMapOutputKeyClass(SpatioTemporalWritable.class);
    aggJob.setMapOutputValueClass(AggregationArrayWritable.class);
    aggJob.setOutputKeyClass(SpatioTemporalWritable.class);
    aggJob.setOutputValueClass(FloatArrayWritable.class);
    //aggJob.setOutputKeyClass(Text.class);
    //aggJob.setOutputValueClass(Text.class);

    aggJob.setMapperClass(AggregationMapper.class);
    aggJob.setCombinerClass(AggregationCombiner.class);
    aggJob.setReducerClass(AggregationReducer.class);
    aggJob.setNumReduceTasks(machineConf.getNumberReduces());

    aggJob.setInputFormatClass(SequenceFileInputFormat.class);
    //aggJob.setOutputFormatClass(SequenceFileOutputFormat.class);
    LazyOutputFormat.setOutputFormatClass(aggJob, SequenceFileOutputFormat.class);
    //LazyOutputFormat.setOutputFormatClass(aggJob, TextOutputFormat.class);
    SequenceFileOutputFormat.setCompressOutput(aggJob, true);
    SequenceFileOutputFormat.setOutputCompressionType(aggJob, CompressionType.BLOCK);

    FileInputFormat.setInputDirRecursive(aggJob, true);
    FileInputFormat.setInputPaths(aggJob,
            preProcessingDatasets.substring(0, preProcessingDatasets.length() - 1));
    FileOutputFormat.setOutputPath(aggJob, new Path(aggregatesOutputDir));

    aggJob.setJarByClass(Aggregation.class);

    long start = System.currentTimeMillis();
    aggJob.submit();
    aggJob.waitForCompletion(true);
    System.out.println(jobName + "\t" + (System.currentTimeMillis() - start));

    // moving files to right place
    for (String dataset : shortDatasetAggregation) {
        String from = s3bucket + FrameworkUtils.aggregatesDir + "/tmp/" + dataset + "/";
        String to = s3bucket + FrameworkUtils.aggregatesDir + "/" + dataset + "/";
        FrameworkUtils.renameFile(from, to, s3conf, s3);
    }

}

From source file:Main.java

public static HashSet<String> intersectionSet(HashSet<String> setA, HashSet<String> setB) {
    HashSet<String> intersectionSet = new HashSet<String>();
    Iterator<String> iterA = setA.iterator();
    while (iterA.hasNext()) {
        String tempInner = iterA.next();
        if (setB.contains(tempInner)) {
            intersectionSet.add(tempInner);
        }/*w ww  .  j  a  va 2s. c om*/
    }
    return intersectionSet;
}

From source file:Main.java

/**
 * Creates policy tree stub containing two <code>PolicyNode</code>s
 * for testing purposes/*from   www  .jav a2  s . co m*/
 *
 * @return root <code>PolicyNode</code> of the policy tree
 */
public static PolicyNode getPolicyTree() {
    return new PolicyNode() {
        final PolicyNode parent = this;

        public int getDepth() {
            // parent
            return 0;
        }

        public boolean isCritical() {
            return false;
        }

        public String getValidPolicy() {
            return null;
        }

        public PolicyNode getParent() {
            return null;
        }

        public Iterator<PolicyNode> getChildren() {
            PolicyNode child = new PolicyNode() {
                public int getDepth() {
                    // child
                    return 1;
                }

                public boolean isCritical() {
                    return false;
                }

                public String getValidPolicy() {
                    return null;
                }

                public PolicyNode getParent() {
                    return parent;
                }

                public Iterator<PolicyNode> getChildren() {
                    return null;
                }

                public Set<String> getExpectedPolicies() {
                    return null;
                }

                public Set<? extends PolicyQualifierInfo> getPolicyQualifiers() {
                    return null;
                }
            };
            HashSet<PolicyNode> s = new HashSet<PolicyNode>();
            s.add(child);
            return s.iterator();
        }

        public Set<String> getExpectedPolicies() {
            return null;
        }

        public Set<? extends PolicyQualifierInfo> getPolicyQualifiers() {
            return null;
        }
    };
}

From source file:com.jetyun.pgcd.rpc.localarg.LocalArgController.java

/**
 * Using the caller's context, resolve a given method call parameter to a
 * local argument.//from   w  w  w. j  a  v  a 2 s .  co m
 * 
 * @param context
 *            callers context. In an http servlet environment, this will
 *            contain the servlet request and response objects.
 * @param param
 *            class type parameter to resolve to a local argument.
 * 
 * @return the run time instance that is resolved, to be used when calling
 *         the method.
 * 
 * @throws UnmarshallException
 *             if there if a failure during resolution.
 */
public static Object resolveLocalArg(Object context[], Class param) throws UnmarshallException {
    HashSet resolverSet = (HashSet) localArgResolverMap.get(param);
    Iterator i = resolverSet.iterator();
    while (i.hasNext()) {
        LocalArgResolverData resolverData = (LocalArgResolverData) i.next();
        for (int j = 0; j < context.length; j++) {
            if (resolverData.understands(context[j])) {
                try {
                    return resolverData.getArgResolver().resolveArg(context[j]);
                } catch (LocalArgResolveException e) {
                    throw new UnmarshallException("error resolving local argument: " + e, e);
                }
            }
        }
    }
    throw new UnmarshallException("couldn't find local arg resolver");
}