List of usage examples for java.util HashSet iterator
public Iterator<E> iterator()
From source file:Main.java
public static void main(String[] a) { String elements[] = { "A", "B", "C", "D", "E" }; HashSet<String> set = new HashSet<String>(Arrays.asList(elements)); Iterator iter = set.iterator(); while (iter.hasNext()) { System.out.println(iter.next()); }/* w w w. ja v a2 s.co m*/ }
From source file:Main.java
public static void main(String[] args) { HashSet<Integer> hSet = new HashSet<Integer>(); hSet.add(new Integer("1")); hSet.add(new Integer("2")); hSet.add(new Integer("3")); Iterator itr = hSet.iterator(); while (itr.hasNext()) System.out.println(itr.next()); }
From source file:Main.java
public static void main(String args[]) { HashSet<String> newset = new HashSet<String>(); // populate hash set newset.add("Learning"); newset.add("from"); newset.add("java2s.com"); // create an iterator Iterator iterator = newset.iterator(); // check values while (iterator.hasNext()) { System.out.println("Value: " + iterator.next() + " "); }/*from w ww .j av a2 s . c o m*/ }
From source file:subsets.GenerateGFKMatrix.java
public static void main(String args[]) { CollectionTools ct = new CollectionTools(); GenerateGFKMatrix gfk = new GenerateGFKMatrix(); TreeMap<String, HashSet> SmartCube_SCAttribut = gfk.get_AType_Relation_BType("SmartCube", "SCAttribut", "MEMBER_OF", 1); TreeMap<String, HashSet> SCAttribut_BCAttribut_part1 = gfk.get_AType_Relation_BType("SCAttribut", "BCAttribut", "TRANSITION", -1); TreeMap<String, HashSet> SCAttribut_SCAlgo = gfk.get_AType_Relation_BType("SCAttribut", "SCAlgorithmen", "DERIVED_BY", -1); TreeMap<String, HashSet> SCAlgo_BCAttribut = gfk.get_AType_Relation_BType("SCAlgorithmen", "BCAttribut", "USES", -1); gfk.get_AType_Relation_BType("Meldekonzept", "BCAttribut", "USES", -1); //MATCH (c:Codeliste)<-[:MEMBER_OF]-(cd:Code) WHERE c.Name = "Geschaeftsfallkategorie_CL" RETURN c,cd HashSet gfks = gfk.getGFKs(); TreeMap<String, String> Meldekonzepte_formbeschreibung = gfk.getMeldekonzepte_formbeschreibung(); TreeMap<String, HashSet> Meldekonzept_GFK = new TreeMap(); for (String mk : Meldekonzepte_formbeschreibung.keySet()) { String form_beschreibung = Meldekonzepte_formbeschreibung.get(mk); Iterator it = gfks.iterator(); while (it.hasNext()) { String gfkategorie = (String) it.next(); if (form_beschreibung.contains(gfkategorie)) { HashSet dummy = Meldekonzept_GFK.get(mk); if (dummy == null) { dummy = new HashSet(); dummy.add(gfkategorie); Meldekonzept_GFK.put(mk, dummy); }/* w ww . ja va2 s . c o m*/ dummy.add(gfkategorie); } } } TreeMap<String, HashSet> SmartCube_BCAttribut = ct .join_string_hashset_and_string_hashset(SmartCube_SCAttribut, SCAttribut_BCAttribut_part1); TreeMap<String, HashSet> SmartCube_SCAlgo = ct.join_string_hashset_and_string_hashset(SmartCube_SCAttribut, SCAttribut_SCAlgo); TreeMap<String, HashSet> SCAttribut_BCAttribut_part2 = ct .join_string_hashset_and_string_hashset(SmartCube_SCAlgo, SCAlgo_BCAttribut); TreeMap<String, HashSet> SCAttribut_BCAttribut = new TreeMap(); SCAttribut_BCAttribut.putAll(SCAttribut_BCAttribut_part1); SCAttribut_BCAttribut.putAll(SCAttribut_BCAttribut_part2); //TreeMap<String,JSONObject> SC_BC_Dependency = gfk.get_SC_BC_Dependency(); //Achtung: in der Klasse GenerateSCSubset sind alle Subsets definiert GenerateSCSubset gensubset = new GenerateSCSubset(); //Erzeuge alle Smart Cube Matrizen // TreeMap<String,TreeMap<String, HashSet>> dependency_structure= new TreeMap(); // // // ArrayList<Cube> all_cubes = gensubset.getCubeDependency_ArrayList(); // // //get all relevant Dimensions // //GFK - MappingTabelle GFK : TreeMap<String (GFK), Set of BC Attribute > TreeMap<String, HashSet> GFK_BCAttribut = new TreeMap(); //Hier wird eigentlcihe Dependency Matrix aufgebaut for (int i = 0; i < all_cubes.size(); i++) { System.out.println(((Cube) all_cubes.get(i)).Bezeichnung); Cube cube = all_cubes.get(i); for (String mk : cube.Meldekonzept_SCDimensionen.keySet()) { HashSet scattribute_pro_meldekonzept = cube.Meldekonzept_SCDimensionen.get(mk); Iterator it = scattribute_pro_meldekonzept.iterator(); //Meldekonzept get BCAttribute = MK_Basic Cube Attribut HashSet geschaeftsfallkategorien = Meldekonzept_GFK.get(mk); HashSet Meldekonzept_BCDependency = new HashSet(); //schritt fr schritt durchgehen while (it.hasNext()) {//gehe Attribute pro Meldekonzept druch String SCAttribut = (String) it.next(); HashSet BasicCubeAttribute = SCAttribut_BCAttribut.get(SCAttribut); // // // Fehler - SCAttribut_BCAttribut : Diese Datenstruktur enthaelt auch Cubes anstatt NUR SCAttribute // // //BasicCubeAttribute kann null sein ? Meldekonzept_BCDependency.addAll(BasicCubeAttribute); } Iterator gfk_iterator = geschaeftsfallkategorien.iterator(); while (gfk_iterator.hasNext()) { String gfkategorie = (String) gfk_iterator.next(); HashSet dummy = new HashSet(); dummy.addAll(Meldekonzept_BCDependency); GFK_BCAttribut.put(gfkategorie, dummy); } } all_cubes.get(i).showMeldekonzept_SCDimensionen(); } System.out.println("Test1234"); }
From source file:edu.nyu.vida.data_polygamy.feature_identification.IndexCreation.java
/** * @param args/* w ww. j a va 2s.com*/ */ @SuppressWarnings({ "deprecation" }) public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Options options = new Options(); Option forceOption = new Option("f", "force", false, "force the computation of the index and events " + "even if files already exist"); forceOption.setRequired(false); options.addOption(forceOption); Option thresholdOption = new Option("t", "use-custom-thresholds", false, "use custom thresholds for regular and rare events, defined in HDFS_HOME/" + FrameworkUtils.thresholdDir + " file"); thresholdOption.setRequired(false); options.addOption(thresholdOption); Option gOption = new Option("g", "group", true, "set group of datasets for which the indices and events" + " will be computed"); gOption.setRequired(true); gOption.setArgName("GROUP"); gOption.setArgs(Option.UNLIMITED_VALUES); options.addOption(gOption); Option machineOption = new Option("m", "machine", true, "machine identifier"); machineOption.setRequired(true); machineOption.setArgName("MACHINE"); machineOption.setArgs(1); options.addOption(machineOption); Option nodesOption = new Option("n", "nodes", true, "number of nodes"); nodesOption.setRequired(true); nodesOption.setArgName("NODES"); nodesOption.setArgs(1); options.addOption(nodesOption); Option s3Option = new Option("s3", "s3", false, "data on Amazon S3"); s3Option.setRequired(false); options.addOption(s3Option); Option awsAccessKeyIdOption = new Option("aws_id", "aws-id", true, "aws access key id; " + "this is required if the execution is on aws"); awsAccessKeyIdOption.setRequired(false); awsAccessKeyIdOption.setArgName("AWS-ACCESS-KEY-ID"); awsAccessKeyIdOption.setArgs(1); options.addOption(awsAccessKeyIdOption); Option awsSecretAccessKeyOption = new Option("aws_key", "aws-id", true, "aws secrect access key; " + "this is required if the execution is on aws"); awsSecretAccessKeyOption.setRequired(false); awsSecretAccessKeyOption.setArgName("AWS-SECRET-ACCESS-KEY"); awsSecretAccessKeyOption.setArgs(1); options.addOption(awsSecretAccessKeyOption); Option bucketOption = new Option("b", "s3-bucket", true, "bucket on s3; " + "this is required if the execution is on aws"); bucketOption.setRequired(false); bucketOption.setArgName("S3-BUCKET"); bucketOption.setArgs(1); options.addOption(bucketOption); Option helpOption = new Option("h", "help", false, "display this message"); helpOption.setRequired(false); options.addOption(helpOption); HelpFormatter formatter = new HelpFormatter(); CommandLineParser parser = new PosixParser(); CommandLine cmd = null; try { cmd = parser.parse(options, args); } catch (ParseException e) { formatter.printHelp("hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.feature_identification.IndexCreation", options, true); System.exit(0); } if (cmd.hasOption("h")) { formatter.printHelp("hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.feature_identification.IndexCreation", options, true); System.exit(0); } boolean s3 = cmd.hasOption("s3"); String s3bucket = ""; String awsAccessKeyId = ""; String awsSecretAccessKey = ""; if (s3) { if ((!cmd.hasOption("aws_id")) || (!cmd.hasOption("aws_key")) || (!cmd.hasOption("b"))) { System.out.println( "Arguments 'aws_id', 'aws_key', and 'b'" + " are mandatory if execution is on AWS."); formatter.printHelp("hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.feature_identification.IndexCreation", options, true); System.exit(0); } s3bucket = cmd.getOptionValue("b"); awsAccessKeyId = cmd.getOptionValue("aws_id"); awsSecretAccessKey = cmd.getOptionValue("aws_key"); } boolean snappyCompression = false; boolean bzip2Compression = false; String machine = cmd.getOptionValue("m"); int nbNodes = Integer.parseInt(cmd.getOptionValue("n")); Configuration s3conf = new Configuration(); if (s3) { s3conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId); s3conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey); s3conf.set("bucket", s3bucket); } String datasetNames = ""; String datasetIds = ""; ArrayList<String> shortDataset = new ArrayList<String>(); ArrayList<String> shortDatasetIndex = new ArrayList<String>(); HashMap<String, String> datasetAgg = new HashMap<String, String>(); HashMap<String, String> datasetId = new HashMap<String, String>(); HashMap<String, HashMap<Integer, Double>> datasetRegThreshold = new HashMap<String, HashMap<Integer, Double>>(); HashMap<String, HashMap<Integer, Double>> datasetRareThreshold = new HashMap<String, HashMap<Integer, Double>>(); Path path = null; FileSystem fs = FileSystem.get(new Configuration()); BufferedReader br; boolean removeExistingFiles = cmd.hasOption("f"); boolean isThresholdUserDefined = cmd.hasOption("t"); for (String dataset : cmd.getOptionValues("g")) { // getting aggregates String[] aggregate = FrameworkUtils.searchAggregates(dataset, s3conf, s3); if (aggregate.length == 0) { System.out.println("No aggregates found for " + dataset + "."); continue; } // getting aggregates header String aggregatesHeaderFileName = FrameworkUtils.searchAggregatesHeader(dataset, s3conf, s3); if (aggregatesHeaderFileName == null) { System.out.println("No aggregate header for " + dataset); continue; } String aggregatesHeader = s3bucket + FrameworkUtils.preProcessingDir + "/" + aggregatesHeaderFileName; shortDataset.add(dataset); datasetId.put(dataset, null); if (s3) { path = new Path(aggregatesHeader); fs = FileSystem.get(path.toUri(), s3conf); } else { path = new Path(fs.getHomeDirectory() + "/" + aggregatesHeader); } br = new BufferedReader(new InputStreamReader(fs.open(path))); datasetAgg.put(dataset, br.readLine().split("\t")[1]); br.close(); if (s3) fs.close(); } if (shortDataset.size() == 0) { System.out.println("No datasets to process."); System.exit(0); } // getting dataset id if (s3) { path = new Path(s3bucket + FrameworkUtils.datasetsIndexDir); fs = FileSystem.get(path.toUri(), s3conf); } else { path = new Path(fs.getHomeDirectory() + "/" + FrameworkUtils.datasetsIndexDir); } br = new BufferedReader(new InputStreamReader(fs.open(path))); String line = br.readLine(); while (line != null) { String[] dt = line.split("\t"); if (datasetId.containsKey(dt[0])) { datasetId.put(dt[0], dt[1]); datasetNames += dt[0] + ","; datasetIds += dt[1] + ","; } line = br.readLine(); } br.close(); datasetNames = datasetNames.substring(0, datasetNames.length() - 1); datasetIds = datasetIds.substring(0, datasetIds.length() - 1); Iterator<String> it = shortDataset.iterator(); while (it.hasNext()) { String dataset = it.next(); if (datasetId.get(dataset) == null) { System.out.println("No dataset id for " + dataset); System.exit(0); } } // getting user defined thresholds if (isThresholdUserDefined) { if (s3) { path = new Path(s3bucket + FrameworkUtils.thresholdDir); fs = FileSystem.get(path.toUri(), s3conf); } else { path = new Path(fs.getHomeDirectory() + "/" + FrameworkUtils.thresholdDir); } br = new BufferedReader(new InputStreamReader(fs.open(path))); line = br.readLine(); while (line != null) { // getting dataset name String dataset = line.trim(); HashMap<Integer, Double> regThresholds = new HashMap<Integer, Double>(); HashMap<Integer, Double> rareThresholds = new HashMap<Integer, Double>(); line = br.readLine(); while ((line != null) && (line.split("\t").length > 1)) { // getting attribute ids and thresholds String[] keyVals = line.trim().split("\t"); int att = Integer.parseInt(keyVals[0].trim()); regThresholds.put(att, Double.parseDouble(keyVals[1].trim())); rareThresholds.put(att, Double.parseDouble(keyVals[2].trim())); line = br.readLine(); } datasetRegThreshold.put(dataset, regThresholds); datasetRareThreshold.put(dataset, rareThresholds); } br.close(); } if (s3) fs.close(); // datasets that will use existing merge tree ArrayList<String> useMergeTree = new ArrayList<String>(); // creating index for each spatio-temporal resolution FrameworkUtils.createDir(s3bucket + FrameworkUtils.indexDir, s3conf, s3); HashSet<String> input = new HashSet<String>(); for (String dataset : shortDataset) { String indexCreationOutputFileName = s3bucket + FrameworkUtils.indexDir + "/" + dataset + "/"; String mergeTreeFileName = s3bucket + FrameworkUtils.mergeTreeDir + "/" + dataset + "/"; if (removeExistingFiles) { FrameworkUtils.removeFile(indexCreationOutputFileName, s3conf, s3); FrameworkUtils.removeFile(mergeTreeFileName, s3conf, s3); FrameworkUtils.createDir(mergeTreeFileName, s3conf, s3); } else if (datasetRegThreshold.containsKey(dataset)) { FrameworkUtils.removeFile(indexCreationOutputFileName, s3conf, s3); if (FrameworkUtils.fileExists(mergeTreeFileName, s3conf, s3)) { useMergeTree.add(dataset); } } if (!FrameworkUtils.fileExists(indexCreationOutputFileName, s3conf, s3)) { input.add(s3bucket + FrameworkUtils.aggregatesDir + "/" + dataset); shortDatasetIndex.add(dataset); } } if (input.isEmpty()) { System.out.println("All the input datasets have indices."); System.out.println("Use -f in the beginning of the command line to force the computation."); System.exit(0); } String aggregateDatasets = ""; it = input.iterator(); while (it.hasNext()) { aggregateDatasets += it.next() + ","; } Job icJob = null; Configuration icConf = new Configuration(); Machine machineConf = new Machine(machine, nbNodes); String jobName = "index"; String indexOutputDir = s3bucket + FrameworkUtils.indexDir + "/tmp/"; FrameworkUtils.removeFile(indexOutputDir, s3conf, s3); icConf.set("dataset-name", datasetNames); icConf.set("dataset-id", datasetIds); if (!useMergeTree.isEmpty()) { String useMergeTreeStr = ""; for (String dt : useMergeTree) { useMergeTreeStr += dt + ","; } icConf.set("use-merge-tree", useMergeTreeStr.substring(0, useMergeTreeStr.length() - 1)); } for (int i = 0; i < shortDataset.size(); i++) { String dataset = shortDataset.get(i); String id = datasetId.get(dataset); icConf.set("dataset-" + id + "-aggregates", datasetAgg.get(dataset)); if (datasetRegThreshold.containsKey(dataset)) { HashMap<Integer, Double> regThresholds = datasetRegThreshold.get(dataset); String thresholds = ""; for (int att : regThresholds.keySet()) { thresholds += String.valueOf(att) + "-" + String.valueOf(regThresholds.get(att)) + ","; } icConf.set("regular-" + id, thresholds.substring(0, thresholds.length() - 1)); } if (datasetRareThreshold.containsKey(dataset)) { HashMap<Integer, Double> rareThresholds = datasetRareThreshold.get(dataset); String thresholds = ""; for (int att : rareThresholds.keySet()) { thresholds += String.valueOf(att) + "-" + String.valueOf(rareThresholds.get(att)) + ","; } icConf.set("rare-" + id, thresholds.substring(0, thresholds.length() - 1)); } } icConf.set("mapreduce.tasktracker.map.tasks.maximum", String.valueOf(machineConf.getMaximumTasks())); icConf.set("mapreduce.tasktracker.reduce.tasks.maximum", String.valueOf(machineConf.getMaximumTasks())); icConf.set("mapreduce.jobtracker.maxtasks.perjob", "-1"); icConf.set("mapreduce.reduce.shuffle.parallelcopies", "20"); icConf.set("mapreduce.input.fileinputformat.split.minsize", "0"); icConf.set("mapreduce.task.io.sort.mb", "200"); icConf.set("mapreduce.task.io.sort.factor", "100"); //icConf.set("mapreduce.task.timeout", "1800000"); machineConf.setMachineConfiguration(icConf); if (s3) { machineConf.setMachineConfiguration(icConf); icConf.set("fs.s3.awsAccessKeyId", awsAccessKeyId); icConf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey); icConf.set("bucket", s3bucket); } if (snappyCompression) { icConf.set("mapreduce.map.output.compress", "true"); icConf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec"); //icConf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec"); } if (bzip2Compression) { icConf.set("mapreduce.map.output.compress", "true"); icConf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec"); //icConf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec"); } icJob = new Job(icConf); icJob.setJobName(jobName); icJob.setMapOutputKeyClass(AttributeResolutionWritable.class); icJob.setMapOutputValueClass(SpatioTemporalFloatWritable.class); icJob.setOutputKeyClass(AttributeResolutionWritable.class); icJob.setOutputValueClass(TopologyTimeSeriesWritable.class); //icJob.setOutputKeyClass(Text.class); //icJob.setOutputValueClass(Text.class); icJob.setMapperClass(IndexCreationMapper.class); icJob.setReducerClass(IndexCreationReducer.class); icJob.setNumReduceTasks(machineConf.getNumberReduces()); icJob.setInputFormatClass(SequenceFileInputFormat.class); //icJob.setOutputFormatClass(SequenceFileOutputFormat.class); LazyOutputFormat.setOutputFormatClass(icJob, SequenceFileOutputFormat.class); //LazyOutputFormat.setOutputFormatClass(icJob, TextOutputFormat.class); SequenceFileOutputFormat.setCompressOutput(icJob, true); SequenceFileOutputFormat.setOutputCompressionType(icJob, CompressionType.BLOCK); FileInputFormat.setInputDirRecursive(icJob, true); FileInputFormat.setInputPaths(icJob, aggregateDatasets.substring(0, aggregateDatasets.length() - 1)); FileOutputFormat.setOutputPath(icJob, new Path(indexOutputDir)); icJob.setJarByClass(IndexCreation.class); long start = System.currentTimeMillis(); icJob.submit(); icJob.waitForCompletion(true); System.out.println(jobName + "\t" + (System.currentTimeMillis() - start)); // moving files to right place for (String dataset : shortDatasetIndex) { String from = s3bucket + FrameworkUtils.indexDir + "/tmp/" + dataset + "/"; String to = s3bucket + FrameworkUtils.indexDir + "/" + dataset + "/"; FrameworkUtils.renameFile(from, to, s3conf, s3); } }
From source file:org.apache.nutch.tools.proxy.TestbedProxy.java
/** * @param args//w w w . j a v a 2 s . co m */ public static void main(String[] args) throws Exception { if (args.length == 0) { System.err.println( "TestbedProxy [-seg <segment_name> | -segdir <segments>] [-port <nnn>] [-forward] [-fake] [-delay nnn] [-debug]"); System.err.println("-seg <segment_name>\tpath to a single segment (can be specified multiple times)"); System.err.println("-segdir <segments>\tpath to a parent directory of multiple segments (as above)"); System.err.println( "-port <nnn>\trun the proxy on port <nnn> (special permissions may be needed for ports < 1024)"); System.err.println("-forward\tif specified, requests to all unknown urls will be passed to"); System.err.println("\t\toriginal servers. If false (default) unknown urls generate 404 Not Found."); System.err.println( "-delay\tdelay every response by nnn seconds. If delay is negative use a random value up to nnn"); System.err.println("-fake\tif specified, requests to all unknown urls will succeed with fake content"); System.exit(-1); } Configuration conf = NutchConfiguration.create(); int port = conf.getInt("segment.proxy.port", 8181); boolean forward = false; boolean fake = false; boolean delay = false; boolean debug = false; int delayVal = 0; HashSet<Path> segs = new HashSet<Path>(); for (int i = 0; i < args.length; i++) { if (args[i].equals("-segdir")) { FileSystem fs = FileSystem.get(conf); FileStatus[] fstats = fs.listStatus(new Path(args[++i])); Path[] paths = HadoopFSUtil.getPaths(fstats); segs.addAll(Arrays.asList(paths)); } else if (args[i].equals("-port")) { port = Integer.parseInt(args[++i]); } else if (args[i].equals("-forward")) { forward = true; } else if (args[i].equals("-delay")) { delay = true; delayVal = Integer.parseInt(args[++i]); } else if (args[i].equals("-fake")) { fake = true; } else if (args[i].equals("-debug")) { debug = true; } else if (args[i].equals("-seg")) { segs.add(new Path(args[++i])); } else { LOG.fatal("Unknown argument: " + args[i]); System.exit(-1); } } // Create the server Server server = new Server(); SocketConnector connector = new SocketConnector(); connector.setPort(port); connector.setResolveNames(false); server.addConnector(connector); // create a list of handlers HandlerList list = new HandlerList(); server.addHandler(list); if (debug) { LOG.info("* Added debug handler."); list.addHandler(new LogDebugHandler()); } if (delay) { LOG.info("* Added delay handler: " + (delayVal < 0 ? "random delay up to " + (-delayVal) : "constant delay of " + delayVal)); list.addHandler(new DelayHandler(delayVal)); } // XXX alternatively, we can add the DispatchHandler as the first one, // XXX to activate handler plugins and redirect requests to appropriate // XXX handlers ... Here we always load these handlers Iterator<Path> it = segs.iterator(); while (it.hasNext()) { Path p = it.next(); try { SegmentHandler segment = new SegmentHandler(conf, p); list.addHandler(segment); LOG.info("* Added segment handler for: " + p); } catch (Exception e) { LOG.warn("Skipping segment '" + p + "': " + StringUtils.stringifyException(e)); } } if (forward) { LOG.info("* Adding forwarding proxy for all unknown urls ..."); ServletHandler servlets = new ServletHandler(); servlets.addServletWithMapping(AsyncProxyServlet.class, "/*"); servlets.addFilterWithMapping(LogDebugHandler.class, "/*", Handler.ALL); list.addHandler(servlets); } if (fake) { LOG.info("* Added fake handler for remaining URLs."); list.addHandler(new FakeHandler()); } list.addHandler(new NotFoundHandler()); // Start the http server server.start(); server.join(); }
From source file:edu.nyu.vida.data_polygamy.scalar_function_computation.Aggregation.java
/** * @param args/* www . j a va 2 s. c o m*/ */ @SuppressWarnings({ "deprecation" }) public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Options options = new Options(); Option forceOption = new Option("f", "force", false, "force the computation of the aggregate functions " + "even if files already exist"); forceOption.setRequired(false); options.addOption(forceOption); Option gOption = new Option("g", "group", true, "set group of datasets for which the aggregate functions" + " will be computed, followed by their temporal and spatial attribute indices"); gOption.setRequired(true); gOption.setArgName("GROUP"); gOption.setArgs(Option.UNLIMITED_VALUES); options.addOption(gOption); Option machineOption = new Option("m", "machine", true, "machine identifier"); machineOption.setRequired(true); machineOption.setArgName("MACHINE"); machineOption.setArgs(1); options.addOption(machineOption); Option nodesOption = new Option("n", "nodes", true, "number of nodes"); nodesOption.setRequired(true); nodesOption.setArgName("NODES"); nodesOption.setArgs(1); options.addOption(nodesOption); Option s3Option = new Option("s3", "s3", false, "data on Amazon S3"); s3Option.setRequired(false); options.addOption(s3Option); Option awsAccessKeyIdOption = new Option("aws_id", "aws-id", true, "aws access key id; " + "this is required if the execution is on aws"); awsAccessKeyIdOption.setRequired(false); awsAccessKeyIdOption.setArgName("AWS-ACCESS-KEY-ID"); awsAccessKeyIdOption.setArgs(1); options.addOption(awsAccessKeyIdOption); Option awsSecretAccessKeyOption = new Option("aws_key", "aws-id", true, "aws secrect access key; " + "this is required if the execution is on aws"); awsSecretAccessKeyOption.setRequired(false); awsSecretAccessKeyOption.setArgName("AWS-SECRET-ACCESS-KEY"); awsSecretAccessKeyOption.setArgs(1); options.addOption(awsSecretAccessKeyOption); Option bucketOption = new Option("b", "s3-bucket", true, "bucket on s3; " + "this is required if the execution is on aws"); bucketOption.setRequired(false); bucketOption.setArgName("S3-BUCKET"); bucketOption.setArgs(1); options.addOption(bucketOption); Option helpOption = new Option("h", "help", false, "display this message"); helpOption.setRequired(false); options.addOption(helpOption); HelpFormatter formatter = new HelpFormatter(); CommandLineParser parser = new PosixParser(); CommandLine cmd = null; try { cmd = parser.parse(options, args); } catch (ParseException e) { formatter.printHelp("hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.scalar_function_computation.Aggregation", options, true); System.exit(0); } if (cmd.hasOption("h")) { formatter.printHelp("hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.scalar_function_computation.Aggregation", options, true); System.exit(0); } boolean s3 = cmd.hasOption("s3"); String s3bucket = ""; String awsAccessKeyId = ""; String awsSecretAccessKey = ""; if (s3) { if ((!cmd.hasOption("aws_id")) || (!cmd.hasOption("aws_key")) || (!cmd.hasOption("b"))) { System.out.println( "Arguments 'aws_id', 'aws_key', and 'b'" + " are mandatory if execution is on AWS."); formatter.printHelp( "hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.scalar_function_computation.Aggregation", options, true); System.exit(0); } s3bucket = cmd.getOptionValue("b"); awsAccessKeyId = cmd.getOptionValue("aws_id"); awsSecretAccessKey = cmd.getOptionValue("aws_key"); } boolean snappyCompression = false; boolean bzip2Compression = false; String machine = cmd.getOptionValue("m"); int nbNodes = Integer.parseInt(cmd.getOptionValue("n")); Configuration s3conf = new Configuration(); if (s3) { s3conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId); s3conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey); s3conf.set("bucket", s3bucket); } String datasetNames = ""; String datasetIds = ""; String preProcessingDatasets = ""; ArrayList<String> shortDataset = new ArrayList<String>(); ArrayList<String> shortDatasetAggregation = new ArrayList<String>(); HashMap<String, String> datasetTempAtt = new HashMap<String, String>(); HashMap<String, String> datasetSpatialAtt = new HashMap<String, String>(); HashMap<String, String> preProcessingDataset = new HashMap<String, String>(); HashMap<String, String> datasetId = new HashMap<String, String>(); boolean removeExistingFiles = cmd.hasOption("f"); String[] datasetArgs = cmd.getOptionValues("g"); for (int i = 0; i < datasetArgs.length; i += 3) { String dataset = datasetArgs[i]; // getting pre-processing String tempPreProcessing = FrameworkUtils.searchPreProcessing(dataset, s3conf, s3); if (tempPreProcessing == null) { System.out.println("No pre-processing available for " + dataset); continue; } preProcessingDataset.put(dataset, tempPreProcessing); shortDataset.add(dataset); datasetTempAtt.put(dataset, ((datasetArgs[i + 1] == "null") ? null : datasetArgs[i + 1])); datasetSpatialAtt.put(dataset, ((datasetArgs[i + 2] == "null") ? null : datasetArgs[i + 2])); datasetId.put(dataset, null); } if (shortDataset.size() == 0) { System.out.println("No datasets to process."); System.exit(0); } // getting dataset id Path path = null; FileSystem fs = null; if (s3) { path = new Path(s3bucket + FrameworkUtils.datasetsIndexDir); fs = FileSystem.get(path.toUri(), s3conf); } else { fs = FileSystem.get(new Configuration()); path = new Path(fs.getHomeDirectory() + "/" + FrameworkUtils.datasetsIndexDir); } BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(path))); String line = br.readLine(); while (line != null) { String[] dt = line.split("\t"); if (datasetId.containsKey(dt[0])) { datasetId.put(dt[0], dt[1]); datasetNames += dt[0] + ","; datasetIds += dt[1] + ","; } line = br.readLine(); } br.close(); if (s3) fs.close(); datasetNames = datasetNames.substring(0, datasetNames.length() - 1); datasetIds = datasetIds.substring(0, datasetIds.length() - 1); Iterator<String> it = shortDataset.iterator(); while (it.hasNext()) { String dataset = it.next(); if (datasetId.get(dataset) == null) { System.out.println("No dataset id for " + dataset); System.exit(0); } } FrameworkUtils.createDir(s3bucket + FrameworkUtils.aggregatesDir, s3conf, s3); // getting smallest resolution HashMap<String, String> tempResMap = new HashMap<String, String>(); HashMap<String, String> spatialResMap = new HashMap<String, String>(); HashMap<String, String> datasetTemporalStrMap = new HashMap<String, String>(); HashMap<String, String> datasetSpatialStrMap = new HashMap<String, String>(); HashSet<String> input = new HashSet<String>(); for (String dataset : shortDataset) { String[] datasetArray = preProcessingDataset.get(dataset).split("-"); String datasetTemporalStr = datasetArray[datasetArray.length - 2]; int datasetTemporal = utils.temporalResolution(datasetTemporalStr); String datasetSpatialStr = datasetArray[datasetArray.length - 1]; int datasetSpatial = utils.spatialResolution(datasetSpatialStr); // finding all possible resolutions String[] temporalResolutions = FrameworkUtils.getAggTempResolutions(datasetTemporal); String[] spatialResolutions = FrameworkUtils.getAggSpatialResolutions(datasetSpatial); String temporalResolution = ""; String spatialResolution = ""; String tempRes = ""; String spatialRes = ""; boolean dataAdded = false; for (int i = 0; i < temporalResolutions.length; i++) { for (int j = 0; j < spatialResolutions.length; j++) { temporalResolution = temporalResolutions[i]; spatialResolution = spatialResolutions[j]; String aggregatesOutputFileName = s3bucket + FrameworkUtils.aggregatesDir + "/" + dataset + "/"; if (removeExistingFiles) { FrameworkUtils.removeFile(aggregatesOutputFileName, s3conf, s3); } if (!FrameworkUtils.fileExists(aggregatesOutputFileName, s3conf, s3)) { dataAdded = true; tempRes += temporalResolution + "-"; spatialRes += spatialResolution + "-"; } } } if (dataAdded) { input.add(s3bucket + FrameworkUtils.preProcessingDir + "/" + preProcessingDataset.get(dataset)); shortDatasetAggregation.add(dataset); tempResMap.put(dataset, tempRes.substring(0, tempRes.length() - 1)); spatialResMap.put(dataset, spatialRes.substring(0, spatialRes.length() - 1)); datasetTemporalStrMap.put(dataset, datasetTemporalStr); datasetSpatialStrMap.put(dataset, datasetSpatialStr); } } if (input.isEmpty()) { System.out.println("All the input datasets have aggregates."); System.out.println("Use -f in the beginning of the command line to force the computation."); System.exit(0); } it = input.iterator(); while (it.hasNext()) { preProcessingDatasets += it.next() + ","; } Job aggJob = null; String aggregatesOutputDir = s3bucket + FrameworkUtils.aggregatesDir + "/tmp/"; String jobName = "aggregates"; FrameworkUtils.removeFile(aggregatesOutputDir, s3conf, s3); Configuration aggConf = new Configuration(); Machine machineConf = new Machine(machine, nbNodes); aggConf.set("dataset-name", datasetNames); aggConf.set("dataset-id", datasetIds); for (int i = 0; i < shortDatasetAggregation.size(); i++) { String dataset = shortDatasetAggregation.get(i); String id = datasetId.get(dataset); aggConf.set("dataset-" + id + "-temporal-resolutions", tempResMap.get(dataset)); aggConf.set("dataset-" + id + "-spatial-resolutions", spatialResMap.get(dataset)); aggConf.set("dataset-" + id + "-temporal-att", datasetTempAtt.get(dataset)); aggConf.set("dataset-" + id + "-spatial-att", datasetSpatialAtt.get(dataset)); aggConf.set("dataset-" + id + "-temporal", datasetTemporalStrMap.get(dataset)); aggConf.set("dataset-" + id + "-spatial", datasetSpatialStrMap.get(dataset)); if (s3) aggConf.set("dataset-" + id, s3bucket + FrameworkUtils.preProcessingDir + "/" + preProcessingDataset.get(dataset)); else aggConf.set("dataset-" + id, FileSystem.get(new Configuration()).getHomeDirectory() + "/" + FrameworkUtils.preProcessingDir + "/" + preProcessingDataset.get(dataset)); } aggConf.set("mapreduce.tasktracker.map.tasks.maximum", String.valueOf(machineConf.getMaximumTasks())); aggConf.set("mapreduce.tasktracker.reduce.tasks.maximum", String.valueOf(machineConf.getMaximumTasks())); aggConf.set("mapreduce.jobtracker.maxtasks.perjob", "-1"); aggConf.set("mapreduce.reduce.shuffle.parallelcopies", "20"); aggConf.set("mapreduce.input.fileinputformat.split.minsize", "0"); aggConf.set("mapreduce.task.io.sort.mb", "200"); aggConf.set("mapreduce.task.io.sort.factor", "100"); machineConf.setMachineConfiguration(aggConf); if (s3) { machineConf.setMachineConfiguration(aggConf); aggConf.set("fs.s3.awsAccessKeyId", awsAccessKeyId); aggConf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey); } if (snappyCompression) { aggConf.set("mapreduce.map.output.compress", "true"); aggConf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec"); //aggConf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec"); } if (bzip2Compression) { aggConf.set("mapreduce.map.output.compress", "true"); aggConf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec"); //aggConf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec"); } aggJob = new Job(aggConf); aggJob.setJobName(jobName); aggJob.setMapOutputKeyClass(SpatioTemporalWritable.class); aggJob.setMapOutputValueClass(AggregationArrayWritable.class); aggJob.setOutputKeyClass(SpatioTemporalWritable.class); aggJob.setOutputValueClass(FloatArrayWritable.class); //aggJob.setOutputKeyClass(Text.class); //aggJob.setOutputValueClass(Text.class); aggJob.setMapperClass(AggregationMapper.class); aggJob.setCombinerClass(AggregationCombiner.class); aggJob.setReducerClass(AggregationReducer.class); aggJob.setNumReduceTasks(machineConf.getNumberReduces()); aggJob.setInputFormatClass(SequenceFileInputFormat.class); //aggJob.setOutputFormatClass(SequenceFileOutputFormat.class); LazyOutputFormat.setOutputFormatClass(aggJob, SequenceFileOutputFormat.class); //LazyOutputFormat.setOutputFormatClass(aggJob, TextOutputFormat.class); SequenceFileOutputFormat.setCompressOutput(aggJob, true); SequenceFileOutputFormat.setOutputCompressionType(aggJob, CompressionType.BLOCK); FileInputFormat.setInputDirRecursive(aggJob, true); FileInputFormat.setInputPaths(aggJob, preProcessingDatasets.substring(0, preProcessingDatasets.length() - 1)); FileOutputFormat.setOutputPath(aggJob, new Path(aggregatesOutputDir)); aggJob.setJarByClass(Aggregation.class); long start = System.currentTimeMillis(); aggJob.submit(); aggJob.waitForCompletion(true); System.out.println(jobName + "\t" + (System.currentTimeMillis() - start)); // moving files to right place for (String dataset : shortDatasetAggregation) { String from = s3bucket + FrameworkUtils.aggregatesDir + "/tmp/" + dataset + "/"; String to = s3bucket + FrameworkUtils.aggregatesDir + "/" + dataset + "/"; FrameworkUtils.renameFile(from, to, s3conf, s3); } }
From source file:Main.java
public static HashSet<String> intersectionSet(HashSet<String> setA, HashSet<String> setB) { HashSet<String> intersectionSet = new HashSet<String>(); Iterator<String> iterA = setA.iterator(); while (iterA.hasNext()) { String tempInner = iterA.next(); if (setB.contains(tempInner)) { intersectionSet.add(tempInner); }/*w ww . j a va 2s. c om*/ } return intersectionSet; }
From source file:Main.java
/** * Creates policy tree stub containing two <code>PolicyNode</code>s * for testing purposes/*from www .jav a2 s . co m*/ * * @return root <code>PolicyNode</code> of the policy tree */ public static PolicyNode getPolicyTree() { return new PolicyNode() { final PolicyNode parent = this; public int getDepth() { // parent return 0; } public boolean isCritical() { return false; } public String getValidPolicy() { return null; } public PolicyNode getParent() { return null; } public Iterator<PolicyNode> getChildren() { PolicyNode child = new PolicyNode() { public int getDepth() { // child return 1; } public boolean isCritical() { return false; } public String getValidPolicy() { return null; } public PolicyNode getParent() { return parent; } public Iterator<PolicyNode> getChildren() { return null; } public Set<String> getExpectedPolicies() { return null; } public Set<? extends PolicyQualifierInfo> getPolicyQualifiers() { return null; } }; HashSet<PolicyNode> s = new HashSet<PolicyNode>(); s.add(child); return s.iterator(); } public Set<String> getExpectedPolicies() { return null; } public Set<? extends PolicyQualifierInfo> getPolicyQualifiers() { return null; } }; }
From source file:com.jetyun.pgcd.rpc.localarg.LocalArgController.java
/** * Using the caller's context, resolve a given method call parameter to a * local argument.//from w w w. j a v a 2 s . co m * * @param context * callers context. In an http servlet environment, this will * contain the servlet request and response objects. * @param param * class type parameter to resolve to a local argument. * * @return the run time instance that is resolved, to be used when calling * the method. * * @throws UnmarshallException * if there if a failure during resolution. */ public static Object resolveLocalArg(Object context[], Class param) throws UnmarshallException { HashSet resolverSet = (HashSet) localArgResolverMap.get(param); Iterator i = resolverSet.iterator(); while (i.hasNext()) { LocalArgResolverData resolverData = (LocalArgResolverData) i.next(); for (int j = 0; j < context.length; j++) { if (resolverData.understands(context[j])) { try { return resolverData.getArgResolver().resolveArg(context[j]); } catch (LocalArgResolveException e) { throw new UnmarshallException("error resolving local argument: " + e, e); } } } } throw new UnmarshallException("couldn't find local arg resolver"); }