List of usage examples for java.util Iterator next
E next();
From source file:fr.iphc.grid.jobmonitor.CeList.java
public static void main(String[] args) throws Exception { SessionFactory.createSession(true);/* w ww .j a va2 s. co m*/ CeList command = new CeList(); CommandLine line = command.parse(args); Integer timeout = 0; String TableSql = "monce"; // MySQLAccess sql = new MySQLAccess(); if (line.getOptionValue(OPT_TIMEOUT) == null) { timeout = 15; } else { timeout = Integer.parseInt(line.getOptionValue(OPT_TIMEOUT)); } timeout = timeout * 60; // convertir en secondes Date start = new Date(); String OutDir = line.getOptionValue(OPT_OUTDIR); if (OutDir == null) { OutDir = "/tmp/thread"; } ArrayList<URL> CeList = null; if (line.getOptionValue(OPT_CEPATH) == null) { CeList = AvailableLdapCe(); // for (URL k : CeList) { // // System.out.println(k); // } } else { CeList = AvailableCe(line.getOptionValue(OPT_CEPATH)); } Boolean ret = initDirectory(new File(OutDir)); if (!ret) { System.out.println("ERROR: " + OutDir + "STOP"); System.exit(-1); } // check if we can connect to the grid // try{ // SessionFactory.createSession(true); // }catch(NoSuccessException e){ // System.err.println("Could not connect to the grid at all // ("+e.getMessage()+")"); // System.err.println("Aborting"); // System.exit(0); // // } SubmitterThread[] st = new SubmitterThread[CeList.size()]; Iterator<URL> i = CeList.iterator(); int index = 0; while (i.hasNext()) { URL serviceURL = i.next(); // Ne pas importer dans thread because options. Properties prop = new Properties(); prop.setProperty("Executable", "/bin/hostname");// // prop.setProperty("Executable", "touch /dev/null"); JobDescription desc = createJobDescription(prop); desc.setAttribute(JobDescription.INTERACTIVE, "true"); desc.setAttribute(JobDescription.EXECUTABLE, "/bin/hostname"); // proxy="/home/dsa/.globus/biomed.txt"; // Context ctx = ContextFactory.createContext(); // ctx.setAttribute(Context.TYPE, "VOMS"); // ctx.setAttribute(Context.USERVO, "biomed"); // ctx.setAttribute(Context.USERPROXY,proxy); // Session session = SessionFactory.createSession(false); // session.addContext(ctx); Session session = SessionFactory.createSession(true); st[index] = new SubmitterThread(serviceURL, session, desc, OutDir, timeout, start); st[index].setUncaughtExceptionHandler(new Thread.UncaughtExceptionHandler() { @Override public void uncaughtException(Thread t, Throwable e) { // System.out.println("Error! An exception occured in " // + t.getName() + ". Cause: " + e.getMessage()); } }); st[index].start(); // Thread.sleep(15*1000); // test si fichier exist // System.out.println("Alive // "+OutDir+"/"+serviceURL.getHost()+"_"+serviceURL.getPath().replaceAll("/cream-","")+".out"); // while ((!((new // File(OutDir+"/"+serviceURL.getHost()+"_"+serviceURL.getPath().replaceAll("/cream-","")+".out").exists()) // || // (new // File(OutDir+"/"+serviceURL.getHost()+"_"+serviceURL.getPath().replaceAll("/cream-","")+".err").exists())))) // { // Thread.sleep(500); // } // System.out.println("Alive "+serviceURL.getHost()+"-"+ index+"FILE // EXIST"); index++; } ; // System.out.println("BEGIN LOOP: Max " + index); long date_start = System.currentTimeMillis(); // System.out.println("BEGIN START: " + date_start); Integer time_out = (timeout + 180) * 1000; // unit ms value in minute // +120 // =delta par rapport thread Boolean Alive = true; // int nb = 0; long now = System.currentTimeMillis(); do { now = System.currentTimeMillis(); Alive = false; // nb = 0; for (int j = 0; j < index; j++) { if (st[j].isAlive()) { // System.out.println("Alive "+j); Alive = true; // nb++; } } // System.out.println(nb); Thread.sleep(10000); } while ((Alive) && ((now - date_start) < time_out)); for (int j = 0; j < index; j++) { if (st[j].isAlive()) { st[j].Requeststop(); } } BilanCe(OutDir, CeList, TableSql); jobManagerLdap jm = new jobManagerLdap(); jm.updateLdapCe(); System.out.println("END " + new Date()); // faire un traitement... System.exit(0); }
From source file:edu.upenn.cis.FastAlign.java
/** * Prints alignments for options specified by command line arguments. * @param argv parameters to be used by FastAlign. *///w w w. j a va 2 s. co m public static void main(String[] argv) { FastAlign align = FastAlign.initCommandLine(argv); if (align == null) { System.err.println("Usage: java " + FastAlign.class.getCanonicalName() + " -i file.fr-en\n" + " Standard options ([USE] = strongly recommended):\n" + " -i: [REQ] Input parallel corpus\n" + " -v: [USE] Use Dirichlet prior on lexical translation distributions\n" + " -d: [USE] Favor alignment points close to the monotonic diagonoal\n" + " -o: [USE] Optimize how close to the diagonal alignment points should be\n" + " -r: Run alignment in reverse (condition on target and predict source)\n" + " -c: Output conditional probability table\n" + " -e: Start with existing conditional probability table\n" + " Advanced options:\n" + " -I: number of iterations in EM training (default = 5)\n" + " -p: p_null parameter (default = 0.08)\n" + " -N: No null word\n" + " -a: alpha parameter for optional Dirichlet prior (default = 0.01)\n" + " -T: starting lambda for diagonal distance parameter (default = 4)\n"); System.exit(1); } boolean use_null = !align.no_null_word; if (align.variational_bayes && align.alpha <= 0.0) { System.err.println("--alpha must be > 0\n"); System.exit(1); } double prob_align_not_null = 1.0 - align.prob_align_null; final int kNULL = align.d.Convert("<eps>"); TTable s2t = new TTable(); if (!align.existing_probability_filename.isEmpty()) { boolean success = s2t.ImportFromFile(align.existing_probability_filename, '\t', align.d); if (!success) { System.err.println("Can't read table " + align.existing_probability_filename); System.exit(1); } } Map<Pair, Integer> size_counts = new HashMap<Pair, Integer>(); double tot_len_ratio = 0; double mean_srclen_multiplier = 0; List<Double> probs = new ArrayList<Double>(); ; // E-M Iterations Loop TODO move this into a method? for (int iter = 0; iter < align.iterations || (iter == 0 && align.iterations == 0); ++iter) { final boolean final_iteration = (iter >= (align.iterations - 1)); System.err.println("ITERATION " + (iter + 1) + (final_iteration ? " (FINAL)" : "")); Scanner in = null; try { in = new Scanner(new File(align.input)); if (!in.hasNextLine()) { System.err.println("Can't read " + align.input); System.exit(1); } } catch (FileNotFoundException e) { e.printStackTrace(); System.err.println("Can't read " + align.input); System.exit(1); } double likelihood = 0; double denom = 0.0; int lc = 0; boolean flag = false; String line; // String ssrc, strg; ArrayList<Integer> src = new ArrayList<Integer>(); ArrayList<Integer> trg = new ArrayList<Integer>(); double c0 = 0; double emp_feat = 0; double toks = 0; // Iterate over each line of the input file while (in.hasNextLine()) { line = in.nextLine(); ++lc; if (lc % 1000 == 0) { System.err.print('.'); flag = true; } if (lc % 50000 == 0) { System.err.println(" [" + lc + "]\n"); System.err.flush(); flag = false; } src.clear(); trg.clear(); // TODO this is redundant; src and tgt cleared in ParseLine // Integerize and split source and target lines. align.ParseLine(line, src, trg); if (align.is_reverse) { ArrayList<Integer> tmp = src; src = trg; trg = tmp; } // TODO Empty lines break the parser. Should this be true? if (src.size() == 0 || trg.size() == 0) { System.err.println("Error in line " + lc + "\n" + line); System.exit(1); } if (iter == 0) { tot_len_ratio += ((double) trg.size()) / ((double) src.size()); } denom += trg.size(); probs.clear(); // Add to pair length counts only if first iteration. if (iter == 0) { Pair pair = new Pair(trg.size(), src.size()); Integer value = size_counts.get(pair); if (value == null) value = 0; size_counts.put(pair, value + 1); } boolean first_al = true; // used when printing alignments toks += trg.size(); // Iterate through the English tokens for (int j = 0; j < trg.size(); ++j) { final int f_j = trg.get(j); double sum = 0; double prob_a_i = 1.0 / (src.size() + (use_null ? 1 : 0)); // uniform (model 1) if (use_null) { if (align.favor_diagonal) { prob_a_i = align.prob_align_null; } probs.add(0, s2t.prob(kNULL, f_j) * prob_a_i); sum += probs.get(0); } double az = 0; if (align.favor_diagonal) az = DiagonalAlignment.computeZ(j + 1, trg.size(), src.size(), align.diagonal_tension) / prob_align_not_null; for (int i = 1; i <= src.size(); ++i) { if (align.favor_diagonal) prob_a_i = DiagonalAlignment.unnormalizedProb(j + 1, i, trg.size(), src.size(), align.diagonal_tension) / az; probs.add(i, s2t.prob(src.get(i - 1), f_j) * prob_a_i); sum += probs.get(i); } if (final_iteration) { double max_p = -1; int max_index = -1; if (use_null) { max_index = 0; max_p = probs.get(0); } for (int i = 1; i <= src.size(); ++i) { if (probs.get(i) > max_p) { max_index = i; max_p = probs.get(i); } } if (max_index > 0) { if (first_al) first_al = false; else System.out.print(' '); if (align.is_reverse) System.out.print("" + j + '-' + (max_index - 1)); else System.out.print("" + (max_index - 1) + '-' + j); } } else { if (use_null) { double count = probs.get(0) / sum; c0 += count; s2t.Increment(kNULL, f_j, count); } for (int i = 1; i <= src.size(); ++i) { final double p = probs.get(i) / sum; s2t.Increment(src.get(i - 1), f_j, p); emp_feat += DiagonalAlignment.feature(j, i, trg.size(), src.size()) * p; } } likelihood += Math.log(sum); } if (final_iteration) System.out.println(); } // log(e) = 1.0 double base2_likelihood = likelihood / Math.log(2); if (flag) { System.err.println(); } if (iter == 0) { mean_srclen_multiplier = tot_len_ratio / lc; System.err.println("expected target length = source length * " + mean_srclen_multiplier); } emp_feat /= toks; System.err.println(" log_e likelihood: " + likelihood); System.err.println(" log_2 likelihood: " + base2_likelihood); System.err.println(" cross entropy: " + (-base2_likelihood / denom)); System.err.println(" perplexity: " + Math.pow(2.0, -base2_likelihood / denom)); System.err.println(" posterior p0: " + c0 / toks); System.err.println(" posterior al-feat: " + emp_feat); //System.err.println(" model tension: " + mod_feat / toks ); System.err.println(" size counts: " + size_counts.size()); if (!final_iteration) { if (align.favor_diagonal && align.optimize_tension && iter > 0) { for (int ii = 0; ii < 8; ++ii) { double mod_feat = 0; Iterator<Map.Entry<Pair, Integer>> it = size_counts.entrySet().iterator(); for (; it.hasNext();) { Map.Entry<Pair, Integer> entry = it.next(); final Pair p = entry.getKey(); for (int j = 1; j <= p.first; ++j) mod_feat += entry.getValue() * DiagonalAlignment.computeDLogZ(j, p.first, p.second, align.diagonal_tension); } mod_feat /= toks; System.err.println(" " + ii + 1 + " model al-feat: " + mod_feat + " (tension=" + align.diagonal_tension + ")"); align.diagonal_tension += (emp_feat - mod_feat) * 20.0; if (align.diagonal_tension <= 0.1) align.diagonal_tension = 0.1; if (align.diagonal_tension > 14) align.diagonal_tension = 14; } System.err.println(" final tension: " + align.diagonal_tension); } if (align.variational_bayes) s2t.NormalizeVB(align.alpha); else s2t.Normalize(); //prob_align_null *= 0.8; // XXX //prob_align_null += (c0 / toks) * 0.2; prob_align_not_null = 1.0 - align.prob_align_null; } } if (!align.conditional_probability_filename.isEmpty()) { System.err.println("conditional probabilities: " + align.conditional_probability_filename); s2t.ExportToFile(align.conditional_probability_filename, align.d); } System.exit(0); }
From source file:com.mch.registry.ccs.server.CcsClient.java
/** * Sends messages to registered devices/*from w w w.jav a2 s.c o m*/ */ public static void main(String[] args) { Config config = new Config(); final String projectId = config.getProjectId(); final String key = config.getKey(); final CcsClient ccsClient = CcsClient.prepareClient(projectId, key, true); try { ccsClient.connect(); } catch (XMPPException e) { logger.log(Level.WARNING, "XMPP Exception ", e); } final Runnable sendNotifications = new Runnable() { public void run() { try { logger.log(Level.INFO, "Working Q!"); if (!isOffHours()) { //Prepare downstream message String toRegId = ""; String message = ""; String messageId = ""; Map<String, String> payload = new HashMap<String, String>(); String collapseKey = null; Long timeToLive = 10000L; Boolean delayWhileIdle = true; String messagePrefix = ""; int notificationQueueID = 0; boolean sucessfullySent = false; //Read from mysql database MySqlHandler mysql = new MySqlHandler(); ArrayList<Notification> queue = new ArrayList<Notification>(); for (int i = 1; i < 3; i++) { queue = mysql.getNotificationQueue(i); if (queue.size() > 0) { switch (i) { case 1: messagePrefix = "_V: "; break; case 2: messagePrefix = "_R: "; break; default: messagePrefix = ""; logger.log(Level.WARNING, "Unknown message type!"); } Notification notification = new Notification(); Iterator<Notification> iterator = queue.iterator(); while (iterator.hasNext()) { notification = iterator.next(); toRegId = notification.getGcmRegID(); message = notification.getNotificationText(); notificationQueueID = notification.getNotificationQueueID(); messageId = "m-" + Long.toString(random.nextLong()); payload = new HashMap<String, String>(); payload.put("message", messagePrefix + message); try { // Send the downstream message to a device. ccsClient.send(createJsonMessage(toRegId, messageId, payload, collapseKey, timeToLive, delayWhileIdle)); sucessfullySent = true; logger.log(Level.INFO, "Message sent. ID: " + notificationQueueID + ", RegID: " + toRegId + ", Text: " + message); } catch (Exception e) { mysql.prepareNotificationForTheNextDay(notificationQueueID); sucessfullySent = false; logger.log(Level.WARNING, "Message could not be sent! ID: " + notificationQueueID + ", RegID: " + toRegId + ", Text: " + message); } if (sucessfullySent) { mysql.moveNotificationToHistory(notificationQueueID); } } } else { logger.log(Level.INFO, "No notifications to send. Type: " + Integer.toString(i)); } } } } catch (Exception e) { logger.log(Level.WARNING, "Exception ", e); } } }; ScheduledExecutorService executor = Executors.newSingleThreadScheduledExecutor(); //Start when server starts and every 30 minutes after ScheduledFuture task = executor.scheduleAtFixedRate(sendNotifications, 0, 30, TimeUnit.MINUTES); try { task.get(); } catch (ExecutionException e) { logger.log(Level.SEVERE, "Exception ", e); } catch (InterruptedException e) { logger.log(Level.SEVERE, "Exception ", e); } task.cancel(false); try { executor.shutdown(); executor.awaitTermination(30, TimeUnit.SECONDS); } catch (InterruptedException e) { logger.log(Level.SEVERE, "Exception ", e); } }
From source file:de.micromata.tpsb.doc.StaticTestDocGenerator.java
public static void main(String[] args) { ParserConfig.Builder bCfg = new ParserConfig.Builder(); ParserConfig.Builder tCfg = new ParserConfig.Builder(); tCfg.generateIndividualFiles(true);//from w w w.j a v a 2 s . co m bCfg.generateIndividualFiles(true); List<String> la = Arrays.asList(args); Iterator<String> it = la.iterator(); boolean baseDirSet = false; boolean ignoreLocalSettings = false; List<String> addRepos = new ArrayList<String>(); StringResourceLoader.setRepository(StringResourceLoader.REPOSITORY_NAME_DEFAULT, new StringResourceRepositoryImpl()); try { while (it.hasNext()) { String arg = it.next(); String value = null; if ((value = getArgumentOption(it, arg, "--project-root", "-pr")) != null) { File f = new File(value); if (f.exists() == false) { System.err.print("project root doesn't exists: " + f.getAbsolutePath()); continue; } TpsbEnvironment.get().addProjectRoots(f); File ts = new File(f, "src/test"); if (ts.exists() == true) { tCfg.addSourceFileRespository(new FileSystemSourceFileRepository(ts.getAbsolutePath())); bCfg.addSourceFileRespository(new FileSystemSourceFileRepository(ts.getAbsolutePath())); } continue; } if ((value = getArgumentOption(it, arg, "--test-input", "-ti")) != null) { File f = new File(value); if (f.exists() == false) { System.err.print("test-input doesn't exists: " + f.getAbsolutePath()); } tCfg.addSourceFileRespository(new FileSystemSourceFileRepository(value)); bCfg.addSourceFileRespository(new FileSystemSourceFileRepository(value)); continue; } if ((value = getArgumentOption(it, arg, "--output-path", "-op")) != null) { if (baseDirSet == false) { tCfg.outputDir(value); bCfg.outputDir(value); TpsbEnvironment.setBaseDir(value); baseDirSet = true; } else { addRepos.add(value); } continue; } if ((value = getArgumentOption(it, arg, "--index-vmtemplate", "-ivt")) != null) { try { String content = FileUtils.readFileToString(new File(value), CharEncoding.UTF_8); StringResourceRepository repo = StringResourceLoader.getRepository(); repo.putStringResource("customIndexTemplate", content, CharEncoding.UTF_8); tCfg.indexTemplate("customIndexTemplate"); } catch (IOException ex) { throw new RuntimeException( "Cannot load file " + new File(value).getAbsolutePath() + ": " + ex.getMessage(), ex); } continue; } if ((value = getArgumentOption(it, arg, "--test-vmtemplate", "-tvt")) != null) { try { String content = FileUtils.readFileToString(new File(value), CharEncoding.UTF_8); StringResourceRepository repo = StringResourceLoader.getRepository(); repo.putStringResource("customTestTemplate", content, CharEncoding.UTF_8); tCfg.testTemplate("customTestTemplate"); } catch (IOException ex) { throw new RuntimeException( "Cannot load file " + new File(value).getAbsolutePath() + ": " + ex.getMessage(), ex); } continue; } if (arg.equals("--singlexml") == true) { tCfg.generateIndividualFiles(false); bCfg.generateIndividualFiles(false); } else if (arg.equals("--ignore-local-settings") == true) { ignoreLocalSettings = true; continue; } } } catch (RuntimeException ex) { System.err.print(ex.getMessage()); return; } if (ignoreLocalSettings == false) { readLocalSettings(bCfg, tCfg); } bCfg// .addSourceFileFilter(new MatcherSourceFileFilter("*Builder,*App,*builder")) // .addSourceFileFilter(new AnnotationSourceFileFilter(TpsbBuilder.class)) // .addSourceFileFilter(new AnnotationSourceFileFilter(TpsbApplication.class)) // ; tCfg// .addSourceFileFilter(new MatcherSourceFileFilter("*Test,*TestCase")) // .addSourceFileFilter(new AnnotationSourceFileFilter(TpsbTestSuite.class)) // ; StaticTestDocGenerator docGenerator = new StaticTestDocGenerator(bCfg.build(), tCfg.build()); TpsbEnvironment env = TpsbEnvironment.get(); if (addRepos.isEmpty() == false) { env.setIncludeRepos(addRepos); } docGenerator.parseTestBuilders(); docGenerator.parseTestCases(); }
From source file:edu.nyu.vida.data_polygamy.scalar_function_computation.Aggregation.java
/** * @param args/* w w w .ja v a2s. c o m*/ */ @SuppressWarnings({ "deprecation" }) public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Options options = new Options(); Option forceOption = new Option("f", "force", false, "force the computation of the aggregate functions " + "even if files already exist"); forceOption.setRequired(false); options.addOption(forceOption); Option gOption = new Option("g", "group", true, "set group of datasets for which the aggregate functions" + " will be computed, followed by their temporal and spatial attribute indices"); gOption.setRequired(true); gOption.setArgName("GROUP"); gOption.setArgs(Option.UNLIMITED_VALUES); options.addOption(gOption); Option machineOption = new Option("m", "machine", true, "machine identifier"); machineOption.setRequired(true); machineOption.setArgName("MACHINE"); machineOption.setArgs(1); options.addOption(machineOption); Option nodesOption = new Option("n", "nodes", true, "number of nodes"); nodesOption.setRequired(true); nodesOption.setArgName("NODES"); nodesOption.setArgs(1); options.addOption(nodesOption); Option s3Option = new Option("s3", "s3", false, "data on Amazon S3"); s3Option.setRequired(false); options.addOption(s3Option); Option awsAccessKeyIdOption = new Option("aws_id", "aws-id", true, "aws access key id; " + "this is required if the execution is on aws"); awsAccessKeyIdOption.setRequired(false); awsAccessKeyIdOption.setArgName("AWS-ACCESS-KEY-ID"); awsAccessKeyIdOption.setArgs(1); options.addOption(awsAccessKeyIdOption); Option awsSecretAccessKeyOption = new Option("aws_key", "aws-id", true, "aws secrect access key; " + "this is required if the execution is on aws"); awsSecretAccessKeyOption.setRequired(false); awsSecretAccessKeyOption.setArgName("AWS-SECRET-ACCESS-KEY"); awsSecretAccessKeyOption.setArgs(1); options.addOption(awsSecretAccessKeyOption); Option bucketOption = new Option("b", "s3-bucket", true, "bucket on s3; " + "this is required if the execution is on aws"); bucketOption.setRequired(false); bucketOption.setArgName("S3-BUCKET"); bucketOption.setArgs(1); options.addOption(bucketOption); Option helpOption = new Option("h", "help", false, "display this message"); helpOption.setRequired(false); options.addOption(helpOption); HelpFormatter formatter = new HelpFormatter(); CommandLineParser parser = new PosixParser(); CommandLine cmd = null; try { cmd = parser.parse(options, args); } catch (ParseException e) { formatter.printHelp("hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.scalar_function_computation.Aggregation", options, true); System.exit(0); } if (cmd.hasOption("h")) { formatter.printHelp("hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.scalar_function_computation.Aggregation", options, true); System.exit(0); } boolean s3 = cmd.hasOption("s3"); String s3bucket = ""; String awsAccessKeyId = ""; String awsSecretAccessKey = ""; if (s3) { if ((!cmd.hasOption("aws_id")) || (!cmd.hasOption("aws_key")) || (!cmd.hasOption("b"))) { System.out.println( "Arguments 'aws_id', 'aws_key', and 'b'" + " are mandatory if execution is on AWS."); formatter.printHelp( "hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.scalar_function_computation.Aggregation", options, true); System.exit(0); } s3bucket = cmd.getOptionValue("b"); awsAccessKeyId = cmd.getOptionValue("aws_id"); awsSecretAccessKey = cmd.getOptionValue("aws_key"); } boolean snappyCompression = false; boolean bzip2Compression = false; String machine = cmd.getOptionValue("m"); int nbNodes = Integer.parseInt(cmd.getOptionValue("n")); Configuration s3conf = new Configuration(); if (s3) { s3conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId); s3conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey); s3conf.set("bucket", s3bucket); } String datasetNames = ""; String datasetIds = ""; String preProcessingDatasets = ""; ArrayList<String> shortDataset = new ArrayList<String>(); ArrayList<String> shortDatasetAggregation = new ArrayList<String>(); HashMap<String, String> datasetTempAtt = new HashMap<String, String>(); HashMap<String, String> datasetSpatialAtt = new HashMap<String, String>(); HashMap<String, String> preProcessingDataset = new HashMap<String, String>(); HashMap<String, String> datasetId = new HashMap<String, String>(); boolean removeExistingFiles = cmd.hasOption("f"); String[] datasetArgs = cmd.getOptionValues("g"); for (int i = 0; i < datasetArgs.length; i += 3) { String dataset = datasetArgs[i]; // getting pre-processing String tempPreProcessing = FrameworkUtils.searchPreProcessing(dataset, s3conf, s3); if (tempPreProcessing == null) { System.out.println("No pre-processing available for " + dataset); continue; } preProcessingDataset.put(dataset, tempPreProcessing); shortDataset.add(dataset); datasetTempAtt.put(dataset, ((datasetArgs[i + 1] == "null") ? null : datasetArgs[i + 1])); datasetSpatialAtt.put(dataset, ((datasetArgs[i + 2] == "null") ? null : datasetArgs[i + 2])); datasetId.put(dataset, null); } if (shortDataset.size() == 0) { System.out.println("No datasets to process."); System.exit(0); } // getting dataset id Path path = null; FileSystem fs = null; if (s3) { path = new Path(s3bucket + FrameworkUtils.datasetsIndexDir); fs = FileSystem.get(path.toUri(), s3conf); } else { fs = FileSystem.get(new Configuration()); path = new Path(fs.getHomeDirectory() + "/" + FrameworkUtils.datasetsIndexDir); } BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(path))); String line = br.readLine(); while (line != null) { String[] dt = line.split("\t"); if (datasetId.containsKey(dt[0])) { datasetId.put(dt[0], dt[1]); datasetNames += dt[0] + ","; datasetIds += dt[1] + ","; } line = br.readLine(); } br.close(); if (s3) fs.close(); datasetNames = datasetNames.substring(0, datasetNames.length() - 1); datasetIds = datasetIds.substring(0, datasetIds.length() - 1); Iterator<String> it = shortDataset.iterator(); while (it.hasNext()) { String dataset = it.next(); if (datasetId.get(dataset) == null) { System.out.println("No dataset id for " + dataset); System.exit(0); } } FrameworkUtils.createDir(s3bucket + FrameworkUtils.aggregatesDir, s3conf, s3); // getting smallest resolution HashMap<String, String> tempResMap = new HashMap<String, String>(); HashMap<String, String> spatialResMap = new HashMap<String, String>(); HashMap<String, String> datasetTemporalStrMap = new HashMap<String, String>(); HashMap<String, String> datasetSpatialStrMap = new HashMap<String, String>(); HashSet<String> input = new HashSet<String>(); for (String dataset : shortDataset) { String[] datasetArray = preProcessingDataset.get(dataset).split("-"); String datasetTemporalStr = datasetArray[datasetArray.length - 2]; int datasetTemporal = utils.temporalResolution(datasetTemporalStr); String datasetSpatialStr = datasetArray[datasetArray.length - 1]; int datasetSpatial = utils.spatialResolution(datasetSpatialStr); // finding all possible resolutions String[] temporalResolutions = FrameworkUtils.getAggTempResolutions(datasetTemporal); String[] spatialResolutions = FrameworkUtils.getAggSpatialResolutions(datasetSpatial); String temporalResolution = ""; String spatialResolution = ""; String tempRes = ""; String spatialRes = ""; boolean dataAdded = false; for (int i = 0; i < temporalResolutions.length; i++) { for (int j = 0; j < spatialResolutions.length; j++) { temporalResolution = temporalResolutions[i]; spatialResolution = spatialResolutions[j]; String aggregatesOutputFileName = s3bucket + FrameworkUtils.aggregatesDir + "/" + dataset + "/"; if (removeExistingFiles) { FrameworkUtils.removeFile(aggregatesOutputFileName, s3conf, s3); } if (!FrameworkUtils.fileExists(aggregatesOutputFileName, s3conf, s3)) { dataAdded = true; tempRes += temporalResolution + "-"; spatialRes += spatialResolution + "-"; } } } if (dataAdded) { input.add(s3bucket + FrameworkUtils.preProcessingDir + "/" + preProcessingDataset.get(dataset)); shortDatasetAggregation.add(dataset); tempResMap.put(dataset, tempRes.substring(0, tempRes.length() - 1)); spatialResMap.put(dataset, spatialRes.substring(0, spatialRes.length() - 1)); datasetTemporalStrMap.put(dataset, datasetTemporalStr); datasetSpatialStrMap.put(dataset, datasetSpatialStr); } } if (input.isEmpty()) { System.out.println("All the input datasets have aggregates."); System.out.println("Use -f in the beginning of the command line to force the computation."); System.exit(0); } it = input.iterator(); while (it.hasNext()) { preProcessingDatasets += it.next() + ","; } Job aggJob = null; String aggregatesOutputDir = s3bucket + FrameworkUtils.aggregatesDir + "/tmp/"; String jobName = "aggregates"; FrameworkUtils.removeFile(aggregatesOutputDir, s3conf, s3); Configuration aggConf = new Configuration(); Machine machineConf = new Machine(machine, nbNodes); aggConf.set("dataset-name", datasetNames); aggConf.set("dataset-id", datasetIds); for (int i = 0; i < shortDatasetAggregation.size(); i++) { String dataset = shortDatasetAggregation.get(i); String id = datasetId.get(dataset); aggConf.set("dataset-" + id + "-temporal-resolutions", tempResMap.get(dataset)); aggConf.set("dataset-" + id + "-spatial-resolutions", spatialResMap.get(dataset)); aggConf.set("dataset-" + id + "-temporal-att", datasetTempAtt.get(dataset)); aggConf.set("dataset-" + id + "-spatial-att", datasetSpatialAtt.get(dataset)); aggConf.set("dataset-" + id + "-temporal", datasetTemporalStrMap.get(dataset)); aggConf.set("dataset-" + id + "-spatial", datasetSpatialStrMap.get(dataset)); if (s3) aggConf.set("dataset-" + id, s3bucket + FrameworkUtils.preProcessingDir + "/" + preProcessingDataset.get(dataset)); else aggConf.set("dataset-" + id, FileSystem.get(new Configuration()).getHomeDirectory() + "/" + FrameworkUtils.preProcessingDir + "/" + preProcessingDataset.get(dataset)); } aggConf.set("mapreduce.tasktracker.map.tasks.maximum", String.valueOf(machineConf.getMaximumTasks())); aggConf.set("mapreduce.tasktracker.reduce.tasks.maximum", String.valueOf(machineConf.getMaximumTasks())); aggConf.set("mapreduce.jobtracker.maxtasks.perjob", "-1"); aggConf.set("mapreduce.reduce.shuffle.parallelcopies", "20"); aggConf.set("mapreduce.input.fileinputformat.split.minsize", "0"); aggConf.set("mapreduce.task.io.sort.mb", "200"); aggConf.set("mapreduce.task.io.sort.factor", "100"); machineConf.setMachineConfiguration(aggConf); if (s3) { machineConf.setMachineConfiguration(aggConf); aggConf.set("fs.s3.awsAccessKeyId", awsAccessKeyId); aggConf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey); } if (snappyCompression) { aggConf.set("mapreduce.map.output.compress", "true"); aggConf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec"); //aggConf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec"); } if (bzip2Compression) { aggConf.set("mapreduce.map.output.compress", "true"); aggConf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec"); //aggConf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec"); } aggJob = new Job(aggConf); aggJob.setJobName(jobName); aggJob.setMapOutputKeyClass(SpatioTemporalWritable.class); aggJob.setMapOutputValueClass(AggregationArrayWritable.class); aggJob.setOutputKeyClass(SpatioTemporalWritable.class); aggJob.setOutputValueClass(FloatArrayWritable.class); //aggJob.setOutputKeyClass(Text.class); //aggJob.setOutputValueClass(Text.class); aggJob.setMapperClass(AggregationMapper.class); aggJob.setCombinerClass(AggregationCombiner.class); aggJob.setReducerClass(AggregationReducer.class); aggJob.setNumReduceTasks(machineConf.getNumberReduces()); aggJob.setInputFormatClass(SequenceFileInputFormat.class); //aggJob.setOutputFormatClass(SequenceFileOutputFormat.class); LazyOutputFormat.setOutputFormatClass(aggJob, SequenceFileOutputFormat.class); //LazyOutputFormat.setOutputFormatClass(aggJob, TextOutputFormat.class); SequenceFileOutputFormat.setCompressOutput(aggJob, true); SequenceFileOutputFormat.setOutputCompressionType(aggJob, CompressionType.BLOCK); FileInputFormat.setInputDirRecursive(aggJob, true); FileInputFormat.setInputPaths(aggJob, preProcessingDatasets.substring(0, preProcessingDatasets.length() - 1)); FileOutputFormat.setOutputPath(aggJob, new Path(aggregatesOutputDir)); aggJob.setJarByClass(Aggregation.class); long start = System.currentTimeMillis(); aggJob.submit(); aggJob.waitForCompletion(true); System.out.println(jobName + "\t" + (System.currentTimeMillis() - start)); // moving files to right place for (String dataset : shortDatasetAggregation) { String from = s3bucket + FrameworkUtils.aggregatesDir + "/tmp/" + dataset + "/"; String to = s3bucket + FrameworkUtils.aggregatesDir + "/" + dataset + "/"; FrameworkUtils.renameFile(from, to, s3conf, s3); } }
From source file:examples.mail.IMAPExportMbox.java
public static void main(String[] args) throws IOException { int connect_timeout = CONNECT_TIMEOUT; int read_timeout = READ_TIMEOUT; int argIdx = 0; String eol = EOL_DEFAULT;/* www . j a va2s .c o m*/ boolean printHash = false; boolean printMarker = false; int retryWaitSecs = 0; for (argIdx = 0; argIdx < args.length; argIdx++) { if (args[argIdx].equals("-c")) { connect_timeout = Integer.parseInt(args[++argIdx]); } else if (args[argIdx].equals("-r")) { read_timeout = Integer.parseInt(args[++argIdx]); } else if (args[argIdx].equals("-R")) { retryWaitSecs = Integer.parseInt(args[++argIdx]); } else if (args[argIdx].equals("-LF")) { eol = LF; } else if (args[argIdx].equals("-CRLF")) { eol = CRLF; } else if (args[argIdx].equals("-.")) { printHash = true; } else if (args[argIdx].equals("-X")) { printMarker = true; } else { break; } } final int argCount = args.length - argIdx; if (argCount < 2) { System.err.println("Usage: IMAPExportMbox [-LF|-CRLF] [-c n] [-r n] [-R n] [-.] [-X]" + " imap[s]://user:password@host[:port]/folder/path [+|-]<mboxfile> [sequence-set] [itemnames]"); System.err.println( "\t-LF | -CRLF set end-of-line to LF or CRLF (default is the line.separator system property)"); System.err.println("\t-c connect timeout in seconds (default 10)"); System.err.println("\t-r read timeout in seconds (default 10)"); System.err.println("\t-R temporary failure retry wait in seconds (default 0; i.e. disabled)"); System.err.println("\t-. print a . for each complete message received"); System.err.println("\t-X print the X-IMAP line for each complete message received"); System.err.println( "\tthe mboxfile is where the messages are stored; use '-' to write to standard output."); System.err.println( "\tPrefix filename with '+' to append to the file. Prefix with '-' to allow overwrite."); System.err.println( "\ta sequence-set is a list of numbers/number ranges e.g. 1,2,3-10,20:* - default 1:*"); System.err .println("\titemnames are the message data item name(s) e.g. BODY.PEEK[HEADER.FIELDS (SUBJECT)]" + " or a macro e.g. ALL - default (INTERNALDATE BODY.PEEK[])"); System.exit(1); } final URI uri = URI.create(args[argIdx++]); final String file = args[argIdx++]; String sequenceSet = argCount > 2 ? args[argIdx++] : "1:*"; final String itemNames; // Handle 0, 1 or multiple item names if (argCount > 3) { if (argCount > 4) { StringBuilder sb = new StringBuilder(); sb.append("("); for (int i = 4; i <= argCount; i++) { if (i > 4) { sb.append(" "); } sb.append(args[argIdx++]); } sb.append(")"); itemNames = sb.toString(); } else { itemNames = args[argIdx++]; } } else { itemNames = "(INTERNALDATE BODY.PEEK[])"; } final boolean checkSequence = sequenceSet.matches("\\d+:(\\d+|\\*)"); // are we expecting a sequence? final MboxListener chunkListener; if (file.equals("-")) { chunkListener = null; } else if (file.startsWith("+")) { final File mbox = new File(file.substring(1)); System.out.println("Appending to file " + mbox); chunkListener = new MboxListener(new BufferedWriter(new FileWriter(mbox, true)), eol, printHash, printMarker, checkSequence); } else if (file.startsWith("-")) { final File mbox = new File(file.substring(1)); System.out.println("Writing to file " + mbox); chunkListener = new MboxListener(new BufferedWriter(new FileWriter(mbox, false)), eol, printHash, printMarker, checkSequence); } else { final File mbox = new File(file); if (mbox.exists()) { throw new IOException("mailbox file: " + mbox + " already exists!"); } System.out.println("Creating file " + mbox); chunkListener = new MboxListener(new BufferedWriter(new FileWriter(mbox)), eol, printHash, printMarker, checkSequence); } String path = uri.getPath(); if (path == null || path.length() < 1) { throw new IllegalArgumentException("Invalid folderPath: '" + path + "'"); } String folder = path.substring(1); // skip the leading / // suppress login details final PrintCommandListener listener = new PrintCommandListener(System.out, true) { @Override public void protocolReplyReceived(ProtocolCommandEvent event) { if (event.getReplyCode() != IMAPReply.PARTIAL) { // This is dealt with by the chunk listener super.protocolReplyReceived(event); } } }; // Connect and login final IMAPClient imap = IMAPUtils.imapLogin(uri, connect_timeout * 1000, listener); String maxIndexInFolder = null; try { imap.setSoTimeout(read_timeout * 1000); if (!imap.select(folder)) { throw new IOException("Could not select folder: " + folder); } for (String line : imap.getReplyStrings()) { maxIndexInFolder = matches(line, PATEXISTS, 1); if (maxIndexInFolder != null) { break; } } if (chunkListener != null) { imap.setChunkListener(chunkListener); } // else the command listener displays the full output without processing while (true) { boolean ok = imap.fetch(sequenceSet, itemNames); // If the fetch failed, can we retry? if (!ok && retryWaitSecs > 0 && chunkListener != null && checkSequence) { final String replyString = imap.getReplyString(); //includes EOL if (startsWith(replyString, PATTEMPFAIL)) { System.err.println("Temporary error detected, will retry in " + retryWaitSecs + "seconds"); sequenceSet = (chunkListener.lastSeq + 1) + ":*"; try { Thread.sleep(retryWaitSecs * 1000); } catch (InterruptedException e) { // ignored } } else { throw new IOException( "FETCH " + sequenceSet + " " + itemNames + " failed with " + replyString); } } else { break; } } } catch (IOException ioe) { String count = chunkListener == null ? "?" : Integer.toString(chunkListener.total); System.err.println("FETCH " + sequenceSet + " " + itemNames + " failed after processing " + count + " complete messages "); if (chunkListener != null) { System.err.println("Last complete response seen: " + chunkListener.lastFetched); } throw ioe; } finally { if (printHash) { System.err.println(); } if (chunkListener != null) { chunkListener.close(); final Iterator<String> missingIds = chunkListener.missingIds.iterator(); if (missingIds.hasNext()) { StringBuilder sb = new StringBuilder(); for (;;) { sb.append(missingIds.next()); if (!missingIds.hasNext()) { break; } sb.append(","); } System.err.println("*** Missing ids: " + sb.toString()); } } imap.logout(); imap.disconnect(); } if (chunkListener != null) { System.out.println("Processed " + chunkListener.total + " messages."); } if (maxIndexInFolder != null) { System.out.println("Folder contained " + maxIndexInFolder + " messages."); } }
From source file:com.github.xbn.examples.regexutil.non_xbn.BetweenLineMarkersButSkipFirstXmpl.java
public static final void main(String[] as_1RqdTxtFilePath) { Iterator<String> lineItr = null; try {/*ww w. j a v a 2 s . c o m*/ lineItr = FileUtils.lineIterator(new File(as_1RqdTxtFilePath[0])); //Throws npx if null } catch (IOException iox) { throw new RuntimeException("Attempting to open \"" + as_1RqdTxtFilePath[0] + "\"", iox); } catch (RuntimeException rx) { throw new RuntimeException("One required parameter: The path to the text file.", rx); } String LINE_SEP = System.getProperty("line.separator", "\n"); ArrayList<String> alsItems = new ArrayList<String>(); boolean bStartMark = false; boolean bLine1Skipped = false; StringBuilder sdCurrentItem = new StringBuilder(); while (lineItr.hasNext()) { String sLine = lineItr.next().trim(); if (!bStartMark) { if (sLine.startsWith(".START_SEQUENCE")) { bStartMark = true; continue; } throw new IllegalStateException("Start mark not found."); } if (!bLine1Skipped) { bLine1Skipped = true; continue; } else if (!sLine.equals(".END_SEQUENCE")) { sdCurrentItem.append(sLine).append(LINE_SEP); } else { alsItems.add(sdCurrentItem.toString()); sdCurrentItem.setLength(0); bStartMark = false; bLine1Skipped = false; continue; } } for (String s : alsItems) { System.out.println("----------"); System.out.print(s); } }
From source file:edu.nyu.vida.data_polygamy.standard_techniques.CorrelationTechniques.java
/** * @param args// ww w. j a va2 s . co m * @throws ParseException */ @SuppressWarnings({ "deprecation" }) public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Options options = new Options(); Option forceOption = new Option("f", "force", false, "force the computation of the relationship " + "even if files already exist"); forceOption.setRequired(false); options.addOption(forceOption); Option g1Option = new Option("g1", "first-group", true, "set first group of datasets"); g1Option.setRequired(true); g1Option.setArgName("FIRST GROUP"); g1Option.setArgs(Option.UNLIMITED_VALUES); options.addOption(g1Option); Option g2Option = new Option("g2", "second-group", true, "set second group of datasets"); g2Option.setRequired(false); g2Option.setArgName("SECOND GROUP"); g2Option.setArgs(Option.UNLIMITED_VALUES); options.addOption(g2Option); Option machineOption = new Option("m", "machine", true, "machine identifier"); machineOption.setRequired(true); machineOption.setArgName("MACHINE"); machineOption.setArgs(1); options.addOption(machineOption); Option nodesOption = new Option("n", "nodes", true, "number of nodes"); nodesOption.setRequired(true); nodesOption.setArgName("NODES"); nodesOption.setArgs(1); options.addOption(nodesOption); Option s3Option = new Option("s3", "s3", false, "data on Amazon S3"); s3Option.setRequired(false); options.addOption(s3Option); Option awsAccessKeyIdOption = new Option("aws_id", "aws-id", true, "aws access key id; " + "this is required if the execution is on aws"); awsAccessKeyIdOption.setRequired(false); awsAccessKeyIdOption.setArgName("AWS-ACCESS-KEY-ID"); awsAccessKeyIdOption.setArgs(1); options.addOption(awsAccessKeyIdOption); Option awsSecretAccessKeyOption = new Option("aws_key", "aws-id", true, "aws secrect access key; " + "this is required if the execution is on aws"); awsSecretAccessKeyOption.setRequired(false); awsSecretAccessKeyOption.setArgName("AWS-SECRET-ACCESS-KEY"); awsSecretAccessKeyOption.setArgs(1); options.addOption(awsSecretAccessKeyOption); Option bucketOption = new Option("b", "s3-bucket", true, "bucket on s3; " + "this is required if the execution is on aws"); bucketOption.setRequired(false); bucketOption.setArgName("S3-BUCKET"); bucketOption.setArgs(1); options.addOption(bucketOption); Option helpOption = new Option("h", "help", false, "display this message"); helpOption.setRequired(false); options.addOption(helpOption); HelpFormatter formatter = new HelpFormatter(); CommandLineParser parser = new PosixParser(); CommandLine cmd = null; try { cmd = parser.parse(options, args); } catch (ParseException e) { formatter.printHelp( "hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.standard_techniques.CorrelationTechniques", options, true); System.exit(0); } if (cmd.hasOption("h")) { formatter.printHelp( "hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.standard_techniques.CorrelationTechniques", options, true); System.exit(0); } boolean s3 = cmd.hasOption("s3"); String s3bucket = ""; String awsAccessKeyId = ""; String awsSecretAccessKey = ""; if (s3) { if ((!cmd.hasOption("aws_id")) || (!cmd.hasOption("aws_key")) || (!cmd.hasOption("b"))) { System.out.println( "Arguments 'aws_id', 'aws_key', and 'b'" + " are mandatory if execution is on AWS."); formatter.printHelp( "hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.standard_techniques.CorrelationTechniques", options, true); System.exit(0); } s3bucket = cmd.getOptionValue("b"); awsAccessKeyId = cmd.getOptionValue("aws_id"); awsSecretAccessKey = cmd.getOptionValue("aws_key"); } boolean snappyCompression = false; boolean bzip2Compression = false; String machine = cmd.getOptionValue("m"); int nbNodes = Integer.parseInt(cmd.getOptionValue("n")); Configuration s3conf = new Configuration(); if (s3) { s3conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId); s3conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey); s3conf.set("bucket", s3bucket); } Path path = null; FileSystem fs = FileSystem.get(new Configuration()); ArrayList<String> shortDataset = new ArrayList<String>(); ArrayList<String> firstGroup = new ArrayList<String>(); ArrayList<String> secondGroup = new ArrayList<String>(); HashMap<String, String> datasetAgg = new HashMap<String, String>(); boolean removeExistingFiles = cmd.hasOption("f"); String[] firstGroupCmd = cmd.getOptionValues("g1"); String[] secondGroupCmd = cmd.hasOption("g2") ? cmd.getOptionValues("g2") : new String[0]; addDatasets(firstGroupCmd, firstGroup, shortDataset, datasetAgg, path, fs, s3conf, s3, s3bucket); addDatasets(secondGroupCmd, secondGroup, shortDataset, datasetAgg, path, fs, s3conf, s3, s3bucket); if (shortDataset.size() == 0) { System.out.println("No datasets to process."); System.exit(0); } if (firstGroup.isEmpty()) { System.out.println("First group of datasets (G1) is empty. " + "Doing G1 = G2."); firstGroup.addAll(secondGroup); } if (secondGroup.isEmpty()) { System.out.println("Second group of datasets (G2) is empty. " + "Doing G2 = G1."); secondGroup.addAll(firstGroup); } // getting dataset ids String datasetNames = ""; String datasetIds = ""; HashMap<String, String> datasetId = new HashMap<String, String>(); Iterator<String> it = shortDataset.iterator(); while (it.hasNext()) { datasetId.put(it.next(), null); } if (s3) { path = new Path(s3bucket + FrameworkUtils.datasetsIndexDir); fs = FileSystem.get(path.toUri(), s3conf); } else { path = new Path(fs.getHomeDirectory() + "/" + FrameworkUtils.datasetsIndexDir); } BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(path))); String line = br.readLine(); while (line != null) { String[] dt = line.split("\t"); if (datasetId.containsKey(dt[0])) { datasetId.put(dt[0], dt[1]); datasetNames += dt[0] + ","; datasetIds += dt[1] + ","; } line = br.readLine(); } br.close(); if (s3) fs.close(); datasetNames = datasetNames.substring(0, datasetNames.length() - 1); datasetIds = datasetIds.substring(0, datasetIds.length() - 1); it = shortDataset.iterator(); while (it.hasNext()) { String dataset = it.next(); if (datasetId.get(dataset) == null) { System.out.println("No dataset id for " + dataset); System.exit(0); } } String firstGroupStr = ""; String secondGroupStr = ""; for (String dataset : firstGroup) { firstGroupStr += datasetId.get(dataset) + ","; } for (String dataset : secondGroup) { secondGroupStr += datasetId.get(dataset) + ","; } firstGroupStr = firstGroupStr.substring(0, firstGroupStr.length() - 1); secondGroupStr = secondGroupStr.substring(0, secondGroupStr.length() - 1); FrameworkUtils.createDir(s3bucket + FrameworkUtils.correlationTechniquesDir, s3conf, s3); String dataAttributesInputDirs = ""; String noRelationship = ""; HashSet<String> dirs = new HashSet<String>(); String dataset1; String dataset2; String datasetId1; String datasetId2; for (int i = 0; i < firstGroup.size(); i++) { for (int j = 0; j < secondGroup.size(); j++) { if (Integer.parseInt(datasetId.get(firstGroup.get(i))) < Integer .parseInt(datasetId.get(secondGroup.get(j)))) { dataset1 = firstGroup.get(i); dataset2 = secondGroup.get(j); } else { dataset1 = secondGroup.get(j); dataset2 = firstGroup.get(i); } datasetId1 = datasetId.get(dataset1); datasetId2 = datasetId.get(dataset2); if (dataset1.equals(dataset2)) continue; String correlationOutputFileName = s3bucket + FrameworkUtils.correlationTechniquesDir + "/" + dataset1 + "-" + dataset2 + "/"; if (removeExistingFiles) { FrameworkUtils.removeFile(correlationOutputFileName, s3conf, s3); } if (!FrameworkUtils.fileExists(correlationOutputFileName, s3conf, s3)) { dirs.add(s3bucket + FrameworkUtils.aggregatesDir + "/" + dataset1); dirs.add(s3bucket + FrameworkUtils.aggregatesDir + "/" + dataset2); } else { noRelationship += datasetId1 + "-" + datasetId2 + ","; } } } if (dirs.isEmpty()) { System.out.println("All the relationships were already computed."); System.out.println("Use -f in the beginning of the command line to force the computation."); System.exit(0); } for (String dir : dirs) { dataAttributesInputDirs += dir + ","; } Configuration conf = new Configuration(); Machine machineConf = new Machine(machine, nbNodes); String jobName = "correlation"; String correlationOutputDir = s3bucket + FrameworkUtils.correlationTechniquesDir + "/tmp/"; FrameworkUtils.removeFile(correlationOutputDir, s3conf, s3); for (int i = 0; i < shortDataset.size(); i++) { conf.set("dataset-" + datasetId.get(shortDataset.get(i)) + "-agg", datasetAgg.get(shortDataset.get(i))); } for (int i = 0; i < shortDataset.size(); i++) { conf.set("dataset-" + datasetId.get(shortDataset.get(i)) + "-agg-size", Integer.toString(datasetAgg.get(shortDataset.get(i)).split(",").length)); } conf.set("dataset-keys", datasetIds); conf.set("dataset-names", datasetNames); conf.set("first-group", firstGroupStr); conf.set("second-group", secondGroupStr); conf.set("main-dataset-id", datasetId.get(shortDataset.get(0))); if (noRelationship.length() > 0) { conf.set("no-relationship", noRelationship.substring(0, noRelationship.length() - 1)); } conf.set("mapreduce.tasktracker.map.tasks.maximum", String.valueOf(machineConf.getMaximumTasks())); conf.set("mapreduce.tasktracker.reduce.tasks.maximum", String.valueOf(machineConf.getMaximumTasks())); conf.set("mapreduce.jobtracker.maxtasks.perjob", "-1"); conf.set("mapreduce.reduce.shuffle.parallelcopies", "20"); conf.set("mapreduce.input.fileinputformat.split.minsize", "0"); conf.set("mapreduce.task.io.sort.mb", "200"); conf.set("mapreduce.task.io.sort.factor", "100"); conf.set("mapreduce.task.timeout", "2400000"); if (s3) { machineConf.setMachineConfiguration(conf); conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId); conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey); conf.set("bucket", s3bucket); } if (snappyCompression) { conf.set("mapreduce.map.output.compress", "true"); conf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec"); //conf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec"); } if (bzip2Compression) { conf.set("mapreduce.map.output.compress", "true"); conf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec"); //conf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec"); } Job job = new Job(conf); job.setJobName(jobName); job.setMapOutputKeyClass(PairAttributeWritable.class); job.setMapOutputValueClass(SpatioTemporalValueWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(CorrelationTechniquesMapper.class); job.setReducerClass(CorrelationTechniquesReducer.class); job.setNumReduceTasks(machineConf.getNumberReduces()); job.setInputFormatClass(SequenceFileInputFormat.class); LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class); FileInputFormat.setInputDirRecursive(job, true); FileInputFormat.setInputPaths(job, dataAttributesInputDirs.substring(0, dataAttributesInputDirs.length() - 1)); FileOutputFormat.setOutputPath(job, new Path(correlationOutputDir)); job.setJarByClass(CorrelationTechniques.class); long start = System.currentTimeMillis(); job.submit(); job.waitForCompletion(true); System.out.println(jobName + "\t" + (System.currentTimeMillis() - start)); // moving files to right place for (int i = 0; i < firstGroup.size(); i++) { for (int j = 0; j < secondGroup.size(); j++) { if (Integer.parseInt(datasetId.get(firstGroup.get(i))) < Integer .parseInt(datasetId.get(secondGroup.get(j)))) { dataset1 = firstGroup.get(i); dataset2 = secondGroup.get(j); } else { dataset1 = secondGroup.get(j); dataset2 = firstGroup.get(i); } if (dataset1.equals(dataset2)) continue; String from = s3bucket + FrameworkUtils.correlationTechniquesDir + "/tmp/" + dataset1 + "-" + dataset2 + "/"; String to = s3bucket + FrameworkUtils.correlationTechniquesDir + "/" + dataset1 + "-" + dataset2 + "/"; FrameworkUtils.renameFile(from, to, s3conf, s3); } } }
From source file:com.alkacon.opencms.registration.CmsRegistrationFormHandler.java
/** * As test case.<p>/* w w w .j a va 2 s . c o m*/ * * @param args not used */ public static void main(String[] args) { CmsUser user = new CmsUser(null, "/mylongouname/m.moossen@alkacon.com", "", "", "", "", 0, 0, 0, null); String code = getActivationCode(user); System.out.println(code); System.out.println(getUserName(code)); CmsMacroResolver macroResolver = CmsMacroResolver.newInstance(); macroResolver.setKeepEmptyMacros(true); // create macros for getters Method[] methods = CmsUser.class.getDeclaredMethods(); for (int i = 0; i < methods.length; i++) { Method method = methods[i]; if (method.getReturnType() != String.class) { continue; } if (method.getParameterTypes().length > 0) { continue; } if (!method.getName().startsWith("get") || (method.getName().length() < 4) || method.getName().equals("getPassword")) { continue; } String label = ("" + method.getName().charAt(3)).toLowerCase(); if (method.getName().length() > 4) { label += method.getName().substring(4); } try { Object value = method.invoke(user, new Object[] {}); if (value == null) { value = ""; } macroResolver.addMacro(label, value.toString()); } catch (Exception e) { e.printStackTrace(); } } // add addinfo values as macros Iterator itFields = user.getAdditionalInfo().entrySet().iterator(); while (itFields.hasNext()) { Map.Entry entry = (Map.Entry) itFields.next(); if ((entry.getValue() instanceof String) && (entry.getKey() instanceof String)) { macroResolver.addMacro(entry.getKey().toString(), entry.getValue().toString()); } } // add login macroResolver.addMacro(FIELD_LOGIN, user.getSimpleName()); }
From source file:edu.nyu.vida.data_polygamy.feature_identification.IndexCreation.java
/** * @param args//from w w w . j a v a 2s . c om */ @SuppressWarnings({ "deprecation" }) public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Options options = new Options(); Option forceOption = new Option("f", "force", false, "force the computation of the index and events " + "even if files already exist"); forceOption.setRequired(false); options.addOption(forceOption); Option thresholdOption = new Option("t", "use-custom-thresholds", false, "use custom thresholds for regular and rare events, defined in HDFS_HOME/" + FrameworkUtils.thresholdDir + " file"); thresholdOption.setRequired(false); options.addOption(thresholdOption); Option gOption = new Option("g", "group", true, "set group of datasets for which the indices and events" + " will be computed"); gOption.setRequired(true); gOption.setArgName("GROUP"); gOption.setArgs(Option.UNLIMITED_VALUES); options.addOption(gOption); Option machineOption = new Option("m", "machine", true, "machine identifier"); machineOption.setRequired(true); machineOption.setArgName("MACHINE"); machineOption.setArgs(1); options.addOption(machineOption); Option nodesOption = new Option("n", "nodes", true, "number of nodes"); nodesOption.setRequired(true); nodesOption.setArgName("NODES"); nodesOption.setArgs(1); options.addOption(nodesOption); Option s3Option = new Option("s3", "s3", false, "data on Amazon S3"); s3Option.setRequired(false); options.addOption(s3Option); Option awsAccessKeyIdOption = new Option("aws_id", "aws-id", true, "aws access key id; " + "this is required if the execution is on aws"); awsAccessKeyIdOption.setRequired(false); awsAccessKeyIdOption.setArgName("AWS-ACCESS-KEY-ID"); awsAccessKeyIdOption.setArgs(1); options.addOption(awsAccessKeyIdOption); Option awsSecretAccessKeyOption = new Option("aws_key", "aws-id", true, "aws secrect access key; " + "this is required if the execution is on aws"); awsSecretAccessKeyOption.setRequired(false); awsSecretAccessKeyOption.setArgName("AWS-SECRET-ACCESS-KEY"); awsSecretAccessKeyOption.setArgs(1); options.addOption(awsSecretAccessKeyOption); Option bucketOption = new Option("b", "s3-bucket", true, "bucket on s3; " + "this is required if the execution is on aws"); bucketOption.setRequired(false); bucketOption.setArgName("S3-BUCKET"); bucketOption.setArgs(1); options.addOption(bucketOption); Option helpOption = new Option("h", "help", false, "display this message"); helpOption.setRequired(false); options.addOption(helpOption); HelpFormatter formatter = new HelpFormatter(); CommandLineParser parser = new PosixParser(); CommandLine cmd = null; try { cmd = parser.parse(options, args); } catch (ParseException e) { formatter.printHelp("hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.feature_identification.IndexCreation", options, true); System.exit(0); } if (cmd.hasOption("h")) { formatter.printHelp("hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.feature_identification.IndexCreation", options, true); System.exit(0); } boolean s3 = cmd.hasOption("s3"); String s3bucket = ""; String awsAccessKeyId = ""; String awsSecretAccessKey = ""; if (s3) { if ((!cmd.hasOption("aws_id")) || (!cmd.hasOption("aws_key")) || (!cmd.hasOption("b"))) { System.out.println( "Arguments 'aws_id', 'aws_key', and 'b'" + " are mandatory if execution is on AWS."); formatter.printHelp("hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.feature_identification.IndexCreation", options, true); System.exit(0); } s3bucket = cmd.getOptionValue("b"); awsAccessKeyId = cmd.getOptionValue("aws_id"); awsSecretAccessKey = cmd.getOptionValue("aws_key"); } boolean snappyCompression = false; boolean bzip2Compression = false; String machine = cmd.getOptionValue("m"); int nbNodes = Integer.parseInt(cmd.getOptionValue("n")); Configuration s3conf = new Configuration(); if (s3) { s3conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId); s3conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey); s3conf.set("bucket", s3bucket); } String datasetNames = ""; String datasetIds = ""; ArrayList<String> shortDataset = new ArrayList<String>(); ArrayList<String> shortDatasetIndex = new ArrayList<String>(); HashMap<String, String> datasetAgg = new HashMap<String, String>(); HashMap<String, String> datasetId = new HashMap<String, String>(); HashMap<String, HashMap<Integer, Double>> datasetRegThreshold = new HashMap<String, HashMap<Integer, Double>>(); HashMap<String, HashMap<Integer, Double>> datasetRareThreshold = new HashMap<String, HashMap<Integer, Double>>(); Path path = null; FileSystem fs = FileSystem.get(new Configuration()); BufferedReader br; boolean removeExistingFiles = cmd.hasOption("f"); boolean isThresholdUserDefined = cmd.hasOption("t"); for (String dataset : cmd.getOptionValues("g")) { // getting aggregates String[] aggregate = FrameworkUtils.searchAggregates(dataset, s3conf, s3); if (aggregate.length == 0) { System.out.println("No aggregates found for " + dataset + "."); continue; } // getting aggregates header String aggregatesHeaderFileName = FrameworkUtils.searchAggregatesHeader(dataset, s3conf, s3); if (aggregatesHeaderFileName == null) { System.out.println("No aggregate header for " + dataset); continue; } String aggregatesHeader = s3bucket + FrameworkUtils.preProcessingDir + "/" + aggregatesHeaderFileName; shortDataset.add(dataset); datasetId.put(dataset, null); if (s3) { path = new Path(aggregatesHeader); fs = FileSystem.get(path.toUri(), s3conf); } else { path = new Path(fs.getHomeDirectory() + "/" + aggregatesHeader); } br = new BufferedReader(new InputStreamReader(fs.open(path))); datasetAgg.put(dataset, br.readLine().split("\t")[1]); br.close(); if (s3) fs.close(); } if (shortDataset.size() == 0) { System.out.println("No datasets to process."); System.exit(0); } // getting dataset id if (s3) { path = new Path(s3bucket + FrameworkUtils.datasetsIndexDir); fs = FileSystem.get(path.toUri(), s3conf); } else { path = new Path(fs.getHomeDirectory() + "/" + FrameworkUtils.datasetsIndexDir); } br = new BufferedReader(new InputStreamReader(fs.open(path))); String line = br.readLine(); while (line != null) { String[] dt = line.split("\t"); if (datasetId.containsKey(dt[0])) { datasetId.put(dt[0], dt[1]); datasetNames += dt[0] + ","; datasetIds += dt[1] + ","; } line = br.readLine(); } br.close(); datasetNames = datasetNames.substring(0, datasetNames.length() - 1); datasetIds = datasetIds.substring(0, datasetIds.length() - 1); Iterator<String> it = shortDataset.iterator(); while (it.hasNext()) { String dataset = it.next(); if (datasetId.get(dataset) == null) { System.out.println("No dataset id for " + dataset); System.exit(0); } } // getting user defined thresholds if (isThresholdUserDefined) { if (s3) { path = new Path(s3bucket + FrameworkUtils.thresholdDir); fs = FileSystem.get(path.toUri(), s3conf); } else { path = new Path(fs.getHomeDirectory() + "/" + FrameworkUtils.thresholdDir); } br = new BufferedReader(new InputStreamReader(fs.open(path))); line = br.readLine(); while (line != null) { // getting dataset name String dataset = line.trim(); HashMap<Integer, Double> regThresholds = new HashMap<Integer, Double>(); HashMap<Integer, Double> rareThresholds = new HashMap<Integer, Double>(); line = br.readLine(); while ((line != null) && (line.split("\t").length > 1)) { // getting attribute ids and thresholds String[] keyVals = line.trim().split("\t"); int att = Integer.parseInt(keyVals[0].trim()); regThresholds.put(att, Double.parseDouble(keyVals[1].trim())); rareThresholds.put(att, Double.parseDouble(keyVals[2].trim())); line = br.readLine(); } datasetRegThreshold.put(dataset, regThresholds); datasetRareThreshold.put(dataset, rareThresholds); } br.close(); } if (s3) fs.close(); // datasets that will use existing merge tree ArrayList<String> useMergeTree = new ArrayList<String>(); // creating index for each spatio-temporal resolution FrameworkUtils.createDir(s3bucket + FrameworkUtils.indexDir, s3conf, s3); HashSet<String> input = new HashSet<String>(); for (String dataset : shortDataset) { String indexCreationOutputFileName = s3bucket + FrameworkUtils.indexDir + "/" + dataset + "/"; String mergeTreeFileName = s3bucket + FrameworkUtils.mergeTreeDir + "/" + dataset + "/"; if (removeExistingFiles) { FrameworkUtils.removeFile(indexCreationOutputFileName, s3conf, s3); FrameworkUtils.removeFile(mergeTreeFileName, s3conf, s3); FrameworkUtils.createDir(mergeTreeFileName, s3conf, s3); } else if (datasetRegThreshold.containsKey(dataset)) { FrameworkUtils.removeFile(indexCreationOutputFileName, s3conf, s3); if (FrameworkUtils.fileExists(mergeTreeFileName, s3conf, s3)) { useMergeTree.add(dataset); } } if (!FrameworkUtils.fileExists(indexCreationOutputFileName, s3conf, s3)) { input.add(s3bucket + FrameworkUtils.aggregatesDir + "/" + dataset); shortDatasetIndex.add(dataset); } } if (input.isEmpty()) { System.out.println("All the input datasets have indices."); System.out.println("Use -f in the beginning of the command line to force the computation."); System.exit(0); } String aggregateDatasets = ""; it = input.iterator(); while (it.hasNext()) { aggregateDatasets += it.next() + ","; } Job icJob = null; Configuration icConf = new Configuration(); Machine machineConf = new Machine(machine, nbNodes); String jobName = "index"; String indexOutputDir = s3bucket + FrameworkUtils.indexDir + "/tmp/"; FrameworkUtils.removeFile(indexOutputDir, s3conf, s3); icConf.set("dataset-name", datasetNames); icConf.set("dataset-id", datasetIds); if (!useMergeTree.isEmpty()) { String useMergeTreeStr = ""; for (String dt : useMergeTree) { useMergeTreeStr += dt + ","; } icConf.set("use-merge-tree", useMergeTreeStr.substring(0, useMergeTreeStr.length() - 1)); } for (int i = 0; i < shortDataset.size(); i++) { String dataset = shortDataset.get(i); String id = datasetId.get(dataset); icConf.set("dataset-" + id + "-aggregates", datasetAgg.get(dataset)); if (datasetRegThreshold.containsKey(dataset)) { HashMap<Integer, Double> regThresholds = datasetRegThreshold.get(dataset); String thresholds = ""; for (int att : regThresholds.keySet()) { thresholds += String.valueOf(att) + "-" + String.valueOf(regThresholds.get(att)) + ","; } icConf.set("regular-" + id, thresholds.substring(0, thresholds.length() - 1)); } if (datasetRareThreshold.containsKey(dataset)) { HashMap<Integer, Double> rareThresholds = datasetRareThreshold.get(dataset); String thresholds = ""; for (int att : rareThresholds.keySet()) { thresholds += String.valueOf(att) + "-" + String.valueOf(rareThresholds.get(att)) + ","; } icConf.set("rare-" + id, thresholds.substring(0, thresholds.length() - 1)); } } icConf.set("mapreduce.tasktracker.map.tasks.maximum", String.valueOf(machineConf.getMaximumTasks())); icConf.set("mapreduce.tasktracker.reduce.tasks.maximum", String.valueOf(machineConf.getMaximumTasks())); icConf.set("mapreduce.jobtracker.maxtasks.perjob", "-1"); icConf.set("mapreduce.reduce.shuffle.parallelcopies", "20"); icConf.set("mapreduce.input.fileinputformat.split.minsize", "0"); icConf.set("mapreduce.task.io.sort.mb", "200"); icConf.set("mapreduce.task.io.sort.factor", "100"); //icConf.set("mapreduce.task.timeout", "1800000"); machineConf.setMachineConfiguration(icConf); if (s3) { machineConf.setMachineConfiguration(icConf); icConf.set("fs.s3.awsAccessKeyId", awsAccessKeyId); icConf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey); icConf.set("bucket", s3bucket); } if (snappyCompression) { icConf.set("mapreduce.map.output.compress", "true"); icConf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec"); //icConf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec"); } if (bzip2Compression) { icConf.set("mapreduce.map.output.compress", "true"); icConf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec"); //icConf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec"); } icJob = new Job(icConf); icJob.setJobName(jobName); icJob.setMapOutputKeyClass(AttributeResolutionWritable.class); icJob.setMapOutputValueClass(SpatioTemporalFloatWritable.class); icJob.setOutputKeyClass(AttributeResolutionWritable.class); icJob.setOutputValueClass(TopologyTimeSeriesWritable.class); //icJob.setOutputKeyClass(Text.class); //icJob.setOutputValueClass(Text.class); icJob.setMapperClass(IndexCreationMapper.class); icJob.setReducerClass(IndexCreationReducer.class); icJob.setNumReduceTasks(machineConf.getNumberReduces()); icJob.setInputFormatClass(SequenceFileInputFormat.class); //icJob.setOutputFormatClass(SequenceFileOutputFormat.class); LazyOutputFormat.setOutputFormatClass(icJob, SequenceFileOutputFormat.class); //LazyOutputFormat.setOutputFormatClass(icJob, TextOutputFormat.class); SequenceFileOutputFormat.setCompressOutput(icJob, true); SequenceFileOutputFormat.setOutputCompressionType(icJob, CompressionType.BLOCK); FileInputFormat.setInputDirRecursive(icJob, true); FileInputFormat.setInputPaths(icJob, aggregateDatasets.substring(0, aggregateDatasets.length() - 1)); FileOutputFormat.setOutputPath(icJob, new Path(indexOutputDir)); icJob.setJarByClass(IndexCreation.class); long start = System.currentTimeMillis(); icJob.submit(); icJob.waitForCompletion(true); System.out.println(jobName + "\t" + (System.currentTimeMillis() - start)); // moving files to right place for (String dataset : shortDatasetIndex) { String from = s3bucket + FrameworkUtils.indexDir + "/tmp/" + dataset + "/"; String to = s3bucket + FrameworkUtils.indexDir + "/" + dataset + "/"; FrameworkUtils.renameFile(from, to, s3conf, s3); } }