List of usage examples for java.util ArrayList isEmpty
public boolean isEmpty()
From source file:Main.java
public static void main(String[] args) { ArrayList<Integer> arrlist = new ArrayList<Integer>(5); arrlist.add(25);//from w w w.j ava 2 s . c o m boolean retval = arrlist.isEmpty(); if (retval == true) { System.out.println("list is empty"); } else { System.out.println("list is not empty"); } }
From source file:TestMapObjInterface.java
public static void main(String[] args) { if (args.length < 3) { System.out.println("Usage: MapObjInterfaceTest <filename> <vfs name> <CN address>"); System.out.println("MapObjInterfaceTest is a unit test of mproxy's map_obj interface." + " It expects as input a filename and a Parascale vfs name."); System.out.println(//from w w w. j a v a2 s . c o m "It returns a listing of the chunks for the specified file" + " in the following format:"); System.out.println(); System.out.println("[1275] ChunkInfo: offset: 85563801600 length: 67108864 17 : " + "Filelength 85899345920 <0>StorageNodeInfo: nodeName: " + "atg-sn2 primar addr: 10.10.249.222 enabled: true up: true "); System.out.println(); System.out.println("Callers may use this information to determine if the " + "map_obj interface is returning correct data."); System.exit(1); } try { if (args[args.length - 1].equals("auto")) { if (runTest(args[0], args[1], args[2])) { System.out.println("pass"); } else { System.out.println("FAIL"); } } else { ArrayList<String> output = getChunks(args[0], args[1], args[2]); if (!output.isEmpty()) { int i = 0; for (i = 0; i < output.size(); i++) { System.out.println(output.get(i)); } } } } catch (Exception e) { System.out.println(e); } }
From source file:Main.java
public static void main(String[] args) { ArrayList<String> list = new ArrayList<>(); list.add("A"); list.add("B"); list.add("C"); list.add("D"); list.add("E"); list.add("F"); list.add("G"); list.add("H"); System.out.println(list);/*from www . j a v a 2 s .c o m*/ while (!list.isEmpty()) { long index = Math.round(Math.floor(Math.random() * list.size())); System.out.println("Name " + list.get((int) index)); list.remove((int) index); } }
From source file:accinapdf.ACCinaPDF.java
/** * @param args the command line arguments */// ww w . j av a2s . c o m public static void main(String[] args) { controller.Logger.create(); controller.Bundle.getBundle(); if (GraphicsEnvironment.isHeadless()) { // Headless // Erro String fich; if (args.length != 1) { System.err.println(Bundle.getBundle().getString("invalidArgs")); return; } else { fich = args[0]; } try { System.out.println(Bundle.getBundle().getString("validating") + " " + fich); ArrayList<SignatureValidation> alSv = CCInstance.getInstance().validatePDF(fich, null); if (alSv.isEmpty()) { System.out.println(Bundle.getBundle().getString("notSigned")); } else { String newLine = System.getProperty("line.separator"); String toWrite = "("; int numSigs = alSv.size(); if (numSigs == 1) { toWrite += "1 " + Bundle.getBundle().getString("signature"); } else { toWrite += numSigs + " " + Bundle.getBundle().getString("signatures"); } toWrite += ")" + newLine; for (SignatureValidation sv : alSv) { toWrite += "\t" + sv.getName() + " - "; toWrite += (sv.isCertification() ? WordUtils.capitalize(Bundle.getBundle().getString("certificate")) : WordUtils.capitalize(Bundle.getBundle().getString("signed"))) + " " + Bundle.getBundle().getString("by") + " " + sv.getSignerName(); toWrite += newLine + "\t\t"; if (sv.isChanged()) { toWrite += Bundle.getBundle().getString("certifiedChangedOrCorrupted"); } else { if (sv.isCertification()) { if (sv.isValid()) { if (sv.isChanged() || !sv.isCoversEntireDocument()) { toWrite += Bundle.getBundle().getString("certifiedButChanged"); } else { toWrite += Bundle.getBundle().getString("certifiedOk"); } } else { toWrite += Bundle.getBundle().getString("changedAfterCertified"); } } else { if (sv.isValid()) { if (sv.isChanged()) { toWrite += Bundle.getBundle().getString("signedButChanged"); } else { toWrite += Bundle.getBundle().getString("signedOk"); } } else { toWrite += Bundle.getBundle().getString("signedChangedOrCorrupted"); } } } toWrite += newLine + "\t\t"; if (sv.getOcspCertificateStatus().equals(CertificateStatus.OK) || sv.getCrlCertificateStatus().equals(CertificateStatus.OK)) { toWrite += Bundle.getBundle().getString("certOK"); } else if (sv.getOcspCertificateStatus().equals(CertificateStatus.REVOKED) || sv.getCrlCertificateStatus().equals(CertificateStatus.REVOKED)) { toWrite += Bundle.getBundle().getString("certRevoked"); } else if (sv.getOcspCertificateStatus().equals(CertificateStatus.UNCHECKED) && sv.getCrlCertificateStatus().equals(CertificateStatus.UNCHECKED)) { toWrite += Bundle.getBundle().getString("certNotVerified"); } else if (sv.getOcspCertificateStatus().equals(CertificateStatus.UNCHAINED)) { toWrite += Bundle.getBundle().getString("certNotChained"); } else if (sv.getOcspCertificateStatus().equals(CertificateStatus.EXPIRED)) { toWrite += Bundle.getBundle().getString("certExpired"); } else if (sv.getOcspCertificateStatus().equals(CertificateStatus.CHAINED_LOCALLY)) { toWrite += Bundle.getBundle().getString("certChainedLocally"); } toWrite += newLine + "\t\t"; if (sv.isValidTimeStamp()) { toWrite += Bundle.getBundle().getString("validTimestamp"); } else { toWrite += Bundle.getBundle().getString("signerDateTime"); } toWrite += newLine + "\t\t"; toWrite += WordUtils.capitalize(Bundle.getBundle().getString("revision")) + ": " + sv.getRevision() + " " + Bundle.getBundle().getString("of") + " " + sv.getNumRevisions(); toWrite += newLine + "\t\t"; final DateFormat df = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss"); final SimpleDateFormat sdf = new SimpleDateFormat("Z"); if (sv.getSignature().getTimeStampToken() == null) { Calendar cal = sv.getSignature().getSignDate(); String date = sdf.format(cal.getTime().toLocaleString()); toWrite += date + " " + sdf.format(cal.getTime()) + " (" + Bundle.getBundle().getString("signerDateTimeSmall") + ")"; } else { Calendar ts = sv.getSignature().getTimeStampDate(); String date = df.format(ts.getTime()); toWrite += Bundle.getBundle().getString("date") + " " + date + " " + sdf.format(ts.getTime()); } toWrite += newLine + "\t\t"; boolean ltv = (sv.getOcspCertificateStatus() == CertificateStatus.OK || sv.getCrlCertificateStatus() == CertificateStatus.OK); toWrite += Bundle.getBundle().getString("isLtv") + ": " + (ltv ? Bundle.getBundle().getString("yes") : Bundle.getBundle().getString("no")); String reason = sv.getSignature().getReason(); toWrite += newLine + "\t\t"; toWrite += Bundle.getBundle().getString("reason") + ": "; if (reason == null) { toWrite += Bundle.getBundle().getString("notDefined"); } else if (reason.isEmpty()) { toWrite += Bundle.getBundle().getString("notDefined"); } else { toWrite += reason; } String location = sv.getSignature().getLocation(); toWrite += newLine + "\t\t"; toWrite += Bundle.getBundle().getString("location") + ": "; if (location == null) { toWrite += Bundle.getBundle().getString("notDefined"); } else if (location.isEmpty()) { toWrite += Bundle.getBundle().getString("notDefined"); } else { toWrite += location; } toWrite += newLine + "\t\t"; toWrite += Bundle.getBundle().getString("allowsChanges") + ": "; try { int certLevel = CCInstance.getInstance().getCertificationLevel(sv.getFilename()); if (certLevel == PdfSignatureAppearance.CERTIFIED_FORM_FILLING) { toWrite += Bundle.getBundle().getString("onlyAnnotations"); } else if (certLevel == PdfSignatureAppearance.CERTIFIED_FORM_FILLING_AND_ANNOTATIONS) { toWrite += Bundle.getBundle().getString("annotationsFormFilling"); } else if (certLevel == PdfSignatureAppearance.CERTIFIED_NO_CHANGES_ALLOWED) { toWrite += Bundle.getBundle().getString("no"); } else { toWrite += Bundle.getBundle().getString("yes"); } } catch (IOException ex) { controller.Logger.getLogger().addEntry(ex); } toWrite += newLine + "\t\t"; if (sv.getOcspCertificateStatus() == CertificateStatus.OK || sv.getCrlCertificateStatus() == CertificateStatus.OK) { toWrite += (Bundle.getBundle().getString("validationCheck1") + " " + (sv.getOcspCertificateStatus() == CertificateStatus.OK ? Bundle.getBundle().getString("validationCheck2") + ": " + CCInstance.getInstance().getCertificateProperty( sv.getSignature().getOcsp().getCerts()[0].getSubject(), "CN") + " " + Bundle.getBundle().getString("at") + " " + df.format(sv.getSignature().getOcsp().getProducedAt()) : (sv.getCrlCertificateStatus() == CertificateStatus.OK ? "CRL" : "")) + (sv.getSignature().getTimeStampToken() != null ? Bundle.getBundle().getString("validationCheck3") + ": " + CCInstance.getInstance() .getCertificateProperty(sv.getSignature() .getTimeStampToken().getSID().getIssuer(), "O") : "")); } else if (sv.getSignature().getTimeStampToken() != null) { toWrite += (Bundle.getBundle().getString("validationCheck3") + ": " + CCInstance.getInstance().getCertificateProperty( sv.getSignature().getTimeStampToken().getSID().getIssuer(), "O")); } toWrite += newLine; } System.out.println(toWrite); System.out.println(Bundle.getBundle().getString("validationFinished")); } } catch (IOException | DocumentException | GeneralSecurityException ex) { System.err.println(Bundle.getBundle().getString("validationError")); Logger.getLogger(ACCinaPDF.class.getName()).log(Level.SEVERE, null, ex); } } else { // GUI CCSignatureSettings defaultSettings = new CCSignatureSettings(false); if (SystemUtils.IS_OS_WINDOWS) { for (LookAndFeelInfo info : UIManager.getInstalledLookAndFeels()) { if ("Windows".equals(info.getName())) { try { UIManager.setLookAndFeel(info.getClassName()); } catch (ClassNotFoundException | InstantiationException | IllegalAccessException | UnsupportedLookAndFeelException ex) { Logger.getLogger(ACCinaPDF.class.getName()).log(Level.SEVERE, null, ex); } break; } } } else if (SystemUtils.IS_OS_LINUX) { for (LookAndFeelInfo info : UIManager.getInstalledLookAndFeels()) { if ("Nimbus".equals(info.getName())) { try { UIManager.setLookAndFeel(info.getClassName()); } catch (ClassNotFoundException | InstantiationException | IllegalAccessException | UnsupportedLookAndFeelException ex) { Logger.getLogger(ACCinaPDF.class.getName()).log(Level.SEVERE, null, ex); } break; } } } else if (SystemUtils.IS_OS_MAC_OSX) { try { UIManager.setLookAndFeel("com.sun.java.swing.plaf.mac.MacLookAndFeel"); } catch (ClassNotFoundException | InstantiationException | IllegalAccessException | UnsupportedLookAndFeelException ex) { Logger.getLogger(ACCinaPDF.class.getName()).log(Level.SEVERE, null, ex); } } new SplashScreen().setVisible(true); } }
From source file:isc_415_practica_1.ISC_415_Practica_1.java
/** * @param args the command line arguments *//*from w w w .j a v a2 s. co m*/ public static void main(String[] args) { String urlString; Scanner input = new Scanner(System.in); Document doc; try { urlString = input.next(); if (urlString.equals("servlet")) { urlString = "http://localhost:8084/ISC_415_Practica1_Servlet/client"; } urlString = urlString.contains("http://") || urlString.contains("https://") ? urlString : "http://" + urlString; doc = Jsoup.connect(urlString).get(); } catch (Exception ex) { System.out.println("El URL ingresado no es valido."); return; } ArrayList<NameValuePair> formInputParams; formInputParams = new ArrayList<>(); String[] plainTextDoc = new TextNode(doc.html(), "").getWholeText().split("\n"); System.out.println(String.format("Nmero de lineas del documento: %d", plainTextDoc.length)); System.out.println(String.format("Nmero de p tags: %d", doc.select("p").size())); System.out.println(String.format("Nmero de img tags: %d", doc.select("img").size())); System.out.println(String.format("Nmero de form tags: %d", doc.select("form").size())); Integer index = 1; ArrayList<NameValuePair> urlParameters = new ArrayList<>(); for (Element e : doc.select("form")) { System.out.println(String.format("Form %d: Nmero de Input tags %d", index, e.select("input").size())); System.out.println(e.select("input")); for (Element formInput : e.select("input")) { if (formInput.attr("id") != null && formInput.attr("id") != "") { urlParameters.add(new BasicNameValuePair(formInput.attr("id"), "PRACTICA1")); } else if (formInput.attr("name") != null && formInput.attr("name") != "") { urlParameters.add(new BasicNameValuePair(formInput.attr("name"), "PRACTICA1")); } } index++; } if (!urlParameters.isEmpty()) { try { CloseableHttpClient httpclient = HttpClients.createDefault(); UrlEncodedFormEntity entity = new UrlEncodedFormEntity(urlParameters, Consts.UTF_8); HttpPost httpPost = new HttpPost(urlString); httpPost.setHeader("User-Agent", USER_AGENT); httpPost.setEntity(entity); HttpResponse response = httpclient.execute(httpPost); System.out.println(response.getStatusLine()); } catch (IOException ex) { Logger.getLogger(ISC_415_Practica_1.class.getName()).log(Level.SEVERE, null, ex); } } }
From source file:edu.nyu.vida.data_polygamy.standard_techniques.CorrelationTechniques.java
/** * @param args/*from ww w. j a v a 2s . c om*/ * @throws ParseException */ @SuppressWarnings({ "deprecation" }) public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Options options = new Options(); Option forceOption = new Option("f", "force", false, "force the computation of the relationship " + "even if files already exist"); forceOption.setRequired(false); options.addOption(forceOption); Option g1Option = new Option("g1", "first-group", true, "set first group of datasets"); g1Option.setRequired(true); g1Option.setArgName("FIRST GROUP"); g1Option.setArgs(Option.UNLIMITED_VALUES); options.addOption(g1Option); Option g2Option = new Option("g2", "second-group", true, "set second group of datasets"); g2Option.setRequired(false); g2Option.setArgName("SECOND GROUP"); g2Option.setArgs(Option.UNLIMITED_VALUES); options.addOption(g2Option); Option machineOption = new Option("m", "machine", true, "machine identifier"); machineOption.setRequired(true); machineOption.setArgName("MACHINE"); machineOption.setArgs(1); options.addOption(machineOption); Option nodesOption = new Option("n", "nodes", true, "number of nodes"); nodesOption.setRequired(true); nodesOption.setArgName("NODES"); nodesOption.setArgs(1); options.addOption(nodesOption); Option s3Option = new Option("s3", "s3", false, "data on Amazon S3"); s3Option.setRequired(false); options.addOption(s3Option); Option awsAccessKeyIdOption = new Option("aws_id", "aws-id", true, "aws access key id; " + "this is required if the execution is on aws"); awsAccessKeyIdOption.setRequired(false); awsAccessKeyIdOption.setArgName("AWS-ACCESS-KEY-ID"); awsAccessKeyIdOption.setArgs(1); options.addOption(awsAccessKeyIdOption); Option awsSecretAccessKeyOption = new Option("aws_key", "aws-id", true, "aws secrect access key; " + "this is required if the execution is on aws"); awsSecretAccessKeyOption.setRequired(false); awsSecretAccessKeyOption.setArgName("AWS-SECRET-ACCESS-KEY"); awsSecretAccessKeyOption.setArgs(1); options.addOption(awsSecretAccessKeyOption); Option bucketOption = new Option("b", "s3-bucket", true, "bucket on s3; " + "this is required if the execution is on aws"); bucketOption.setRequired(false); bucketOption.setArgName("S3-BUCKET"); bucketOption.setArgs(1); options.addOption(bucketOption); Option helpOption = new Option("h", "help", false, "display this message"); helpOption.setRequired(false); options.addOption(helpOption); HelpFormatter formatter = new HelpFormatter(); CommandLineParser parser = new PosixParser(); CommandLine cmd = null; try { cmd = parser.parse(options, args); } catch (ParseException e) { formatter.printHelp( "hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.standard_techniques.CorrelationTechniques", options, true); System.exit(0); } if (cmd.hasOption("h")) { formatter.printHelp( "hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.standard_techniques.CorrelationTechniques", options, true); System.exit(0); } boolean s3 = cmd.hasOption("s3"); String s3bucket = ""; String awsAccessKeyId = ""; String awsSecretAccessKey = ""; if (s3) { if ((!cmd.hasOption("aws_id")) || (!cmd.hasOption("aws_key")) || (!cmd.hasOption("b"))) { System.out.println( "Arguments 'aws_id', 'aws_key', and 'b'" + " are mandatory if execution is on AWS."); formatter.printHelp( "hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.standard_techniques.CorrelationTechniques", options, true); System.exit(0); } s3bucket = cmd.getOptionValue("b"); awsAccessKeyId = cmd.getOptionValue("aws_id"); awsSecretAccessKey = cmd.getOptionValue("aws_key"); } boolean snappyCompression = false; boolean bzip2Compression = false; String machine = cmd.getOptionValue("m"); int nbNodes = Integer.parseInt(cmd.getOptionValue("n")); Configuration s3conf = new Configuration(); if (s3) { s3conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId); s3conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey); s3conf.set("bucket", s3bucket); } Path path = null; FileSystem fs = FileSystem.get(new Configuration()); ArrayList<String> shortDataset = new ArrayList<String>(); ArrayList<String> firstGroup = new ArrayList<String>(); ArrayList<String> secondGroup = new ArrayList<String>(); HashMap<String, String> datasetAgg = new HashMap<String, String>(); boolean removeExistingFiles = cmd.hasOption("f"); String[] firstGroupCmd = cmd.getOptionValues("g1"); String[] secondGroupCmd = cmd.hasOption("g2") ? cmd.getOptionValues("g2") : new String[0]; addDatasets(firstGroupCmd, firstGroup, shortDataset, datasetAgg, path, fs, s3conf, s3, s3bucket); addDatasets(secondGroupCmd, secondGroup, shortDataset, datasetAgg, path, fs, s3conf, s3, s3bucket); if (shortDataset.size() == 0) { System.out.println("No datasets to process."); System.exit(0); } if (firstGroup.isEmpty()) { System.out.println("First group of datasets (G1) is empty. " + "Doing G1 = G2."); firstGroup.addAll(secondGroup); } if (secondGroup.isEmpty()) { System.out.println("Second group of datasets (G2) is empty. " + "Doing G2 = G1."); secondGroup.addAll(firstGroup); } // getting dataset ids String datasetNames = ""; String datasetIds = ""; HashMap<String, String> datasetId = new HashMap<String, String>(); Iterator<String> it = shortDataset.iterator(); while (it.hasNext()) { datasetId.put(it.next(), null); } if (s3) { path = new Path(s3bucket + FrameworkUtils.datasetsIndexDir); fs = FileSystem.get(path.toUri(), s3conf); } else { path = new Path(fs.getHomeDirectory() + "/" + FrameworkUtils.datasetsIndexDir); } BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(path))); String line = br.readLine(); while (line != null) { String[] dt = line.split("\t"); if (datasetId.containsKey(dt[0])) { datasetId.put(dt[0], dt[1]); datasetNames += dt[0] + ","; datasetIds += dt[1] + ","; } line = br.readLine(); } br.close(); if (s3) fs.close(); datasetNames = datasetNames.substring(0, datasetNames.length() - 1); datasetIds = datasetIds.substring(0, datasetIds.length() - 1); it = shortDataset.iterator(); while (it.hasNext()) { String dataset = it.next(); if (datasetId.get(dataset) == null) { System.out.println("No dataset id for " + dataset); System.exit(0); } } String firstGroupStr = ""; String secondGroupStr = ""; for (String dataset : firstGroup) { firstGroupStr += datasetId.get(dataset) + ","; } for (String dataset : secondGroup) { secondGroupStr += datasetId.get(dataset) + ","; } firstGroupStr = firstGroupStr.substring(0, firstGroupStr.length() - 1); secondGroupStr = secondGroupStr.substring(0, secondGroupStr.length() - 1); FrameworkUtils.createDir(s3bucket + FrameworkUtils.correlationTechniquesDir, s3conf, s3); String dataAttributesInputDirs = ""; String noRelationship = ""; HashSet<String> dirs = new HashSet<String>(); String dataset1; String dataset2; String datasetId1; String datasetId2; for (int i = 0; i < firstGroup.size(); i++) { for (int j = 0; j < secondGroup.size(); j++) { if (Integer.parseInt(datasetId.get(firstGroup.get(i))) < Integer .parseInt(datasetId.get(secondGroup.get(j)))) { dataset1 = firstGroup.get(i); dataset2 = secondGroup.get(j); } else { dataset1 = secondGroup.get(j); dataset2 = firstGroup.get(i); } datasetId1 = datasetId.get(dataset1); datasetId2 = datasetId.get(dataset2); if (dataset1.equals(dataset2)) continue; String correlationOutputFileName = s3bucket + FrameworkUtils.correlationTechniquesDir + "/" + dataset1 + "-" + dataset2 + "/"; if (removeExistingFiles) { FrameworkUtils.removeFile(correlationOutputFileName, s3conf, s3); } if (!FrameworkUtils.fileExists(correlationOutputFileName, s3conf, s3)) { dirs.add(s3bucket + FrameworkUtils.aggregatesDir + "/" + dataset1); dirs.add(s3bucket + FrameworkUtils.aggregatesDir + "/" + dataset2); } else { noRelationship += datasetId1 + "-" + datasetId2 + ","; } } } if (dirs.isEmpty()) { System.out.println("All the relationships were already computed."); System.out.println("Use -f in the beginning of the command line to force the computation."); System.exit(0); } for (String dir : dirs) { dataAttributesInputDirs += dir + ","; } Configuration conf = new Configuration(); Machine machineConf = new Machine(machine, nbNodes); String jobName = "correlation"; String correlationOutputDir = s3bucket + FrameworkUtils.correlationTechniquesDir + "/tmp/"; FrameworkUtils.removeFile(correlationOutputDir, s3conf, s3); for (int i = 0; i < shortDataset.size(); i++) { conf.set("dataset-" + datasetId.get(shortDataset.get(i)) + "-agg", datasetAgg.get(shortDataset.get(i))); } for (int i = 0; i < shortDataset.size(); i++) { conf.set("dataset-" + datasetId.get(shortDataset.get(i)) + "-agg-size", Integer.toString(datasetAgg.get(shortDataset.get(i)).split(",").length)); } conf.set("dataset-keys", datasetIds); conf.set("dataset-names", datasetNames); conf.set("first-group", firstGroupStr); conf.set("second-group", secondGroupStr); conf.set("main-dataset-id", datasetId.get(shortDataset.get(0))); if (noRelationship.length() > 0) { conf.set("no-relationship", noRelationship.substring(0, noRelationship.length() - 1)); } conf.set("mapreduce.tasktracker.map.tasks.maximum", String.valueOf(machineConf.getMaximumTasks())); conf.set("mapreduce.tasktracker.reduce.tasks.maximum", String.valueOf(machineConf.getMaximumTasks())); conf.set("mapreduce.jobtracker.maxtasks.perjob", "-1"); conf.set("mapreduce.reduce.shuffle.parallelcopies", "20"); conf.set("mapreduce.input.fileinputformat.split.minsize", "0"); conf.set("mapreduce.task.io.sort.mb", "200"); conf.set("mapreduce.task.io.sort.factor", "100"); conf.set("mapreduce.task.timeout", "2400000"); if (s3) { machineConf.setMachineConfiguration(conf); conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId); conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey); conf.set("bucket", s3bucket); } if (snappyCompression) { conf.set("mapreduce.map.output.compress", "true"); conf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec"); //conf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec"); } if (bzip2Compression) { conf.set("mapreduce.map.output.compress", "true"); conf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec"); //conf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec"); } Job job = new Job(conf); job.setJobName(jobName); job.setMapOutputKeyClass(PairAttributeWritable.class); job.setMapOutputValueClass(SpatioTemporalValueWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(CorrelationTechniquesMapper.class); job.setReducerClass(CorrelationTechniquesReducer.class); job.setNumReduceTasks(machineConf.getNumberReduces()); job.setInputFormatClass(SequenceFileInputFormat.class); LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class); FileInputFormat.setInputDirRecursive(job, true); FileInputFormat.setInputPaths(job, dataAttributesInputDirs.substring(0, dataAttributesInputDirs.length() - 1)); FileOutputFormat.setOutputPath(job, new Path(correlationOutputDir)); job.setJarByClass(CorrelationTechniques.class); long start = System.currentTimeMillis(); job.submit(); job.waitForCompletion(true); System.out.println(jobName + "\t" + (System.currentTimeMillis() - start)); // moving files to right place for (int i = 0; i < firstGroup.size(); i++) { for (int j = 0; j < secondGroup.size(); j++) { if (Integer.parseInt(datasetId.get(firstGroup.get(i))) < Integer .parseInt(datasetId.get(secondGroup.get(j)))) { dataset1 = firstGroup.get(i); dataset2 = secondGroup.get(j); } else { dataset1 = secondGroup.get(j); dataset2 = firstGroup.get(i); } if (dataset1.equals(dataset2)) continue; String from = s3bucket + FrameworkUtils.correlationTechniquesDir + "/tmp/" + dataset1 + "-" + dataset2 + "/"; String to = s3bucket + FrameworkUtils.correlationTechniquesDir + "/" + dataset1 + "-" + dataset2 + "/"; FrameworkUtils.renameFile(from, to, s3conf, s3); } } }
From source file:com.sebuilder.interpreter.SeInterpreter.java
public static void main(String[] args) { if (args.length == 0) { System.out.println(//w w w . j a v a2 s . c om "Usage: [--driver=<drivername] [--driver.<configkey>=<configvalue>...] [--implicitlyWait=<ms>] [--pageLoadTimeout=<ms>] [--stepTypePackage=<package name>] <script path>..."); System.exit(0); } Log log = LogFactory.getFactory().getInstance(SeInterpreter.class); WebDriverFactory wdf = DEFAULT_DRIVER_FACTORY; ScriptFactory sf = new ScriptFactory(); StepTypeFactory stf = new StepTypeFactory(); sf.setStepTypeFactory(stf); TestRunFactory trf = new TestRunFactory(); sf.setTestRunFactory(trf); ArrayList<String> paths = new ArrayList<String>(); HashMap<String, String> driverConfig = new HashMap<String, String>(); for (String s : args) { if (s.startsWith("--")) { String[] kv = s.split("=", 2); if (kv.length < 2) { log.fatal("Driver configuration option \"" + s + "\" is not of the form \"--driver=<name>\" or \"--driver.<key>=<value\"."); System.exit(1); } if (s.startsWith("--implicitlyWait")) { trf.setImplicitlyWaitDriverTimeout(Integer.parseInt(kv[1])); } else if (s.startsWith("--pageLoadTimeout")) { trf.setPageLoadDriverTimeout(Integer.parseInt(kv[1])); } else if (s.startsWith("--stepTypePackage")) { stf.setPrimaryPackage(kv[1]); } else if (s.startsWith("--driver.")) { driverConfig.put(kv[0].substring("--driver.".length()), kv[1]); } else if (s.startsWith("--driver")) { try { wdf = (WebDriverFactory) Class .forName("com.sebuilder.interpreter.webdriverfactory." + kv[1]).newInstance(); } catch (ClassNotFoundException e) { log.fatal("Unknown WebDriverFactory: " + "com.sebuilder.interpreter.webdriverfactory." + kv[1], e); } catch (InstantiationException e) { log.fatal("Could not instantiate WebDriverFactory " + "com.sebuilder.interpreter.webdriverfactory." + kv[1], e); } catch (IllegalAccessException e) { log.fatal("Could not instantiate WebDriverFactory " + "com.sebuilder.interpreter.webdriverfactory." + kv[1], e); } } else { paths.add(s); } } else { paths.add(s); } } if (paths.isEmpty()) { log.info("Configuration successful but no paths to scripts specified. Exiting."); System.exit(0); } HashMap<String, String> cfg = new HashMap<String, String>(driverConfig); for (String path : paths) { try { TestRun lastRun = null; for (Script script : sf.parse(new File(path))) { for (Map<String, String> data : script.dataRows) { try { lastRun = script.testRunFactory.createTestRun(script, log, wdf, driverConfig, data, lastRun); if (lastRun.finish()) { log.info(script.name + " succeeded"); } else { log.info(script.name + " failed"); } } catch (Exception e) { log.info(script.name + " failed", e); } } } } catch (Exception e) { log.fatal("Run error.", e); System.exit(1); } } }
From source file:edu.nyu.vida.data_polygamy.relationship_computation.Relationship.java
/** * @param args// ww w . ja v a2 s.co m * @throws ParseException */ @SuppressWarnings({ "deprecation" }) public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Options options = new Options(); Option forceOption = new Option("f", "force", false, "force the computation of the relationship " + "even if files already exist"); forceOption.setRequired(false); options.addOption(forceOption); Option scoreOption = new Option("sc", "score", true, "set threhsold for relationship score"); scoreOption.setRequired(false); scoreOption.setArgName("SCORE THRESHOLD"); options.addOption(scoreOption); Option strengthOption = new Option("st", "strength", true, "set threhsold for relationship strength"); strengthOption.setRequired(false); strengthOption.setArgName("STRENGTH THRESHOLD"); options.addOption(strengthOption); Option completeRandomizationOption = new Option("c", "complete-randomization", false, "use complete randomization when performing significance tests"); completeRandomizationOption.setRequired(false); options.addOption(completeRandomizationOption); Option idOption = new Option("id", "ids", false, "output id instead of names for datasets and attributes"); idOption.setRequired(false); options.addOption(idOption); Option g1Option = new Option("g1", "first-group", true, "set first group of datasets"); g1Option.setRequired(true); g1Option.setArgName("FIRST GROUP"); g1Option.setArgs(Option.UNLIMITED_VALUES); options.addOption(g1Option); Option g2Option = new Option("g2", "second-group", true, "set second group of datasets"); g2Option.setRequired(false); g2Option.setArgName("SECOND GROUP"); g2Option.setArgs(Option.UNLIMITED_VALUES); options.addOption(g2Option); Option machineOption = new Option("m", "machine", true, "machine identifier"); machineOption.setRequired(true); machineOption.setArgName("MACHINE"); machineOption.setArgs(1); options.addOption(machineOption); Option nodesOption = new Option("n", "nodes", true, "number of nodes"); nodesOption.setRequired(true); nodesOption.setArgName("NODES"); nodesOption.setArgs(1); options.addOption(nodesOption); Option s3Option = new Option("s3", "s3", false, "data on Amazon S3"); s3Option.setRequired(false); options.addOption(s3Option); Option awsAccessKeyIdOption = new Option("aws_id", "aws-id", true, "aws access key id; " + "this is required if the execution is on aws"); awsAccessKeyIdOption.setRequired(false); awsAccessKeyIdOption.setArgName("AWS-ACCESS-KEY-ID"); awsAccessKeyIdOption.setArgs(1); options.addOption(awsAccessKeyIdOption); Option awsSecretAccessKeyOption = new Option("aws_key", "aws-id", true, "aws secrect access key; " + "this is required if the execution is on aws"); awsSecretAccessKeyOption.setRequired(false); awsSecretAccessKeyOption.setArgName("AWS-SECRET-ACCESS-KEY"); awsSecretAccessKeyOption.setArgs(1); options.addOption(awsSecretAccessKeyOption); Option bucketOption = new Option("b", "s3-bucket", true, "bucket on s3; " + "this is required if the execution is on aws"); bucketOption.setRequired(false); bucketOption.setArgName("S3-BUCKET"); bucketOption.setArgs(1); options.addOption(bucketOption); Option helpOption = new Option("h", "help", false, "display this message"); helpOption.setRequired(false); options.addOption(helpOption); Option removeOption = new Option("r", "remove-not-significant", false, "remove relationships that are not" + "significant from the final output"); removeOption.setRequired(false); options.addOption(removeOption); HelpFormatter formatter = new HelpFormatter(); CommandLineParser parser = new PosixParser(); CommandLine cmd = null; try { cmd = parser.parse(options, args); } catch (ParseException e) { formatter.printHelp("hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.relationship_computation.Relationship", options, true); System.exit(0); } if (cmd.hasOption("h")) { formatter.printHelp("hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.relationship_computation.Relationship", options, true); System.exit(0); } boolean s3 = cmd.hasOption("s3"); String s3bucket = ""; String awsAccessKeyId = ""; String awsSecretAccessKey = ""; if (s3) { if ((!cmd.hasOption("aws_id")) || (!cmd.hasOption("aws_key")) || (!cmd.hasOption("b"))) { System.out.println( "Arguments 'aws_id', 'aws_key', and 'b'" + " are mandatory if execution is on AWS."); formatter.printHelp( "hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.relationship_computation.Relationship", options, true); System.exit(0); } s3bucket = cmd.getOptionValue("b"); awsAccessKeyId = cmd.getOptionValue("aws_id"); awsSecretAccessKey = cmd.getOptionValue("aws_key"); } boolean snappyCompression = false; boolean bzip2Compression = false; String machine = cmd.getOptionValue("m"); int nbNodes = Integer.parseInt(cmd.getOptionValue("n")); Configuration s3conf = new Configuration(); if (s3) { s3conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId); s3conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey); s3conf.set("bucket", s3bucket); } Path path = null; FileSystem fs = FileSystem.get(new Configuration()); ArrayList<String> shortDataset = new ArrayList<String>(); ArrayList<String> firstGroup = new ArrayList<String>(); ArrayList<String> secondGroup = new ArrayList<String>(); HashMap<String, String> datasetAgg = new HashMap<String, String>(); boolean removeNotSignificant = cmd.hasOption("r"); boolean removeExistingFiles = cmd.hasOption("f"); boolean completeRandomization = cmd.hasOption("c"); boolean hasScoreThreshold = cmd.hasOption("sc"); boolean hasStrengthThreshold = cmd.hasOption("st"); boolean outputIds = cmd.hasOption("id"); String scoreThreshold = hasScoreThreshold ? cmd.getOptionValue("sc") : ""; String strengthThreshold = hasStrengthThreshold ? cmd.getOptionValue("st") : ""; // all datasets ArrayList<String> all_datasets = new ArrayList<String>(); if (s3) { path = new Path(s3bucket + FrameworkUtils.datasetsIndexDir); fs = FileSystem.get(path.toUri(), s3conf); } else { path = new Path(fs.getHomeDirectory() + "/" + FrameworkUtils.datasetsIndexDir); } BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(path))); String line = br.readLine(); while (line != null) { all_datasets.add(line.split("\t")[0]); line = br.readLine(); } br.close(); if (s3) fs.close(); String[] all_datasets_array = new String[all_datasets.size()]; all_datasets.toArray(all_datasets_array); String[] firstGroupCmd = cmd.getOptionValues("g1"); String[] secondGroupCmd = cmd.hasOption("g2") ? cmd.getOptionValues("g2") : all_datasets_array; addDatasets(firstGroupCmd, firstGroup, shortDataset, datasetAgg, path, fs, s3conf, s3, s3bucket); addDatasets(secondGroupCmd, secondGroup, shortDataset, datasetAgg, path, fs, s3conf, s3, s3bucket); if (shortDataset.size() == 0) { System.out.println("No datasets to process."); System.exit(0); } if (firstGroup.isEmpty()) { System.out.println("No indices from datasets in G1."); System.exit(0); } if (secondGroup.isEmpty()) { System.out.println("No indices from datasets in G2."); System.exit(0); } // getting dataset ids String datasetNames = ""; String datasetIds = ""; HashMap<String, String> datasetId = new HashMap<String, String>(); Iterator<String> it = shortDataset.iterator(); while (it.hasNext()) { datasetId.put(it.next(), null); } if (s3) { path = new Path(s3bucket + FrameworkUtils.datasetsIndexDir); fs = FileSystem.get(path.toUri(), s3conf); } else { path = new Path(fs.getHomeDirectory() + "/" + FrameworkUtils.datasetsIndexDir); } br = new BufferedReader(new InputStreamReader(fs.open(path))); line = br.readLine(); while (line != null) { String[] dt = line.split("\t"); all_datasets.add(dt[0]); if (datasetId.containsKey(dt[0])) { datasetId.put(dt[0], dt[1]); datasetNames += dt[0] + ","; datasetIds += dt[1] + ","; } line = br.readLine(); } br.close(); if (s3) fs.close(); datasetNames = datasetNames.substring(0, datasetNames.length() - 1); datasetIds = datasetIds.substring(0, datasetIds.length() - 1); it = shortDataset.iterator(); while (it.hasNext()) { String dataset = it.next(); if (datasetId.get(dataset) == null) { System.out.println("No dataset id for " + dataset); System.exit(0); } } String firstGroupStr = ""; String secondGroupStr = ""; for (String dataset : firstGroup) { firstGroupStr += datasetId.get(dataset) + ","; } for (String dataset : secondGroup) { secondGroupStr += datasetId.get(dataset) + ","; } firstGroupStr = firstGroupStr.substring(0, firstGroupStr.length() - 1); secondGroupStr = secondGroupStr.substring(0, secondGroupStr.length() - 1); String relationshipsDir = ""; if (outputIds) { relationshipsDir = FrameworkUtils.relationshipsIdsDir; } else { relationshipsDir = FrameworkUtils.relationshipsDir; } FrameworkUtils.createDir(s3bucket + relationshipsDir, s3conf, s3); String random = completeRandomization ? "complete" : "restricted"; String indexInputDirs = ""; String noRelationship = ""; HashSet<String> dirs = new HashSet<String>(); String dataset1; String dataset2; String datasetId1; String datasetId2; for (int i = 0; i < firstGroup.size(); i++) { for (int j = 0; j < secondGroup.size(); j++) { if (Integer.parseInt(datasetId.get(firstGroup.get(i))) < Integer .parseInt(datasetId.get(secondGroup.get(j)))) { dataset1 = firstGroup.get(i); dataset2 = secondGroup.get(j); } else { dataset1 = secondGroup.get(j); dataset2 = firstGroup.get(i); } datasetId1 = datasetId.get(dataset1); datasetId2 = datasetId.get(dataset2); if (dataset1.equals(dataset2)) continue; String correlationOutputFileName = s3bucket + relationshipsDir + "/" + dataset1 + "-" + dataset2 + "/"; if (removeExistingFiles) { FrameworkUtils.removeFile(correlationOutputFileName, s3conf, s3); } if (!FrameworkUtils.fileExists(correlationOutputFileName, s3conf, s3)) { dirs.add(s3bucket + FrameworkUtils.indexDir + "/" + dataset1); dirs.add(s3bucket + FrameworkUtils.indexDir + "/" + dataset2); } else { noRelationship += datasetId1 + "-" + datasetId2 + ","; } } } if (dirs.isEmpty()) { System.out.println("All the relationships were already computed."); System.out.println("Use -f in the beginning of the command line to force the computation."); System.exit(0); } for (String dir : dirs) { indexInputDirs += dir + ","; } Configuration conf = new Configuration(); Machine machineConf = new Machine(machine, nbNodes); String jobName = "relationship" + "-" + random; String relationshipOutputDir = s3bucket + relationshipsDir + "/tmp/"; FrameworkUtils.removeFile(relationshipOutputDir, s3conf, s3); for (int i = 0; i < shortDataset.size(); i++) { conf.set("dataset-" + datasetId.get(shortDataset.get(i)) + "-agg", datasetAgg.get(shortDataset.get(i))); } for (int i = 0; i < shortDataset.size(); i++) { conf.set("dataset-" + datasetId.get(shortDataset.get(i)) + "-agg-size", Integer.toString(datasetAgg.get(shortDataset.get(i)).split(",").length)); } conf.set("dataset-keys", datasetIds); conf.set("dataset-names", datasetNames); conf.set("first-group", firstGroupStr); conf.set("second-group", secondGroupStr); conf.set("complete-random", String.valueOf(completeRandomization)); conf.set("output-ids", String.valueOf(outputIds)); conf.set("complete-random-str", random); conf.set("main-dataset-id", datasetId.get(shortDataset.get(0))); conf.set("remove-not-significant", String.valueOf(removeNotSignificant)); if (noRelationship.length() > 0) { conf.set("no-relationship", noRelationship.substring(0, noRelationship.length() - 1)); } if (hasScoreThreshold) { conf.set("score-threshold", scoreThreshold); } if (hasStrengthThreshold) { conf.set("strength-threshold", strengthThreshold); } conf.set("mapreduce.tasktracker.map.tasks.maximum", String.valueOf(machineConf.getMaximumTasks())); conf.set("mapreduce.tasktracker.reduce.tasks.maximum", String.valueOf(machineConf.getMaximumTasks())); conf.set("mapreduce.jobtracker.maxtasks.perjob", "-1"); conf.set("mapreduce.reduce.shuffle.parallelcopies", "20"); conf.set("mapreduce.input.fileinputformat.split.minsize", "0"); conf.set("mapreduce.task.io.sort.mb", "200"); conf.set("mapreduce.task.io.sort.factor", "100"); conf.set("mapreduce.task.timeout", "2400000"); if (s3) { machineConf.setMachineConfiguration(conf); conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId); conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey); conf.set("bucket", s3bucket); } if (snappyCompression) { conf.set("mapreduce.map.output.compress", "true"); conf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec"); //conf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec"); } if (bzip2Compression) { conf.set("mapreduce.map.output.compress", "true"); conf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec"); //conf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec"); } Job job = new Job(conf); job.setJobName(jobName); job.setMapOutputKeyClass(PairAttributeWritable.class); job.setMapOutputValueClass(TopologyTimeSeriesWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(CorrelationMapper.class); job.setReducerClass(CorrelationReducer.class); job.setNumReduceTasks(machineConf.getNumberReduces()); job.setInputFormatClass(SequenceFileInputFormat.class); //job.setOutputFormatClass(TextOutputFormat.class); LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class); FileInputFormat.setInputDirRecursive(job, true); FileInputFormat.setInputPaths(job, indexInputDirs.substring(0, indexInputDirs.length() - 1)); FileOutputFormat.setOutputPath(job, new Path(relationshipOutputDir)); job.setJarByClass(Relationship.class); long start = System.currentTimeMillis(); job.submit(); job.waitForCompletion(true); System.out.println(jobName + "\t" + (System.currentTimeMillis() - start)); // moving files to right place for (int i = 0; i < firstGroup.size(); i++) { for (int j = 0; j < secondGroup.size(); j++) { if (Integer.parseInt(datasetId.get(firstGroup.get(i))) < Integer .parseInt(datasetId.get(secondGroup.get(j)))) { dataset1 = firstGroup.get(i); dataset2 = secondGroup.get(j); } else { dataset1 = secondGroup.get(j); dataset2 = firstGroup.get(i); } if (dataset1.equals(dataset2)) continue; String from = s3bucket + relationshipsDir + "/tmp/" + dataset1 + "-" + dataset2 + "/"; String to = s3bucket + relationshipsDir + "/" + dataset1 + "-" + dataset2 + "/"; FrameworkUtils.renameFile(from, to, s3conf, s3); } } }
From source file:edu.nyu.vida.data_polygamy.feature_identification.IndexCreation.java
/** * @param args/*from w w w. j a va 2s . c om*/ */ @SuppressWarnings({ "deprecation" }) public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Options options = new Options(); Option forceOption = new Option("f", "force", false, "force the computation of the index and events " + "even if files already exist"); forceOption.setRequired(false); options.addOption(forceOption); Option thresholdOption = new Option("t", "use-custom-thresholds", false, "use custom thresholds for regular and rare events, defined in HDFS_HOME/" + FrameworkUtils.thresholdDir + " file"); thresholdOption.setRequired(false); options.addOption(thresholdOption); Option gOption = new Option("g", "group", true, "set group of datasets for which the indices and events" + " will be computed"); gOption.setRequired(true); gOption.setArgName("GROUP"); gOption.setArgs(Option.UNLIMITED_VALUES); options.addOption(gOption); Option machineOption = new Option("m", "machine", true, "machine identifier"); machineOption.setRequired(true); machineOption.setArgName("MACHINE"); machineOption.setArgs(1); options.addOption(machineOption); Option nodesOption = new Option("n", "nodes", true, "number of nodes"); nodesOption.setRequired(true); nodesOption.setArgName("NODES"); nodesOption.setArgs(1); options.addOption(nodesOption); Option s3Option = new Option("s3", "s3", false, "data on Amazon S3"); s3Option.setRequired(false); options.addOption(s3Option); Option awsAccessKeyIdOption = new Option("aws_id", "aws-id", true, "aws access key id; " + "this is required if the execution is on aws"); awsAccessKeyIdOption.setRequired(false); awsAccessKeyIdOption.setArgName("AWS-ACCESS-KEY-ID"); awsAccessKeyIdOption.setArgs(1); options.addOption(awsAccessKeyIdOption); Option awsSecretAccessKeyOption = new Option("aws_key", "aws-id", true, "aws secrect access key; " + "this is required if the execution is on aws"); awsSecretAccessKeyOption.setRequired(false); awsSecretAccessKeyOption.setArgName("AWS-SECRET-ACCESS-KEY"); awsSecretAccessKeyOption.setArgs(1); options.addOption(awsSecretAccessKeyOption); Option bucketOption = new Option("b", "s3-bucket", true, "bucket on s3; " + "this is required if the execution is on aws"); bucketOption.setRequired(false); bucketOption.setArgName("S3-BUCKET"); bucketOption.setArgs(1); options.addOption(bucketOption); Option helpOption = new Option("h", "help", false, "display this message"); helpOption.setRequired(false); options.addOption(helpOption); HelpFormatter formatter = new HelpFormatter(); CommandLineParser parser = new PosixParser(); CommandLine cmd = null; try { cmd = parser.parse(options, args); } catch (ParseException e) { formatter.printHelp("hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.feature_identification.IndexCreation", options, true); System.exit(0); } if (cmd.hasOption("h")) { formatter.printHelp("hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.feature_identification.IndexCreation", options, true); System.exit(0); } boolean s3 = cmd.hasOption("s3"); String s3bucket = ""; String awsAccessKeyId = ""; String awsSecretAccessKey = ""; if (s3) { if ((!cmd.hasOption("aws_id")) || (!cmd.hasOption("aws_key")) || (!cmd.hasOption("b"))) { System.out.println( "Arguments 'aws_id', 'aws_key', and 'b'" + " are mandatory if execution is on AWS."); formatter.printHelp("hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.feature_identification.IndexCreation", options, true); System.exit(0); } s3bucket = cmd.getOptionValue("b"); awsAccessKeyId = cmd.getOptionValue("aws_id"); awsSecretAccessKey = cmd.getOptionValue("aws_key"); } boolean snappyCompression = false; boolean bzip2Compression = false; String machine = cmd.getOptionValue("m"); int nbNodes = Integer.parseInt(cmd.getOptionValue("n")); Configuration s3conf = new Configuration(); if (s3) { s3conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId); s3conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey); s3conf.set("bucket", s3bucket); } String datasetNames = ""; String datasetIds = ""; ArrayList<String> shortDataset = new ArrayList<String>(); ArrayList<String> shortDatasetIndex = new ArrayList<String>(); HashMap<String, String> datasetAgg = new HashMap<String, String>(); HashMap<String, String> datasetId = new HashMap<String, String>(); HashMap<String, HashMap<Integer, Double>> datasetRegThreshold = new HashMap<String, HashMap<Integer, Double>>(); HashMap<String, HashMap<Integer, Double>> datasetRareThreshold = new HashMap<String, HashMap<Integer, Double>>(); Path path = null; FileSystem fs = FileSystem.get(new Configuration()); BufferedReader br; boolean removeExistingFiles = cmd.hasOption("f"); boolean isThresholdUserDefined = cmd.hasOption("t"); for (String dataset : cmd.getOptionValues("g")) { // getting aggregates String[] aggregate = FrameworkUtils.searchAggregates(dataset, s3conf, s3); if (aggregate.length == 0) { System.out.println("No aggregates found for " + dataset + "."); continue; } // getting aggregates header String aggregatesHeaderFileName = FrameworkUtils.searchAggregatesHeader(dataset, s3conf, s3); if (aggregatesHeaderFileName == null) { System.out.println("No aggregate header for " + dataset); continue; } String aggregatesHeader = s3bucket + FrameworkUtils.preProcessingDir + "/" + aggregatesHeaderFileName; shortDataset.add(dataset); datasetId.put(dataset, null); if (s3) { path = new Path(aggregatesHeader); fs = FileSystem.get(path.toUri(), s3conf); } else { path = new Path(fs.getHomeDirectory() + "/" + aggregatesHeader); } br = new BufferedReader(new InputStreamReader(fs.open(path))); datasetAgg.put(dataset, br.readLine().split("\t")[1]); br.close(); if (s3) fs.close(); } if (shortDataset.size() == 0) { System.out.println("No datasets to process."); System.exit(0); } // getting dataset id if (s3) { path = new Path(s3bucket + FrameworkUtils.datasetsIndexDir); fs = FileSystem.get(path.toUri(), s3conf); } else { path = new Path(fs.getHomeDirectory() + "/" + FrameworkUtils.datasetsIndexDir); } br = new BufferedReader(new InputStreamReader(fs.open(path))); String line = br.readLine(); while (line != null) { String[] dt = line.split("\t"); if (datasetId.containsKey(dt[0])) { datasetId.put(dt[0], dt[1]); datasetNames += dt[0] + ","; datasetIds += dt[1] + ","; } line = br.readLine(); } br.close(); datasetNames = datasetNames.substring(0, datasetNames.length() - 1); datasetIds = datasetIds.substring(0, datasetIds.length() - 1); Iterator<String> it = shortDataset.iterator(); while (it.hasNext()) { String dataset = it.next(); if (datasetId.get(dataset) == null) { System.out.println("No dataset id for " + dataset); System.exit(0); } } // getting user defined thresholds if (isThresholdUserDefined) { if (s3) { path = new Path(s3bucket + FrameworkUtils.thresholdDir); fs = FileSystem.get(path.toUri(), s3conf); } else { path = new Path(fs.getHomeDirectory() + "/" + FrameworkUtils.thresholdDir); } br = new BufferedReader(new InputStreamReader(fs.open(path))); line = br.readLine(); while (line != null) { // getting dataset name String dataset = line.trim(); HashMap<Integer, Double> regThresholds = new HashMap<Integer, Double>(); HashMap<Integer, Double> rareThresholds = new HashMap<Integer, Double>(); line = br.readLine(); while ((line != null) && (line.split("\t").length > 1)) { // getting attribute ids and thresholds String[] keyVals = line.trim().split("\t"); int att = Integer.parseInt(keyVals[0].trim()); regThresholds.put(att, Double.parseDouble(keyVals[1].trim())); rareThresholds.put(att, Double.parseDouble(keyVals[2].trim())); line = br.readLine(); } datasetRegThreshold.put(dataset, regThresholds); datasetRareThreshold.put(dataset, rareThresholds); } br.close(); } if (s3) fs.close(); // datasets that will use existing merge tree ArrayList<String> useMergeTree = new ArrayList<String>(); // creating index for each spatio-temporal resolution FrameworkUtils.createDir(s3bucket + FrameworkUtils.indexDir, s3conf, s3); HashSet<String> input = new HashSet<String>(); for (String dataset : shortDataset) { String indexCreationOutputFileName = s3bucket + FrameworkUtils.indexDir + "/" + dataset + "/"; String mergeTreeFileName = s3bucket + FrameworkUtils.mergeTreeDir + "/" + dataset + "/"; if (removeExistingFiles) { FrameworkUtils.removeFile(indexCreationOutputFileName, s3conf, s3); FrameworkUtils.removeFile(mergeTreeFileName, s3conf, s3); FrameworkUtils.createDir(mergeTreeFileName, s3conf, s3); } else if (datasetRegThreshold.containsKey(dataset)) { FrameworkUtils.removeFile(indexCreationOutputFileName, s3conf, s3); if (FrameworkUtils.fileExists(mergeTreeFileName, s3conf, s3)) { useMergeTree.add(dataset); } } if (!FrameworkUtils.fileExists(indexCreationOutputFileName, s3conf, s3)) { input.add(s3bucket + FrameworkUtils.aggregatesDir + "/" + dataset); shortDatasetIndex.add(dataset); } } if (input.isEmpty()) { System.out.println("All the input datasets have indices."); System.out.println("Use -f in the beginning of the command line to force the computation."); System.exit(0); } String aggregateDatasets = ""; it = input.iterator(); while (it.hasNext()) { aggregateDatasets += it.next() + ","; } Job icJob = null; Configuration icConf = new Configuration(); Machine machineConf = new Machine(machine, nbNodes); String jobName = "index"; String indexOutputDir = s3bucket + FrameworkUtils.indexDir + "/tmp/"; FrameworkUtils.removeFile(indexOutputDir, s3conf, s3); icConf.set("dataset-name", datasetNames); icConf.set("dataset-id", datasetIds); if (!useMergeTree.isEmpty()) { String useMergeTreeStr = ""; for (String dt : useMergeTree) { useMergeTreeStr += dt + ","; } icConf.set("use-merge-tree", useMergeTreeStr.substring(0, useMergeTreeStr.length() - 1)); } for (int i = 0; i < shortDataset.size(); i++) { String dataset = shortDataset.get(i); String id = datasetId.get(dataset); icConf.set("dataset-" + id + "-aggregates", datasetAgg.get(dataset)); if (datasetRegThreshold.containsKey(dataset)) { HashMap<Integer, Double> regThresholds = datasetRegThreshold.get(dataset); String thresholds = ""; for (int att : regThresholds.keySet()) { thresholds += String.valueOf(att) + "-" + String.valueOf(regThresholds.get(att)) + ","; } icConf.set("regular-" + id, thresholds.substring(0, thresholds.length() - 1)); } if (datasetRareThreshold.containsKey(dataset)) { HashMap<Integer, Double> rareThresholds = datasetRareThreshold.get(dataset); String thresholds = ""; for (int att : rareThresholds.keySet()) { thresholds += String.valueOf(att) + "-" + String.valueOf(rareThresholds.get(att)) + ","; } icConf.set("rare-" + id, thresholds.substring(0, thresholds.length() - 1)); } } icConf.set("mapreduce.tasktracker.map.tasks.maximum", String.valueOf(machineConf.getMaximumTasks())); icConf.set("mapreduce.tasktracker.reduce.tasks.maximum", String.valueOf(machineConf.getMaximumTasks())); icConf.set("mapreduce.jobtracker.maxtasks.perjob", "-1"); icConf.set("mapreduce.reduce.shuffle.parallelcopies", "20"); icConf.set("mapreduce.input.fileinputformat.split.minsize", "0"); icConf.set("mapreduce.task.io.sort.mb", "200"); icConf.set("mapreduce.task.io.sort.factor", "100"); //icConf.set("mapreduce.task.timeout", "1800000"); machineConf.setMachineConfiguration(icConf); if (s3) { machineConf.setMachineConfiguration(icConf); icConf.set("fs.s3.awsAccessKeyId", awsAccessKeyId); icConf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey); icConf.set("bucket", s3bucket); } if (snappyCompression) { icConf.set("mapreduce.map.output.compress", "true"); icConf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec"); //icConf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec"); } if (bzip2Compression) { icConf.set("mapreduce.map.output.compress", "true"); icConf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec"); //icConf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec"); } icJob = new Job(icConf); icJob.setJobName(jobName); icJob.setMapOutputKeyClass(AttributeResolutionWritable.class); icJob.setMapOutputValueClass(SpatioTemporalFloatWritable.class); icJob.setOutputKeyClass(AttributeResolutionWritable.class); icJob.setOutputValueClass(TopologyTimeSeriesWritable.class); //icJob.setOutputKeyClass(Text.class); //icJob.setOutputValueClass(Text.class); icJob.setMapperClass(IndexCreationMapper.class); icJob.setReducerClass(IndexCreationReducer.class); icJob.setNumReduceTasks(machineConf.getNumberReduces()); icJob.setInputFormatClass(SequenceFileInputFormat.class); //icJob.setOutputFormatClass(SequenceFileOutputFormat.class); LazyOutputFormat.setOutputFormatClass(icJob, SequenceFileOutputFormat.class); //LazyOutputFormat.setOutputFormatClass(icJob, TextOutputFormat.class); SequenceFileOutputFormat.setCompressOutput(icJob, true); SequenceFileOutputFormat.setOutputCompressionType(icJob, CompressionType.BLOCK); FileInputFormat.setInputDirRecursive(icJob, true); FileInputFormat.setInputPaths(icJob, aggregateDatasets.substring(0, aggregateDatasets.length() - 1)); FileOutputFormat.setOutputPath(icJob, new Path(indexOutputDir)); icJob.setJarByClass(IndexCreation.class); long start = System.currentTimeMillis(); icJob.submit(); icJob.waitForCompletion(true); System.out.println(jobName + "\t" + (System.currentTimeMillis() - start)); // moving files to right place for (String dataset : shortDatasetIndex) { String from = s3bucket + FrameworkUtils.indexDir + "/tmp/" + dataset + "/"; String to = s3bucket + FrameworkUtils.indexDir + "/" + dataset + "/"; FrameworkUtils.renameFile(from, to, s3conf, s3); } }
From source file:Main.java
private static void putInto(Map<String, Object> map, String key, ArrayList<Object> list) { if (list.isEmpty()) { return;/* ww w. ja va 2 s . c o m*/ } map.put(key, list); }