Example usage for java.util ArrayList isEmpty

Introduction

In this page you can find the example usage for java.util ArrayList isEmpty.

Prototype

public boolean isEmpty()

Source Link

Document

Returns true if this list contains no elements.

Usage

From source file:Main.java

public static void main(String[] args) {

    ArrayList<Integer> arrlist = new ArrayList<Integer>(5);

    arrlist.add(25);//from  w  w w.j ava  2  s  .  c o m

    boolean retval = arrlist.isEmpty();
    if (retval == true) {
        System.out.println("list is empty");
    } else {
        System.out.println("list is not empty");
    }

}

From source file:TestMapObjInterface.java

public static void main(String[] args) {
    if (args.length < 3) {
        System.out.println("Usage: MapObjInterfaceTest <filename> <vfs name> <CN address>");
        System.out.println("MapObjInterfaceTest is a unit test of mproxy's map_obj interface."
                + "  It expects as input a filename and a Parascale vfs name.");
        System.out.println(//from w  w  w.  j a v a2 s .  c o  m
                "It returns a listing of the chunks for the specified file" + " in the following format:");
        System.out.println();
        System.out.println("[1275] ChunkInfo: offset: 85563801600 length: 67108864 17 : "
                + "Filelength 85899345920 <0>StorageNodeInfo: nodeName: "
                + "atg-sn2 primar addr: 10.10.249.222 enabled: true up: true ");
        System.out.println();
        System.out.println("Callers may use this information to determine if the "
                + "map_obj interface is returning correct data.");
        System.exit(1);
    }
    try {
        if (args[args.length - 1].equals("auto")) {
            if (runTest(args[0], args[1], args[2])) {
                System.out.println("pass");
            } else {
                System.out.println("FAIL");
            }
        } else {
            ArrayList<String> output = getChunks(args[0], args[1], args[2]);
            if (!output.isEmpty()) {
                int i = 0;
                for (i = 0; i < output.size(); i++) {
                    System.out.println(output.get(i));
                }
            }
        }
    } catch (Exception e) {
        System.out.println(e);
    }
}

From source file:Main.java

public static void main(String[] args) {
    ArrayList<String> list = new ArrayList<>();
    list.add("A");
    list.add("B");
    list.add("C");
    list.add("D");
    list.add("E");
    list.add("F");
    list.add("G");
    list.add("H");

    System.out.println(list);/*from   www  . j a  v  a  2  s  .c  o m*/

    while (!list.isEmpty()) {
        long index = Math.round(Math.floor(Math.random() * list.size()));
        System.out.println("Name " + list.get((int) index));
        list.remove((int) index);
    }

}

From source file:accinapdf.ACCinaPDF.java

/**
 * @param args the command line arguments
 */// ww w .  j  av a2s  . c  o  m
public static void main(String[] args) {

    controller.Logger.create();
    controller.Bundle.getBundle();

    if (GraphicsEnvironment.isHeadless()) {
        // Headless
        // Erro 
        String fich;
        if (args.length != 1) {
            System.err.println(Bundle.getBundle().getString("invalidArgs"));
            return;
        } else {
            fich = args[0];
        }

        try {
            System.out.println(Bundle.getBundle().getString("validating") + " " + fich);
            ArrayList<SignatureValidation> alSv = CCInstance.getInstance().validatePDF(fich, null);
            if (alSv.isEmpty()) {
                System.out.println(Bundle.getBundle().getString("notSigned"));
            } else {
                String newLine = System.getProperty("line.separator");
                String toWrite = "(";
                int numSigs = alSv.size();
                if (numSigs == 1) {
                    toWrite += "1 " + Bundle.getBundle().getString("signature");
                } else {
                    toWrite += numSigs + " " + Bundle.getBundle().getString("signatures");
                }
                toWrite += ")" + newLine;
                for (SignatureValidation sv : alSv) {
                    toWrite += "\t" + sv.getName() + " - ";
                    toWrite += (sv.isCertification()
                            ? WordUtils.capitalize(Bundle.getBundle().getString("certificate"))
                            : WordUtils.capitalize(Bundle.getBundle().getString("signed"))) + " "
                            + Bundle.getBundle().getString("by") + " " + sv.getSignerName();
                    toWrite += newLine + "\t\t";
                    if (sv.isChanged()) {
                        toWrite += Bundle.getBundle().getString("certifiedChangedOrCorrupted");
                    } else {
                        if (sv.isCertification()) {
                            if (sv.isValid()) {
                                if (sv.isChanged() || !sv.isCoversEntireDocument()) {
                                    toWrite += Bundle.getBundle().getString("certifiedButChanged");
                                } else {
                                    toWrite += Bundle.getBundle().getString("certifiedOk");
                                }
                            } else {
                                toWrite += Bundle.getBundle().getString("changedAfterCertified");
                            }
                        } else {
                            if (sv.isValid()) {
                                if (sv.isChanged()) {
                                    toWrite += Bundle.getBundle().getString("signedButChanged");
                                } else {
                                    toWrite += Bundle.getBundle().getString("signedOk");
                                }
                            } else {
                                toWrite += Bundle.getBundle().getString("signedChangedOrCorrupted");
                            }
                        }
                    }
                    toWrite += newLine + "\t\t";
                    if (sv.getOcspCertificateStatus().equals(CertificateStatus.OK)
                            || sv.getCrlCertificateStatus().equals(CertificateStatus.OK)) {
                        toWrite += Bundle.getBundle().getString("certOK");
                    } else if (sv.getOcspCertificateStatus().equals(CertificateStatus.REVOKED)
                            || sv.getCrlCertificateStatus().equals(CertificateStatus.REVOKED)) {
                        toWrite += Bundle.getBundle().getString("certRevoked");
                    } else if (sv.getOcspCertificateStatus().equals(CertificateStatus.UNCHECKED)
                            && sv.getCrlCertificateStatus().equals(CertificateStatus.UNCHECKED)) {
                        toWrite += Bundle.getBundle().getString("certNotVerified");
                    } else if (sv.getOcspCertificateStatus().equals(CertificateStatus.UNCHAINED)) {
                        toWrite += Bundle.getBundle().getString("certNotChained");
                    } else if (sv.getOcspCertificateStatus().equals(CertificateStatus.EXPIRED)) {
                        toWrite += Bundle.getBundle().getString("certExpired");
                    } else if (sv.getOcspCertificateStatus().equals(CertificateStatus.CHAINED_LOCALLY)) {
                        toWrite += Bundle.getBundle().getString("certChainedLocally");
                    }
                    toWrite += newLine + "\t\t";
                    if (sv.isValidTimeStamp()) {
                        toWrite += Bundle.getBundle().getString("validTimestamp");
                    } else {
                        toWrite += Bundle.getBundle().getString("signerDateTime");
                    }
                    toWrite += newLine + "\t\t";

                    toWrite += WordUtils.capitalize(Bundle.getBundle().getString("revision")) + ": "
                            + sv.getRevision() + " " + Bundle.getBundle().getString("of") + " "
                            + sv.getNumRevisions();
                    toWrite += newLine + "\t\t";
                    final DateFormat df = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss");
                    final SimpleDateFormat sdf = new SimpleDateFormat("Z");
                    if (sv.getSignature().getTimeStampToken() == null) {
                        Calendar cal = sv.getSignature().getSignDate();
                        String date = sdf.format(cal.getTime().toLocaleString());
                        toWrite += date + " " + sdf.format(cal.getTime()) + " ("
                                + Bundle.getBundle().getString("signerDateTimeSmall") + ")";
                    } else {
                        Calendar ts = sv.getSignature().getTimeStampDate();
                        String date = df.format(ts.getTime());
                        toWrite += Bundle.getBundle().getString("date") + " " + date + " "
                                + sdf.format(ts.getTime());
                    }
                    toWrite += newLine + "\t\t";
                    boolean ltv = (sv.getOcspCertificateStatus() == CertificateStatus.OK
                            || sv.getCrlCertificateStatus() == CertificateStatus.OK);
                    toWrite += Bundle.getBundle().getString("isLtv") + ": "
                            + (ltv ? Bundle.getBundle().getString("yes") : Bundle.getBundle().getString("no"));
                    String reason = sv.getSignature().getReason();
                    toWrite += newLine + "\t\t";
                    toWrite += Bundle.getBundle().getString("reason") + ": ";
                    if (reason == null) {
                        toWrite += Bundle.getBundle().getString("notDefined");
                    } else if (reason.isEmpty()) {
                        toWrite += Bundle.getBundle().getString("notDefined");
                    } else {
                        toWrite += reason;
                    }
                    String location = sv.getSignature().getLocation();
                    toWrite += newLine + "\t\t";
                    toWrite += Bundle.getBundle().getString("location") + ": ";
                    if (location == null) {
                        toWrite += Bundle.getBundle().getString("notDefined");
                    } else if (location.isEmpty()) {
                        toWrite += Bundle.getBundle().getString("notDefined");
                    } else {
                        toWrite += location;
                    }
                    toWrite += newLine + "\t\t";
                    toWrite += Bundle.getBundle().getString("allowsChanges") + ": ";
                    try {
                        int certLevel = CCInstance.getInstance().getCertificationLevel(sv.getFilename());
                        if (certLevel == PdfSignatureAppearance.CERTIFIED_FORM_FILLING) {
                            toWrite += Bundle.getBundle().getString("onlyAnnotations");
                        } else if (certLevel == PdfSignatureAppearance.CERTIFIED_FORM_FILLING_AND_ANNOTATIONS) {
                            toWrite += Bundle.getBundle().getString("annotationsFormFilling");
                        } else if (certLevel == PdfSignatureAppearance.CERTIFIED_NO_CHANGES_ALLOWED) {
                            toWrite += Bundle.getBundle().getString("no");
                        } else {
                            toWrite += Bundle.getBundle().getString("yes");
                        }
                    } catch (IOException ex) {
                        controller.Logger.getLogger().addEntry(ex);
                    }
                    toWrite += newLine + "\t\t";
                    if (sv.getOcspCertificateStatus() == CertificateStatus.OK
                            || sv.getCrlCertificateStatus() == CertificateStatus.OK) {
                        toWrite += (Bundle.getBundle().getString("validationCheck1") + " "
                                + (sv.getOcspCertificateStatus() == CertificateStatus.OK
                                        ? Bundle.getBundle().getString("validationCheck2") + ": "
                                                + CCInstance.getInstance().getCertificateProperty(
                                                        sv.getSignature().getOcsp().getCerts()[0].getSubject(),
                                                        "CN")
                                                + " " + Bundle.getBundle().getString("at") + " "
                                                + df.format(sv.getSignature().getOcsp().getProducedAt())
                                        : (sv.getCrlCertificateStatus() == CertificateStatus.OK ? "CRL" : ""))
                                + (sv.getSignature().getTimeStampToken() != null
                                        ? Bundle.getBundle().getString("validationCheck3") + ": "
                                                + CCInstance.getInstance()
                                                        .getCertificateProperty(sv.getSignature()
                                                                .getTimeStampToken().getSID().getIssuer(), "O")
                                        : ""));
                    } else if (sv.getSignature().getTimeStampToken() != null) {
                        toWrite += (Bundle.getBundle().getString("validationCheck3") + ": "
                                + CCInstance.getInstance().getCertificateProperty(
                                        sv.getSignature().getTimeStampToken().getSID().getIssuer(), "O"));
                    }
                    toWrite += newLine;
                }

                System.out.println(toWrite);
                System.out.println(Bundle.getBundle().getString("validationFinished"));
            }
        } catch (IOException | DocumentException | GeneralSecurityException ex) {
            System.err.println(Bundle.getBundle().getString("validationError"));
            Logger.getLogger(ACCinaPDF.class.getName()).log(Level.SEVERE, null, ex);
        }

    } else {
        // GUI
        CCSignatureSettings defaultSettings = new CCSignatureSettings(false);
        if (SystemUtils.IS_OS_WINDOWS) {
            for (LookAndFeelInfo info : UIManager.getInstalledLookAndFeels()) {
                if ("Windows".equals(info.getName())) {
                    try {
                        UIManager.setLookAndFeel(info.getClassName());
                    } catch (ClassNotFoundException | InstantiationException | IllegalAccessException
                            | UnsupportedLookAndFeelException ex) {
                        Logger.getLogger(ACCinaPDF.class.getName()).log(Level.SEVERE, null, ex);
                    }
                    break;
                }
            }
        } else if (SystemUtils.IS_OS_LINUX) {
            for (LookAndFeelInfo info : UIManager.getInstalledLookAndFeels()) {
                if ("Nimbus".equals(info.getName())) {
                    try {
                        UIManager.setLookAndFeel(info.getClassName());
                    } catch (ClassNotFoundException | InstantiationException | IllegalAccessException
                            | UnsupportedLookAndFeelException ex) {
                        Logger.getLogger(ACCinaPDF.class.getName()).log(Level.SEVERE, null, ex);
                    }
                    break;
                }
            }
        } else if (SystemUtils.IS_OS_MAC_OSX) {
            try {
                UIManager.setLookAndFeel("com.sun.java.swing.plaf.mac.MacLookAndFeel");
            } catch (ClassNotFoundException | InstantiationException | IllegalAccessException
                    | UnsupportedLookAndFeelException ex) {
                Logger.getLogger(ACCinaPDF.class.getName()).log(Level.SEVERE, null, ex);
            }
        }

        new SplashScreen().setVisible(true);
    }
}

From source file:isc_415_practica_1.ISC_415_Practica_1.java

/**
 * @param args the command line arguments
 *//*from   w w  w  .j a  v  a2 s. co m*/
public static void main(String[] args) {
    String urlString;
    Scanner input = new Scanner(System.in);
    Document doc;

    try {
        urlString = input.next();
        if (urlString.equals("servlet")) {
            urlString = "http://localhost:8084/ISC_415_Practica1_Servlet/client";
        }
        urlString = urlString.contains("http://") || urlString.contains("https://") ? urlString
                : "http://" + urlString;
        doc = Jsoup.connect(urlString).get();
    } catch (Exception ex) {
        System.out.println("El URL ingresado no es valido.");
        return;
    }

    ArrayList<NameValuePair> formInputParams;
    formInputParams = new ArrayList<>();
    String[] plainTextDoc = new TextNode(doc.html(), "").getWholeText().split("\n");
    System.out.println(String.format("Nmero de lineas del documento: %d", plainTextDoc.length));
    System.out.println(String.format("Nmero de p tags: %d", doc.select("p").size()));
    System.out.println(String.format("Nmero de img tags: %d", doc.select("img").size()));
    System.out.println(String.format("Nmero de form tags: %d", doc.select("form").size()));

    Integer index = 1;

    ArrayList<NameValuePair> urlParameters = new ArrayList<>();
    for (Element e : doc.select("form")) {
        System.out.println(String.format("Form %d: Nmero de Input tags %d", index, e.select("input").size()));
        System.out.println(e.select("input"));

        for (Element formInput : e.select("input")) {
            if (formInput.attr("id") != null && formInput.attr("id") != "") {
                urlParameters.add(new BasicNameValuePair(formInput.attr("id"), "PRACTICA1"));
            } else if (formInput.attr("name") != null && formInput.attr("name") != "") {
                urlParameters.add(new BasicNameValuePair(formInput.attr("name"), "PRACTICA1"));
            }
        }

        index++;
    }

    if (!urlParameters.isEmpty()) {
        try {
            CloseableHttpClient httpclient = HttpClients.createDefault();
            UrlEncodedFormEntity entity = new UrlEncodedFormEntity(urlParameters, Consts.UTF_8);
            HttpPost httpPost = new HttpPost(urlString);
            httpPost.setHeader("User-Agent", USER_AGENT);
            httpPost.setEntity(entity);
            HttpResponse response = httpclient.execute(httpPost);
            System.out.println(response.getStatusLine());
        } catch (IOException ex) {
            Logger.getLogger(ISC_415_Practica_1.class.getName()).log(Level.SEVERE, null, ex);
        }

    }

}

From source file:edu.nyu.vida.data_polygamy.standard_techniques.CorrelationTechniques.java

/**
 * @param args/*from  ww  w.  j  a  v a 2s . c om*/
 * @throws ParseException 
 */
@SuppressWarnings({ "deprecation" })
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    Options options = new Options();

    Option forceOption = new Option("f", "force", false,
            "force the computation of the relationship " + "even if files already exist");
    forceOption.setRequired(false);
    options.addOption(forceOption);

    Option g1Option = new Option("g1", "first-group", true, "set first group of datasets");
    g1Option.setRequired(true);
    g1Option.setArgName("FIRST GROUP");
    g1Option.setArgs(Option.UNLIMITED_VALUES);
    options.addOption(g1Option);

    Option g2Option = new Option("g2", "second-group", true, "set second group of datasets");
    g2Option.setRequired(false);
    g2Option.setArgName("SECOND GROUP");
    g2Option.setArgs(Option.UNLIMITED_VALUES);
    options.addOption(g2Option);

    Option machineOption = new Option("m", "machine", true, "machine identifier");
    machineOption.setRequired(true);
    machineOption.setArgName("MACHINE");
    machineOption.setArgs(1);
    options.addOption(machineOption);

    Option nodesOption = new Option("n", "nodes", true, "number of nodes");
    nodesOption.setRequired(true);
    nodesOption.setArgName("NODES");
    nodesOption.setArgs(1);
    options.addOption(nodesOption);

    Option s3Option = new Option("s3", "s3", false, "data on Amazon S3");
    s3Option.setRequired(false);
    options.addOption(s3Option);

    Option awsAccessKeyIdOption = new Option("aws_id", "aws-id", true,
            "aws access key id; " + "this is required if the execution is on aws");
    awsAccessKeyIdOption.setRequired(false);
    awsAccessKeyIdOption.setArgName("AWS-ACCESS-KEY-ID");
    awsAccessKeyIdOption.setArgs(1);
    options.addOption(awsAccessKeyIdOption);

    Option awsSecretAccessKeyOption = new Option("aws_key", "aws-id", true,
            "aws secrect access key; " + "this is required if the execution is on aws");
    awsSecretAccessKeyOption.setRequired(false);
    awsSecretAccessKeyOption.setArgName("AWS-SECRET-ACCESS-KEY");
    awsSecretAccessKeyOption.setArgs(1);
    options.addOption(awsSecretAccessKeyOption);

    Option bucketOption = new Option("b", "s3-bucket", true,
            "bucket on s3; " + "this is required if the execution is on aws");
    bucketOption.setRequired(false);
    bucketOption.setArgName("S3-BUCKET");
    bucketOption.setArgs(1);
    options.addOption(bucketOption);

    Option helpOption = new Option("h", "help", false, "display this message");
    helpOption.setRequired(false);
    options.addOption(helpOption);

    HelpFormatter formatter = new HelpFormatter();
    CommandLineParser parser = new PosixParser();
    CommandLine cmd = null;

    try {
        cmd = parser.parse(options, args);
    } catch (ParseException e) {
        formatter.printHelp(
                "hadoop jar data-polygamy.jar "
                        + "edu.nyu.vida.data_polygamy.standard_techniques.CorrelationTechniques",
                options, true);
        System.exit(0);
    }

    if (cmd.hasOption("h")) {
        formatter.printHelp(
                "hadoop jar data-polygamy.jar "
                        + "edu.nyu.vida.data_polygamy.standard_techniques.CorrelationTechniques",
                options, true);
        System.exit(0);
    }

    boolean s3 = cmd.hasOption("s3");
    String s3bucket = "";
    String awsAccessKeyId = "";
    String awsSecretAccessKey = "";

    if (s3) {
        if ((!cmd.hasOption("aws_id")) || (!cmd.hasOption("aws_key")) || (!cmd.hasOption("b"))) {
            System.out.println(
                    "Arguments 'aws_id', 'aws_key', and 'b'" + " are mandatory if execution is on AWS.");
            formatter.printHelp(
                    "hadoop jar data-polygamy.jar "
                            + "edu.nyu.vida.data_polygamy.standard_techniques.CorrelationTechniques",
                    options, true);
            System.exit(0);
        }
        s3bucket = cmd.getOptionValue("b");
        awsAccessKeyId = cmd.getOptionValue("aws_id");
        awsSecretAccessKey = cmd.getOptionValue("aws_key");
    }

    boolean snappyCompression = false;
    boolean bzip2Compression = false;
    String machine = cmd.getOptionValue("m");
    int nbNodes = Integer.parseInt(cmd.getOptionValue("n"));

    Configuration s3conf = new Configuration();
    if (s3) {
        s3conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId);
        s3conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey);
        s3conf.set("bucket", s3bucket);
    }

    Path path = null;
    FileSystem fs = FileSystem.get(new Configuration());

    ArrayList<String> shortDataset = new ArrayList<String>();
    ArrayList<String> firstGroup = new ArrayList<String>();
    ArrayList<String> secondGroup = new ArrayList<String>();
    HashMap<String, String> datasetAgg = new HashMap<String, String>();

    boolean removeExistingFiles = cmd.hasOption("f");

    String[] firstGroupCmd = cmd.getOptionValues("g1");
    String[] secondGroupCmd = cmd.hasOption("g2") ? cmd.getOptionValues("g2") : new String[0];
    addDatasets(firstGroupCmd, firstGroup, shortDataset, datasetAgg, path, fs, s3conf, s3, s3bucket);
    addDatasets(secondGroupCmd, secondGroup, shortDataset, datasetAgg, path, fs, s3conf, s3, s3bucket);

    if (shortDataset.size() == 0) {
        System.out.println("No datasets to process.");
        System.exit(0);
    }

    if (firstGroup.isEmpty()) {
        System.out.println("First group of datasets (G1) is empty. " + "Doing G1 = G2.");
        firstGroup.addAll(secondGroup);
    }

    if (secondGroup.isEmpty()) {
        System.out.println("Second group of datasets (G2) is empty. " + "Doing G2 = G1.");
        secondGroup.addAll(firstGroup);
    }

    // getting dataset ids

    String datasetNames = "";
    String datasetIds = "";
    HashMap<String, String> datasetId = new HashMap<String, String>();
    Iterator<String> it = shortDataset.iterator();
    while (it.hasNext()) {
        datasetId.put(it.next(), null);
    }

    if (s3) {
        path = new Path(s3bucket + FrameworkUtils.datasetsIndexDir);
        fs = FileSystem.get(path.toUri(), s3conf);
    } else {
        path = new Path(fs.getHomeDirectory() + "/" + FrameworkUtils.datasetsIndexDir);
    }
    BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(path)));
    String line = br.readLine();
    while (line != null) {
        String[] dt = line.split("\t");
        if (datasetId.containsKey(dt[0])) {
            datasetId.put(dt[0], dt[1]);
            datasetNames += dt[0] + ",";
            datasetIds += dt[1] + ",";
        }
        line = br.readLine();
    }
    br.close();
    if (s3)
        fs.close();

    datasetNames = datasetNames.substring(0, datasetNames.length() - 1);
    datasetIds = datasetIds.substring(0, datasetIds.length() - 1);
    it = shortDataset.iterator();
    while (it.hasNext()) {
        String dataset = it.next();
        if (datasetId.get(dataset) == null) {
            System.out.println("No dataset id for " + dataset);
            System.exit(0);
        }
    }

    String firstGroupStr = "";
    String secondGroupStr = "";
    for (String dataset : firstGroup) {
        firstGroupStr += datasetId.get(dataset) + ",";
    }
    for (String dataset : secondGroup) {
        secondGroupStr += datasetId.get(dataset) + ",";
    }
    firstGroupStr = firstGroupStr.substring(0, firstGroupStr.length() - 1);
    secondGroupStr = secondGroupStr.substring(0, secondGroupStr.length() - 1);

    FrameworkUtils.createDir(s3bucket + FrameworkUtils.correlationTechniquesDir, s3conf, s3);

    String dataAttributesInputDirs = "";
    String noRelationship = "";

    HashSet<String> dirs = new HashSet<String>();

    String dataset1;
    String dataset2;
    String datasetId1;
    String datasetId2;
    for (int i = 0; i < firstGroup.size(); i++) {
        for (int j = 0; j < secondGroup.size(); j++) {

            if (Integer.parseInt(datasetId.get(firstGroup.get(i))) < Integer
                    .parseInt(datasetId.get(secondGroup.get(j)))) {
                dataset1 = firstGroup.get(i);
                dataset2 = secondGroup.get(j);
            } else {
                dataset1 = secondGroup.get(j);
                dataset2 = firstGroup.get(i);
            }

            datasetId1 = datasetId.get(dataset1);
            datasetId2 = datasetId.get(dataset2);

            if (dataset1.equals(dataset2))
                continue;
            String correlationOutputFileName = s3bucket + FrameworkUtils.correlationTechniquesDir + "/"
                    + dataset1 + "-" + dataset2 + "/";

            if (removeExistingFiles) {
                FrameworkUtils.removeFile(correlationOutputFileName, s3conf, s3);
            }
            if (!FrameworkUtils.fileExists(correlationOutputFileName, s3conf, s3)) {
                dirs.add(s3bucket + FrameworkUtils.aggregatesDir + "/" + dataset1);
                dirs.add(s3bucket + FrameworkUtils.aggregatesDir + "/" + dataset2);
            } else {
                noRelationship += datasetId1 + "-" + datasetId2 + ",";
            }
        }
    }

    if (dirs.isEmpty()) {
        System.out.println("All the relationships were already computed.");
        System.out.println("Use -f in the beginning of the command line to force the computation.");
        System.exit(0);
    }

    for (String dir : dirs) {
        dataAttributesInputDirs += dir + ",";
    }

    Configuration conf = new Configuration();
    Machine machineConf = new Machine(machine, nbNodes);

    String jobName = "correlation";
    String correlationOutputDir = s3bucket + FrameworkUtils.correlationTechniquesDir + "/tmp/";

    FrameworkUtils.removeFile(correlationOutputDir, s3conf, s3);

    for (int i = 0; i < shortDataset.size(); i++) {
        conf.set("dataset-" + datasetId.get(shortDataset.get(i)) + "-agg", datasetAgg.get(shortDataset.get(i)));
    }
    for (int i = 0; i < shortDataset.size(); i++) {
        conf.set("dataset-" + datasetId.get(shortDataset.get(i)) + "-agg-size",
                Integer.toString(datasetAgg.get(shortDataset.get(i)).split(",").length));
    }
    conf.set("dataset-keys", datasetIds);
    conf.set("dataset-names", datasetNames);
    conf.set("first-group", firstGroupStr);
    conf.set("second-group", secondGroupStr);
    conf.set("main-dataset-id", datasetId.get(shortDataset.get(0)));
    if (noRelationship.length() > 0) {
        conf.set("no-relationship", noRelationship.substring(0, noRelationship.length() - 1));
    }

    conf.set("mapreduce.tasktracker.map.tasks.maximum", String.valueOf(machineConf.getMaximumTasks()));
    conf.set("mapreduce.tasktracker.reduce.tasks.maximum", String.valueOf(machineConf.getMaximumTasks()));
    conf.set("mapreduce.jobtracker.maxtasks.perjob", "-1");
    conf.set("mapreduce.reduce.shuffle.parallelcopies", "20");
    conf.set("mapreduce.input.fileinputformat.split.minsize", "0");
    conf.set("mapreduce.task.io.sort.mb", "200");
    conf.set("mapreduce.task.io.sort.factor", "100");
    conf.set("mapreduce.task.timeout", "2400000");

    if (s3) {
        machineConf.setMachineConfiguration(conf);
        conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId);
        conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey);
        conf.set("bucket", s3bucket);
    }

    if (snappyCompression) {
        conf.set("mapreduce.map.output.compress", "true");
        conf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec");
        //conf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec");
    }
    if (bzip2Compression) {
        conf.set("mapreduce.map.output.compress", "true");
        conf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec");
        //conf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec");
    }

    Job job = new Job(conf);
    job.setJobName(jobName);

    job.setMapOutputKeyClass(PairAttributeWritable.class);
    job.setMapOutputValueClass(SpatioTemporalValueWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(CorrelationTechniquesMapper.class);
    job.setReducerClass(CorrelationTechniquesReducer.class);
    job.setNumReduceTasks(machineConf.getNumberReduces());

    job.setInputFormatClass(SequenceFileInputFormat.class);
    LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);

    FileInputFormat.setInputDirRecursive(job, true);
    FileInputFormat.setInputPaths(job,
            dataAttributesInputDirs.substring(0, dataAttributesInputDirs.length() - 1));
    FileOutputFormat.setOutputPath(job, new Path(correlationOutputDir));

    job.setJarByClass(CorrelationTechniques.class);

    long start = System.currentTimeMillis();
    job.submit();
    job.waitForCompletion(true);
    System.out.println(jobName + "\t" + (System.currentTimeMillis() - start));

    // moving files to right place
    for (int i = 0; i < firstGroup.size(); i++) {
        for (int j = 0; j < secondGroup.size(); j++) {

            if (Integer.parseInt(datasetId.get(firstGroup.get(i))) < Integer
                    .parseInt(datasetId.get(secondGroup.get(j)))) {
                dataset1 = firstGroup.get(i);
                dataset2 = secondGroup.get(j);
            } else {
                dataset1 = secondGroup.get(j);
                dataset2 = firstGroup.get(i);
            }

            if (dataset1.equals(dataset2))
                continue;

            String from = s3bucket + FrameworkUtils.correlationTechniquesDir + "/tmp/" + dataset1 + "-"
                    + dataset2 + "/";
            String to = s3bucket + FrameworkUtils.correlationTechniquesDir + "/" + dataset1 + "-" + dataset2
                    + "/";
            FrameworkUtils.renameFile(from, to, s3conf, s3);
        }
    }
}

From source file:com.sebuilder.interpreter.SeInterpreter.java

public static void main(String[] args) {
    if (args.length == 0) {
        System.out.println(//w  w  w . j a v a2  s  . c  om
                "Usage: [--driver=<drivername] [--driver.<configkey>=<configvalue>...] [--implicitlyWait=<ms>] [--pageLoadTimeout=<ms>] [--stepTypePackage=<package name>] <script path>...");
        System.exit(0);
    }

    Log log = LogFactory.getFactory().getInstance(SeInterpreter.class);

    WebDriverFactory wdf = DEFAULT_DRIVER_FACTORY;
    ScriptFactory sf = new ScriptFactory();
    StepTypeFactory stf = new StepTypeFactory();
    sf.setStepTypeFactory(stf);
    TestRunFactory trf = new TestRunFactory();
    sf.setTestRunFactory(trf);

    ArrayList<String> paths = new ArrayList<String>();
    HashMap<String, String> driverConfig = new HashMap<String, String>();
    for (String s : args) {
        if (s.startsWith("--")) {
            String[] kv = s.split("=", 2);
            if (kv.length < 2) {
                log.fatal("Driver configuration option \"" + s
                        + "\" is not of the form \"--driver=<name>\" or \"--driver.<key>=<value\".");
                System.exit(1);
            }
            if (s.startsWith("--implicitlyWait")) {
                trf.setImplicitlyWaitDriverTimeout(Integer.parseInt(kv[1]));
            } else if (s.startsWith("--pageLoadTimeout")) {
                trf.setPageLoadDriverTimeout(Integer.parseInt(kv[1]));
            } else if (s.startsWith("--stepTypePackage")) {
                stf.setPrimaryPackage(kv[1]);
            } else if (s.startsWith("--driver.")) {
                driverConfig.put(kv[0].substring("--driver.".length()), kv[1]);
            } else if (s.startsWith("--driver")) {
                try {
                    wdf = (WebDriverFactory) Class
                            .forName("com.sebuilder.interpreter.webdriverfactory." + kv[1]).newInstance();
                } catch (ClassNotFoundException e) {
                    log.fatal("Unknown WebDriverFactory: " + "com.sebuilder.interpreter.webdriverfactory."
                            + kv[1], e);
                } catch (InstantiationException e) {
                    log.fatal("Could not instantiate WebDriverFactory "
                            + "com.sebuilder.interpreter.webdriverfactory." + kv[1], e);
                } catch (IllegalAccessException e) {
                    log.fatal("Could not instantiate WebDriverFactory "
                            + "com.sebuilder.interpreter.webdriverfactory." + kv[1], e);
                }
            } else {
                paths.add(s);
            }
        } else {
            paths.add(s);
        }
    }

    if (paths.isEmpty()) {
        log.info("Configuration successful but no paths to scripts specified. Exiting.");
        System.exit(0);
    }

    HashMap<String, String> cfg = new HashMap<String, String>(driverConfig);

    for (String path : paths) {
        try {
            TestRun lastRun = null;
            for (Script script : sf.parse(new File(path))) {
                for (Map<String, String> data : script.dataRows) {
                    try {
                        lastRun = script.testRunFactory.createTestRun(script, log, wdf, driverConfig, data,
                                lastRun);
                        if (lastRun.finish()) {
                            log.info(script.name + " succeeded");
                        } else {
                            log.info(script.name + " failed");
                        }
                    } catch (Exception e) {
                        log.info(script.name + " failed", e);
                    }
                }
            }
        } catch (Exception e) {
            log.fatal("Run error.", e);
            System.exit(1);
        }
    }
}

From source file:edu.nyu.vida.data_polygamy.relationship_computation.Relationship.java

/**
 * @param args//  ww w . ja v  a2  s.co m
 * @throws ParseException 
 */
@SuppressWarnings({ "deprecation" })
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    Options options = new Options();

    Option forceOption = new Option("f", "force", false,
            "force the computation of the relationship " + "even if files already exist");
    forceOption.setRequired(false);
    options.addOption(forceOption);

    Option scoreOption = new Option("sc", "score", true, "set threhsold for relationship score");
    scoreOption.setRequired(false);
    scoreOption.setArgName("SCORE THRESHOLD");
    options.addOption(scoreOption);

    Option strengthOption = new Option("st", "strength", true, "set threhsold for relationship strength");
    strengthOption.setRequired(false);
    strengthOption.setArgName("STRENGTH THRESHOLD");
    options.addOption(strengthOption);

    Option completeRandomizationOption = new Option("c", "complete-randomization", false,
            "use complete randomization when performing significance tests");
    completeRandomizationOption.setRequired(false);
    options.addOption(completeRandomizationOption);

    Option idOption = new Option("id", "ids", false, "output id instead of names for datasets and attributes");
    idOption.setRequired(false);
    options.addOption(idOption);

    Option g1Option = new Option("g1", "first-group", true, "set first group of datasets");
    g1Option.setRequired(true);
    g1Option.setArgName("FIRST GROUP");
    g1Option.setArgs(Option.UNLIMITED_VALUES);
    options.addOption(g1Option);

    Option g2Option = new Option("g2", "second-group", true, "set second group of datasets");
    g2Option.setRequired(false);
    g2Option.setArgName("SECOND GROUP");
    g2Option.setArgs(Option.UNLIMITED_VALUES);
    options.addOption(g2Option);

    Option machineOption = new Option("m", "machine", true, "machine identifier");
    machineOption.setRequired(true);
    machineOption.setArgName("MACHINE");
    machineOption.setArgs(1);
    options.addOption(machineOption);

    Option nodesOption = new Option("n", "nodes", true, "number of nodes");
    nodesOption.setRequired(true);
    nodesOption.setArgName("NODES");
    nodesOption.setArgs(1);
    options.addOption(nodesOption);

    Option s3Option = new Option("s3", "s3", false, "data on Amazon S3");
    s3Option.setRequired(false);
    options.addOption(s3Option);

    Option awsAccessKeyIdOption = new Option("aws_id", "aws-id", true,
            "aws access key id; " + "this is required if the execution is on aws");
    awsAccessKeyIdOption.setRequired(false);
    awsAccessKeyIdOption.setArgName("AWS-ACCESS-KEY-ID");
    awsAccessKeyIdOption.setArgs(1);
    options.addOption(awsAccessKeyIdOption);

    Option awsSecretAccessKeyOption = new Option("aws_key", "aws-id", true,
            "aws secrect access key; " + "this is required if the execution is on aws");
    awsSecretAccessKeyOption.setRequired(false);
    awsSecretAccessKeyOption.setArgName("AWS-SECRET-ACCESS-KEY");
    awsSecretAccessKeyOption.setArgs(1);
    options.addOption(awsSecretAccessKeyOption);

    Option bucketOption = new Option("b", "s3-bucket", true,
            "bucket on s3; " + "this is required if the execution is on aws");
    bucketOption.setRequired(false);
    bucketOption.setArgName("S3-BUCKET");
    bucketOption.setArgs(1);
    options.addOption(bucketOption);

    Option helpOption = new Option("h", "help", false, "display this message");
    helpOption.setRequired(false);
    options.addOption(helpOption);

    Option removeOption = new Option("r", "remove-not-significant", false,
            "remove relationships that are not" + "significant from the final output");
    removeOption.setRequired(false);
    options.addOption(removeOption);

    HelpFormatter formatter = new HelpFormatter();
    CommandLineParser parser = new PosixParser();
    CommandLine cmd = null;

    try {
        cmd = parser.parse(options, args);
    } catch (ParseException e) {
        formatter.printHelp("hadoop jar data-polygamy.jar "
                + "edu.nyu.vida.data_polygamy.relationship_computation.Relationship", options, true);
        System.exit(0);
    }

    if (cmd.hasOption("h")) {
        formatter.printHelp("hadoop jar data-polygamy.jar "
                + "edu.nyu.vida.data_polygamy.relationship_computation.Relationship", options, true);
        System.exit(0);
    }

    boolean s3 = cmd.hasOption("s3");
    String s3bucket = "";
    String awsAccessKeyId = "";
    String awsSecretAccessKey = "";

    if (s3) {
        if ((!cmd.hasOption("aws_id")) || (!cmd.hasOption("aws_key")) || (!cmd.hasOption("b"))) {
            System.out.println(
                    "Arguments 'aws_id', 'aws_key', and 'b'" + " are mandatory if execution is on AWS.");
            formatter.printHelp(
                    "hadoop jar data-polygamy.jar "
                            + "edu.nyu.vida.data_polygamy.relationship_computation.Relationship",
                    options, true);
            System.exit(0);
        }
        s3bucket = cmd.getOptionValue("b");
        awsAccessKeyId = cmd.getOptionValue("aws_id");
        awsSecretAccessKey = cmd.getOptionValue("aws_key");
    }

    boolean snappyCompression = false;
    boolean bzip2Compression = false;
    String machine = cmd.getOptionValue("m");
    int nbNodes = Integer.parseInt(cmd.getOptionValue("n"));

    Configuration s3conf = new Configuration();
    if (s3) {
        s3conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId);
        s3conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey);
        s3conf.set("bucket", s3bucket);
    }

    Path path = null;
    FileSystem fs = FileSystem.get(new Configuration());

    ArrayList<String> shortDataset = new ArrayList<String>();
    ArrayList<String> firstGroup = new ArrayList<String>();
    ArrayList<String> secondGroup = new ArrayList<String>();
    HashMap<String, String> datasetAgg = new HashMap<String, String>();

    boolean removeNotSignificant = cmd.hasOption("r");
    boolean removeExistingFiles = cmd.hasOption("f");
    boolean completeRandomization = cmd.hasOption("c");
    boolean hasScoreThreshold = cmd.hasOption("sc");
    boolean hasStrengthThreshold = cmd.hasOption("st");
    boolean outputIds = cmd.hasOption("id");
    String scoreThreshold = hasScoreThreshold ? cmd.getOptionValue("sc") : "";
    String strengthThreshold = hasStrengthThreshold ? cmd.getOptionValue("st") : "";

    // all datasets
    ArrayList<String> all_datasets = new ArrayList<String>();
    if (s3) {
        path = new Path(s3bucket + FrameworkUtils.datasetsIndexDir);
        fs = FileSystem.get(path.toUri(), s3conf);
    } else {
        path = new Path(fs.getHomeDirectory() + "/" + FrameworkUtils.datasetsIndexDir);
    }
    BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(path)));
    String line = br.readLine();
    while (line != null) {
        all_datasets.add(line.split("\t")[0]);
        line = br.readLine();
    }
    br.close();
    if (s3)
        fs.close();
    String[] all_datasets_array = new String[all_datasets.size()];
    all_datasets.toArray(all_datasets_array);

    String[] firstGroupCmd = cmd.getOptionValues("g1");
    String[] secondGroupCmd = cmd.hasOption("g2") ? cmd.getOptionValues("g2") : all_datasets_array;
    addDatasets(firstGroupCmd, firstGroup, shortDataset, datasetAgg, path, fs, s3conf, s3, s3bucket);
    addDatasets(secondGroupCmd, secondGroup, shortDataset, datasetAgg, path, fs, s3conf, s3, s3bucket);

    if (shortDataset.size() == 0) {
        System.out.println("No datasets to process.");
        System.exit(0);
    }

    if (firstGroup.isEmpty()) {
        System.out.println("No indices from datasets in G1.");
        System.exit(0);
    }

    if (secondGroup.isEmpty()) {
        System.out.println("No indices from datasets in G2.");
        System.exit(0);
    }

    // getting dataset ids

    String datasetNames = "";
    String datasetIds = "";
    HashMap<String, String> datasetId = new HashMap<String, String>();
    Iterator<String> it = shortDataset.iterator();
    while (it.hasNext()) {
        datasetId.put(it.next(), null);
    }

    if (s3) {
        path = new Path(s3bucket + FrameworkUtils.datasetsIndexDir);
        fs = FileSystem.get(path.toUri(), s3conf);
    } else {
        path = new Path(fs.getHomeDirectory() + "/" + FrameworkUtils.datasetsIndexDir);
    }
    br = new BufferedReader(new InputStreamReader(fs.open(path)));
    line = br.readLine();
    while (line != null) {
        String[] dt = line.split("\t");
        all_datasets.add(dt[0]);
        if (datasetId.containsKey(dt[0])) {
            datasetId.put(dt[0], dt[1]);
            datasetNames += dt[0] + ",";
            datasetIds += dt[1] + ",";
        }
        line = br.readLine();
    }
    br.close();
    if (s3)
        fs.close();

    datasetNames = datasetNames.substring(0, datasetNames.length() - 1);
    datasetIds = datasetIds.substring(0, datasetIds.length() - 1);
    it = shortDataset.iterator();
    while (it.hasNext()) {
        String dataset = it.next();
        if (datasetId.get(dataset) == null) {
            System.out.println("No dataset id for " + dataset);
            System.exit(0);
        }
    }

    String firstGroupStr = "";
    String secondGroupStr = "";
    for (String dataset : firstGroup) {
        firstGroupStr += datasetId.get(dataset) + ",";
    }
    for (String dataset : secondGroup) {
        secondGroupStr += datasetId.get(dataset) + ",";
    }
    firstGroupStr = firstGroupStr.substring(0, firstGroupStr.length() - 1);
    secondGroupStr = secondGroupStr.substring(0, secondGroupStr.length() - 1);

    String relationshipsDir = "";
    if (outputIds) {
        relationshipsDir = FrameworkUtils.relationshipsIdsDir;
    } else {
        relationshipsDir = FrameworkUtils.relationshipsDir;
    }

    FrameworkUtils.createDir(s3bucket + relationshipsDir, s3conf, s3);

    String random = completeRandomization ? "complete" : "restricted";

    String indexInputDirs = "";
    String noRelationship = "";

    HashSet<String> dirs = new HashSet<String>();

    String dataset1;
    String dataset2;
    String datasetId1;
    String datasetId2;
    for (int i = 0; i < firstGroup.size(); i++) {
        for (int j = 0; j < secondGroup.size(); j++) {

            if (Integer.parseInt(datasetId.get(firstGroup.get(i))) < Integer
                    .parseInt(datasetId.get(secondGroup.get(j)))) {
                dataset1 = firstGroup.get(i);
                dataset2 = secondGroup.get(j);
            } else {
                dataset1 = secondGroup.get(j);
                dataset2 = firstGroup.get(i);
            }

            datasetId1 = datasetId.get(dataset1);
            datasetId2 = datasetId.get(dataset2);

            if (dataset1.equals(dataset2))
                continue;
            String correlationOutputFileName = s3bucket + relationshipsDir + "/" + dataset1 + "-" + dataset2
                    + "/";

            if (removeExistingFiles) {
                FrameworkUtils.removeFile(correlationOutputFileName, s3conf, s3);
            }
            if (!FrameworkUtils.fileExists(correlationOutputFileName, s3conf, s3)) {
                dirs.add(s3bucket + FrameworkUtils.indexDir + "/" + dataset1);
                dirs.add(s3bucket + FrameworkUtils.indexDir + "/" + dataset2);
            } else {
                noRelationship += datasetId1 + "-" + datasetId2 + ",";
            }
        }
    }

    if (dirs.isEmpty()) {
        System.out.println("All the relationships were already computed.");
        System.out.println("Use -f in the beginning of the command line to force the computation.");
        System.exit(0);
    }

    for (String dir : dirs) {
        indexInputDirs += dir + ",";
    }

    Configuration conf = new Configuration();
    Machine machineConf = new Machine(machine, nbNodes);

    String jobName = "relationship" + "-" + random;
    String relationshipOutputDir = s3bucket + relationshipsDir + "/tmp/";

    FrameworkUtils.removeFile(relationshipOutputDir, s3conf, s3);

    for (int i = 0; i < shortDataset.size(); i++) {
        conf.set("dataset-" + datasetId.get(shortDataset.get(i)) + "-agg", datasetAgg.get(shortDataset.get(i)));
    }
    for (int i = 0; i < shortDataset.size(); i++) {
        conf.set("dataset-" + datasetId.get(shortDataset.get(i)) + "-agg-size",
                Integer.toString(datasetAgg.get(shortDataset.get(i)).split(",").length));
    }
    conf.set("dataset-keys", datasetIds);
    conf.set("dataset-names", datasetNames);
    conf.set("first-group", firstGroupStr);
    conf.set("second-group", secondGroupStr);
    conf.set("complete-random", String.valueOf(completeRandomization));
    conf.set("output-ids", String.valueOf(outputIds));
    conf.set("complete-random-str", random);
    conf.set("main-dataset-id", datasetId.get(shortDataset.get(0)));
    conf.set("remove-not-significant", String.valueOf(removeNotSignificant));
    if (noRelationship.length() > 0) {
        conf.set("no-relationship", noRelationship.substring(0, noRelationship.length() - 1));
    }
    if (hasScoreThreshold) {
        conf.set("score-threshold", scoreThreshold);
    }
    if (hasStrengthThreshold) {
        conf.set("strength-threshold", strengthThreshold);
    }

    conf.set("mapreduce.tasktracker.map.tasks.maximum", String.valueOf(machineConf.getMaximumTasks()));
    conf.set("mapreduce.tasktracker.reduce.tasks.maximum", String.valueOf(machineConf.getMaximumTasks()));
    conf.set("mapreduce.jobtracker.maxtasks.perjob", "-1");
    conf.set("mapreduce.reduce.shuffle.parallelcopies", "20");
    conf.set("mapreduce.input.fileinputformat.split.minsize", "0");
    conf.set("mapreduce.task.io.sort.mb", "200");
    conf.set("mapreduce.task.io.sort.factor", "100");
    conf.set("mapreduce.task.timeout", "2400000");

    if (s3) {
        machineConf.setMachineConfiguration(conf);
        conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId);
        conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey);
        conf.set("bucket", s3bucket);
    }

    if (snappyCompression) {
        conf.set("mapreduce.map.output.compress", "true");
        conf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec");
        //conf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec");
    }
    if (bzip2Compression) {
        conf.set("mapreduce.map.output.compress", "true");
        conf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec");
        //conf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec");
    }

    Job job = new Job(conf);
    job.setJobName(jobName);

    job.setMapOutputKeyClass(PairAttributeWritable.class);
    job.setMapOutputValueClass(TopologyTimeSeriesWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(CorrelationMapper.class);
    job.setReducerClass(CorrelationReducer.class);
    job.setNumReduceTasks(machineConf.getNumberReduces());

    job.setInputFormatClass(SequenceFileInputFormat.class);
    //job.setOutputFormatClass(TextOutputFormat.class);
    LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);

    FileInputFormat.setInputDirRecursive(job, true);
    FileInputFormat.setInputPaths(job, indexInputDirs.substring(0, indexInputDirs.length() - 1));
    FileOutputFormat.setOutputPath(job, new Path(relationshipOutputDir));

    job.setJarByClass(Relationship.class);

    long start = System.currentTimeMillis();
    job.submit();
    job.waitForCompletion(true);
    System.out.println(jobName + "\t" + (System.currentTimeMillis() - start));

    // moving files to right place
    for (int i = 0; i < firstGroup.size(); i++) {
        for (int j = 0; j < secondGroup.size(); j++) {

            if (Integer.parseInt(datasetId.get(firstGroup.get(i))) < Integer
                    .parseInt(datasetId.get(secondGroup.get(j)))) {
                dataset1 = firstGroup.get(i);
                dataset2 = secondGroup.get(j);
            } else {
                dataset1 = secondGroup.get(j);
                dataset2 = firstGroup.get(i);
            }

            if (dataset1.equals(dataset2))
                continue;

            String from = s3bucket + relationshipsDir + "/tmp/" + dataset1 + "-" + dataset2 + "/";
            String to = s3bucket + relationshipsDir + "/" + dataset1 + "-" + dataset2 + "/";
            FrameworkUtils.renameFile(from, to, s3conf, s3);
        }
    }
}

From source file:edu.nyu.vida.data_polygamy.feature_identification.IndexCreation.java

/**
 * @param args/*from w  w  w. j  a  va  2s  .  c om*/
 */
@SuppressWarnings({ "deprecation" })
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    Options options = new Options();

    Option forceOption = new Option("f", "force", false,
            "force the computation of the index and events " + "even if files already exist");
    forceOption.setRequired(false);
    options.addOption(forceOption);

    Option thresholdOption = new Option("t", "use-custom-thresholds", false,
            "use custom thresholds for regular and rare events, defined in HDFS_HOME/"
                    + FrameworkUtils.thresholdDir + " file");
    thresholdOption.setRequired(false);
    options.addOption(thresholdOption);

    Option gOption = new Option("g", "group", true,
            "set group of datasets for which the indices and events" + " will be computed");
    gOption.setRequired(true);
    gOption.setArgName("GROUP");
    gOption.setArgs(Option.UNLIMITED_VALUES);
    options.addOption(gOption);

    Option machineOption = new Option("m", "machine", true, "machine identifier");
    machineOption.setRequired(true);
    machineOption.setArgName("MACHINE");
    machineOption.setArgs(1);
    options.addOption(machineOption);

    Option nodesOption = new Option("n", "nodes", true, "number of nodes");
    nodesOption.setRequired(true);
    nodesOption.setArgName("NODES");
    nodesOption.setArgs(1);
    options.addOption(nodesOption);

    Option s3Option = new Option("s3", "s3", false, "data on Amazon S3");
    s3Option.setRequired(false);
    options.addOption(s3Option);

    Option awsAccessKeyIdOption = new Option("aws_id", "aws-id", true,
            "aws access key id; " + "this is required if the execution is on aws");
    awsAccessKeyIdOption.setRequired(false);
    awsAccessKeyIdOption.setArgName("AWS-ACCESS-KEY-ID");
    awsAccessKeyIdOption.setArgs(1);
    options.addOption(awsAccessKeyIdOption);

    Option awsSecretAccessKeyOption = new Option("aws_key", "aws-id", true,
            "aws secrect access key; " + "this is required if the execution is on aws");
    awsSecretAccessKeyOption.setRequired(false);
    awsSecretAccessKeyOption.setArgName("AWS-SECRET-ACCESS-KEY");
    awsSecretAccessKeyOption.setArgs(1);
    options.addOption(awsSecretAccessKeyOption);

    Option bucketOption = new Option("b", "s3-bucket", true,
            "bucket on s3; " + "this is required if the execution is on aws");
    bucketOption.setRequired(false);
    bucketOption.setArgName("S3-BUCKET");
    bucketOption.setArgs(1);
    options.addOption(bucketOption);

    Option helpOption = new Option("h", "help", false, "display this message");
    helpOption.setRequired(false);
    options.addOption(helpOption);

    HelpFormatter formatter = new HelpFormatter();
    CommandLineParser parser = new PosixParser();
    CommandLine cmd = null;

    try {
        cmd = parser.parse(options, args);
    } catch (ParseException e) {
        formatter.printHelp("hadoop jar data-polygamy.jar "
                + "edu.nyu.vida.data_polygamy.feature_identification.IndexCreation", options, true);
        System.exit(0);
    }

    if (cmd.hasOption("h")) {
        formatter.printHelp("hadoop jar data-polygamy.jar "
                + "edu.nyu.vida.data_polygamy.feature_identification.IndexCreation", options, true);
        System.exit(0);
    }

    boolean s3 = cmd.hasOption("s3");
    String s3bucket = "";
    String awsAccessKeyId = "";
    String awsSecretAccessKey = "";

    if (s3) {
        if ((!cmd.hasOption("aws_id")) || (!cmd.hasOption("aws_key")) || (!cmd.hasOption("b"))) {
            System.out.println(
                    "Arguments 'aws_id', 'aws_key', and 'b'" + " are mandatory if execution is on AWS.");
            formatter.printHelp("hadoop jar data-polygamy.jar "
                    + "edu.nyu.vida.data_polygamy.feature_identification.IndexCreation", options, true);
            System.exit(0);
        }
        s3bucket = cmd.getOptionValue("b");
        awsAccessKeyId = cmd.getOptionValue("aws_id");
        awsSecretAccessKey = cmd.getOptionValue("aws_key");
    }

    boolean snappyCompression = false;
    boolean bzip2Compression = false;
    String machine = cmd.getOptionValue("m");
    int nbNodes = Integer.parseInt(cmd.getOptionValue("n"));

    Configuration s3conf = new Configuration();
    if (s3) {
        s3conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId);
        s3conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey);
        s3conf.set("bucket", s3bucket);
    }

    String datasetNames = "";
    String datasetIds = "";

    ArrayList<String> shortDataset = new ArrayList<String>();
    ArrayList<String> shortDatasetIndex = new ArrayList<String>();
    HashMap<String, String> datasetAgg = new HashMap<String, String>();
    HashMap<String, String> datasetId = new HashMap<String, String>();
    HashMap<String, HashMap<Integer, Double>> datasetRegThreshold = new HashMap<String, HashMap<Integer, Double>>();
    HashMap<String, HashMap<Integer, Double>> datasetRareThreshold = new HashMap<String, HashMap<Integer, Double>>();

    Path path = null;
    FileSystem fs = FileSystem.get(new Configuration());
    BufferedReader br;

    boolean removeExistingFiles = cmd.hasOption("f");
    boolean isThresholdUserDefined = cmd.hasOption("t");

    for (String dataset : cmd.getOptionValues("g")) {

        // getting aggregates
        String[] aggregate = FrameworkUtils.searchAggregates(dataset, s3conf, s3);
        if (aggregate.length == 0) {
            System.out.println("No aggregates found for " + dataset + ".");
            continue;
        }

        // getting aggregates header
        String aggregatesHeaderFileName = FrameworkUtils.searchAggregatesHeader(dataset, s3conf, s3);
        if (aggregatesHeaderFileName == null) {
            System.out.println("No aggregate header for " + dataset);
            continue;
        }

        String aggregatesHeader = s3bucket + FrameworkUtils.preProcessingDir + "/" + aggregatesHeaderFileName;

        shortDataset.add(dataset);
        datasetId.put(dataset, null);

        if (s3) {
            path = new Path(aggregatesHeader);
            fs = FileSystem.get(path.toUri(), s3conf);
        } else {
            path = new Path(fs.getHomeDirectory() + "/" + aggregatesHeader);
        }

        br = new BufferedReader(new InputStreamReader(fs.open(path)));
        datasetAgg.put(dataset, br.readLine().split("\t")[1]);
        br.close();
        if (s3)
            fs.close();
    }

    if (shortDataset.size() == 0) {
        System.out.println("No datasets to process.");
        System.exit(0);
    }

    // getting dataset id

    if (s3) {
        path = new Path(s3bucket + FrameworkUtils.datasetsIndexDir);
        fs = FileSystem.get(path.toUri(), s3conf);
    } else {
        path = new Path(fs.getHomeDirectory() + "/" + FrameworkUtils.datasetsIndexDir);
    }
    br = new BufferedReader(new InputStreamReader(fs.open(path)));
    String line = br.readLine();
    while (line != null) {
        String[] dt = line.split("\t");
        if (datasetId.containsKey(dt[0])) {
            datasetId.put(dt[0], dt[1]);
            datasetNames += dt[0] + ",";
            datasetIds += dt[1] + ",";
        }
        line = br.readLine();
    }
    br.close();

    datasetNames = datasetNames.substring(0, datasetNames.length() - 1);
    datasetIds = datasetIds.substring(0, datasetIds.length() - 1);
    Iterator<String> it = shortDataset.iterator();
    while (it.hasNext()) {
        String dataset = it.next();
        if (datasetId.get(dataset) == null) {
            System.out.println("No dataset id for " + dataset);
            System.exit(0);
        }
    }

    // getting user defined thresholds

    if (isThresholdUserDefined) {
        if (s3) {
            path = new Path(s3bucket + FrameworkUtils.thresholdDir);
            fs = FileSystem.get(path.toUri(), s3conf);
        } else {
            path = new Path(fs.getHomeDirectory() + "/" + FrameworkUtils.thresholdDir);
        }
        br = new BufferedReader(new InputStreamReader(fs.open(path)));
        line = br.readLine();
        while (line != null) {
            // getting dataset name
            String dataset = line.trim();
            HashMap<Integer, Double> regThresholds = new HashMap<Integer, Double>();
            HashMap<Integer, Double> rareThresholds = new HashMap<Integer, Double>();
            line = br.readLine();
            while ((line != null) && (line.split("\t").length > 1)) {
                // getting attribute ids and thresholds
                String[] keyVals = line.trim().split("\t");
                int att = Integer.parseInt(keyVals[0].trim());
                regThresholds.put(att, Double.parseDouble(keyVals[1].trim()));
                rareThresholds.put(att, Double.parseDouble(keyVals[2].trim()));
                line = br.readLine();
            }
            datasetRegThreshold.put(dataset, regThresholds);
            datasetRareThreshold.put(dataset, rareThresholds);
        }
        br.close();
    }
    if (s3)
        fs.close();

    // datasets that will use existing merge tree
    ArrayList<String> useMergeTree = new ArrayList<String>();

    // creating index for each spatio-temporal resolution

    FrameworkUtils.createDir(s3bucket + FrameworkUtils.indexDir, s3conf, s3);

    HashSet<String> input = new HashSet<String>();

    for (String dataset : shortDataset) {

        String indexCreationOutputFileName = s3bucket + FrameworkUtils.indexDir + "/" + dataset + "/";
        String mergeTreeFileName = s3bucket + FrameworkUtils.mergeTreeDir + "/" + dataset + "/";

        if (removeExistingFiles) {
            FrameworkUtils.removeFile(indexCreationOutputFileName, s3conf, s3);
            FrameworkUtils.removeFile(mergeTreeFileName, s3conf, s3);
            FrameworkUtils.createDir(mergeTreeFileName, s3conf, s3);
        } else if (datasetRegThreshold.containsKey(dataset)) {
            FrameworkUtils.removeFile(indexCreationOutputFileName, s3conf, s3);
            if (FrameworkUtils.fileExists(mergeTreeFileName, s3conf, s3)) {
                useMergeTree.add(dataset);
            }
        }

        if (!FrameworkUtils.fileExists(indexCreationOutputFileName, s3conf, s3)) {
            input.add(s3bucket + FrameworkUtils.aggregatesDir + "/" + dataset);
            shortDatasetIndex.add(dataset);
        }

    }

    if (input.isEmpty()) {
        System.out.println("All the input datasets have indices.");
        System.out.println("Use -f in the beginning of the command line to force the computation.");
        System.exit(0);
    }

    String aggregateDatasets = "";
    it = input.iterator();
    while (it.hasNext()) {
        aggregateDatasets += it.next() + ",";
    }

    Job icJob = null;
    Configuration icConf = new Configuration();
    Machine machineConf = new Machine(machine, nbNodes);

    String jobName = "index";
    String indexOutputDir = s3bucket + FrameworkUtils.indexDir + "/tmp/";

    FrameworkUtils.removeFile(indexOutputDir, s3conf, s3);

    icConf.set("dataset-name", datasetNames);
    icConf.set("dataset-id", datasetIds);

    if (!useMergeTree.isEmpty()) {
        String useMergeTreeStr = "";
        for (String dt : useMergeTree) {
            useMergeTreeStr += dt + ",";
        }
        icConf.set("use-merge-tree", useMergeTreeStr.substring(0, useMergeTreeStr.length() - 1));
    }

    for (int i = 0; i < shortDataset.size(); i++) {
        String dataset = shortDataset.get(i);
        String id = datasetId.get(dataset);
        icConf.set("dataset-" + id + "-aggregates", datasetAgg.get(dataset));
        if (datasetRegThreshold.containsKey(dataset)) {
            HashMap<Integer, Double> regThresholds = datasetRegThreshold.get(dataset);
            String thresholds = "";
            for (int att : regThresholds.keySet()) {
                thresholds += String.valueOf(att) + "-" + String.valueOf(regThresholds.get(att)) + ",";
            }
            icConf.set("regular-" + id, thresholds.substring(0, thresholds.length() - 1));
        }

        if (datasetRareThreshold.containsKey(dataset)) {
            HashMap<Integer, Double> rareThresholds = datasetRareThreshold.get(dataset);
            String thresholds = "";
            for (int att : rareThresholds.keySet()) {
                thresholds += String.valueOf(att) + "-" + String.valueOf(rareThresholds.get(att)) + ",";
            }
            icConf.set("rare-" + id, thresholds.substring(0, thresholds.length() - 1));
        }
    }

    icConf.set("mapreduce.tasktracker.map.tasks.maximum", String.valueOf(machineConf.getMaximumTasks()));
    icConf.set("mapreduce.tasktracker.reduce.tasks.maximum", String.valueOf(machineConf.getMaximumTasks()));
    icConf.set("mapreduce.jobtracker.maxtasks.perjob", "-1");
    icConf.set("mapreduce.reduce.shuffle.parallelcopies", "20");
    icConf.set("mapreduce.input.fileinputformat.split.minsize", "0");
    icConf.set("mapreduce.task.io.sort.mb", "200");
    icConf.set("mapreduce.task.io.sort.factor", "100");
    //icConf.set("mapreduce.task.timeout", "1800000");
    machineConf.setMachineConfiguration(icConf);

    if (s3) {
        machineConf.setMachineConfiguration(icConf);
        icConf.set("fs.s3.awsAccessKeyId", awsAccessKeyId);
        icConf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey);
        icConf.set("bucket", s3bucket);
    }

    if (snappyCompression) {
        icConf.set("mapreduce.map.output.compress", "true");
        icConf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec");
        //icConf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec");
    }
    if (bzip2Compression) {
        icConf.set("mapreduce.map.output.compress", "true");
        icConf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec");
        //icConf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec");
    }

    icJob = new Job(icConf);
    icJob.setJobName(jobName);

    icJob.setMapOutputKeyClass(AttributeResolutionWritable.class);
    icJob.setMapOutputValueClass(SpatioTemporalFloatWritable.class);
    icJob.setOutputKeyClass(AttributeResolutionWritable.class);
    icJob.setOutputValueClass(TopologyTimeSeriesWritable.class);
    //icJob.setOutputKeyClass(Text.class);
    //icJob.setOutputValueClass(Text.class);

    icJob.setMapperClass(IndexCreationMapper.class);
    icJob.setReducerClass(IndexCreationReducer.class);
    icJob.setNumReduceTasks(machineConf.getNumberReduces());

    icJob.setInputFormatClass(SequenceFileInputFormat.class);
    //icJob.setOutputFormatClass(SequenceFileOutputFormat.class);
    LazyOutputFormat.setOutputFormatClass(icJob, SequenceFileOutputFormat.class);
    //LazyOutputFormat.setOutputFormatClass(icJob, TextOutputFormat.class);
    SequenceFileOutputFormat.setCompressOutput(icJob, true);
    SequenceFileOutputFormat.setOutputCompressionType(icJob, CompressionType.BLOCK);

    FileInputFormat.setInputDirRecursive(icJob, true);
    FileInputFormat.setInputPaths(icJob, aggregateDatasets.substring(0, aggregateDatasets.length() - 1));
    FileOutputFormat.setOutputPath(icJob, new Path(indexOutputDir));

    icJob.setJarByClass(IndexCreation.class);

    long start = System.currentTimeMillis();
    icJob.submit();
    icJob.waitForCompletion(true);
    System.out.println(jobName + "\t" + (System.currentTimeMillis() - start));

    // moving files to right place
    for (String dataset : shortDatasetIndex) {
        String from = s3bucket + FrameworkUtils.indexDir + "/tmp/" + dataset + "/";
        String to = s3bucket + FrameworkUtils.indexDir + "/" + dataset + "/";
        FrameworkUtils.renameFile(from, to, s3conf, s3);
    }

}

From source file:Main.java

private static void putInto(Map<String, Object> map, String key, ArrayList<Object> list) {
    if (list.isEmpty()) {
        return;/*  ww w. ja  va  2  s  .  c  o  m*/
    }
    map.put(key, list);
}