Example usage for java.util Iterator next

Introduction

In this page you can find the example usage for java.util Iterator next.

Prototype

E next();

Source Link

Document

Returns the next element in the iteration.

Usage

From source file:fr.iphc.grid.jobmonitor.CeList.java

public static void main(String[] args) throws Exception {

    SessionFactory.createSession(true);/* w ww  .j a  va2  s. co m*/
    CeList command = new CeList();
    CommandLine line = command.parse(args);
    Integer timeout = 0;
    String TableSql = "monce";
    //      MySQLAccess sql = new MySQLAccess();
    if (line.getOptionValue(OPT_TIMEOUT) == null) {
        timeout = 15;
    } else {
        timeout = Integer.parseInt(line.getOptionValue(OPT_TIMEOUT));
    }
    timeout = timeout * 60; // convertir en secondes
    Date start = new Date();
    String OutDir = line.getOptionValue(OPT_OUTDIR);

    if (OutDir == null) {
        OutDir = "/tmp/thread";
    }
    ArrayList<URL> CeList = null;
    if (line.getOptionValue(OPT_CEPATH) == null) {
        CeList = AvailableLdapCe();
        //         for (URL k : CeList) {
        //            // System.out.println(k);
        //         }
    } else {
        CeList = AvailableCe(line.getOptionValue(OPT_CEPATH));
    }
    Boolean ret = initDirectory(new File(OutDir));
    if (!ret) {
        System.out.println("ERROR: " + OutDir + "STOP");
        System.exit(-1);
    }

    // check if we can connect to the grid
    // try{
    // SessionFactory.createSession(true);
    // }catch(NoSuccessException e){
    // System.err.println("Could not connect to the grid at all
    // ("+e.getMessage()+")");
    // System.err.println("Aborting");
    // System.exit(0);
    //
    // }

    SubmitterThread[] st = new SubmitterThread[CeList.size()];

    Iterator<URL> i = CeList.iterator();
    int index = 0;
    while (i.hasNext()) {
        URL serviceURL = i.next();
        // Ne pas importer dans thread because options.
        Properties prop = new Properties();
        prop.setProperty("Executable", "/bin/hostname");//
        // prop.setProperty("Executable", "touch /dev/null");
        JobDescription desc = createJobDescription(prop);
        desc.setAttribute(JobDescription.INTERACTIVE, "true");
        desc.setAttribute(JobDescription.EXECUTABLE, "/bin/hostname");
        // proxy="/home/dsa/.globus/biomed.txt";
        // Context ctx = ContextFactory.createContext();
        // ctx.setAttribute(Context.TYPE, "VOMS");
        // ctx.setAttribute(Context.USERVO, "biomed");
        // ctx.setAttribute(Context.USERPROXY,proxy);
        // Session session = SessionFactory.createSession(false);
        // session.addContext(ctx);
        Session session = SessionFactory.createSession(true);
        st[index] = new SubmitterThread(serviceURL, session, desc, OutDir, timeout, start);
        st[index].setUncaughtExceptionHandler(new Thread.UncaughtExceptionHandler() {
            @Override
            public void uncaughtException(Thread t, Throwable e) {
                // System.out.println("Error! An exception occured in "
                // + t.getName() + ". Cause: " + e.getMessage());
            }
        });
        st[index].start();
        // Thread.sleep(15*1000);
        // test si fichier exist
        // System.out.println("Alive
        // "+OutDir+"/"+serviceURL.getHost()+"_"+serviceURL.getPath().replaceAll("/cream-","")+".out");
        // while ((!((new
        // File(OutDir+"/"+serviceURL.getHost()+"_"+serviceURL.getPath().replaceAll("/cream-","")+".out").exists())
        // ||
        // (new
        // File(OutDir+"/"+serviceURL.getHost()+"_"+serviceURL.getPath().replaceAll("/cream-","")+".err").exists()))))
        // {
        // Thread.sleep(500);
        // }
        // System.out.println("Alive "+serviceURL.getHost()+"-"+ index+"FILE
        // EXIST");
        index++;

    }
    ;

    // System.out.println("BEGIN LOOP: Max " + index);
    long date_start = System.currentTimeMillis();
    // System.out.println("BEGIN START: " + date_start);
    Integer time_out = (timeout + 180) * 1000; // unit ms value in minute
    // +120
    // =delta par rapport thread
    Boolean Alive = true;
    //      int nb = 0;
    long now = System.currentTimeMillis();
    do {
        now = System.currentTimeMillis();
        Alive = false;
        //         nb = 0;
        for (int j = 0; j < index; j++) {
            if (st[j].isAlive()) {
                // System.out.println("Alive "+j);
                Alive = true;
                //               nb++;
            }
        }
        // System.out.println(nb);
        Thread.sleep(10000);
    } while ((Alive) && ((now - date_start) < time_out));

    for (int j = 0; j < index; j++) {
        if (st[j].isAlive()) {
            st[j].Requeststop();
        }
    }
    BilanCe(OutDir, CeList, TableSql);
    jobManagerLdap jm = new jobManagerLdap();
    jm.updateLdapCe();
    System.out.println("END " + new Date());
    // faire un traitement...
    System.exit(0);
}

From source file:edu.upenn.cis.FastAlign.java

/**
 * Prints alignments for options specified by command line arguments.
 * @param argv  parameters to be used by FastAlign.
 *///w w  w. j  a va 2  s. co m
public static void main(String[] argv) {

    FastAlign align = FastAlign.initCommandLine(argv);
    if (align == null) {
        System.err.println("Usage: java " + FastAlign.class.getCanonicalName() + " -i file.fr-en\n"
                + " Standard options ([USE] = strongly recommended):\n" + "  -i: [REQ] Input parallel corpus\n"
                + "  -v: [USE] Use Dirichlet prior on lexical translation distributions\n"
                + "  -d: [USE] Favor alignment points close to the monotonic diagonoal\n"
                + "  -o: [USE] Optimize how close to the diagonal alignment points should be\n"
                + "  -r: Run alignment in reverse (condition on target and predict source)\n"
                + "  -c: Output conditional probability table\n"
                + "  -e: Start with existing conditional probability table\n" + " Advanced options:\n"
                + "  -I: number of iterations in EM training (default = 5)\n"
                + "  -p: p_null parameter (default = 0.08)\n" + "  -N: No null word\n"
                + "  -a: alpha parameter for optional Dirichlet prior (default = 0.01)\n"
                + "  -T: starting lambda for diagonal distance parameter (default = 4)\n");
        System.exit(1);
    }
    boolean use_null = !align.no_null_word;
    if (align.variational_bayes && align.alpha <= 0.0) {
        System.err.println("--alpha must be > 0\n");
        System.exit(1);
    }
    double prob_align_not_null = 1.0 - align.prob_align_null;
    final int kNULL = align.d.Convert("<eps>");
    TTable s2t = new TTable();
    if (!align.existing_probability_filename.isEmpty()) {
        boolean success = s2t.ImportFromFile(align.existing_probability_filename, '\t', align.d);
        if (!success) {
            System.err.println("Can't read table " + align.existing_probability_filename);
            System.exit(1);
        }
    }
    Map<Pair, Integer> size_counts = new HashMap<Pair, Integer>();
    double tot_len_ratio = 0;
    double mean_srclen_multiplier = 0;
    List<Double> probs = new ArrayList<Double>();
    ;
    // E-M Iterations Loop TODO move this into a method?
    for (int iter = 0; iter < align.iterations || (iter == 0 && align.iterations == 0); ++iter) {
        final boolean final_iteration = (iter >= (align.iterations - 1));
        System.err.println("ITERATION " + (iter + 1) + (final_iteration ? " (FINAL)" : ""));
        Scanner in = null;
        try {
            in = new Scanner(new File(align.input));
            if (!in.hasNextLine()) {
                System.err.println("Can't read " + align.input);
                System.exit(1);
            }
        } catch (FileNotFoundException e) {
            e.printStackTrace();
            System.err.println("Can't read " + align.input);
            System.exit(1);
        }

        double likelihood = 0;
        double denom = 0.0;
        int lc = 0;
        boolean flag = false;
        String line;
        //         String ssrc, strg;
        ArrayList<Integer> src = new ArrayList<Integer>();
        ArrayList<Integer> trg = new ArrayList<Integer>();
        double c0 = 0;
        double emp_feat = 0;
        double toks = 0;
        // Iterate over each line of the input file
        while (in.hasNextLine()) {
            line = in.nextLine();
            ++lc;
            if (lc % 1000 == 0) {
                System.err.print('.');
                flag = true;
            }
            if (lc % 50000 == 0) {
                System.err.println(" [" + lc + "]\n");
                System.err.flush();
                flag = false;
            }
            src.clear();
            trg.clear(); // TODO this is redundant; src and tgt cleared in ParseLine
            // Integerize and split source and target lines.
            align.ParseLine(line, src, trg);
            if (align.is_reverse) {
                ArrayList<Integer> tmp = src;
                src = trg;
                trg = tmp;
            }
            // TODO Empty lines break the parser. Should this be true?
            if (src.size() == 0 || trg.size() == 0) {
                System.err.println("Error in line " + lc + "\n" + line);
                System.exit(1);
            }
            if (iter == 0) {
                tot_len_ratio += ((double) trg.size()) / ((double) src.size());
            }
            denom += trg.size();
            probs.clear();
            // Add to pair length counts only if first iteration.
            if (iter == 0) {
                Pair pair = new Pair(trg.size(), src.size());
                Integer value = size_counts.get(pair);
                if (value == null)
                    value = 0;
                size_counts.put(pair, value + 1);
            }
            boolean first_al = true; // used when printing alignments
            toks += trg.size();
            // Iterate through the English tokens
            for (int j = 0; j < trg.size(); ++j) {
                final int f_j = trg.get(j);
                double sum = 0;
                double prob_a_i = 1.0 / (src.size() + (use_null ? 1 : 0)); // uniform (model 1)
                if (use_null) {
                    if (align.favor_diagonal) {
                        prob_a_i = align.prob_align_null;
                    }
                    probs.add(0, s2t.prob(kNULL, f_j) * prob_a_i);
                    sum += probs.get(0);
                }
                double az = 0;
                if (align.favor_diagonal)
                    az = DiagonalAlignment.computeZ(j + 1, trg.size(), src.size(), align.diagonal_tension)
                            / prob_align_not_null;
                for (int i = 1; i <= src.size(); ++i) {
                    if (align.favor_diagonal)
                        prob_a_i = DiagonalAlignment.unnormalizedProb(j + 1, i, trg.size(), src.size(),
                                align.diagonal_tension) / az;
                    probs.add(i, s2t.prob(src.get(i - 1), f_j) * prob_a_i);
                    sum += probs.get(i);
                }
                if (final_iteration) {
                    double max_p = -1;
                    int max_index = -1;
                    if (use_null) {
                        max_index = 0;
                        max_p = probs.get(0);
                    }
                    for (int i = 1; i <= src.size(); ++i) {
                        if (probs.get(i) > max_p) {
                            max_index = i;
                            max_p = probs.get(i);
                        }
                    }
                    if (max_index > 0) {
                        if (first_al)
                            first_al = false;
                        else
                            System.out.print(' ');
                        if (align.is_reverse)
                            System.out.print("" + j + '-' + (max_index - 1));
                        else
                            System.out.print("" + (max_index - 1) + '-' + j);
                    }
                } else {
                    if (use_null) {
                        double count = probs.get(0) / sum;
                        c0 += count;
                        s2t.Increment(kNULL, f_j, count);
                    }
                    for (int i = 1; i <= src.size(); ++i) {
                        final double p = probs.get(i) / sum;
                        s2t.Increment(src.get(i - 1), f_j, p);
                        emp_feat += DiagonalAlignment.feature(j, i, trg.size(), src.size()) * p;
                    }
                }
                likelihood += Math.log(sum);
            }
            if (final_iteration)
                System.out.println();
        }

        // log(e) = 1.0
        double base2_likelihood = likelihood / Math.log(2);

        if (flag) {
            System.err.println();
        }
        if (iter == 0) {
            mean_srclen_multiplier = tot_len_ratio / lc;
            System.err.println("expected target length = source length * " + mean_srclen_multiplier);
        }
        emp_feat /= toks;
        System.err.println("  log_e likelihood: " + likelihood);
        System.err.println("  log_2 likelihood: " + base2_likelihood);
        System.err.println("     cross entropy: " + (-base2_likelihood / denom));
        System.err.println("        perplexity: " + Math.pow(2.0, -base2_likelihood / denom));
        System.err.println("      posterior p0: " + c0 / toks);
        System.err.println(" posterior al-feat: " + emp_feat);
        //System.err.println("     model tension: " + mod_feat / toks );
        System.err.println("       size counts: " + size_counts.size());
        if (!final_iteration) {
            if (align.favor_diagonal && align.optimize_tension && iter > 0) {
                for (int ii = 0; ii < 8; ++ii) {
                    double mod_feat = 0;
                    Iterator<Map.Entry<Pair, Integer>> it = size_counts.entrySet().iterator();
                    for (; it.hasNext();) {
                        Map.Entry<Pair, Integer> entry = it.next();
                        final Pair p = entry.getKey();
                        for (int j = 1; j <= p.first; ++j)
                            mod_feat += entry.getValue() * DiagonalAlignment.computeDLogZ(j, p.first, p.second,
                                    align.diagonal_tension);
                    }
                    mod_feat /= toks;
                    System.err.println("  " + ii + 1 + "  model al-feat: " + mod_feat + " (tension="
                            + align.diagonal_tension + ")");
                    align.diagonal_tension += (emp_feat - mod_feat) * 20.0;
                    if (align.diagonal_tension <= 0.1)
                        align.diagonal_tension = 0.1;
                    if (align.diagonal_tension > 14)
                        align.diagonal_tension = 14;
                }
                System.err.println("     final tension: " + align.diagonal_tension);
            }
            if (align.variational_bayes)
                s2t.NormalizeVB(align.alpha);
            else
                s2t.Normalize();
            //prob_align_null *= 0.8; // XXX
            //prob_align_null += (c0 / toks) * 0.2;
            prob_align_not_null = 1.0 - align.prob_align_null;
        }

    }
    if (!align.conditional_probability_filename.isEmpty()) {
        System.err.println("conditional probabilities: " + align.conditional_probability_filename);
        s2t.ExportToFile(align.conditional_probability_filename, align.d);
    }
    System.exit(0);
}

From source file:com.mch.registry.ccs.server.CcsClient.java

/**
 * Sends messages to registered devices/*from  w w w.jav  a2 s.c  o  m*/
 */
public static void main(String[] args) {

    Config config = new Config();
    final String projectId = config.getProjectId();
    final String key = config.getKey();

    final CcsClient ccsClient = CcsClient.prepareClient(projectId, key, true);

    try {
        ccsClient.connect();
    } catch (XMPPException e) {
        logger.log(Level.WARNING, "XMPP Exception ", e);
    }

    final Runnable sendNotifications = new Runnable() {
        public void run() {
            try {
                logger.log(Level.INFO, "Working Q!");
                if (!isOffHours()) {
                    //Prepare downstream message
                    String toRegId = "";
                    String message = "";
                    String messageId = "";
                    Map<String, String> payload = new HashMap<String, String>();
                    String collapseKey = null;
                    Long timeToLive = 10000L;
                    Boolean delayWhileIdle = true;
                    String messagePrefix = "";
                    int notificationQueueID = 0;
                    boolean sucessfullySent = false;

                    //Read from mysql database
                    MySqlHandler mysql = new MySqlHandler();
                    ArrayList<Notification> queue = new ArrayList<Notification>();

                    for (int i = 1; i < 3; i++) {
                        queue = mysql.getNotificationQueue(i);
                        if (queue.size() > 0) {

                            switch (i) {
                            case 1:
                                messagePrefix = "_V: ";
                                break;
                            case 2:
                                messagePrefix = "_R: ";
                                break;
                            default:
                                messagePrefix = "";
                                logger.log(Level.WARNING, "Unknown message type!");
                            }

                            Notification notification = new Notification();
                            Iterator<Notification> iterator = queue.iterator();

                            while (iterator.hasNext()) {
                                notification = iterator.next();

                                toRegId = notification.getGcmRegID();
                                message = notification.getNotificationText();
                                notificationQueueID = notification.getNotificationQueueID();
                                messageId = "m-" + Long.toString(random.nextLong());

                                payload = new HashMap<String, String>();
                                payload.put("message", messagePrefix + message);

                                try {
                                    // Send the downstream message to a device.
                                    ccsClient.send(createJsonMessage(toRegId, messageId, payload, collapseKey,
                                            timeToLive, delayWhileIdle));
                                    sucessfullySent = true;
                                    logger.log(Level.INFO, "Message sent. ID: " + notificationQueueID
                                            + ", RegID: " + toRegId + ", Text: " + message);
                                } catch (Exception e) {
                                    mysql.prepareNotificationForTheNextDay(notificationQueueID);
                                    sucessfullySent = false;
                                    logger.log(Level.WARNING,
                                            "Message could not be sent! ID: " + notificationQueueID
                                                    + ", RegID: " + toRegId + ", Text: " + message);
                                }

                                if (sucessfullySent) {
                                    mysql.moveNotificationToHistory(notificationQueueID);
                                }
                            }
                        } else {
                            logger.log(Level.INFO, "No notifications to send. Type: " + Integer.toString(i));
                        }
                    }
                }
            } catch (Exception e) {
                logger.log(Level.WARNING, "Exception ", e);
            }
        }
    };

    ScheduledExecutorService executor = Executors.newSingleThreadScheduledExecutor();
    //Start when server starts and every 30 minutes after
    ScheduledFuture task = executor.scheduleAtFixedRate(sendNotifications, 0, 30, TimeUnit.MINUTES);
    try {
        task.get();
    } catch (ExecutionException e) {
        logger.log(Level.SEVERE, "Exception ", e);
    } catch (InterruptedException e) {
        logger.log(Level.SEVERE, "Exception ", e);
    }
    task.cancel(false);

    try {
        executor.shutdown();
        executor.awaitTermination(30, TimeUnit.SECONDS);
    } catch (InterruptedException e) {
        logger.log(Level.SEVERE, "Exception ", e);
    }

}

From source file:de.micromata.tpsb.doc.StaticTestDocGenerator.java

public static void main(String[] args) {

    ParserConfig.Builder bCfg = new ParserConfig.Builder();
    ParserConfig.Builder tCfg = new ParserConfig.Builder();
    tCfg.generateIndividualFiles(true);//from w  w  w.j  a  v a  2  s . co m
    bCfg.generateIndividualFiles(true);
    List<String> la = Arrays.asList(args);
    Iterator<String> it = la.iterator();
    boolean baseDirSet = false;
    boolean ignoreLocalSettings = false;
    List<String> addRepos = new ArrayList<String>();
    StringResourceLoader.setRepository(StringResourceLoader.REPOSITORY_NAME_DEFAULT,
            new StringResourceRepositoryImpl());
    try {
        while (it.hasNext()) {
            String arg = it.next();
            String value = null;

            if ((value = getArgumentOption(it, arg, "--project-root", "-pr")) != null) {
                File f = new File(value);
                if (f.exists() == false) {
                    System.err.print("project root doesn't exists: " + f.getAbsolutePath());
                    continue;
                }
                TpsbEnvironment.get().addProjectRoots(f);
                File ts = new File(f, "src/test");
                if (ts.exists() == true) {
                    tCfg.addSourceFileRespository(new FileSystemSourceFileRepository(ts.getAbsolutePath()));
                    bCfg.addSourceFileRespository(new FileSystemSourceFileRepository(ts.getAbsolutePath()));
                }
                continue;
            }
            if ((value = getArgumentOption(it, arg, "--test-input", "-ti")) != null) {
                File f = new File(value);
                if (f.exists() == false) {
                    System.err.print("test-input doesn't exists: " + f.getAbsolutePath());
                }
                tCfg.addSourceFileRespository(new FileSystemSourceFileRepository(value));
                bCfg.addSourceFileRespository(new FileSystemSourceFileRepository(value));
                continue;
            }
            if ((value = getArgumentOption(it, arg, "--output-path", "-op")) != null) {
                if (baseDirSet == false) {
                    tCfg.outputDir(value);
                    bCfg.outputDir(value);
                    TpsbEnvironment.setBaseDir(value);
                    baseDirSet = true;
                } else {
                    addRepos.add(value);
                }
                continue;
            }
            if ((value = getArgumentOption(it, arg, "--index-vmtemplate", "-ivt")) != null) {
                try {
                    String content = FileUtils.readFileToString(new File(value), CharEncoding.UTF_8);
                    StringResourceRepository repo = StringResourceLoader.getRepository();
                    repo.putStringResource("customIndexTemplate", content, CharEncoding.UTF_8);
                    tCfg.indexTemplate("customIndexTemplate");
                } catch (IOException ex) {
                    throw new RuntimeException(
                            "Cannot load file " + new File(value).getAbsolutePath() + ": " + ex.getMessage(),
                            ex);
                }
                continue;
            }
            if ((value = getArgumentOption(it, arg, "--test-vmtemplate", "-tvt")) != null) {
                try {
                    String content = FileUtils.readFileToString(new File(value), CharEncoding.UTF_8);
                    StringResourceRepository repo = StringResourceLoader.getRepository();
                    repo.putStringResource("customTestTemplate", content, CharEncoding.UTF_8);
                    tCfg.testTemplate("customTestTemplate");
                } catch (IOException ex) {
                    throw new RuntimeException(
                            "Cannot load file " + new File(value).getAbsolutePath() + ": " + ex.getMessage(),
                            ex);
                }
                continue;
            }
            if (arg.equals("--singlexml") == true) {
                tCfg.generateIndividualFiles(false);
                bCfg.generateIndividualFiles(false);
            } else if (arg.equals("--ignore-local-settings") == true) {
                ignoreLocalSettings = true;
                continue;
            }
        }
    } catch (RuntimeException ex) {
        System.err.print(ex.getMessage());
        return;
    }
    if (ignoreLocalSettings == false) {
        readLocalSettings(bCfg, tCfg);
    }
    bCfg// .addSourceFileFilter(new MatcherSourceFileFilter("*Builder,*App,*builder")) //
            .addSourceFileFilter(new AnnotationSourceFileFilter(TpsbBuilder.class)) //
            .addSourceFileFilter(new AnnotationSourceFileFilter(TpsbApplication.class)) //
    ;
    tCfg// .addSourceFileFilter(new MatcherSourceFileFilter("*Test,*TestCase")) //
            .addSourceFileFilter(new AnnotationSourceFileFilter(TpsbTestSuite.class)) //
    ;

    StaticTestDocGenerator docGenerator = new StaticTestDocGenerator(bCfg.build(), tCfg.build());
    TpsbEnvironment env = TpsbEnvironment.get();
    if (addRepos.isEmpty() == false) {
        env.setIncludeRepos(addRepos);
    }
    docGenerator.parseTestBuilders();
    docGenerator.parseTestCases();
}

From source file:edu.nyu.vida.data_polygamy.scalar_function_computation.Aggregation.java

/**
 * @param args/*  w  w  w  .ja v  a2s. c  o  m*/
 */
@SuppressWarnings({ "deprecation" })
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    Options options = new Options();

    Option forceOption = new Option("f", "force", false,
            "force the computation of the aggregate functions " + "even if files already exist");
    forceOption.setRequired(false);
    options.addOption(forceOption);

    Option gOption = new Option("g", "group", true, "set group of datasets for which the aggregate functions"
            + " will be computed, followed by their temporal and spatial attribute indices");
    gOption.setRequired(true);
    gOption.setArgName("GROUP");
    gOption.setArgs(Option.UNLIMITED_VALUES);
    options.addOption(gOption);

    Option machineOption = new Option("m", "machine", true, "machine identifier");
    machineOption.setRequired(true);
    machineOption.setArgName("MACHINE");
    machineOption.setArgs(1);
    options.addOption(machineOption);

    Option nodesOption = new Option("n", "nodes", true, "number of nodes");
    nodesOption.setRequired(true);
    nodesOption.setArgName("NODES");
    nodesOption.setArgs(1);
    options.addOption(nodesOption);

    Option s3Option = new Option("s3", "s3", false, "data on Amazon S3");
    s3Option.setRequired(false);
    options.addOption(s3Option);

    Option awsAccessKeyIdOption = new Option("aws_id", "aws-id", true,
            "aws access key id; " + "this is required if the execution is on aws");
    awsAccessKeyIdOption.setRequired(false);
    awsAccessKeyIdOption.setArgName("AWS-ACCESS-KEY-ID");
    awsAccessKeyIdOption.setArgs(1);
    options.addOption(awsAccessKeyIdOption);

    Option awsSecretAccessKeyOption = new Option("aws_key", "aws-id", true,
            "aws secrect access key; " + "this is required if the execution is on aws");
    awsSecretAccessKeyOption.setRequired(false);
    awsSecretAccessKeyOption.setArgName("AWS-SECRET-ACCESS-KEY");
    awsSecretAccessKeyOption.setArgs(1);
    options.addOption(awsSecretAccessKeyOption);

    Option bucketOption = new Option("b", "s3-bucket", true,
            "bucket on s3; " + "this is required if the execution is on aws");
    bucketOption.setRequired(false);
    bucketOption.setArgName("S3-BUCKET");
    bucketOption.setArgs(1);
    options.addOption(bucketOption);

    Option helpOption = new Option("h", "help", false, "display this message");
    helpOption.setRequired(false);
    options.addOption(helpOption);

    HelpFormatter formatter = new HelpFormatter();
    CommandLineParser parser = new PosixParser();
    CommandLine cmd = null;

    try {
        cmd = parser.parse(options, args);
    } catch (ParseException e) {
        formatter.printHelp("hadoop jar data-polygamy.jar "
                + "edu.nyu.vida.data_polygamy.scalar_function_computation.Aggregation", options, true);
        System.exit(0);
    }

    if (cmd.hasOption("h")) {
        formatter.printHelp("hadoop jar data-polygamy.jar "
                + "edu.nyu.vida.data_polygamy.scalar_function_computation.Aggregation", options, true);
        System.exit(0);
    }

    boolean s3 = cmd.hasOption("s3");
    String s3bucket = "";
    String awsAccessKeyId = "";
    String awsSecretAccessKey = "";

    if (s3) {
        if ((!cmd.hasOption("aws_id")) || (!cmd.hasOption("aws_key")) || (!cmd.hasOption("b"))) {
            System.out.println(
                    "Arguments 'aws_id', 'aws_key', and 'b'" + " are mandatory if execution is on AWS.");
            formatter.printHelp(
                    "hadoop jar data-polygamy.jar "
                            + "edu.nyu.vida.data_polygamy.scalar_function_computation.Aggregation",
                    options, true);
            System.exit(0);
        }
        s3bucket = cmd.getOptionValue("b");
        awsAccessKeyId = cmd.getOptionValue("aws_id");
        awsSecretAccessKey = cmd.getOptionValue("aws_key");
    }

    boolean snappyCompression = false;
    boolean bzip2Compression = false;
    String machine = cmd.getOptionValue("m");
    int nbNodes = Integer.parseInt(cmd.getOptionValue("n"));

    Configuration s3conf = new Configuration();
    if (s3) {
        s3conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId);
        s3conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey);
        s3conf.set("bucket", s3bucket);
    }

    String datasetNames = "";
    String datasetIds = "";
    String preProcessingDatasets = "";

    ArrayList<String> shortDataset = new ArrayList<String>();
    ArrayList<String> shortDatasetAggregation = new ArrayList<String>();
    HashMap<String, String> datasetTempAtt = new HashMap<String, String>();
    HashMap<String, String> datasetSpatialAtt = new HashMap<String, String>();
    HashMap<String, String> preProcessingDataset = new HashMap<String, String>();
    HashMap<String, String> datasetId = new HashMap<String, String>();

    boolean removeExistingFiles = cmd.hasOption("f");
    String[] datasetArgs = cmd.getOptionValues("g");

    for (int i = 0; i < datasetArgs.length; i += 3) {
        String dataset = datasetArgs[i];

        // getting pre-processing
        String tempPreProcessing = FrameworkUtils.searchPreProcessing(dataset, s3conf, s3);
        if (tempPreProcessing == null) {
            System.out.println("No pre-processing available for " + dataset);
            continue;
        }
        preProcessingDataset.put(dataset, tempPreProcessing);

        shortDataset.add(dataset);
        datasetTempAtt.put(dataset, ((datasetArgs[i + 1] == "null") ? null : datasetArgs[i + 1]));
        datasetSpatialAtt.put(dataset, ((datasetArgs[i + 2] == "null") ? null : datasetArgs[i + 2]));

        datasetId.put(dataset, null);
    }

    if (shortDataset.size() == 0) {
        System.out.println("No datasets to process.");
        System.exit(0);
    }

    // getting dataset id

    Path path = null;
    FileSystem fs = null;

    if (s3) {
        path = new Path(s3bucket + FrameworkUtils.datasetsIndexDir);
        fs = FileSystem.get(path.toUri(), s3conf);
    } else {
        fs = FileSystem.get(new Configuration());
        path = new Path(fs.getHomeDirectory() + "/" + FrameworkUtils.datasetsIndexDir);
    }
    BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(path)));
    String line = br.readLine();
    while (line != null) {
        String[] dt = line.split("\t");
        if (datasetId.containsKey(dt[0])) {
            datasetId.put(dt[0], dt[1]);
            datasetNames += dt[0] + ",";
            datasetIds += dt[1] + ",";
        }
        line = br.readLine();
    }
    br.close();
    if (s3)
        fs.close();

    datasetNames = datasetNames.substring(0, datasetNames.length() - 1);
    datasetIds = datasetIds.substring(0, datasetIds.length() - 1);
    Iterator<String> it = shortDataset.iterator();
    while (it.hasNext()) {
        String dataset = it.next();
        if (datasetId.get(dataset) == null) {
            System.out.println("No dataset id for " + dataset);
            System.exit(0);
        }
    }

    FrameworkUtils.createDir(s3bucket + FrameworkUtils.aggregatesDir, s3conf, s3);

    // getting smallest resolution

    HashMap<String, String> tempResMap = new HashMap<String, String>();
    HashMap<String, String> spatialResMap = new HashMap<String, String>();

    HashMap<String, String> datasetTemporalStrMap = new HashMap<String, String>();
    HashMap<String, String> datasetSpatialStrMap = new HashMap<String, String>();

    HashSet<String> input = new HashSet<String>();

    for (String dataset : shortDataset) {

        String[] datasetArray = preProcessingDataset.get(dataset).split("-");

        String datasetTemporalStr = datasetArray[datasetArray.length - 2];
        int datasetTemporal = utils.temporalResolution(datasetTemporalStr);

        String datasetSpatialStr = datasetArray[datasetArray.length - 1];
        int datasetSpatial = utils.spatialResolution(datasetSpatialStr);

        // finding all possible resolutions

        String[] temporalResolutions = FrameworkUtils.getAggTempResolutions(datasetTemporal);
        String[] spatialResolutions = FrameworkUtils.getAggSpatialResolutions(datasetSpatial);

        String temporalResolution = "";
        String spatialResolution = "";

        String tempRes = "";
        String spatialRes = "";

        boolean dataAdded = false;

        for (int i = 0; i < temporalResolutions.length; i++) {
            for (int j = 0; j < spatialResolutions.length; j++) {

                temporalResolution = temporalResolutions[i];
                spatialResolution = spatialResolutions[j];

                String aggregatesOutputFileName = s3bucket + FrameworkUtils.aggregatesDir + "/" + dataset + "/";

                if (removeExistingFiles) {
                    FrameworkUtils.removeFile(aggregatesOutputFileName, s3conf, s3);
                }

                if (!FrameworkUtils.fileExists(aggregatesOutputFileName, s3conf, s3)) {

                    dataAdded = true;

                    tempRes += temporalResolution + "-";
                    spatialRes += spatialResolution + "-";
                }
            }
        }

        if (dataAdded) {
            input.add(s3bucket + FrameworkUtils.preProcessingDir + "/" + preProcessingDataset.get(dataset));
            shortDatasetAggregation.add(dataset);

            tempResMap.put(dataset, tempRes.substring(0, tempRes.length() - 1));
            spatialResMap.put(dataset, spatialRes.substring(0, spatialRes.length() - 1));

            datasetTemporalStrMap.put(dataset, datasetTemporalStr);
            datasetSpatialStrMap.put(dataset, datasetSpatialStr);
        }
    }

    if (input.isEmpty()) {
        System.out.println("All the input datasets have aggregates.");
        System.out.println("Use -f in the beginning of the command line to force the computation.");
        System.exit(0);
    }

    it = input.iterator();
    while (it.hasNext()) {
        preProcessingDatasets += it.next() + ",";
    }

    Job aggJob = null;
    String aggregatesOutputDir = s3bucket + FrameworkUtils.aggregatesDir + "/tmp/";
    String jobName = "aggregates";

    FrameworkUtils.removeFile(aggregatesOutputDir, s3conf, s3);

    Configuration aggConf = new Configuration();
    Machine machineConf = new Machine(machine, nbNodes);

    aggConf.set("dataset-name", datasetNames);
    aggConf.set("dataset-id", datasetIds);

    for (int i = 0; i < shortDatasetAggregation.size(); i++) {
        String dataset = shortDatasetAggregation.get(i);
        String id = datasetId.get(dataset);
        aggConf.set("dataset-" + id + "-temporal-resolutions", tempResMap.get(dataset));
        aggConf.set("dataset-" + id + "-spatial-resolutions", spatialResMap.get(dataset));
        aggConf.set("dataset-" + id + "-temporal-att", datasetTempAtt.get(dataset));
        aggConf.set("dataset-" + id + "-spatial-att", datasetSpatialAtt.get(dataset));
        aggConf.set("dataset-" + id + "-temporal", datasetTemporalStrMap.get(dataset));
        aggConf.set("dataset-" + id + "-spatial", datasetSpatialStrMap.get(dataset));

        if (s3)
            aggConf.set("dataset-" + id,
                    s3bucket + FrameworkUtils.preProcessingDir + "/" + preProcessingDataset.get(dataset));
        else
            aggConf.set("dataset-" + id, FileSystem.get(new Configuration()).getHomeDirectory() + "/"
                    + FrameworkUtils.preProcessingDir + "/" + preProcessingDataset.get(dataset));
    }

    aggConf.set("mapreduce.tasktracker.map.tasks.maximum", String.valueOf(machineConf.getMaximumTasks()));
    aggConf.set("mapreduce.tasktracker.reduce.tasks.maximum", String.valueOf(machineConf.getMaximumTasks()));
    aggConf.set("mapreduce.jobtracker.maxtasks.perjob", "-1");
    aggConf.set("mapreduce.reduce.shuffle.parallelcopies", "20");
    aggConf.set("mapreduce.input.fileinputformat.split.minsize", "0");
    aggConf.set("mapreduce.task.io.sort.mb", "200");
    aggConf.set("mapreduce.task.io.sort.factor", "100");
    machineConf.setMachineConfiguration(aggConf);

    if (s3) {
        machineConf.setMachineConfiguration(aggConf);
        aggConf.set("fs.s3.awsAccessKeyId", awsAccessKeyId);
        aggConf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey);
    }

    if (snappyCompression) {
        aggConf.set("mapreduce.map.output.compress", "true");
        aggConf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec");
        //aggConf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec");
    }
    if (bzip2Compression) {
        aggConf.set("mapreduce.map.output.compress", "true");
        aggConf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec");
        //aggConf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec");
    }

    aggJob = new Job(aggConf);
    aggJob.setJobName(jobName);

    aggJob.setMapOutputKeyClass(SpatioTemporalWritable.class);
    aggJob.setMapOutputValueClass(AggregationArrayWritable.class);
    aggJob.setOutputKeyClass(SpatioTemporalWritable.class);
    aggJob.setOutputValueClass(FloatArrayWritable.class);
    //aggJob.setOutputKeyClass(Text.class);
    //aggJob.setOutputValueClass(Text.class);

    aggJob.setMapperClass(AggregationMapper.class);
    aggJob.setCombinerClass(AggregationCombiner.class);
    aggJob.setReducerClass(AggregationReducer.class);
    aggJob.setNumReduceTasks(machineConf.getNumberReduces());

    aggJob.setInputFormatClass(SequenceFileInputFormat.class);
    //aggJob.setOutputFormatClass(SequenceFileOutputFormat.class);
    LazyOutputFormat.setOutputFormatClass(aggJob, SequenceFileOutputFormat.class);
    //LazyOutputFormat.setOutputFormatClass(aggJob, TextOutputFormat.class);
    SequenceFileOutputFormat.setCompressOutput(aggJob, true);
    SequenceFileOutputFormat.setOutputCompressionType(aggJob, CompressionType.BLOCK);

    FileInputFormat.setInputDirRecursive(aggJob, true);
    FileInputFormat.setInputPaths(aggJob,
            preProcessingDatasets.substring(0, preProcessingDatasets.length() - 1));
    FileOutputFormat.setOutputPath(aggJob, new Path(aggregatesOutputDir));

    aggJob.setJarByClass(Aggregation.class);

    long start = System.currentTimeMillis();
    aggJob.submit();
    aggJob.waitForCompletion(true);
    System.out.println(jobName + "\t" + (System.currentTimeMillis() - start));

    // moving files to right place
    for (String dataset : shortDatasetAggregation) {
        String from = s3bucket + FrameworkUtils.aggregatesDir + "/tmp/" + dataset + "/";
        String to = s3bucket + FrameworkUtils.aggregatesDir + "/" + dataset + "/";
        FrameworkUtils.renameFile(from, to, s3conf, s3);
    }

}

From source file:examples.mail.IMAPExportMbox.java

public static void main(String[] args) throws IOException {
    int connect_timeout = CONNECT_TIMEOUT;
    int read_timeout = READ_TIMEOUT;

    int argIdx = 0;
    String eol = EOL_DEFAULT;/* www  .  j a va2s  .c o m*/
    boolean printHash = false;
    boolean printMarker = false;
    int retryWaitSecs = 0;

    for (argIdx = 0; argIdx < args.length; argIdx++) {
        if (args[argIdx].equals("-c")) {
            connect_timeout = Integer.parseInt(args[++argIdx]);
        } else if (args[argIdx].equals("-r")) {
            read_timeout = Integer.parseInt(args[++argIdx]);
        } else if (args[argIdx].equals("-R")) {
            retryWaitSecs = Integer.parseInt(args[++argIdx]);
        } else if (args[argIdx].equals("-LF")) {
            eol = LF;
        } else if (args[argIdx].equals("-CRLF")) {
            eol = CRLF;
        } else if (args[argIdx].equals("-.")) {
            printHash = true;
        } else if (args[argIdx].equals("-X")) {
            printMarker = true;
        } else {
            break;
        }
    }

    final int argCount = args.length - argIdx;

    if (argCount < 2) {
        System.err.println("Usage: IMAPExportMbox [-LF|-CRLF] [-c n] [-r n] [-R n] [-.] [-X]"
                + " imap[s]://user:password@host[:port]/folder/path [+|-]<mboxfile> [sequence-set] [itemnames]");
        System.err.println(
                "\t-LF | -CRLF set end-of-line to LF or CRLF (default is the line.separator system property)");
        System.err.println("\t-c connect timeout in seconds (default 10)");
        System.err.println("\t-r read timeout in seconds (default 10)");
        System.err.println("\t-R temporary failure retry wait in seconds (default 0; i.e. disabled)");
        System.err.println("\t-. print a . for each complete message received");
        System.err.println("\t-X print the X-IMAP line for each complete message received");
        System.err.println(
                "\tthe mboxfile is where the messages are stored; use '-' to write to standard output.");
        System.err.println(
                "\tPrefix filename with '+' to append to the file. Prefix with '-' to allow overwrite.");
        System.err.println(
                "\ta sequence-set is a list of numbers/number ranges e.g. 1,2,3-10,20:* - default 1:*");
        System.err
                .println("\titemnames are the message data item name(s) e.g. BODY.PEEK[HEADER.FIELDS (SUBJECT)]"
                        + " or a macro e.g. ALL - default (INTERNALDATE BODY.PEEK[])");
        System.exit(1);
    }

    final URI uri = URI.create(args[argIdx++]);
    final String file = args[argIdx++];
    String sequenceSet = argCount > 2 ? args[argIdx++] : "1:*";
    final String itemNames;
    // Handle 0, 1 or multiple item names
    if (argCount > 3) {
        if (argCount > 4) {
            StringBuilder sb = new StringBuilder();
            sb.append("(");
            for (int i = 4; i <= argCount; i++) {
                if (i > 4) {
                    sb.append(" ");
                }
                sb.append(args[argIdx++]);
            }
            sb.append(")");
            itemNames = sb.toString();
        } else {
            itemNames = args[argIdx++];
        }
    } else {
        itemNames = "(INTERNALDATE BODY.PEEK[])";
    }

    final boolean checkSequence = sequenceSet.matches("\\d+:(\\d+|\\*)"); // are we expecting a sequence?
    final MboxListener chunkListener;
    if (file.equals("-")) {
        chunkListener = null;
    } else if (file.startsWith("+")) {
        final File mbox = new File(file.substring(1));
        System.out.println("Appending to file " + mbox);
        chunkListener = new MboxListener(new BufferedWriter(new FileWriter(mbox, true)), eol, printHash,
                printMarker, checkSequence);
    } else if (file.startsWith("-")) {
        final File mbox = new File(file.substring(1));
        System.out.println("Writing to file " + mbox);
        chunkListener = new MboxListener(new BufferedWriter(new FileWriter(mbox, false)), eol, printHash,
                printMarker, checkSequence);
    } else {
        final File mbox = new File(file);
        if (mbox.exists()) {
            throw new IOException("mailbox file: " + mbox + " already exists!");
        }
        System.out.println("Creating file " + mbox);
        chunkListener = new MboxListener(new BufferedWriter(new FileWriter(mbox)), eol, printHash, printMarker,
                checkSequence);
    }

    String path = uri.getPath();
    if (path == null || path.length() < 1) {
        throw new IllegalArgumentException("Invalid folderPath: '" + path + "'");
    }
    String folder = path.substring(1); // skip the leading /

    // suppress login details
    final PrintCommandListener listener = new PrintCommandListener(System.out, true) {
        @Override
        public void protocolReplyReceived(ProtocolCommandEvent event) {
            if (event.getReplyCode() != IMAPReply.PARTIAL) { // This is dealt with by the chunk listener
                super.protocolReplyReceived(event);
            }
        }
    };

    // Connect and login
    final IMAPClient imap = IMAPUtils.imapLogin(uri, connect_timeout * 1000, listener);

    String maxIndexInFolder = null;

    try {

        imap.setSoTimeout(read_timeout * 1000);

        if (!imap.select(folder)) {
            throw new IOException("Could not select folder: " + folder);
        }

        for (String line : imap.getReplyStrings()) {
            maxIndexInFolder = matches(line, PATEXISTS, 1);
            if (maxIndexInFolder != null) {
                break;
            }
        }

        if (chunkListener != null) {
            imap.setChunkListener(chunkListener);
        } // else the command listener displays the full output without processing

        while (true) {
            boolean ok = imap.fetch(sequenceSet, itemNames);
            // If the fetch failed, can we retry?
            if (!ok && retryWaitSecs > 0 && chunkListener != null && checkSequence) {
                final String replyString = imap.getReplyString(); //includes EOL
                if (startsWith(replyString, PATTEMPFAIL)) {
                    System.err.println("Temporary error detected, will retry in " + retryWaitSecs + "seconds");
                    sequenceSet = (chunkListener.lastSeq + 1) + ":*";
                    try {
                        Thread.sleep(retryWaitSecs * 1000);
                    } catch (InterruptedException e) {
                        // ignored
                    }
                } else {
                    throw new IOException(
                            "FETCH " + sequenceSet + " " + itemNames + " failed with " + replyString);
                }
            } else {
                break;
            }
        }

    } catch (IOException ioe) {
        String count = chunkListener == null ? "?" : Integer.toString(chunkListener.total);
        System.err.println("FETCH " + sequenceSet + " " + itemNames + " failed after processing " + count
                + " complete messages ");
        if (chunkListener != null) {
            System.err.println("Last complete response seen: " + chunkListener.lastFetched);
        }
        throw ioe;
    } finally {

        if (printHash) {
            System.err.println();
        }

        if (chunkListener != null) {
            chunkListener.close();
            final Iterator<String> missingIds = chunkListener.missingIds.iterator();
            if (missingIds.hasNext()) {
                StringBuilder sb = new StringBuilder();
                for (;;) {
                    sb.append(missingIds.next());
                    if (!missingIds.hasNext()) {
                        break;
                    }
                    sb.append(",");
                }
                System.err.println("*** Missing ids: " + sb.toString());
            }
        }
        imap.logout();
        imap.disconnect();
    }
    if (chunkListener != null) {
        System.out.println("Processed " + chunkListener.total + " messages.");
    }
    if (maxIndexInFolder != null) {
        System.out.println("Folder contained " + maxIndexInFolder + " messages.");
    }
}

From source file:com.github.xbn.examples.regexutil.non_xbn.BetweenLineMarkersButSkipFirstXmpl.java

public static final void main(String[] as_1RqdTxtFilePath) {
    Iterator<String> lineItr = null;
    try {/*ww  w. j  a v  a 2 s  . c  o  m*/
        lineItr = FileUtils.lineIterator(new File(as_1RqdTxtFilePath[0])); //Throws npx if null
    } catch (IOException iox) {
        throw new RuntimeException("Attempting to open \"" + as_1RqdTxtFilePath[0] + "\"", iox);
    } catch (RuntimeException rx) {
        throw new RuntimeException("One required parameter: The path to the text file.", rx);
    }

    String LINE_SEP = System.getProperty("line.separator", "\n");

    ArrayList<String> alsItems = new ArrayList<String>();
    boolean bStartMark = false;
    boolean bLine1Skipped = false;
    StringBuilder sdCurrentItem = new StringBuilder();
    while (lineItr.hasNext()) {
        String sLine = lineItr.next().trim();
        if (!bStartMark) {
            if (sLine.startsWith(".START_SEQUENCE")) {
                bStartMark = true;
                continue;
            }
            throw new IllegalStateException("Start mark not found.");
        }
        if (!bLine1Skipped) {
            bLine1Skipped = true;
            continue;
        } else if (!sLine.equals(".END_SEQUENCE")) {
            sdCurrentItem.append(sLine).append(LINE_SEP);
        } else {
            alsItems.add(sdCurrentItem.toString());
            sdCurrentItem.setLength(0);
            bStartMark = false;
            bLine1Skipped = false;
            continue;
        }
    }

    for (String s : alsItems) {
        System.out.println("----------");
        System.out.print(s);
    }
}

From source file:edu.nyu.vida.data_polygamy.standard_techniques.CorrelationTechniques.java

/**
 * @param args//  ww  w. j a va2  s  . co m
 * @throws ParseException 
 */
@SuppressWarnings({ "deprecation" })
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    Options options = new Options();

    Option forceOption = new Option("f", "force", false,
            "force the computation of the relationship " + "even if files already exist");
    forceOption.setRequired(false);
    options.addOption(forceOption);

    Option g1Option = new Option("g1", "first-group", true, "set first group of datasets");
    g1Option.setRequired(true);
    g1Option.setArgName("FIRST GROUP");
    g1Option.setArgs(Option.UNLIMITED_VALUES);
    options.addOption(g1Option);

    Option g2Option = new Option("g2", "second-group", true, "set second group of datasets");
    g2Option.setRequired(false);
    g2Option.setArgName("SECOND GROUP");
    g2Option.setArgs(Option.UNLIMITED_VALUES);
    options.addOption(g2Option);

    Option machineOption = new Option("m", "machine", true, "machine identifier");
    machineOption.setRequired(true);
    machineOption.setArgName("MACHINE");
    machineOption.setArgs(1);
    options.addOption(machineOption);

    Option nodesOption = new Option("n", "nodes", true, "number of nodes");
    nodesOption.setRequired(true);
    nodesOption.setArgName("NODES");
    nodesOption.setArgs(1);
    options.addOption(nodesOption);

    Option s3Option = new Option("s3", "s3", false, "data on Amazon S3");
    s3Option.setRequired(false);
    options.addOption(s3Option);

    Option awsAccessKeyIdOption = new Option("aws_id", "aws-id", true,
            "aws access key id; " + "this is required if the execution is on aws");
    awsAccessKeyIdOption.setRequired(false);
    awsAccessKeyIdOption.setArgName("AWS-ACCESS-KEY-ID");
    awsAccessKeyIdOption.setArgs(1);
    options.addOption(awsAccessKeyIdOption);

    Option awsSecretAccessKeyOption = new Option("aws_key", "aws-id", true,
            "aws secrect access key; " + "this is required if the execution is on aws");
    awsSecretAccessKeyOption.setRequired(false);
    awsSecretAccessKeyOption.setArgName("AWS-SECRET-ACCESS-KEY");
    awsSecretAccessKeyOption.setArgs(1);
    options.addOption(awsSecretAccessKeyOption);

    Option bucketOption = new Option("b", "s3-bucket", true,
            "bucket on s3; " + "this is required if the execution is on aws");
    bucketOption.setRequired(false);
    bucketOption.setArgName("S3-BUCKET");
    bucketOption.setArgs(1);
    options.addOption(bucketOption);

    Option helpOption = new Option("h", "help", false, "display this message");
    helpOption.setRequired(false);
    options.addOption(helpOption);

    HelpFormatter formatter = new HelpFormatter();
    CommandLineParser parser = new PosixParser();
    CommandLine cmd = null;

    try {
        cmd = parser.parse(options, args);
    } catch (ParseException e) {
        formatter.printHelp(
                "hadoop jar data-polygamy.jar "
                        + "edu.nyu.vida.data_polygamy.standard_techniques.CorrelationTechniques",
                options, true);
        System.exit(0);
    }

    if (cmd.hasOption("h")) {
        formatter.printHelp(
                "hadoop jar data-polygamy.jar "
                        + "edu.nyu.vida.data_polygamy.standard_techniques.CorrelationTechniques",
                options, true);
        System.exit(0);
    }

    boolean s3 = cmd.hasOption("s3");
    String s3bucket = "";
    String awsAccessKeyId = "";
    String awsSecretAccessKey = "";

    if (s3) {
        if ((!cmd.hasOption("aws_id")) || (!cmd.hasOption("aws_key")) || (!cmd.hasOption("b"))) {
            System.out.println(
                    "Arguments 'aws_id', 'aws_key', and 'b'" + " are mandatory if execution is on AWS.");
            formatter.printHelp(
                    "hadoop jar data-polygamy.jar "
                            + "edu.nyu.vida.data_polygamy.standard_techniques.CorrelationTechniques",
                    options, true);
            System.exit(0);
        }
        s3bucket = cmd.getOptionValue("b");
        awsAccessKeyId = cmd.getOptionValue("aws_id");
        awsSecretAccessKey = cmd.getOptionValue("aws_key");
    }

    boolean snappyCompression = false;
    boolean bzip2Compression = false;
    String machine = cmd.getOptionValue("m");
    int nbNodes = Integer.parseInt(cmd.getOptionValue("n"));

    Configuration s3conf = new Configuration();
    if (s3) {
        s3conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId);
        s3conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey);
        s3conf.set("bucket", s3bucket);
    }

    Path path = null;
    FileSystem fs = FileSystem.get(new Configuration());

    ArrayList<String> shortDataset = new ArrayList<String>();
    ArrayList<String> firstGroup = new ArrayList<String>();
    ArrayList<String> secondGroup = new ArrayList<String>();
    HashMap<String, String> datasetAgg = new HashMap<String, String>();

    boolean removeExistingFiles = cmd.hasOption("f");

    String[] firstGroupCmd = cmd.getOptionValues("g1");
    String[] secondGroupCmd = cmd.hasOption("g2") ? cmd.getOptionValues("g2") : new String[0];
    addDatasets(firstGroupCmd, firstGroup, shortDataset, datasetAgg, path, fs, s3conf, s3, s3bucket);
    addDatasets(secondGroupCmd, secondGroup, shortDataset, datasetAgg, path, fs, s3conf, s3, s3bucket);

    if (shortDataset.size() == 0) {
        System.out.println("No datasets to process.");
        System.exit(0);
    }

    if (firstGroup.isEmpty()) {
        System.out.println("First group of datasets (G1) is empty. " + "Doing G1 = G2.");
        firstGroup.addAll(secondGroup);
    }

    if (secondGroup.isEmpty()) {
        System.out.println("Second group of datasets (G2) is empty. " + "Doing G2 = G1.");
        secondGroup.addAll(firstGroup);
    }

    // getting dataset ids

    String datasetNames = "";
    String datasetIds = "";
    HashMap<String, String> datasetId = new HashMap<String, String>();
    Iterator<String> it = shortDataset.iterator();
    while (it.hasNext()) {
        datasetId.put(it.next(), null);
    }

    if (s3) {
        path = new Path(s3bucket + FrameworkUtils.datasetsIndexDir);
        fs = FileSystem.get(path.toUri(), s3conf);
    } else {
        path = new Path(fs.getHomeDirectory() + "/" + FrameworkUtils.datasetsIndexDir);
    }
    BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(path)));
    String line = br.readLine();
    while (line != null) {
        String[] dt = line.split("\t");
        if (datasetId.containsKey(dt[0])) {
            datasetId.put(dt[0], dt[1]);
            datasetNames += dt[0] + ",";
            datasetIds += dt[1] + ",";
        }
        line = br.readLine();
    }
    br.close();
    if (s3)
        fs.close();

    datasetNames = datasetNames.substring(0, datasetNames.length() - 1);
    datasetIds = datasetIds.substring(0, datasetIds.length() - 1);
    it = shortDataset.iterator();
    while (it.hasNext()) {
        String dataset = it.next();
        if (datasetId.get(dataset) == null) {
            System.out.println("No dataset id for " + dataset);
            System.exit(0);
        }
    }

    String firstGroupStr = "";
    String secondGroupStr = "";
    for (String dataset : firstGroup) {
        firstGroupStr += datasetId.get(dataset) + ",";
    }
    for (String dataset : secondGroup) {
        secondGroupStr += datasetId.get(dataset) + ",";
    }
    firstGroupStr = firstGroupStr.substring(0, firstGroupStr.length() - 1);
    secondGroupStr = secondGroupStr.substring(0, secondGroupStr.length() - 1);

    FrameworkUtils.createDir(s3bucket + FrameworkUtils.correlationTechniquesDir, s3conf, s3);

    String dataAttributesInputDirs = "";
    String noRelationship = "";

    HashSet<String> dirs = new HashSet<String>();

    String dataset1;
    String dataset2;
    String datasetId1;
    String datasetId2;
    for (int i = 0; i < firstGroup.size(); i++) {
        for (int j = 0; j < secondGroup.size(); j++) {

            if (Integer.parseInt(datasetId.get(firstGroup.get(i))) < Integer
                    .parseInt(datasetId.get(secondGroup.get(j)))) {
                dataset1 = firstGroup.get(i);
                dataset2 = secondGroup.get(j);
            } else {
                dataset1 = secondGroup.get(j);
                dataset2 = firstGroup.get(i);
            }

            datasetId1 = datasetId.get(dataset1);
            datasetId2 = datasetId.get(dataset2);

            if (dataset1.equals(dataset2))
                continue;
            String correlationOutputFileName = s3bucket + FrameworkUtils.correlationTechniquesDir + "/"
                    + dataset1 + "-" + dataset2 + "/";

            if (removeExistingFiles) {
                FrameworkUtils.removeFile(correlationOutputFileName, s3conf, s3);
            }
            if (!FrameworkUtils.fileExists(correlationOutputFileName, s3conf, s3)) {
                dirs.add(s3bucket + FrameworkUtils.aggregatesDir + "/" + dataset1);
                dirs.add(s3bucket + FrameworkUtils.aggregatesDir + "/" + dataset2);
            } else {
                noRelationship += datasetId1 + "-" + datasetId2 + ",";
            }
        }
    }

    if (dirs.isEmpty()) {
        System.out.println("All the relationships were already computed.");
        System.out.println("Use -f in the beginning of the command line to force the computation.");
        System.exit(0);
    }

    for (String dir : dirs) {
        dataAttributesInputDirs += dir + ",";
    }

    Configuration conf = new Configuration();
    Machine machineConf = new Machine(machine, nbNodes);

    String jobName = "correlation";
    String correlationOutputDir = s3bucket + FrameworkUtils.correlationTechniquesDir + "/tmp/";

    FrameworkUtils.removeFile(correlationOutputDir, s3conf, s3);

    for (int i = 0; i < shortDataset.size(); i++) {
        conf.set("dataset-" + datasetId.get(shortDataset.get(i)) + "-agg", datasetAgg.get(shortDataset.get(i)));
    }
    for (int i = 0; i < shortDataset.size(); i++) {
        conf.set("dataset-" + datasetId.get(shortDataset.get(i)) + "-agg-size",
                Integer.toString(datasetAgg.get(shortDataset.get(i)).split(",").length));
    }
    conf.set("dataset-keys", datasetIds);
    conf.set("dataset-names", datasetNames);
    conf.set("first-group", firstGroupStr);
    conf.set("second-group", secondGroupStr);
    conf.set("main-dataset-id", datasetId.get(shortDataset.get(0)));
    if (noRelationship.length() > 0) {
        conf.set("no-relationship", noRelationship.substring(0, noRelationship.length() - 1));
    }

    conf.set("mapreduce.tasktracker.map.tasks.maximum", String.valueOf(machineConf.getMaximumTasks()));
    conf.set("mapreduce.tasktracker.reduce.tasks.maximum", String.valueOf(machineConf.getMaximumTasks()));
    conf.set("mapreduce.jobtracker.maxtasks.perjob", "-1");
    conf.set("mapreduce.reduce.shuffle.parallelcopies", "20");
    conf.set("mapreduce.input.fileinputformat.split.minsize", "0");
    conf.set("mapreduce.task.io.sort.mb", "200");
    conf.set("mapreduce.task.io.sort.factor", "100");
    conf.set("mapreduce.task.timeout", "2400000");

    if (s3) {
        machineConf.setMachineConfiguration(conf);
        conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId);
        conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey);
        conf.set("bucket", s3bucket);
    }

    if (snappyCompression) {
        conf.set("mapreduce.map.output.compress", "true");
        conf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec");
        //conf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec");
    }
    if (bzip2Compression) {
        conf.set("mapreduce.map.output.compress", "true");
        conf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec");
        //conf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec");
    }

    Job job = new Job(conf);
    job.setJobName(jobName);

    job.setMapOutputKeyClass(PairAttributeWritable.class);
    job.setMapOutputValueClass(SpatioTemporalValueWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(CorrelationTechniquesMapper.class);
    job.setReducerClass(CorrelationTechniquesReducer.class);
    job.setNumReduceTasks(machineConf.getNumberReduces());

    job.setInputFormatClass(SequenceFileInputFormat.class);
    LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);

    FileInputFormat.setInputDirRecursive(job, true);
    FileInputFormat.setInputPaths(job,
            dataAttributesInputDirs.substring(0, dataAttributesInputDirs.length() - 1));
    FileOutputFormat.setOutputPath(job, new Path(correlationOutputDir));

    job.setJarByClass(CorrelationTechniques.class);

    long start = System.currentTimeMillis();
    job.submit();
    job.waitForCompletion(true);
    System.out.println(jobName + "\t" + (System.currentTimeMillis() - start));

    // moving files to right place
    for (int i = 0; i < firstGroup.size(); i++) {
        for (int j = 0; j < secondGroup.size(); j++) {

            if (Integer.parseInt(datasetId.get(firstGroup.get(i))) < Integer
                    .parseInt(datasetId.get(secondGroup.get(j)))) {
                dataset1 = firstGroup.get(i);
                dataset2 = secondGroup.get(j);
            } else {
                dataset1 = secondGroup.get(j);
                dataset2 = firstGroup.get(i);
            }

            if (dataset1.equals(dataset2))
                continue;

            String from = s3bucket + FrameworkUtils.correlationTechniquesDir + "/tmp/" + dataset1 + "-"
                    + dataset2 + "/";
            String to = s3bucket + FrameworkUtils.correlationTechniquesDir + "/" + dataset1 + "-" + dataset2
                    + "/";
            FrameworkUtils.renameFile(from, to, s3conf, s3);
        }
    }
}

From source file:com.alkacon.opencms.registration.CmsRegistrationFormHandler.java

/**
 * As test case.<p>/* w  w  w  .j a va 2  s  . c o m*/
 * 
 * @param args not used
 */
public static void main(String[] args) {

    CmsUser user = new CmsUser(null, "/mylongouname/m.moossen@alkacon.com", "", "", "", "", 0, 0, 0, null);
    String code = getActivationCode(user);
    System.out.println(code);
    System.out.println(getUserName(code));

    CmsMacroResolver macroResolver = CmsMacroResolver.newInstance();
    macroResolver.setKeepEmptyMacros(true);
    // create macros for getters 
    Method[] methods = CmsUser.class.getDeclaredMethods();
    for (int i = 0; i < methods.length; i++) {
        Method method = methods[i];
        if (method.getReturnType() != String.class) {
            continue;
        }
        if (method.getParameterTypes().length > 0) {
            continue;
        }
        if (!method.getName().startsWith("get") || (method.getName().length() < 4)
                || method.getName().equals("getPassword")) {
            continue;
        }
        String label = ("" + method.getName().charAt(3)).toLowerCase();
        if (method.getName().length() > 4) {
            label += method.getName().substring(4);
        }
        try {
            Object value = method.invoke(user, new Object[] {});
            if (value == null) {
                value = "";
            }
            macroResolver.addMacro(label, value.toString());
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    // add addinfo values as macros
    Iterator itFields = user.getAdditionalInfo().entrySet().iterator();
    while (itFields.hasNext()) {
        Map.Entry entry = (Map.Entry) itFields.next();
        if ((entry.getValue() instanceof String) && (entry.getKey() instanceof String)) {
            macroResolver.addMacro(entry.getKey().toString(), entry.getValue().toString());
        }
    }
    // add login
    macroResolver.addMacro(FIELD_LOGIN, user.getSimpleName());

}

From source file:edu.nyu.vida.data_polygamy.feature_identification.IndexCreation.java

/**
 * @param args//from   w w w . j a  v  a  2s  .  c om
 */
@SuppressWarnings({ "deprecation" })
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    Options options = new Options();

    Option forceOption = new Option("f", "force", false,
            "force the computation of the index and events " + "even if files already exist");
    forceOption.setRequired(false);
    options.addOption(forceOption);

    Option thresholdOption = new Option("t", "use-custom-thresholds", false,
            "use custom thresholds for regular and rare events, defined in HDFS_HOME/"
                    + FrameworkUtils.thresholdDir + " file");
    thresholdOption.setRequired(false);
    options.addOption(thresholdOption);

    Option gOption = new Option("g", "group", true,
            "set group of datasets for which the indices and events" + " will be computed");
    gOption.setRequired(true);
    gOption.setArgName("GROUP");
    gOption.setArgs(Option.UNLIMITED_VALUES);
    options.addOption(gOption);

    Option machineOption = new Option("m", "machine", true, "machine identifier");
    machineOption.setRequired(true);
    machineOption.setArgName("MACHINE");
    machineOption.setArgs(1);
    options.addOption(machineOption);

    Option nodesOption = new Option("n", "nodes", true, "number of nodes");
    nodesOption.setRequired(true);
    nodesOption.setArgName("NODES");
    nodesOption.setArgs(1);
    options.addOption(nodesOption);

    Option s3Option = new Option("s3", "s3", false, "data on Amazon S3");
    s3Option.setRequired(false);
    options.addOption(s3Option);

    Option awsAccessKeyIdOption = new Option("aws_id", "aws-id", true,
            "aws access key id; " + "this is required if the execution is on aws");
    awsAccessKeyIdOption.setRequired(false);
    awsAccessKeyIdOption.setArgName("AWS-ACCESS-KEY-ID");
    awsAccessKeyIdOption.setArgs(1);
    options.addOption(awsAccessKeyIdOption);

    Option awsSecretAccessKeyOption = new Option("aws_key", "aws-id", true,
            "aws secrect access key; " + "this is required if the execution is on aws");
    awsSecretAccessKeyOption.setRequired(false);
    awsSecretAccessKeyOption.setArgName("AWS-SECRET-ACCESS-KEY");
    awsSecretAccessKeyOption.setArgs(1);
    options.addOption(awsSecretAccessKeyOption);

    Option bucketOption = new Option("b", "s3-bucket", true,
            "bucket on s3; " + "this is required if the execution is on aws");
    bucketOption.setRequired(false);
    bucketOption.setArgName("S3-BUCKET");
    bucketOption.setArgs(1);
    options.addOption(bucketOption);

    Option helpOption = new Option("h", "help", false, "display this message");
    helpOption.setRequired(false);
    options.addOption(helpOption);

    HelpFormatter formatter = new HelpFormatter();
    CommandLineParser parser = new PosixParser();
    CommandLine cmd = null;

    try {
        cmd = parser.parse(options, args);
    } catch (ParseException e) {
        formatter.printHelp("hadoop jar data-polygamy.jar "
                + "edu.nyu.vida.data_polygamy.feature_identification.IndexCreation", options, true);
        System.exit(0);
    }

    if (cmd.hasOption("h")) {
        formatter.printHelp("hadoop jar data-polygamy.jar "
                + "edu.nyu.vida.data_polygamy.feature_identification.IndexCreation", options, true);
        System.exit(0);
    }

    boolean s3 = cmd.hasOption("s3");
    String s3bucket = "";
    String awsAccessKeyId = "";
    String awsSecretAccessKey = "";

    if (s3) {
        if ((!cmd.hasOption("aws_id")) || (!cmd.hasOption("aws_key")) || (!cmd.hasOption("b"))) {
            System.out.println(
                    "Arguments 'aws_id', 'aws_key', and 'b'" + " are mandatory if execution is on AWS.");
            formatter.printHelp("hadoop jar data-polygamy.jar "
                    + "edu.nyu.vida.data_polygamy.feature_identification.IndexCreation", options, true);
            System.exit(0);
        }
        s3bucket = cmd.getOptionValue("b");
        awsAccessKeyId = cmd.getOptionValue("aws_id");
        awsSecretAccessKey = cmd.getOptionValue("aws_key");
    }

    boolean snappyCompression = false;
    boolean bzip2Compression = false;
    String machine = cmd.getOptionValue("m");
    int nbNodes = Integer.parseInt(cmd.getOptionValue("n"));

    Configuration s3conf = new Configuration();
    if (s3) {
        s3conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId);
        s3conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey);
        s3conf.set("bucket", s3bucket);
    }

    String datasetNames = "";
    String datasetIds = "";

    ArrayList<String> shortDataset = new ArrayList<String>();
    ArrayList<String> shortDatasetIndex = new ArrayList<String>();
    HashMap<String, String> datasetAgg = new HashMap<String, String>();
    HashMap<String, String> datasetId = new HashMap<String, String>();
    HashMap<String, HashMap<Integer, Double>> datasetRegThreshold = new HashMap<String, HashMap<Integer, Double>>();
    HashMap<String, HashMap<Integer, Double>> datasetRareThreshold = new HashMap<String, HashMap<Integer, Double>>();

    Path path = null;
    FileSystem fs = FileSystem.get(new Configuration());
    BufferedReader br;

    boolean removeExistingFiles = cmd.hasOption("f");
    boolean isThresholdUserDefined = cmd.hasOption("t");

    for (String dataset : cmd.getOptionValues("g")) {

        // getting aggregates
        String[] aggregate = FrameworkUtils.searchAggregates(dataset, s3conf, s3);
        if (aggregate.length == 0) {
            System.out.println("No aggregates found for " + dataset + ".");
            continue;
        }

        // getting aggregates header
        String aggregatesHeaderFileName = FrameworkUtils.searchAggregatesHeader(dataset, s3conf, s3);
        if (aggregatesHeaderFileName == null) {
            System.out.println("No aggregate header for " + dataset);
            continue;
        }

        String aggregatesHeader = s3bucket + FrameworkUtils.preProcessingDir + "/" + aggregatesHeaderFileName;

        shortDataset.add(dataset);
        datasetId.put(dataset, null);

        if (s3) {
            path = new Path(aggregatesHeader);
            fs = FileSystem.get(path.toUri(), s3conf);
        } else {
            path = new Path(fs.getHomeDirectory() + "/" + aggregatesHeader);
        }

        br = new BufferedReader(new InputStreamReader(fs.open(path)));
        datasetAgg.put(dataset, br.readLine().split("\t")[1]);
        br.close();
        if (s3)
            fs.close();
    }

    if (shortDataset.size() == 0) {
        System.out.println("No datasets to process.");
        System.exit(0);
    }

    // getting dataset id

    if (s3) {
        path = new Path(s3bucket + FrameworkUtils.datasetsIndexDir);
        fs = FileSystem.get(path.toUri(), s3conf);
    } else {
        path = new Path(fs.getHomeDirectory() + "/" + FrameworkUtils.datasetsIndexDir);
    }
    br = new BufferedReader(new InputStreamReader(fs.open(path)));
    String line = br.readLine();
    while (line != null) {
        String[] dt = line.split("\t");
        if (datasetId.containsKey(dt[0])) {
            datasetId.put(dt[0], dt[1]);
            datasetNames += dt[0] + ",";
            datasetIds += dt[1] + ",";
        }
        line = br.readLine();
    }
    br.close();

    datasetNames = datasetNames.substring(0, datasetNames.length() - 1);
    datasetIds = datasetIds.substring(0, datasetIds.length() - 1);
    Iterator<String> it = shortDataset.iterator();
    while (it.hasNext()) {
        String dataset = it.next();
        if (datasetId.get(dataset) == null) {
            System.out.println("No dataset id for " + dataset);
            System.exit(0);
        }
    }

    // getting user defined thresholds

    if (isThresholdUserDefined) {
        if (s3) {
            path = new Path(s3bucket + FrameworkUtils.thresholdDir);
            fs = FileSystem.get(path.toUri(), s3conf);
        } else {
            path = new Path(fs.getHomeDirectory() + "/" + FrameworkUtils.thresholdDir);
        }
        br = new BufferedReader(new InputStreamReader(fs.open(path)));
        line = br.readLine();
        while (line != null) {
            // getting dataset name
            String dataset = line.trim();
            HashMap<Integer, Double> regThresholds = new HashMap<Integer, Double>();
            HashMap<Integer, Double> rareThresholds = new HashMap<Integer, Double>();
            line = br.readLine();
            while ((line != null) && (line.split("\t").length > 1)) {
                // getting attribute ids and thresholds
                String[] keyVals = line.trim().split("\t");
                int att = Integer.parseInt(keyVals[0].trim());
                regThresholds.put(att, Double.parseDouble(keyVals[1].trim()));
                rareThresholds.put(att, Double.parseDouble(keyVals[2].trim()));
                line = br.readLine();
            }
            datasetRegThreshold.put(dataset, regThresholds);
            datasetRareThreshold.put(dataset, rareThresholds);
        }
        br.close();
    }
    if (s3)
        fs.close();

    // datasets that will use existing merge tree
    ArrayList<String> useMergeTree = new ArrayList<String>();

    // creating index for each spatio-temporal resolution

    FrameworkUtils.createDir(s3bucket + FrameworkUtils.indexDir, s3conf, s3);

    HashSet<String> input = new HashSet<String>();

    for (String dataset : shortDataset) {

        String indexCreationOutputFileName = s3bucket + FrameworkUtils.indexDir + "/" + dataset + "/";
        String mergeTreeFileName = s3bucket + FrameworkUtils.mergeTreeDir + "/" + dataset + "/";

        if (removeExistingFiles) {
            FrameworkUtils.removeFile(indexCreationOutputFileName, s3conf, s3);
            FrameworkUtils.removeFile(mergeTreeFileName, s3conf, s3);
            FrameworkUtils.createDir(mergeTreeFileName, s3conf, s3);
        } else if (datasetRegThreshold.containsKey(dataset)) {
            FrameworkUtils.removeFile(indexCreationOutputFileName, s3conf, s3);
            if (FrameworkUtils.fileExists(mergeTreeFileName, s3conf, s3)) {
                useMergeTree.add(dataset);
            }
        }

        if (!FrameworkUtils.fileExists(indexCreationOutputFileName, s3conf, s3)) {
            input.add(s3bucket + FrameworkUtils.aggregatesDir + "/" + dataset);
            shortDatasetIndex.add(dataset);
        }

    }

    if (input.isEmpty()) {
        System.out.println("All the input datasets have indices.");
        System.out.println("Use -f in the beginning of the command line to force the computation.");
        System.exit(0);
    }

    String aggregateDatasets = "";
    it = input.iterator();
    while (it.hasNext()) {
        aggregateDatasets += it.next() + ",";
    }

    Job icJob = null;
    Configuration icConf = new Configuration();
    Machine machineConf = new Machine(machine, nbNodes);

    String jobName = "index";
    String indexOutputDir = s3bucket + FrameworkUtils.indexDir + "/tmp/";

    FrameworkUtils.removeFile(indexOutputDir, s3conf, s3);

    icConf.set("dataset-name", datasetNames);
    icConf.set("dataset-id", datasetIds);

    if (!useMergeTree.isEmpty()) {
        String useMergeTreeStr = "";
        for (String dt : useMergeTree) {
            useMergeTreeStr += dt + ",";
        }
        icConf.set("use-merge-tree", useMergeTreeStr.substring(0, useMergeTreeStr.length() - 1));
    }

    for (int i = 0; i < shortDataset.size(); i++) {
        String dataset = shortDataset.get(i);
        String id = datasetId.get(dataset);
        icConf.set("dataset-" + id + "-aggregates", datasetAgg.get(dataset));
        if (datasetRegThreshold.containsKey(dataset)) {
            HashMap<Integer, Double> regThresholds = datasetRegThreshold.get(dataset);
            String thresholds = "";
            for (int att : regThresholds.keySet()) {
                thresholds += String.valueOf(att) + "-" + String.valueOf(regThresholds.get(att)) + ",";
            }
            icConf.set("regular-" + id, thresholds.substring(0, thresholds.length() - 1));
        }

        if (datasetRareThreshold.containsKey(dataset)) {
            HashMap<Integer, Double> rareThresholds = datasetRareThreshold.get(dataset);
            String thresholds = "";
            for (int att : rareThresholds.keySet()) {
                thresholds += String.valueOf(att) + "-" + String.valueOf(rareThresholds.get(att)) + ",";
            }
            icConf.set("rare-" + id, thresholds.substring(0, thresholds.length() - 1));
        }
    }

    icConf.set("mapreduce.tasktracker.map.tasks.maximum", String.valueOf(machineConf.getMaximumTasks()));
    icConf.set("mapreduce.tasktracker.reduce.tasks.maximum", String.valueOf(machineConf.getMaximumTasks()));
    icConf.set("mapreduce.jobtracker.maxtasks.perjob", "-1");
    icConf.set("mapreduce.reduce.shuffle.parallelcopies", "20");
    icConf.set("mapreduce.input.fileinputformat.split.minsize", "0");
    icConf.set("mapreduce.task.io.sort.mb", "200");
    icConf.set("mapreduce.task.io.sort.factor", "100");
    //icConf.set("mapreduce.task.timeout", "1800000");
    machineConf.setMachineConfiguration(icConf);

    if (s3) {
        machineConf.setMachineConfiguration(icConf);
        icConf.set("fs.s3.awsAccessKeyId", awsAccessKeyId);
        icConf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey);
        icConf.set("bucket", s3bucket);
    }

    if (snappyCompression) {
        icConf.set("mapreduce.map.output.compress", "true");
        icConf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec");
        //icConf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec");
    }
    if (bzip2Compression) {
        icConf.set("mapreduce.map.output.compress", "true");
        icConf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec");
        //icConf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec");
    }

    icJob = new Job(icConf);
    icJob.setJobName(jobName);

    icJob.setMapOutputKeyClass(AttributeResolutionWritable.class);
    icJob.setMapOutputValueClass(SpatioTemporalFloatWritable.class);
    icJob.setOutputKeyClass(AttributeResolutionWritable.class);
    icJob.setOutputValueClass(TopologyTimeSeriesWritable.class);
    //icJob.setOutputKeyClass(Text.class);
    //icJob.setOutputValueClass(Text.class);

    icJob.setMapperClass(IndexCreationMapper.class);
    icJob.setReducerClass(IndexCreationReducer.class);
    icJob.setNumReduceTasks(machineConf.getNumberReduces());

    icJob.setInputFormatClass(SequenceFileInputFormat.class);
    //icJob.setOutputFormatClass(SequenceFileOutputFormat.class);
    LazyOutputFormat.setOutputFormatClass(icJob, SequenceFileOutputFormat.class);
    //LazyOutputFormat.setOutputFormatClass(icJob, TextOutputFormat.class);
    SequenceFileOutputFormat.setCompressOutput(icJob, true);
    SequenceFileOutputFormat.setOutputCompressionType(icJob, CompressionType.BLOCK);

    FileInputFormat.setInputDirRecursive(icJob, true);
    FileInputFormat.setInputPaths(icJob, aggregateDatasets.substring(0, aggregateDatasets.length() - 1));
    FileOutputFormat.setOutputPath(icJob, new Path(indexOutputDir));

    icJob.setJarByClass(IndexCreation.class);

    long start = System.currentTimeMillis();
    icJob.submit();
    icJob.waitForCompletion(true);
    System.out.println(jobName + "\t" + (System.currentTimeMillis() - start));

    // moving files to right place
    for (String dataset : shortDatasetIndex) {
        String from = s3bucket + FrameworkUtils.indexDir + "/tmp/" + dataset + "/";
        String to = s3bucket + FrameworkUtils.indexDir + "/" + dataset + "/";
        FrameworkUtils.renameFile(from, to, s3conf, s3);
    }

}