Example usage for java.util ArrayList iterator

List of usage examples for java.util ArrayList iterator

Introduction

In this page you can find the example usage for java.util ArrayList iterator.

Prototype

public Iterator<E> iterator() 

Source Link

Document

Returns an iterator over the elements in this list in proper sequence.

Usage

From source file:ArrayListExample.java

public static void main(String[] args) {
    ArrayList<Integer> al = new ArrayList<Integer>();

    al.add(new Integer(1));
    al.add(new Integer(2));
    al.add(new Integer(3));
    al.add(new Integer(4));
    al.add(new Integer(5));
    al.add(new Integer(6));
    al.add(new Integer(7));
    al.add(new Integer(8));
    al.add(new Integer(9));
    al.add(new Integer(10));

    al.set(5, new Integer(66));

    for (Iterator i = al.iterator(); i.hasNext();) {
        Integer integer = (Integer) i.next();
        System.out.println(integer);
    }//from  w  ww .j  a va  2  s .  c o m
}

From source file:ProducerTool.java

public static void main(String[] args) {
    ArrayList<ProducerTool> threads = new ArrayList();
    ProducerTool producerTool = new ProducerTool();
    String[] unknown = CommandLineSupport.setOptions(producerTool, args);
    if (unknown.length > 0) {
        System.out.println("Unknown options: " + Arrays.toString(unknown));
        System.exit(-1);//from   w w w.  j  av  a 2s . co  m
    }
    producerTool.showParameters();
    for (int threadCount = 1; threadCount <= parallelThreads; threadCount++) {
        producerTool = new ProducerTool();
        CommandLineSupport.setOptions(producerTool, args);
        producerTool.start();
        threads.add(producerTool);
    }

    while (true) {
        Iterator<ProducerTool> itr = threads.iterator();
        int running = 0;
        while (itr.hasNext()) {
            ProducerTool thread = itr.next();
            if (thread.isAlive()) {
                running++;
            }
        }
        if (running <= 0) {
            System.out.println("All threads completed their work");
            break;
        }
        try {
            Thread.sleep(1000);
        } catch (Exception e) {
        }
    }
}

From source file:ConsumerTool.java

public static void main(String[] args) {
    ArrayList<ConsumerTool> threads = new ArrayList();
    ConsumerTool consumerTool = new ConsumerTool();
    String[] unknown = CommandLineSupport.setOptions(consumerTool, args);
    if (unknown.length > 0) {
        System.out.println("Unknown options: " + Arrays.toString(unknown));
        System.exit(-1);//from   w  w  w .  j a v a  2 s  .  com
    }
    consumerTool.showParameters();
    for (int threadCount = 1; threadCount <= parallelThreads; threadCount++) {
        consumerTool = new ConsumerTool();
        CommandLineSupport.setOptions(consumerTool, args);
        consumerTool.start();
        threads.add(consumerTool);
    }

    while (true) {
        Iterator<ConsumerTool> itr = threads.iterator();
        int running = 0;
        while (itr.hasNext()) {
            ConsumerTool thread = itr.next();
            if (thread.isAlive()) {
                running++;
            }
        }

        if (running <= 0) {
            System.out.println("All threads completed their work");
            break;
        }

        try {
            Thread.sleep(1000);
        } catch (Exception e) {
        }
    }
    Iterator<ConsumerTool> itr = threads.iterator();
    while (itr.hasNext()) {
        ConsumerTool thread = itr.next();
    }
}

From source file:akori.Impact.java

static public void main(String[] args) throws IOException {
    String PATH = "E:\\Trabajos\\AKORI\\datosmatrizgino\\";
    String PATHIMG = "E:\\NetBeansProjects\\AKORI\\Proccess_1\\ImagesPages\\";
    for (int i = 1; i <= 32; ++i) {
        for (int k = 1; k <= 15; ++k) {
            System.out.println("Matrix " + i + "-" + k);
            BufferedImage img = null;
            try {
                img = ImageIO.read(new File(PATHIMG + i + ".png"));
            } catch (IOException ex) {
                ex.getStackTrace();//from   w  w  w. j  a  va2 s.  com
            }

            int ymax = img.getHeight();
            int xmax = img.getWidth();

            double[][] imagen = new double[ymax][xmax];

            BufferedReader in = null;
            try {
                in = new BufferedReader(new FileReader(PATH + i + "-" + k + ".txt"));
            } catch (FileNotFoundException ex) {
                ex.getStackTrace();
            }

            String linea;
            ArrayList<String> lista = new ArrayList<String>();
            HashMap<String, String> lista1 = new HashMap<String, String>();
            try {
                for (int j = 0; (linea = in.readLine()) != null; ++j) {
                    String[] datos = linea.split(",");
                    int x = (int) Double.parseDouble(datos[1]);
                    int y = (int) Double.parseDouble(datos[2]);
                    if (x >= xmax || y >= ymax || x <= 0 || y <= 0) {
                        continue;
                    }
                    lista.add(x + "," + y);
                }
            } catch (Exception ex) {
                ex.getStackTrace();
            }

            try {
                in.close();
            } catch (IOException ex) {
                ex.getStackTrace();
            }

            Iterator iter = lista.iterator();
            int[][] matrix = new int[lista.size()][2];

            for (int j = 0; iter.hasNext(); ++j) {
                String xy = (String) iter.next();
                String[] datos = xy.split(",");
                matrix[j][0] = Integer.parseInt(datos[0]);
                matrix[j][1] = Integer.parseInt(datos[1]);
            }

            for (int j = 0; j < matrix.length; ++j) {

                int std = 50;
                int x = matrix[j][0];
                int y = matrix[j][1];
                imagen[y][x] += 1;
                double aux;
                normalMatrix(imagen, y, x, std);

            }

            FileWriter fw = new FileWriter(PATH + "Matrix" + i + "-" + k + ".txt");
            BufferedWriter bw = new BufferedWriter(fw);
            for (int j = 0; j < imagen.length; ++j) {
                for (int t = 0; t < imagen[j].length; ++t) {
                    if (t + 1 == imagen[j].length)
                        bw.write(imagen[j][t] + "");
                    else
                        bw.write(imagen[j][t] + ",");
                }
                bw.write("\n");
            }
            bw.close();
        }
    }
}

From source file:Batch.java

static public void main(String[] args) {
    Connection conn = null;/*w  ww  . j av a  2  s  . c  o  m*/

    try {
        ArrayList breakable = new ArrayList();
        PreparedStatement stmt;
        Iterator users;
        ResultSet rs;

        Class.forName(args[0]).newInstance();
        conn = DriverManager.getConnection(args[1], args[2], args[3]);
        stmt = conn.prepareStatement("SELECT user_id, password " + "FROM user");
        rs = stmt.executeQuery();
        while (rs.next()) {
            String uid = rs.getString(1);
            String pw = rs.getString(2);

            // Assume PasswordCracker is some class that provides
            // a single static method called crack() that attempts
            // to run password cracking routines on the password
            //                if( PasswordCracker.crack(uid, pw) ) {
            //                  breakable.add(uid);
            //            }
        }
        stmt.close();
        if (breakable.size() < 1) {
            return;
        }
        stmt = conn.prepareStatement("UPDATE user " + "SET bad_password = 'Y' " + "WHERE uid = ?");
        users = breakable.iterator();
        while (users.hasNext()) {
            String uid = (String) users.next();

            stmt.setString(1, uid);
            stmt.addBatch();
        }
        stmt.executeBatch();
    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        if (conn != null) {
            try {
                conn.close();
            } catch (Exception e) {
            }
        }
    }
}

From source file:com.mch.registry.ccs.server.CcsClient.java

/**
 * Sends messages to registered devices/*from  w w w.jav a2 s .co  m*/
 */
public static void main(String[] args) {

    Config config = new Config();
    final String projectId = config.getProjectId();
    final String key = config.getKey();

    final CcsClient ccsClient = CcsClient.prepareClient(projectId, key, true);

    try {
        ccsClient.connect();
    } catch (XMPPException e) {
        logger.log(Level.WARNING, "XMPP Exception ", e);
    }

    final Runnable sendNotifications = new Runnable() {
        public void run() {
            try {
                logger.log(Level.INFO, "Working Q!");
                if (!isOffHours()) {
                    //Prepare downstream message
                    String toRegId = "";
                    String message = "";
                    String messageId = "";
                    Map<String, String> payload = new HashMap<String, String>();
                    String collapseKey = null;
                    Long timeToLive = 10000L;
                    Boolean delayWhileIdle = true;
                    String messagePrefix = "";
                    int notificationQueueID = 0;
                    boolean sucessfullySent = false;

                    //Read from mysql database
                    MySqlHandler mysql = new MySqlHandler();
                    ArrayList<Notification> queue = new ArrayList<Notification>();

                    for (int i = 1; i < 3; i++) {
                        queue = mysql.getNotificationQueue(i);
                        if (queue.size() > 0) {

                            switch (i) {
                            case 1:
                                messagePrefix = "_V: ";
                                break;
                            case 2:
                                messagePrefix = "_R: ";
                                break;
                            default:
                                messagePrefix = "";
                                logger.log(Level.WARNING, "Unknown message type!");
                            }

                            Notification notification = new Notification();
                            Iterator<Notification> iterator = queue.iterator();

                            while (iterator.hasNext()) {
                                notification = iterator.next();

                                toRegId = notification.getGcmRegID();
                                message = notification.getNotificationText();
                                notificationQueueID = notification.getNotificationQueueID();
                                messageId = "m-" + Long.toString(random.nextLong());

                                payload = new HashMap<String, String>();
                                payload.put("message", messagePrefix + message);

                                try {
                                    // Send the downstream message to a device.
                                    ccsClient.send(createJsonMessage(toRegId, messageId, payload, collapseKey,
                                            timeToLive, delayWhileIdle));
                                    sucessfullySent = true;
                                    logger.log(Level.INFO, "Message sent. ID: " + notificationQueueID
                                            + ", RegID: " + toRegId + ", Text: " + message);
                                } catch (Exception e) {
                                    mysql.prepareNotificationForTheNextDay(notificationQueueID);
                                    sucessfullySent = false;
                                    logger.log(Level.WARNING,
                                            "Message could not be sent! ID: " + notificationQueueID
                                                    + ", RegID: " + toRegId + ", Text: " + message);
                                }

                                if (sucessfullySent) {
                                    mysql.moveNotificationToHistory(notificationQueueID);
                                }
                            }
                        } else {
                            logger.log(Level.INFO, "No notifications to send. Type: " + Integer.toString(i));
                        }
                    }
                }
            } catch (Exception e) {
                logger.log(Level.WARNING, "Exception ", e);
            }
        }
    };

    ScheduledExecutorService executor = Executors.newSingleThreadScheduledExecutor();
    //Start when server starts and every 30 minutes after
    ScheduledFuture task = executor.scheduleAtFixedRate(sendNotifications, 0, 30, TimeUnit.MINUTES);
    try {
        task.get();
    } catch (ExecutionException e) {
        logger.log(Level.SEVERE, "Exception ", e);
    } catch (InterruptedException e) {
        logger.log(Level.SEVERE, "Exception ", e);
    }
    task.cancel(false);

    try {
        executor.shutdown();
        executor.awaitTermination(30, TimeUnit.SECONDS);
    } catch (InterruptedException e) {
        logger.log(Level.SEVERE, "Exception ", e);
    }

}

From source file:fr.iphc.grid.jobmonitor.CeList.java

public static void main(String[] args) throws Exception {

    SessionFactory.createSession(true);/* w  w  w  .jav  a2  s .c  om*/
    CeList command = new CeList();
    CommandLine line = command.parse(args);
    Integer timeout = 0;
    String TableSql = "monce";
    //      MySQLAccess sql = new MySQLAccess();
    if (line.getOptionValue(OPT_TIMEOUT) == null) {
        timeout = 15;
    } else {
        timeout = Integer.parseInt(line.getOptionValue(OPT_TIMEOUT));
    }
    timeout = timeout * 60; // convertir en secondes
    Date start = new Date();
    String OutDir = line.getOptionValue(OPT_OUTDIR);

    if (OutDir == null) {
        OutDir = "/tmp/thread";
    }
    ArrayList<URL> CeList = null;
    if (line.getOptionValue(OPT_CEPATH) == null) {
        CeList = AvailableLdapCe();
        //         for (URL k : CeList) {
        //            // System.out.println(k);
        //         }
    } else {
        CeList = AvailableCe(line.getOptionValue(OPT_CEPATH));
    }
    Boolean ret = initDirectory(new File(OutDir));
    if (!ret) {
        System.out.println("ERROR: " + OutDir + "STOP");
        System.exit(-1);
    }

    // check if we can connect to the grid
    // try{
    // SessionFactory.createSession(true);
    // }catch(NoSuccessException e){
    // System.err.println("Could not connect to the grid at all
    // ("+e.getMessage()+")");
    // System.err.println("Aborting");
    // System.exit(0);
    //
    // }

    SubmitterThread[] st = new SubmitterThread[CeList.size()];

    Iterator<URL> i = CeList.iterator();
    int index = 0;
    while (i.hasNext()) {
        URL serviceURL = i.next();
        // Ne pas importer dans thread because options.
        Properties prop = new Properties();
        prop.setProperty("Executable", "/bin/hostname");//
        // prop.setProperty("Executable", "touch /dev/null");
        JobDescription desc = createJobDescription(prop);
        desc.setAttribute(JobDescription.INTERACTIVE, "true");
        desc.setAttribute(JobDescription.EXECUTABLE, "/bin/hostname");
        // proxy="/home/dsa/.globus/biomed.txt";
        // Context ctx = ContextFactory.createContext();
        // ctx.setAttribute(Context.TYPE, "VOMS");
        // ctx.setAttribute(Context.USERVO, "biomed");
        // ctx.setAttribute(Context.USERPROXY,proxy);
        // Session session = SessionFactory.createSession(false);
        // session.addContext(ctx);
        Session session = SessionFactory.createSession(true);
        st[index] = new SubmitterThread(serviceURL, session, desc, OutDir, timeout, start);
        st[index].setUncaughtExceptionHandler(new Thread.UncaughtExceptionHandler() {
            @Override
            public void uncaughtException(Thread t, Throwable e) {
                // System.out.println("Error! An exception occured in "
                // + t.getName() + ". Cause: " + e.getMessage());
            }
        });
        st[index].start();
        // Thread.sleep(15*1000);
        // test si fichier exist
        // System.out.println("Alive
        // "+OutDir+"/"+serviceURL.getHost()+"_"+serviceURL.getPath().replaceAll("/cream-","")+".out");
        // while ((!((new
        // File(OutDir+"/"+serviceURL.getHost()+"_"+serviceURL.getPath().replaceAll("/cream-","")+".out").exists())
        // ||
        // (new
        // File(OutDir+"/"+serviceURL.getHost()+"_"+serviceURL.getPath().replaceAll("/cream-","")+".err").exists()))))
        // {
        // Thread.sleep(500);
        // }
        // System.out.println("Alive "+serviceURL.getHost()+"-"+ index+"FILE
        // EXIST");
        index++;

    }
    ;

    // System.out.println("BEGIN LOOP: Max " + index);
    long date_start = System.currentTimeMillis();
    // System.out.println("BEGIN START: " + date_start);
    Integer time_out = (timeout + 180) * 1000; // unit ms value in minute
    // +120
    // =delta par rapport thread
    Boolean Alive = true;
    //      int nb = 0;
    long now = System.currentTimeMillis();
    do {
        now = System.currentTimeMillis();
        Alive = false;
        //         nb = 0;
        for (int j = 0; j < index; j++) {
            if (st[j].isAlive()) {
                // System.out.println("Alive "+j);
                Alive = true;
                //               nb++;
            }
        }
        // System.out.println(nb);
        Thread.sleep(10000);
    } while ((Alive) && ((now - date_start) < time_out));

    for (int j = 0; j < index; j++) {
        if (st[j].isAlive()) {
            st[j].Requeststop();
        }
    }
    BilanCe(OutDir, CeList, TableSql);
    jobManagerLdap jm = new jobManagerLdap();
    jm.updateLdapCe();
    System.out.println("END " + new Date());
    // faire un traitement...
    System.exit(0);
}

From source file:edu.nyu.vida.data_polygamy.standard_techniques.CorrelationTechniques.java

/**
 * @param args// ww  w.ja v a2  s.c  o m
 * @throws ParseException 
 */
@SuppressWarnings({ "deprecation" })
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    Options options = new Options();

    Option forceOption = new Option("f", "force", false,
            "force the computation of the relationship " + "even if files already exist");
    forceOption.setRequired(false);
    options.addOption(forceOption);

    Option g1Option = new Option("g1", "first-group", true, "set first group of datasets");
    g1Option.setRequired(true);
    g1Option.setArgName("FIRST GROUP");
    g1Option.setArgs(Option.UNLIMITED_VALUES);
    options.addOption(g1Option);

    Option g2Option = new Option("g2", "second-group", true, "set second group of datasets");
    g2Option.setRequired(false);
    g2Option.setArgName("SECOND GROUP");
    g2Option.setArgs(Option.UNLIMITED_VALUES);
    options.addOption(g2Option);

    Option machineOption = new Option("m", "machine", true, "machine identifier");
    machineOption.setRequired(true);
    machineOption.setArgName("MACHINE");
    machineOption.setArgs(1);
    options.addOption(machineOption);

    Option nodesOption = new Option("n", "nodes", true, "number of nodes");
    nodesOption.setRequired(true);
    nodesOption.setArgName("NODES");
    nodesOption.setArgs(1);
    options.addOption(nodesOption);

    Option s3Option = new Option("s3", "s3", false, "data on Amazon S3");
    s3Option.setRequired(false);
    options.addOption(s3Option);

    Option awsAccessKeyIdOption = new Option("aws_id", "aws-id", true,
            "aws access key id; " + "this is required if the execution is on aws");
    awsAccessKeyIdOption.setRequired(false);
    awsAccessKeyIdOption.setArgName("AWS-ACCESS-KEY-ID");
    awsAccessKeyIdOption.setArgs(1);
    options.addOption(awsAccessKeyIdOption);

    Option awsSecretAccessKeyOption = new Option("aws_key", "aws-id", true,
            "aws secrect access key; " + "this is required if the execution is on aws");
    awsSecretAccessKeyOption.setRequired(false);
    awsSecretAccessKeyOption.setArgName("AWS-SECRET-ACCESS-KEY");
    awsSecretAccessKeyOption.setArgs(1);
    options.addOption(awsSecretAccessKeyOption);

    Option bucketOption = new Option("b", "s3-bucket", true,
            "bucket on s3; " + "this is required if the execution is on aws");
    bucketOption.setRequired(false);
    bucketOption.setArgName("S3-BUCKET");
    bucketOption.setArgs(1);
    options.addOption(bucketOption);

    Option helpOption = new Option("h", "help", false, "display this message");
    helpOption.setRequired(false);
    options.addOption(helpOption);

    HelpFormatter formatter = new HelpFormatter();
    CommandLineParser parser = new PosixParser();
    CommandLine cmd = null;

    try {
        cmd = parser.parse(options, args);
    } catch (ParseException e) {
        formatter.printHelp(
                "hadoop jar data-polygamy.jar "
                        + "edu.nyu.vida.data_polygamy.standard_techniques.CorrelationTechniques",
                options, true);
        System.exit(0);
    }

    if (cmd.hasOption("h")) {
        formatter.printHelp(
                "hadoop jar data-polygamy.jar "
                        + "edu.nyu.vida.data_polygamy.standard_techniques.CorrelationTechniques",
                options, true);
        System.exit(0);
    }

    boolean s3 = cmd.hasOption("s3");
    String s3bucket = "";
    String awsAccessKeyId = "";
    String awsSecretAccessKey = "";

    if (s3) {
        if ((!cmd.hasOption("aws_id")) || (!cmd.hasOption("aws_key")) || (!cmd.hasOption("b"))) {
            System.out.println(
                    "Arguments 'aws_id', 'aws_key', and 'b'" + " are mandatory if execution is on AWS.");
            formatter.printHelp(
                    "hadoop jar data-polygamy.jar "
                            + "edu.nyu.vida.data_polygamy.standard_techniques.CorrelationTechniques",
                    options, true);
            System.exit(0);
        }
        s3bucket = cmd.getOptionValue("b");
        awsAccessKeyId = cmd.getOptionValue("aws_id");
        awsSecretAccessKey = cmd.getOptionValue("aws_key");
    }

    boolean snappyCompression = false;
    boolean bzip2Compression = false;
    String machine = cmd.getOptionValue("m");
    int nbNodes = Integer.parseInt(cmd.getOptionValue("n"));

    Configuration s3conf = new Configuration();
    if (s3) {
        s3conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId);
        s3conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey);
        s3conf.set("bucket", s3bucket);
    }

    Path path = null;
    FileSystem fs = FileSystem.get(new Configuration());

    ArrayList<String> shortDataset = new ArrayList<String>();
    ArrayList<String> firstGroup = new ArrayList<String>();
    ArrayList<String> secondGroup = new ArrayList<String>();
    HashMap<String, String> datasetAgg = new HashMap<String, String>();

    boolean removeExistingFiles = cmd.hasOption("f");

    String[] firstGroupCmd = cmd.getOptionValues("g1");
    String[] secondGroupCmd = cmd.hasOption("g2") ? cmd.getOptionValues("g2") : new String[0];
    addDatasets(firstGroupCmd, firstGroup, shortDataset, datasetAgg, path, fs, s3conf, s3, s3bucket);
    addDatasets(secondGroupCmd, secondGroup, shortDataset, datasetAgg, path, fs, s3conf, s3, s3bucket);

    if (shortDataset.size() == 0) {
        System.out.println("No datasets to process.");
        System.exit(0);
    }

    if (firstGroup.isEmpty()) {
        System.out.println("First group of datasets (G1) is empty. " + "Doing G1 = G2.");
        firstGroup.addAll(secondGroup);
    }

    if (secondGroup.isEmpty()) {
        System.out.println("Second group of datasets (G2) is empty. " + "Doing G2 = G1.");
        secondGroup.addAll(firstGroup);
    }

    // getting dataset ids

    String datasetNames = "";
    String datasetIds = "";
    HashMap<String, String> datasetId = new HashMap<String, String>();
    Iterator<String> it = shortDataset.iterator();
    while (it.hasNext()) {
        datasetId.put(it.next(), null);
    }

    if (s3) {
        path = new Path(s3bucket + FrameworkUtils.datasetsIndexDir);
        fs = FileSystem.get(path.toUri(), s3conf);
    } else {
        path = new Path(fs.getHomeDirectory() + "/" + FrameworkUtils.datasetsIndexDir);
    }
    BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(path)));
    String line = br.readLine();
    while (line != null) {
        String[] dt = line.split("\t");
        if (datasetId.containsKey(dt[0])) {
            datasetId.put(dt[0], dt[1]);
            datasetNames += dt[0] + ",";
            datasetIds += dt[1] + ",";
        }
        line = br.readLine();
    }
    br.close();
    if (s3)
        fs.close();

    datasetNames = datasetNames.substring(0, datasetNames.length() - 1);
    datasetIds = datasetIds.substring(0, datasetIds.length() - 1);
    it = shortDataset.iterator();
    while (it.hasNext()) {
        String dataset = it.next();
        if (datasetId.get(dataset) == null) {
            System.out.println("No dataset id for " + dataset);
            System.exit(0);
        }
    }

    String firstGroupStr = "";
    String secondGroupStr = "";
    for (String dataset : firstGroup) {
        firstGroupStr += datasetId.get(dataset) + ",";
    }
    for (String dataset : secondGroup) {
        secondGroupStr += datasetId.get(dataset) + ",";
    }
    firstGroupStr = firstGroupStr.substring(0, firstGroupStr.length() - 1);
    secondGroupStr = secondGroupStr.substring(0, secondGroupStr.length() - 1);

    FrameworkUtils.createDir(s3bucket + FrameworkUtils.correlationTechniquesDir, s3conf, s3);

    String dataAttributesInputDirs = "";
    String noRelationship = "";

    HashSet<String> dirs = new HashSet<String>();

    String dataset1;
    String dataset2;
    String datasetId1;
    String datasetId2;
    for (int i = 0; i < firstGroup.size(); i++) {
        for (int j = 0; j < secondGroup.size(); j++) {

            if (Integer.parseInt(datasetId.get(firstGroup.get(i))) < Integer
                    .parseInt(datasetId.get(secondGroup.get(j)))) {
                dataset1 = firstGroup.get(i);
                dataset2 = secondGroup.get(j);
            } else {
                dataset1 = secondGroup.get(j);
                dataset2 = firstGroup.get(i);
            }

            datasetId1 = datasetId.get(dataset1);
            datasetId2 = datasetId.get(dataset2);

            if (dataset1.equals(dataset2))
                continue;
            String correlationOutputFileName = s3bucket + FrameworkUtils.correlationTechniquesDir + "/"
                    + dataset1 + "-" + dataset2 + "/";

            if (removeExistingFiles) {
                FrameworkUtils.removeFile(correlationOutputFileName, s3conf, s3);
            }
            if (!FrameworkUtils.fileExists(correlationOutputFileName, s3conf, s3)) {
                dirs.add(s3bucket + FrameworkUtils.aggregatesDir + "/" + dataset1);
                dirs.add(s3bucket + FrameworkUtils.aggregatesDir + "/" + dataset2);
            } else {
                noRelationship += datasetId1 + "-" + datasetId2 + ",";
            }
        }
    }

    if (dirs.isEmpty()) {
        System.out.println("All the relationships were already computed.");
        System.out.println("Use -f in the beginning of the command line to force the computation.");
        System.exit(0);
    }

    for (String dir : dirs) {
        dataAttributesInputDirs += dir + ",";
    }

    Configuration conf = new Configuration();
    Machine machineConf = new Machine(machine, nbNodes);

    String jobName = "correlation";
    String correlationOutputDir = s3bucket + FrameworkUtils.correlationTechniquesDir + "/tmp/";

    FrameworkUtils.removeFile(correlationOutputDir, s3conf, s3);

    for (int i = 0; i < shortDataset.size(); i++) {
        conf.set("dataset-" + datasetId.get(shortDataset.get(i)) + "-agg", datasetAgg.get(shortDataset.get(i)));
    }
    for (int i = 0; i < shortDataset.size(); i++) {
        conf.set("dataset-" + datasetId.get(shortDataset.get(i)) + "-agg-size",
                Integer.toString(datasetAgg.get(shortDataset.get(i)).split(",").length));
    }
    conf.set("dataset-keys", datasetIds);
    conf.set("dataset-names", datasetNames);
    conf.set("first-group", firstGroupStr);
    conf.set("second-group", secondGroupStr);
    conf.set("main-dataset-id", datasetId.get(shortDataset.get(0)));
    if (noRelationship.length() > 0) {
        conf.set("no-relationship", noRelationship.substring(0, noRelationship.length() - 1));
    }

    conf.set("mapreduce.tasktracker.map.tasks.maximum", String.valueOf(machineConf.getMaximumTasks()));
    conf.set("mapreduce.tasktracker.reduce.tasks.maximum", String.valueOf(machineConf.getMaximumTasks()));
    conf.set("mapreduce.jobtracker.maxtasks.perjob", "-1");
    conf.set("mapreduce.reduce.shuffle.parallelcopies", "20");
    conf.set("mapreduce.input.fileinputformat.split.minsize", "0");
    conf.set("mapreduce.task.io.sort.mb", "200");
    conf.set("mapreduce.task.io.sort.factor", "100");
    conf.set("mapreduce.task.timeout", "2400000");

    if (s3) {
        machineConf.setMachineConfiguration(conf);
        conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId);
        conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey);
        conf.set("bucket", s3bucket);
    }

    if (snappyCompression) {
        conf.set("mapreduce.map.output.compress", "true");
        conf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec");
        //conf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec");
    }
    if (bzip2Compression) {
        conf.set("mapreduce.map.output.compress", "true");
        conf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec");
        //conf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec");
    }

    Job job = new Job(conf);
    job.setJobName(jobName);

    job.setMapOutputKeyClass(PairAttributeWritable.class);
    job.setMapOutputValueClass(SpatioTemporalValueWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(CorrelationTechniquesMapper.class);
    job.setReducerClass(CorrelationTechniquesReducer.class);
    job.setNumReduceTasks(machineConf.getNumberReduces());

    job.setInputFormatClass(SequenceFileInputFormat.class);
    LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);

    FileInputFormat.setInputDirRecursive(job, true);
    FileInputFormat.setInputPaths(job,
            dataAttributesInputDirs.substring(0, dataAttributesInputDirs.length() - 1));
    FileOutputFormat.setOutputPath(job, new Path(correlationOutputDir));

    job.setJarByClass(CorrelationTechniques.class);

    long start = System.currentTimeMillis();
    job.submit();
    job.waitForCompletion(true);
    System.out.println(jobName + "\t" + (System.currentTimeMillis() - start));

    // moving files to right place
    for (int i = 0; i < firstGroup.size(); i++) {
        for (int j = 0; j < secondGroup.size(); j++) {

            if (Integer.parseInt(datasetId.get(firstGroup.get(i))) < Integer
                    .parseInt(datasetId.get(secondGroup.get(j)))) {
                dataset1 = firstGroup.get(i);
                dataset2 = secondGroup.get(j);
            } else {
                dataset1 = secondGroup.get(j);
                dataset2 = firstGroup.get(i);
            }

            if (dataset1.equals(dataset2))
                continue;

            String from = s3bucket + FrameworkUtils.correlationTechniquesDir + "/tmp/" + dataset1 + "-"
                    + dataset2 + "/";
            String to = s3bucket + FrameworkUtils.correlationTechniquesDir + "/" + dataset1 + "-" + dataset2
                    + "/";
            FrameworkUtils.renameFile(from, to, s3conf, s3);
        }
    }
}

From source file:edu.oregonstate.eecs.mcplan.abstraction.EvaluateSimilarityFunction.java

/**
 * @param args/*from   w w w .j a va2 s .  c o  m*/
 * @throws IOException
 * @throws FileNotFoundException
 */
public static void main(final String[] args) throws FileNotFoundException, IOException {
    final String experiment_file = args[0];
    final File root_directory;
    if (args.length > 1) {
        root_directory = new File(args[1]);
    } else {
        root_directory = new File(".");
    }
    final CsvConfigurationParser csv_config = new CsvConfigurationParser(new FileReader(experiment_file));
    final String experiment_name = FilenameUtils.getBaseName(experiment_file);

    final File expr_directory = new File(root_directory, experiment_name);
    expr_directory.mkdirs();

    final Csv.Writer csv = new Csv.Writer(
            new PrintStream(new FileOutputStream(new File(expr_directory, "results.csv"))));
    final String[] parameter_headers = new String[] { "kpca.kernel", "kpca.rbf.sigma",
            "kpca.random_forest.Ntrees", "kpca.random_forest.max_depth", "kpca.Nbases", "multiclass.classifier",
            "multiclass.random_forest.Ntrees", "multiclass.random_forest.max_depth",
            "pairwise_classifier.max_branching", "training.label_noise" };
    csv.cell("domain").cell("abstraction");
    for (final String p : parameter_headers) {
        csv.cell(p);
    }
    csv.cell("Ntrain").cell("Ntest").cell("ami.mean").cell("ami.variance").cell("ami.confidence").newline();

    for (int expr = 0; expr < csv_config.size(); ++expr) {
        try {
            final KeyValueStore expr_config = csv_config.get(expr);
            final Configuration config = new Configuration(root_directory.getPath(), expr_directory.getName(),
                    expr_config);

            System.out.println("[Loading '" + config.training_data_single + "']");
            final Instances single = WekaUtil
                    .readLabeledDataset(new File(root_directory, config.training_data_single + ".arff"));

            final Instances train = new Instances(single, 0);
            final int[] idx = Fn.range(0, single.size());
            int instance_counter = 0;
            Fn.shuffle(config.rng, idx);
            final int Ntrain = config.getInt("Ntrain_games"); // TODO: Rename?
            final double label_noise = config.getDouble("training.label_noise");
            final int Nlabels = train.classAttribute().numValues();
            assert (Nlabels > 0);
            for (int i = 0; i < Ntrain; ++i) {
                final Instance inst = single.get(idx[instance_counter++]);
                if (label_noise > 0 && config.rng.nextDouble() < label_noise) {
                    int noisy_label = 0;
                    do {
                        noisy_label = config.rng.nextInt(Nlabels);
                    } while (noisy_label == (int) inst.classValue());
                    System.out.println("Noisy label (" + inst.classValue() + " -> " + noisy_label + ")");
                    inst.setClassValue(noisy_label);
                }
                train.add(inst);
                inst.setDataset(train);
            }

            final Fn.Function2<Boolean, Instance, Instance> plausible_p = createPlausiblePredicate(config);

            final int Ntest = config.Ntest_games;
            int Ntest_added = 0;
            final ArrayList<Instances> tests = new ArrayList<Instances>();
            while (instance_counter < single.size() && Ntest_added < Ntest) {
                final Instance inst = single.get(idx[instance_counter++]);
                boolean found = false;
                for (final Instances test : tests) {
                    // Note that 'plausible_p' should be transitive
                    if (plausible_p.apply(inst, test.get(0))) {
                        WekaUtil.addInstance(test, inst);
                        if (test.size() == 30) {
                            Ntest_added += test.size();
                        } else if (test.size() > 30) {
                            Ntest_added += 1;
                        }
                        found = true;
                        break;
                    }
                }

                if (!found) {
                    final Instances test = new Instances(single, 0);
                    WekaUtil.addInstance(test, inst);
                    tests.add(test);
                }
            }
            final Iterator<Instances> test_itr = tests.iterator();
            while (test_itr.hasNext()) {
                if (test_itr.next().size() < 30) {
                    test_itr.remove();
                }
            }
            System.out.println("=== tests.size() = " + tests.size());
            System.out.println("=== Ntest_added = " + Ntest_added);

            System.out.println("[Training]");
            final Evaluator evaluator = createEvaluator(config, train);
            //            final Instances transformed_test = evaluator.prepareInstances( test );

            System.out.println("[Evaluating]");

            final int Nxval = evaluator.isSensitiveToOrdering() ? 10 : 1;
            final MeanVarianceAccumulator ami = new MeanVarianceAccumulator();

            final MeanVarianceAccumulator errors = new MeanVarianceAccumulator();
            final MeanVarianceAccumulator relative_error = new MeanVarianceAccumulator();

            int c = 0;
            for (int xval = 0; xval < Nxval; ++xval) {
                for (final Instances test : tests) {
                    // TODO: Debugging
                    WekaUtil.writeDataset(new File(config.root_directory), "test_" + (c++), test);

                    //               transformed_test.randomize( new RandomAdaptor( config.rng ) );
                    //               final ClusterContingencyTable ct = evaluator.evaluate( transformed_test );
                    test.randomize(new RandomAdaptor(config.rng));
                    final ClusterContingencyTable ct = evaluator.evaluate(test);
                    System.out.println(ct);

                    int Nerrors = 0;
                    final MeanVarianceAccumulator mv = new MeanVarianceAccumulator();
                    for (int i = 0; i < ct.R; ++i) {
                        final int max = Fn.max(ct.n[i]);
                        Nerrors += (ct.a[i] - max);
                        mv.add(((double) ct.a[i]) / ct.N * Nerrors / ct.a[i]);
                    }
                    errors.add(Nerrors);
                    relative_error.add(mv.mean());

                    System.out.println("exemplar: " + test.get(0));
                    System.out.println("Nerrors = " + Nerrors);
                    final PrintStream ct_out = new PrintStream(
                            new FileOutputStream(new File(expr_directory, "ct_" + expr + "_" + xval + ".csv")));
                    ct.writeCsv(ct_out);
                    ct_out.close();
                    final double ct_ami = ct.adjustedMutualInformation_max();
                    if (Double.isNaN(ct_ami)) {
                        System.out.println("! ct_ami = NaN");
                    } else {
                        ami.add(ct_ami);
                    }
                    System.out.println();
                }
            }
            System.out.println("errors = " + errors.mean() + " (" + errors.confidence() + ")");
            System.out.println(
                    "relative_error = " + relative_error.mean() + " (" + relative_error.confidence() + ")");
            System.out.println("AMI_max = " + ami.mean() + " (" + ami.confidence() + ")");

            csv.cell(config.domain).cell(config.get("abstraction.discovery"));
            for (final String p : parameter_headers) {
                csv.cell(config.get(p));
            }
            csv.cell(Ntrain).cell(Ntest).cell(ami.mean()).cell(ami.variance()).cell(ami.confidence()).newline();
        } catch (final Exception ex) {
            ex.printStackTrace();
        }
    }
}

From source file:edu.nyu.vida.data_polygamy.scalar_function_computation.Aggregation.java

/**
 * @param args/*w  w  w  . j  av a  2s  . c o  m*/
 */
@SuppressWarnings({ "deprecation" })
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    Options options = new Options();

    Option forceOption = new Option("f", "force", false,
            "force the computation of the aggregate functions " + "even if files already exist");
    forceOption.setRequired(false);
    options.addOption(forceOption);

    Option gOption = new Option("g", "group", true, "set group of datasets for which the aggregate functions"
            + " will be computed, followed by their temporal and spatial attribute indices");
    gOption.setRequired(true);
    gOption.setArgName("GROUP");
    gOption.setArgs(Option.UNLIMITED_VALUES);
    options.addOption(gOption);

    Option machineOption = new Option("m", "machine", true, "machine identifier");
    machineOption.setRequired(true);
    machineOption.setArgName("MACHINE");
    machineOption.setArgs(1);
    options.addOption(machineOption);

    Option nodesOption = new Option("n", "nodes", true, "number of nodes");
    nodesOption.setRequired(true);
    nodesOption.setArgName("NODES");
    nodesOption.setArgs(1);
    options.addOption(nodesOption);

    Option s3Option = new Option("s3", "s3", false, "data on Amazon S3");
    s3Option.setRequired(false);
    options.addOption(s3Option);

    Option awsAccessKeyIdOption = new Option("aws_id", "aws-id", true,
            "aws access key id; " + "this is required if the execution is on aws");
    awsAccessKeyIdOption.setRequired(false);
    awsAccessKeyIdOption.setArgName("AWS-ACCESS-KEY-ID");
    awsAccessKeyIdOption.setArgs(1);
    options.addOption(awsAccessKeyIdOption);

    Option awsSecretAccessKeyOption = new Option("aws_key", "aws-id", true,
            "aws secrect access key; " + "this is required if the execution is on aws");
    awsSecretAccessKeyOption.setRequired(false);
    awsSecretAccessKeyOption.setArgName("AWS-SECRET-ACCESS-KEY");
    awsSecretAccessKeyOption.setArgs(1);
    options.addOption(awsSecretAccessKeyOption);

    Option bucketOption = new Option("b", "s3-bucket", true,
            "bucket on s3; " + "this is required if the execution is on aws");
    bucketOption.setRequired(false);
    bucketOption.setArgName("S3-BUCKET");
    bucketOption.setArgs(1);
    options.addOption(bucketOption);

    Option helpOption = new Option("h", "help", false, "display this message");
    helpOption.setRequired(false);
    options.addOption(helpOption);

    HelpFormatter formatter = new HelpFormatter();
    CommandLineParser parser = new PosixParser();
    CommandLine cmd = null;

    try {
        cmd = parser.parse(options, args);
    } catch (ParseException e) {
        formatter.printHelp("hadoop jar data-polygamy.jar "
                + "edu.nyu.vida.data_polygamy.scalar_function_computation.Aggregation", options, true);
        System.exit(0);
    }

    if (cmd.hasOption("h")) {
        formatter.printHelp("hadoop jar data-polygamy.jar "
                + "edu.nyu.vida.data_polygamy.scalar_function_computation.Aggregation", options, true);
        System.exit(0);
    }

    boolean s3 = cmd.hasOption("s3");
    String s3bucket = "";
    String awsAccessKeyId = "";
    String awsSecretAccessKey = "";

    if (s3) {
        if ((!cmd.hasOption("aws_id")) || (!cmd.hasOption("aws_key")) || (!cmd.hasOption("b"))) {
            System.out.println(
                    "Arguments 'aws_id', 'aws_key', and 'b'" + " are mandatory if execution is on AWS.");
            formatter.printHelp(
                    "hadoop jar data-polygamy.jar "
                            + "edu.nyu.vida.data_polygamy.scalar_function_computation.Aggregation",
                    options, true);
            System.exit(0);
        }
        s3bucket = cmd.getOptionValue("b");
        awsAccessKeyId = cmd.getOptionValue("aws_id");
        awsSecretAccessKey = cmd.getOptionValue("aws_key");
    }

    boolean snappyCompression = false;
    boolean bzip2Compression = false;
    String machine = cmd.getOptionValue("m");
    int nbNodes = Integer.parseInt(cmd.getOptionValue("n"));

    Configuration s3conf = new Configuration();
    if (s3) {
        s3conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId);
        s3conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey);
        s3conf.set("bucket", s3bucket);
    }

    String datasetNames = "";
    String datasetIds = "";
    String preProcessingDatasets = "";

    ArrayList<String> shortDataset = new ArrayList<String>();
    ArrayList<String> shortDatasetAggregation = new ArrayList<String>();
    HashMap<String, String> datasetTempAtt = new HashMap<String, String>();
    HashMap<String, String> datasetSpatialAtt = new HashMap<String, String>();
    HashMap<String, String> preProcessingDataset = new HashMap<String, String>();
    HashMap<String, String> datasetId = new HashMap<String, String>();

    boolean removeExistingFiles = cmd.hasOption("f");
    String[] datasetArgs = cmd.getOptionValues("g");

    for (int i = 0; i < datasetArgs.length; i += 3) {
        String dataset = datasetArgs[i];

        // getting pre-processing
        String tempPreProcessing = FrameworkUtils.searchPreProcessing(dataset, s3conf, s3);
        if (tempPreProcessing == null) {
            System.out.println("No pre-processing available for " + dataset);
            continue;
        }
        preProcessingDataset.put(dataset, tempPreProcessing);

        shortDataset.add(dataset);
        datasetTempAtt.put(dataset, ((datasetArgs[i + 1] == "null") ? null : datasetArgs[i + 1]));
        datasetSpatialAtt.put(dataset, ((datasetArgs[i + 2] == "null") ? null : datasetArgs[i + 2]));

        datasetId.put(dataset, null);
    }

    if (shortDataset.size() == 0) {
        System.out.println("No datasets to process.");
        System.exit(0);
    }

    // getting dataset id

    Path path = null;
    FileSystem fs = null;

    if (s3) {
        path = new Path(s3bucket + FrameworkUtils.datasetsIndexDir);
        fs = FileSystem.get(path.toUri(), s3conf);
    } else {
        fs = FileSystem.get(new Configuration());
        path = new Path(fs.getHomeDirectory() + "/" + FrameworkUtils.datasetsIndexDir);
    }
    BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(path)));
    String line = br.readLine();
    while (line != null) {
        String[] dt = line.split("\t");
        if (datasetId.containsKey(dt[0])) {
            datasetId.put(dt[0], dt[1]);
            datasetNames += dt[0] + ",";
            datasetIds += dt[1] + ",";
        }
        line = br.readLine();
    }
    br.close();
    if (s3)
        fs.close();

    datasetNames = datasetNames.substring(0, datasetNames.length() - 1);
    datasetIds = datasetIds.substring(0, datasetIds.length() - 1);
    Iterator<String> it = shortDataset.iterator();
    while (it.hasNext()) {
        String dataset = it.next();
        if (datasetId.get(dataset) == null) {
            System.out.println("No dataset id for " + dataset);
            System.exit(0);
        }
    }

    FrameworkUtils.createDir(s3bucket + FrameworkUtils.aggregatesDir, s3conf, s3);

    // getting smallest resolution

    HashMap<String, String> tempResMap = new HashMap<String, String>();
    HashMap<String, String> spatialResMap = new HashMap<String, String>();

    HashMap<String, String> datasetTemporalStrMap = new HashMap<String, String>();
    HashMap<String, String> datasetSpatialStrMap = new HashMap<String, String>();

    HashSet<String> input = new HashSet<String>();

    for (String dataset : shortDataset) {

        String[] datasetArray = preProcessingDataset.get(dataset).split("-");

        String datasetTemporalStr = datasetArray[datasetArray.length - 2];
        int datasetTemporal = utils.temporalResolution(datasetTemporalStr);

        String datasetSpatialStr = datasetArray[datasetArray.length - 1];
        int datasetSpatial = utils.spatialResolution(datasetSpatialStr);

        // finding all possible resolutions

        String[] temporalResolutions = FrameworkUtils.getAggTempResolutions(datasetTemporal);
        String[] spatialResolutions = FrameworkUtils.getAggSpatialResolutions(datasetSpatial);

        String temporalResolution = "";
        String spatialResolution = "";

        String tempRes = "";
        String spatialRes = "";

        boolean dataAdded = false;

        for (int i = 0; i < temporalResolutions.length; i++) {
            for (int j = 0; j < spatialResolutions.length; j++) {

                temporalResolution = temporalResolutions[i];
                spatialResolution = spatialResolutions[j];

                String aggregatesOutputFileName = s3bucket + FrameworkUtils.aggregatesDir + "/" + dataset + "/";

                if (removeExistingFiles) {
                    FrameworkUtils.removeFile(aggregatesOutputFileName, s3conf, s3);
                }

                if (!FrameworkUtils.fileExists(aggregatesOutputFileName, s3conf, s3)) {

                    dataAdded = true;

                    tempRes += temporalResolution + "-";
                    spatialRes += spatialResolution + "-";
                }
            }
        }

        if (dataAdded) {
            input.add(s3bucket + FrameworkUtils.preProcessingDir + "/" + preProcessingDataset.get(dataset));
            shortDatasetAggregation.add(dataset);

            tempResMap.put(dataset, tempRes.substring(0, tempRes.length() - 1));
            spatialResMap.put(dataset, spatialRes.substring(0, spatialRes.length() - 1));

            datasetTemporalStrMap.put(dataset, datasetTemporalStr);
            datasetSpatialStrMap.put(dataset, datasetSpatialStr);
        }
    }

    if (input.isEmpty()) {
        System.out.println("All the input datasets have aggregates.");
        System.out.println("Use -f in the beginning of the command line to force the computation.");
        System.exit(0);
    }

    it = input.iterator();
    while (it.hasNext()) {
        preProcessingDatasets += it.next() + ",";
    }

    Job aggJob = null;
    String aggregatesOutputDir = s3bucket + FrameworkUtils.aggregatesDir + "/tmp/";
    String jobName = "aggregates";

    FrameworkUtils.removeFile(aggregatesOutputDir, s3conf, s3);

    Configuration aggConf = new Configuration();
    Machine machineConf = new Machine(machine, nbNodes);

    aggConf.set("dataset-name", datasetNames);
    aggConf.set("dataset-id", datasetIds);

    for (int i = 0; i < shortDatasetAggregation.size(); i++) {
        String dataset = shortDatasetAggregation.get(i);
        String id = datasetId.get(dataset);
        aggConf.set("dataset-" + id + "-temporal-resolutions", tempResMap.get(dataset));
        aggConf.set("dataset-" + id + "-spatial-resolutions", spatialResMap.get(dataset));
        aggConf.set("dataset-" + id + "-temporal-att", datasetTempAtt.get(dataset));
        aggConf.set("dataset-" + id + "-spatial-att", datasetSpatialAtt.get(dataset));
        aggConf.set("dataset-" + id + "-temporal", datasetTemporalStrMap.get(dataset));
        aggConf.set("dataset-" + id + "-spatial", datasetSpatialStrMap.get(dataset));

        if (s3)
            aggConf.set("dataset-" + id,
                    s3bucket + FrameworkUtils.preProcessingDir + "/" + preProcessingDataset.get(dataset));
        else
            aggConf.set("dataset-" + id, FileSystem.get(new Configuration()).getHomeDirectory() + "/"
                    + FrameworkUtils.preProcessingDir + "/" + preProcessingDataset.get(dataset));
    }

    aggConf.set("mapreduce.tasktracker.map.tasks.maximum", String.valueOf(machineConf.getMaximumTasks()));
    aggConf.set("mapreduce.tasktracker.reduce.tasks.maximum", String.valueOf(machineConf.getMaximumTasks()));
    aggConf.set("mapreduce.jobtracker.maxtasks.perjob", "-1");
    aggConf.set("mapreduce.reduce.shuffle.parallelcopies", "20");
    aggConf.set("mapreduce.input.fileinputformat.split.minsize", "0");
    aggConf.set("mapreduce.task.io.sort.mb", "200");
    aggConf.set("mapreduce.task.io.sort.factor", "100");
    machineConf.setMachineConfiguration(aggConf);

    if (s3) {
        machineConf.setMachineConfiguration(aggConf);
        aggConf.set("fs.s3.awsAccessKeyId", awsAccessKeyId);
        aggConf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey);
    }

    if (snappyCompression) {
        aggConf.set("mapreduce.map.output.compress", "true");
        aggConf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec");
        //aggConf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec");
    }
    if (bzip2Compression) {
        aggConf.set("mapreduce.map.output.compress", "true");
        aggConf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec");
        //aggConf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec");
    }

    aggJob = new Job(aggConf);
    aggJob.setJobName(jobName);

    aggJob.setMapOutputKeyClass(SpatioTemporalWritable.class);
    aggJob.setMapOutputValueClass(AggregationArrayWritable.class);
    aggJob.setOutputKeyClass(SpatioTemporalWritable.class);
    aggJob.setOutputValueClass(FloatArrayWritable.class);
    //aggJob.setOutputKeyClass(Text.class);
    //aggJob.setOutputValueClass(Text.class);

    aggJob.setMapperClass(AggregationMapper.class);
    aggJob.setCombinerClass(AggregationCombiner.class);
    aggJob.setReducerClass(AggregationReducer.class);
    aggJob.setNumReduceTasks(machineConf.getNumberReduces());

    aggJob.setInputFormatClass(SequenceFileInputFormat.class);
    //aggJob.setOutputFormatClass(SequenceFileOutputFormat.class);
    LazyOutputFormat.setOutputFormatClass(aggJob, SequenceFileOutputFormat.class);
    //LazyOutputFormat.setOutputFormatClass(aggJob, TextOutputFormat.class);
    SequenceFileOutputFormat.setCompressOutput(aggJob, true);
    SequenceFileOutputFormat.setOutputCompressionType(aggJob, CompressionType.BLOCK);

    FileInputFormat.setInputDirRecursive(aggJob, true);
    FileInputFormat.setInputPaths(aggJob,
            preProcessingDatasets.substring(0, preProcessingDatasets.length() - 1));
    FileOutputFormat.setOutputPath(aggJob, new Path(aggregatesOutputDir));

    aggJob.setJarByClass(Aggregation.class);

    long start = System.currentTimeMillis();
    aggJob.submit();
    aggJob.waitForCompletion(true);
    System.out.println(jobName + "\t" + (System.currentTimeMillis() - start));

    // moving files to right place
    for (String dataset : shortDatasetAggregation) {
        String from = s3bucket + FrameworkUtils.aggregatesDir + "/tmp/" + dataset + "/";
        String to = s3bucket + FrameworkUtils.aggregatesDir + "/" + dataset + "/";
        FrameworkUtils.renameFile(from, to, s3conf, s3);
    }

}