Example usage for java.util ArrayList iterator

Introduction

In this page you can find the example usage for java.util ArrayList iterator.

Prototype

public Iterator<E> iterator()

Source Link

Document

Returns an iterator over the elements in this list in proper sequence.

Usage

From source file:ArrayListExample.java

public static void main(String[] args) {
    ArrayList<Integer> al = new ArrayList<Integer>();

    al.add(new Integer(1));
    al.add(new Integer(2));
    al.add(new Integer(3));
    al.add(new Integer(4));
    al.add(new Integer(5));
    al.add(new Integer(6));
    al.add(new Integer(7));
    al.add(new Integer(8));
    al.add(new Integer(9));
    al.add(new Integer(10));

    al.set(5, new Integer(66));

    for (Iterator i = al.iterator(); i.hasNext();) {
        Integer integer = (Integer) i.next();
        System.out.println(integer);
    }//from  w  ww .j  a va  2  s .  c o m
}

From source file:ProducerTool.java

public static void main(String[] args) {
    ArrayList<ProducerTool> threads = new ArrayList();
    ProducerTool producerTool = new ProducerTool();
    String[] unknown = CommandLineSupport.setOptions(producerTool, args);
    if (unknown.length > 0) {
        System.out.println("Unknown options: " + Arrays.toString(unknown));
        System.exit(-1);//from   w w w.  j  av  a 2s . co  m
    }
    producerTool.showParameters();
    for (int threadCount = 1; threadCount <= parallelThreads; threadCount++) {
        producerTool = new ProducerTool();
        CommandLineSupport.setOptions(producerTool, args);
        producerTool.start();
        threads.add(producerTool);
    }

    while (true) {
        Iterator<ProducerTool> itr = threads.iterator();
        int running = 0;
        while (itr.hasNext()) {
            ProducerTool thread = itr.next();
            if (thread.isAlive()) {
                running++;
            }
        }
        if (running <= 0) {
            System.out.println("All threads completed their work");
            break;
        }
        try {
            Thread.sleep(1000);
        } catch (Exception e) {
        }
    }
}

From source file:ConsumerTool.java

public static void main(String[] args) {
    ArrayList<ConsumerTool> threads = new ArrayList();
    ConsumerTool consumerTool = new ConsumerTool();
    String[] unknown = CommandLineSupport.setOptions(consumerTool, args);
    if (unknown.length > 0) {
        System.out.println("Unknown options: " + Arrays.toString(unknown));
        System.exit(-1);//from   w  w  w .  j a v a  2 s  .  com
    }
    consumerTool.showParameters();
    for (int threadCount = 1; threadCount <= parallelThreads; threadCount++) {
        consumerTool = new ConsumerTool();
        CommandLineSupport.setOptions(consumerTool, args);
        consumerTool.start();
        threads.add(consumerTool);
    }

    while (true) {
        Iterator<ConsumerTool> itr = threads.iterator();
        int running = 0;
        while (itr.hasNext()) {
            ConsumerTool thread = itr.next();
            if (thread.isAlive()) {
                running++;
            }
        }

        if (running <= 0) {
            System.out.println("All threads completed their work");
            break;
        }

        try {
            Thread.sleep(1000);
        } catch (Exception e) {
        }
    }
    Iterator<ConsumerTool> itr = threads.iterator();
    while (itr.hasNext()) {
        ConsumerTool thread = itr.next();
    }
}

From source file:akori.Impact.java

static public void main(String[] args) throws IOException {
    String PATH = "E:\\Trabajos\\AKORI\\datosmatrizgino\\";
    String PATHIMG = "E:\\NetBeansProjects\\AKORI\\Proccess_1\\ImagesPages\\";
    for (int i = 1; i <= 32; ++i) {
        for (int k = 1; k <= 15; ++k) {
            System.out.println("Matrix " + i + "-" + k);
            BufferedImage img = null;
            try {
                img = ImageIO.read(new File(PATHIMG + i + ".png"));
            } catch (IOException ex) {
                ex.getStackTrace();//from   w  w  w. j  a  va2 s.  com
            }

            int ymax = img.getHeight();
            int xmax = img.getWidth();

            double[][] imagen = new double[ymax][xmax];

            BufferedReader in = null;
            try {
                in = new BufferedReader(new FileReader(PATH + i + "-" + k + ".txt"));
            } catch (FileNotFoundException ex) {
                ex.getStackTrace();
            }

            String linea;
            ArrayList<String> lista = new ArrayList<String>();
            HashMap<String, String> lista1 = new HashMap<String, String>();
            try {
                for (int j = 0; (linea = in.readLine()) != null; ++j) {
                    String[] datos = linea.split(",");
                    int x = (int) Double.parseDouble(datos[1]);
                    int y = (int) Double.parseDouble(datos[2]);
                    if (x >= xmax || y >= ymax || x <= 0 || y <= 0) {
                        continue;
                    }
                    lista.add(x + "," + y);
                }
            } catch (Exception ex) {
                ex.getStackTrace();
            }

            try {
                in.close();
            } catch (IOException ex) {
                ex.getStackTrace();
            }

            Iterator iter = lista.iterator();
            int[][] matrix = new int[lista.size()][2];

            for (int j = 0; iter.hasNext(); ++j) {
                String xy = (String) iter.next();
                String[] datos = xy.split(",");
                matrix[j][0] = Integer.parseInt(datos[0]);
                matrix[j][1] = Integer.parseInt(datos[1]);
            }

            for (int j = 0; j < matrix.length; ++j) {

                int std = 50;
                int x = matrix[j][0];
                int y = matrix[j][1];
                imagen[y][x] += 1;
                double aux;
                normalMatrix(imagen, y, x, std);

            }

            FileWriter fw = new FileWriter(PATH + "Matrix" + i + "-" + k + ".txt");
            BufferedWriter bw = new BufferedWriter(fw);
            for (int j = 0; j < imagen.length; ++j) {
                for (int t = 0; t < imagen[j].length; ++t) {
                    if (t + 1 == imagen[j].length)
                        bw.write(imagen[j][t] + "");
                    else
                        bw.write(imagen[j][t] + ",");
                }
                bw.write("\n");
            }
            bw.close();
        }
    }
}

From source file:Batch.java

static public void main(String[] args) {
    Connection conn = null;/*w  ww  . j av a  2  s  . c  o  m*/

    try {
        ArrayList breakable = new ArrayList();
        PreparedStatement stmt;
        Iterator users;
        ResultSet rs;

        Class.forName(args[0]).newInstance();
        conn = DriverManager.getConnection(args[1], args[2], args[3]);
        stmt = conn.prepareStatement("SELECT user_id, password " + "FROM user");
        rs = stmt.executeQuery();
        while (rs.next()) {
            String uid = rs.getString(1);
            String pw = rs.getString(2);

            // Assume PasswordCracker is some class that provides
            // a single static method called crack() that attempts
            // to run password cracking routines on the password
            //                if( PasswordCracker.crack(uid, pw) ) {
            //                  breakable.add(uid);
            //            }
        }
        stmt.close();
        if (breakable.size() < 1) {
            return;
        }
        stmt = conn.prepareStatement("UPDATE user " + "SET bad_password = 'Y' " + "WHERE uid = ?");
        users = breakable.iterator();
        while (users.hasNext()) {
            String uid = (String) users.next();

            stmt.setString(1, uid);
            stmt.addBatch();
        }
        stmt.executeBatch();
    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        if (conn != null) {
            try {
                conn.close();
            } catch (Exception e) {
            }
        }
    }
}

From source file:com.mch.registry.ccs.server.CcsClient.java

/**
 * Sends messages to registered devices/*from  w w w.jav a2 s .co  m*/
 */
public static void main(String[] args) {

    Config config = new Config();
    final String projectId = config.getProjectId();
    final String key = config.getKey();

    final CcsClient ccsClient = CcsClient.prepareClient(projectId, key, true);

    try {
        ccsClient.connect();
    } catch (XMPPException e) {
        logger.log(Level.WARNING, "XMPP Exception ", e);
    }

    final Runnable sendNotifications = new Runnable() {
        public void run() {
            try {
                logger.log(Level.INFO, "Working Q!");
                if (!isOffHours()) {
                    //Prepare downstream message
                    String toRegId = "";
                    String message = "";
                    String messageId = "";
                    Map<String, String> payload = new HashMap<String, String>();
                    String collapseKey = null;
                    Long timeToLive = 10000L;
                    Boolean delayWhileIdle = true;
                    String messagePrefix = "";
                    int notificationQueueID = 0;
                    boolean sucessfullySent = false;

                    //Read from mysql database
                    MySqlHandler mysql = new MySqlHandler();
                    ArrayList<Notification> queue = new ArrayList<Notification>();

                    for (int i = 1; i < 3; i++) {
                        queue = mysql.getNotificationQueue(i);
                        if (queue.size() > 0) {

                            switch (i) {
                            case 1:
                                messagePrefix = "_V: ";
                                break;
                            case 2:
                                messagePrefix = "_R: ";
                                break;
                            default:
                                messagePrefix = "";
                                logger.log(Level.WARNING, "Unknown message type!");
                            }

                            Notification notification = new Notification();
                            Iterator<Notification> iterator = queue.iterator();

                            while (iterator.hasNext()) {
                                notification = iterator.next();

                                toRegId = notification.getGcmRegID();
                                message = notification.getNotificationText();
                                notificationQueueID = notification.getNotificationQueueID();
                                messageId = "m-" + Long.toString(random.nextLong());

                                payload = new HashMap<String, String>();
                                payload.put("message", messagePrefix + message);

                                try {
                                    // Send the downstream message to a device.
                                    ccsClient.send(createJsonMessage(toRegId, messageId, payload, collapseKey,
                                            timeToLive, delayWhileIdle));
                                    sucessfullySent = true;
                                    logger.log(Level.INFO, "Message sent. ID: " + notificationQueueID
                                            + ", RegID: " + toRegId + ", Text: " + message);
                                } catch (Exception e) {
                                    mysql.prepareNotificationForTheNextDay(notificationQueueID);
                                    sucessfullySent = false;
                                    logger.log(Level.WARNING,
                                            "Message could not be sent! ID: " + notificationQueueID
                                                    + ", RegID: " + toRegId + ", Text: " + message);
                                }

                                if (sucessfullySent) {
                                    mysql.moveNotificationToHistory(notificationQueueID);
                                }
                            }
                        } else {
                            logger.log(Level.INFO, "No notifications to send. Type: " + Integer.toString(i));
                        }
                    }
                }
            } catch (Exception e) {
                logger.log(Level.WARNING, "Exception ", e);
            }
        }
    };

    ScheduledExecutorService executor = Executors.newSingleThreadScheduledExecutor();
    //Start when server starts and every 30 minutes after
    ScheduledFuture task = executor.scheduleAtFixedRate(sendNotifications, 0, 30, TimeUnit.MINUTES);
    try {
        task.get();
    } catch (ExecutionException e) {
        logger.log(Level.SEVERE, "Exception ", e);
    } catch (InterruptedException e) {
        logger.log(Level.SEVERE, "Exception ", e);
    }
    task.cancel(false);

    try {
        executor.shutdown();
        executor.awaitTermination(30, TimeUnit.SECONDS);
    } catch (InterruptedException e) {
        logger.log(Level.SEVERE, "Exception ", e);
    }

}

From source file:fr.iphc.grid.jobmonitor.CeList.java

public static void main(String[] args) throws Exception {

    SessionFactory.createSession(true);/* w  w  w  .jav  a2  s .c  om*/
    CeList command = new CeList();
    CommandLine line = command.parse(args);
    Integer timeout = 0;
    String TableSql = "monce";
    //      MySQLAccess sql = new MySQLAccess();
    if (line.getOptionValue(OPT_TIMEOUT) == null) {
        timeout = 15;
    } else {
        timeout = Integer.parseInt(line.getOptionValue(OPT_TIMEOUT));
    }
    timeout = timeout * 60; // convertir en secondes
    Date start = new Date();
    String OutDir = line.getOptionValue(OPT_OUTDIR);

    if (OutDir == null) {
        OutDir = "/tmp/thread";
    }
    ArrayList<URL> CeList = null;
    if (line.getOptionValue(OPT_CEPATH) == null) {
        CeList = AvailableLdapCe();
        //         for (URL k : CeList) {
        //            // System.out.println(k);
        //         }
    } else {
        CeList = AvailableCe(line.getOptionValue(OPT_CEPATH));
    }
    Boolean ret = initDirectory(new File(OutDir));
    if (!ret) {
        System.out.println("ERROR: " + OutDir + "STOP");
        System.exit(-1);
    }

    // check if we can connect to the grid
    // try{
    // SessionFactory.createSession(true);
    // }catch(NoSuccessException e){
    // System.err.println("Could not connect to the grid at all
    // ("+e.getMessage()+")");
    // System.err.println("Aborting");
    // System.exit(0);
    //
    // }

    SubmitterThread[] st = new SubmitterThread[CeList.size()];

    Iterator<URL> i = CeList.iterator();
    int index = 0;
    while (i.hasNext()) {
        URL serviceURL = i.next();
        // Ne pas importer dans thread because options.
        Properties prop = new Properties();
        prop.setProperty("Executable", "/bin/hostname");//
        // prop.setProperty("Executable", "touch /dev/null");
        JobDescription desc = createJobDescription(prop);
        desc.setAttribute(JobDescription.INTERACTIVE, "true");
        desc.setAttribute(JobDescription.EXECUTABLE, "/bin/hostname");
        // proxy="/home/dsa/.globus/biomed.txt";
        // Context ctx = ContextFactory.createContext();
        // ctx.setAttribute(Context.TYPE, "VOMS");
        // ctx.setAttribute(Context.USERVO, "biomed");
        // ctx.setAttribute(Context.USERPROXY,proxy);
        // Session session = SessionFactory.createSession(false);
        // session.addContext(ctx);
        Session session = SessionFactory.createSession(true);
        st[index] = new SubmitterThread(serviceURL, session, desc, OutDir, timeout, start);
        st[index].setUncaughtExceptionHandler(new Thread.UncaughtExceptionHandler() {
            @Override
            public void uncaughtException(Thread t, Throwable e) {
                // System.out.println("Error! An exception occured in "
                // + t.getName() + ". Cause: " + e.getMessage());
            }
        });
        st[index].start();
        // Thread.sleep(15*1000);
        // test si fichier exist
        // System.out.println("Alive
        // "+OutDir+"/"+serviceURL.getHost()+"_"+serviceURL.getPath().replaceAll("/cream-","")+".out");
        // while ((!((new
        // File(OutDir+"/"+serviceURL.getHost()+"_"+serviceURL.getPath().replaceAll("/cream-","")+".out").exists())
        // ||
        // (new
        // File(OutDir+"/"+serviceURL.getHost()+"_"+serviceURL.getPath().replaceAll("/cream-","")+".err").exists()))))
        // {
        // Thread.sleep(500);
        // }
        // System.out.println("Alive "+serviceURL.getHost()+"-"+ index+"FILE
        // EXIST");
        index++;

    }
    ;

    // System.out.println("BEGIN LOOP: Max " + index);
    long date_start = System.currentTimeMillis();
    // System.out.println("BEGIN START: " + date_start);
    Integer time_out = (timeout + 180) * 1000; // unit ms value in minute
    // +120
    // =delta par rapport thread
    Boolean Alive = true;
    //      int nb = 0;
    long now = System.currentTimeMillis();
    do {
        now = System.currentTimeMillis();
        Alive = false;
        //         nb = 0;
        for (int j = 0; j < index; j++) {
            if (st[j].isAlive()) {
                // System.out.println("Alive "+j);
                Alive = true;
                //               nb++;
            }
        }
        // System.out.println(nb);
        Thread.sleep(10000);
    } while ((Alive) && ((now - date_start) < time_out));

    for (int j = 0; j < index; j++) {
        if (st[j].isAlive()) {
            st[j].Requeststop();
        }
    }
    BilanCe(OutDir, CeList, TableSql);
    jobManagerLdap jm = new jobManagerLdap();
    jm.updateLdapCe();
    System.out.println("END " + new Date());
    // faire un traitement...
    System.exit(0);
}

From source file:edu.nyu.vida.data_polygamy.standard_techniques.CorrelationTechniques.java

/**
 * @param args// ww  w.ja v a2  s.c  o m
 * @throws ParseException 
 */
@SuppressWarnings({ "deprecation" })
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    Options options = new Options();

    Option forceOption = new Option("f", "force", false,
            "force the computation of the relationship " + "even if files already exist");
    forceOption.setRequired(false);
    options.addOption(forceOption);

    Option g1Option = new Option("g1", "first-group", true, "set first group of datasets");
    g1Option.setRequired(true);
    g1Option.setArgName("FIRST GROUP");
    g1Option.setArgs(Option.UNLIMITED_VALUES);
    options.addOption(g1Option);

    Option g2Option = new Option("g2", "second-group", true, "set second group of datasets");
    g2Option.setRequired(false);
    g2Option.setArgName("SECOND GROUP");
    g2Option.setArgs(Option.UNLIMITED_VALUES);
    options.addOption(g2Option);

    Option machineOption = new Option("m", "machine", true, "machine identifier");
    machineOption.setRequired(true);
    machineOption.setArgName("MACHINE");
    machineOption.setArgs(1);
    options.addOption(machineOption);

    Option nodesOption = new Option("n", "nodes", true, "number of nodes");
    nodesOption.setRequired(true);
    nodesOption.setArgName("NODES");
    nodesOption.setArgs(1);
    options.addOption(nodesOption);

    Option s3Option = new Option("s3", "s3", false, "data on Amazon S3");
    s3Option.setRequired(false);
    options.addOption(s3Option);

    Option awsAccessKeyIdOption = new Option("aws_id", "aws-id", true,
            "aws access key id; " + "this is required if the execution is on aws");
    awsAccessKeyIdOption.setRequired(false);
    awsAccessKeyIdOption.setArgName("AWS-ACCESS-KEY-ID");
    awsAccessKeyIdOption.setArgs(1);
    options.addOption(awsAccessKeyIdOption);

    Option awsSecretAccessKeyOption = new Option("aws_key", "aws-id", true,
            "aws secrect access key; " + "this is required if the execution is on aws");
    awsSecretAccessKeyOption.setRequired(false);
    awsSecretAccessKeyOption.setArgName("AWS-SECRET-ACCESS-KEY");
    awsSecretAccessKeyOption.setArgs(1);
    options.addOption(awsSecretAccessKeyOption);

    Option bucketOption = new Option("b", "s3-bucket", true,
            "bucket on s3; " + "this is required if the execution is on aws");
    bucketOption.setRequired(false);
    bucketOption.setArgName("S3-BUCKET");
    bucketOption.setArgs(1);
    options.addOption(bucketOption);

    Option helpOption = new Option("h", "help", false, "display this message");
    helpOption.setRequired(false);
    options.addOption(helpOption);

    HelpFormatter formatter = new HelpFormatter();
    CommandLineParser parser = new PosixParser();
    CommandLine cmd = null;

    try {
        cmd = parser.parse(options, args);
    } catch (ParseException e) {
        formatter.printHelp(
                "hadoop jar data-polygamy.jar "
                        + "edu.nyu.vida.data_polygamy.standard_techniques.CorrelationTechniques",
                options, true);
        System.exit(0);
    }

    if (cmd.hasOption("h")) {
        formatter.printHelp(
                "hadoop jar data-polygamy.jar "
                        + "edu.nyu.vida.data_polygamy.standard_techniques.CorrelationTechniques",
                options, true);
        System.exit(0);
    }

    boolean s3 = cmd.hasOption("s3");
    String s3bucket = "";
    String awsAccessKeyId = "";
    String awsSecretAccessKey = "";

    if (s3) {
        if ((!cmd.hasOption("aws_id")) || (!cmd.hasOption("aws_key")) || (!cmd.hasOption("b"))) {
            System.out.println(
                    "Arguments 'aws_id', 'aws_key', and 'b'" + " are mandatory if execution is on AWS.");
            formatter.printHelp(
                    "hadoop jar data-polygamy.jar "
                            + "edu.nyu.vida.data_polygamy.standard_techniques.CorrelationTechniques",
                    options, true);
            System.exit(0);
        }
        s3bucket = cmd.getOptionValue("b");
        awsAccessKeyId = cmd.getOptionValue("aws_id");
        awsSecretAccessKey = cmd.getOptionValue("aws_key");
    }

    boolean snappyCompression = false;
    boolean bzip2Compression = false;
    String machine = cmd.getOptionValue("m");
    int nbNodes = Integer.parseInt(cmd.getOptionValue("n"));

    Configuration s3conf = new Configuration();
    if (s3) {
        s3conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId);
        s3conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey);
        s3conf.set("bucket", s3bucket);
    }

    Path path = null;
    FileSystem fs = FileSystem.get(new Configuration());

    ArrayList<String> shortDataset = new ArrayList<String>();
    ArrayList<String> firstGroup = new ArrayList<String>();
    ArrayList<String> secondGroup = new ArrayList<String>();
    HashMap<String, String> datasetAgg = new HashMap<String, String>();

    boolean removeExistingFiles = cmd.hasOption("f");

    String[] firstGroupCmd = cmd.getOptionValues("g1");
    String[] secondGroupCmd = cmd.hasOption("g2") ? cmd.getOptionValues("g2") : new String[0];
    addDatasets(firstGroupCmd, firstGroup, shortDataset, datasetAgg, path, fs, s3conf, s3, s3bucket);
    addDatasets(secondGroupCmd, secondGroup, shortDataset, datasetAgg, path, fs, s3conf, s3, s3bucket);

    if (shortDataset.size() == 0) {
        System.out.println("No datasets to process.");
        System.exit(0);
    }

    if (firstGroup.isEmpty()) {
        System.out.println("First group of datasets (G1) is empty. " + "Doing G1 = G2.");
        firstGroup.addAll(secondGroup);
    }

    if (secondGroup.isEmpty()) {
        System.out.println("Second group of datasets (G2) is empty. " + "Doing G2 = G1.");
        secondGroup.addAll(firstGroup);
    }

    // getting dataset ids

    String datasetNames = "";
    String datasetIds = "";
    HashMap<String, String> datasetId = new HashMap<String, String>();
    Iterator<String> it = shortDataset.iterator();
    while (it.hasNext()) {
        datasetId.put(it.next(), null);
    }

    if (s3) {
        path = new Path(s3bucket + FrameworkUtils.datasetsIndexDir);
        fs = FileSystem.get(path.toUri(), s3conf);
    } else {
        path = new Path(fs.getHomeDirectory() + "/" + FrameworkUtils.datasetsIndexDir);
    }
    BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(path)));
    String line = br.readLine();
    while (line != null) {
        String[] dt = line.split("\t");
        if (datasetId.containsKey(dt[0])) {
            datasetId.put(dt[0], dt[1]);
            datasetNames += dt[0] + ",";
            datasetIds += dt[1] + ",";
        }
        line = br.readLine();
    }
    br.close();
    if (s3)
        fs.close();

    datasetNames = datasetNames.substring(0, datasetNames.length() - 1);
    datasetIds = datasetIds.substring(0, datasetIds.length() - 1);
    it = shortDataset.iterator();
    while (it.hasNext()) {
        String dataset = it.next();
        if (datasetId.get(dataset) == null) {
            System.out.println("No dataset id for " + dataset);
            System.exit(0);
        }
    }

    String firstGroupStr = "";
    String secondGroupStr = "";
    for (String dataset : firstGroup) {
        firstGroupStr += datasetId.get(dataset) + ",";
    }
    for (String dataset : secondGroup) {
        secondGroupStr += datasetId.get(dataset) + ",";
    }
    firstGroupStr = firstGroupStr.substring(0, firstGroupStr.length() - 1);
    secondGroupStr = secondGroupStr.substring(0, secondGroupStr.length() - 1);

    FrameworkUtils.createDir(s3bucket + FrameworkUtils.correlationTechniquesDir, s3conf, s3);

    String dataAttributesInputDirs = "";
    String noRelationship = "";

    HashSet<String> dirs = new HashSet<String>();

    String dataset1;
    String dataset2;
    String datasetId1;
    String datasetId2;
    for (int i = 0; i < firstGroup.size(); i++) {
        for (int j = 0; j < secondGroup.size(); j++) {

            if (Integer.parseInt(datasetId.get(firstGroup.get(i))) < Integer
                    .parseInt(datasetId.get(secondGroup.get(j)))) {
                dataset1 = firstGroup.get(i);
                dataset2 = secondGroup.get(j);
            } else {
                dataset1 = secondGroup.get(j);
                dataset2 = firstGroup.get(i);
            }

            datasetId1 = datasetId.get(dataset1);
            datasetId2 = datasetId.get(dataset2);

            if (dataset1.equals(dataset2))
                continue;
            String correlationOutputFileName = s3bucket + FrameworkUtils.correlationTechniquesDir + "/"
                    + dataset1 + "-" + dataset2 + "/";

            if (removeExistingFiles) {
                FrameworkUtils.removeFile(correlationOutputFileName, s3conf, s3);
            }
            if (!FrameworkUtils.fileExists(correlationOutputFileName, s3conf, s3)) {
                dirs.add(s3bucket + FrameworkUtils.aggregatesDir + "/" + dataset1);
                dirs.add(s3bucket + FrameworkUtils.aggregatesDir + "/" + dataset2);
            } else {
                noRelationship += datasetId1 + "-" + datasetId2 + ",";
            }
        }
    }

    if (dirs.isEmpty()) {
        System.out.println("All the relationships were already computed.");
        System.out.println("Use -f in the beginning of the command line to force the computation.");
        System.exit(0);
    }

    for (String dir : dirs) {
        dataAttributesInputDirs += dir + ",";
    }

    Configuration conf = new Configuration();
    Machine machineConf = new Machine(machine, nbNodes);

    String jobName = "correlation";
    String correlationOutputDir = s3bucket + FrameworkUtils.correlationTechniquesDir + "/tmp/";

    FrameworkUtils.removeFile(correlationOutputDir, s3conf, s3);

    for (int i = 0; i < shortDataset.size(); i++) {
        conf.set("dataset-" + datasetId.get(shortDataset.get(i)) + "-agg", datasetAgg.get(shortDataset.get(i)));
    }
    for (int i = 0; i < shortDataset.size(); i++) {
        conf.set("dataset-" + datasetId.get(shortDataset.get(i)) + "-agg-size",
                Integer.toString(datasetAgg.get(shortDataset.get(i)).split(",").length));
    }
    conf.set("dataset-keys", datasetIds);
    conf.set("dataset-names", datasetNames);
    conf.set("first-group", firstGroupStr);
    conf.set("second-group", secondGroupStr);
    conf.set("main-dataset-id", datasetId.get(shortDataset.get(0)));
    if (noRelationship.length() > 0) {
        conf.set("no-relationship", noRelationship.substring(0, noRelationship.length() - 1));
    }

    conf.set("mapreduce.tasktracker.map.tasks.maximum", String.valueOf(machineConf.getMaximumTasks()));
    conf.set("mapreduce.tasktracker.reduce.tasks.maximum", String.valueOf(machineConf.getMaximumTasks()));
    conf.set("mapreduce.jobtracker.maxtasks.perjob", "-1");
    conf.set("mapreduce.reduce.shuffle.parallelcopies", "20");
    conf.set("mapreduce.input.fileinputformat.split.minsize", "0");
    conf.set("mapreduce.task.io.sort.mb", "200");
    conf.set("mapreduce.task.io.sort.factor", "100");
    conf.set("mapreduce.task.timeout", "2400000");

    if (s3) {
        machineConf.setMachineConfiguration(conf);
        conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId);
        conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey);
        conf.set("bucket", s3bucket);
    }

    if (snappyCompression) {
        conf.set("mapreduce.map.output.compress", "true");
        conf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec");
        //conf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec");
    }
    if (bzip2Compression) {
        conf.set("mapreduce.map.output.compress", "true");
        conf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec");
        //conf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec");
    }

    Job job = new Job(conf);
    job.setJobName(jobName);

    job.setMapOutputKeyClass(PairAttributeWritable.class);
    job.setMapOutputValueClass(SpatioTemporalValueWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(CorrelationTechniquesMapper.class);
    job.setReducerClass(CorrelationTechniquesReducer.class);
    job.setNumReduceTasks(machineConf.getNumberReduces());

    job.setInputFormatClass(SequenceFileInputFormat.class);
    LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);

    FileInputFormat.setInputDirRecursive(job, true);
    FileInputFormat.setInputPaths(job,
            dataAttributesInputDirs.substring(0, dataAttributesInputDirs.length() - 1));
    FileOutputFormat.setOutputPath(job, new Path(correlationOutputDir));

    job.setJarByClass(CorrelationTechniques.class);

    long start = System.currentTimeMillis();
    job.submit();
    job.waitForCompletion(true);
    System.out.println(jobName + "\t" + (System.currentTimeMillis() - start));

    // moving files to right place
    for (int i = 0; i < firstGroup.size(); i++) {
        for (int j = 0; j < secondGroup.size(); j++) {

            if (Integer.parseInt(datasetId.get(firstGroup.get(i))) < Integer
                    .parseInt(datasetId.get(secondGroup.get(j)))) {
                dataset1 = firstGroup.get(i);
                dataset2 = secondGroup.get(j);
            } else {
                dataset1 = secondGroup.get(j);
                dataset2 = firstGroup.get(i);
            }

            if (dataset1.equals(dataset2))
                continue;

            String from = s3bucket + FrameworkUtils.correlationTechniquesDir + "/tmp/" + dataset1 + "-"
                    + dataset2 + "/";
            String to = s3bucket + FrameworkUtils.correlationTechniquesDir + "/" + dataset1 + "-" + dataset2
                    + "/";
            FrameworkUtils.renameFile(from, to, s3conf, s3);
        }
    }
}

From source file:edu.oregonstate.eecs.mcplan.abstraction.EvaluateSimilarityFunction.java

/**
 * @param args/*from   w w w .j a va2 s .  c o  m*/
 * @throws IOException
 * @throws FileNotFoundException
 */
public static void main(final String[] args) throws FileNotFoundException, IOException {
    final String experiment_file = args[0];
    final File root_directory;
    if (args.length > 1) {
        root_directory = new File(args[1]);
    } else {
        root_directory = new File(".");
    }
    final CsvConfigurationParser csv_config = new CsvConfigurationParser(new FileReader(experiment_file));
    final String experiment_name = FilenameUtils.getBaseName(experiment_file);

    final File expr_directory = new File(root_directory, experiment_name);
    expr_directory.mkdirs();

    final Csv.Writer csv = new Csv.Writer(
            new PrintStream(new FileOutputStream(new File(expr_directory, "results.csv"))));
    final String[] parameter_headers = new String[] { "kpca.kernel", "kpca.rbf.sigma",
            "kpca.random_forest.Ntrees", "kpca.random_forest.max_depth", "kpca.Nbases", "multiclass.classifier",
            "multiclass.random_forest.Ntrees", "multiclass.random_forest.max_depth",
            "pairwise_classifier.max_branching", "training.label_noise" };
    csv.cell("domain").cell("abstraction");
    for (final String p : parameter_headers) {
        csv.cell(p);
    }
    csv.cell("Ntrain").cell("Ntest").cell("ami.mean").cell("ami.variance").cell("ami.confidence").newline();

    for (int expr = 0; expr < csv_config.size(); ++expr) {
        try {
            final KeyValueStore expr_config = csv_config.get(expr);
            final Configuration config = new Configuration(root_directory.getPath(), expr_directory.getName(),
                    expr_config);

            System.out.println("[Loading '" + config.training_data_single + "']");
            final Instances single = WekaUtil
                    .readLabeledDataset(new File(root_directory, config.training_data_single + ".arff"));

            final Instances train = new Instances(single, 0);
            final int[] idx = Fn.range(0, single.size());
            int instance_counter = 0;
            Fn.shuffle(config.rng, idx);
            final int Ntrain = config.getInt("Ntrain_games"); // TODO: Rename?
            final double label_noise = config.getDouble("training.label_noise");
            final int Nlabels = train.classAttribute().numValues();
            assert (Nlabels > 0);
            for (int i = 0; i < Ntrain; ++i) {
                final Instance inst = single.get(idx[instance_counter++]);
                if (label_noise > 0 && config.rng.nextDouble() < label_noise) {
                    int noisy_label = 0;
                    do {
                        noisy_label = config.rng.nextInt(Nlabels);
                    } while (noisy_label == (int) inst.classValue());
                    System.out.println("Noisy label (" + inst.classValue() + " -> " + noisy_label + ")");
                    inst.setClassValue(noisy_label);
                }
                train.add(inst);
                inst.setDataset(train);
            }

            final Fn.Function2<Boolean, Instance, Instance> plausible_p = createPlausiblePredicate(config);

            final int Ntest = config.Ntest_games;
            int Ntest_added = 0;
            final ArrayList<Instances> tests = new ArrayList<Instances>();
            while (instance_counter < single.size() && Ntest_added < Ntest) {
                final Instance inst = single.get(idx[instance_counter++]);
                boolean found = false;
                for (final Instances test : tests) {
                    // Note that 'plausible_p' should be transitive
                    if (plausible_p.apply(inst, test.get(0))) {
                        WekaUtil.addInstance(test, inst);
                        if (test.size() == 30) {
                            Ntest_added += test.size();
                        } else if (test.size() > 30) {
                            Ntest_added += 1;
                        }
                        found = true;
                        break;
                    }
                }

                if (!found) {
                    final Instances test = new Instances(single, 0);
                    WekaUtil.addInstance(test, inst);
                    tests.add(test);
                }
            }
            final Iterator<Instances> test_itr = tests.iterator();
            while (test_itr.hasNext()) {
                if (test_itr.next().size() < 30) {
                    test_itr.remove();
                }
            }
            System.out.println("=== tests.size() = " + tests.size());
            System.out.println("=== Ntest_added = " + Ntest_added);

            System.out.println("[Training]");
            final Evaluator evaluator = createEvaluator(config, train);
            //            final Instances transformed_test = evaluator.prepareInstances( test );

            System.out.println("[Evaluating]");

            final int Nxval = evaluator.isSensitiveToOrdering() ? 10 : 1;
            final MeanVarianceAccumulator ami = new MeanVarianceAccumulator();

            final MeanVarianceAccumulator errors = new MeanVarianceAccumulator();
            final MeanVarianceAccumulator relative_error = new MeanVarianceAccumulator();

            int c = 0;
            for (int xval = 0; xval < Nxval; ++xval) {
                for (final Instances test : tests) {
                    // TODO: Debugging
                    WekaUtil.writeDataset(new File(config.root_directory), "test_" + (c++), test);

                    //               transformed_test.randomize( new RandomAdaptor( config.rng ) );
                    //               final ClusterContingencyTable ct = evaluator.evaluate( transformed_test );
                    test.randomize(new RandomAdaptor(config.rng));
                    final ClusterContingencyTable ct = evaluator.evaluate(test);
                    System.out.println(ct);

                    int Nerrors = 0;
                    final MeanVarianceAccumulator mv = new MeanVarianceAccumulator();
                    for (int i = 0; i < ct.R; ++i) {
                        final int max = Fn.max(ct.n[i]);
                        Nerrors += (ct.a[i] - max);
                        mv.add(((double) ct.a[i]) / ct.N * Nerrors / ct.a[i]);
                    }
                    errors.add(Nerrors);
                    relative_error.add(mv.mean());

                    System.out.println("exemplar: " + test.get(0));
                    System.out.println("Nerrors = " + Nerrors);
                    final PrintStream ct_out = new PrintStream(
                            new FileOutputStream(new File(expr_directory, "ct_" + expr + "_" + xval + ".csv")));
                    ct.writeCsv(ct_out);
                    ct_out.close();
                    final double ct_ami = ct.adjustedMutualInformation_max();
                    if (Double.isNaN(ct_ami)) {
                        System.out.println("! ct_ami = NaN");
                    } else {
                        ami.add(ct_ami);
                    }
                    System.out.println();
                }
            }
            System.out.println("errors = " + errors.mean() + " (" + errors.confidence() + ")");
            System.out.println(
                    "relative_error = " + relative_error.mean() + " (" + relative_error.confidence() + ")");
            System.out.println("AMI_max = " + ami.mean() + " (" + ami.confidence() + ")");

            csv.cell(config.domain).cell(config.get("abstraction.discovery"));
            for (final String p : parameter_headers) {
                csv.cell(config.get(p));
            }
            csv.cell(Ntrain).cell(Ntest).cell(ami.mean()).cell(ami.variance()).cell(ami.confidence()).newline();
        } catch (final Exception ex) {
            ex.printStackTrace();
        }
    }
}

From source file:edu.nyu.vida.data_polygamy.scalar_function_computation.Aggregation.java

/**
 * @param args/*w  w  w  . j  av a  2s  . c o  m*/
 */
@SuppressWarnings({ "deprecation" })
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    Options options = new Options();

    Option forceOption = new Option("f", "force", false,
            "force the computation of the aggregate functions " + "even if files already exist");
    forceOption.setRequired(false);
    options.addOption(forceOption);

    Option gOption = new Option("g", "group", true, "set group of datasets for which the aggregate functions"
            + " will be computed, followed by their temporal and spatial attribute indices");
    gOption.setRequired(true);
    gOption.setArgName("GROUP");
    gOption.setArgs(Option.UNLIMITED_VALUES);
    options.addOption(gOption);

    Option machineOption = new Option("m", "machine", true, "machine identifier");
    machineOption.setRequired(true);
    machineOption.setArgName("MACHINE");
    machineOption.setArgs(1);
    options.addOption(machineOption);

    Option nodesOption = new Option("n", "nodes", true, "number of nodes");
    nodesOption.setRequired(true);
    nodesOption.setArgName("NODES");
    nodesOption.setArgs(1);
    options.addOption(nodesOption);

    Option s3Option = new Option("s3", "s3", false, "data on Amazon S3");
    s3Option.setRequired(false);
    options.addOption(s3Option);

    Option awsAccessKeyIdOption = new Option("aws_id", "aws-id", true,
            "aws access key id; " + "this is required if the execution is on aws");
    awsAccessKeyIdOption.setRequired(false);
    awsAccessKeyIdOption.setArgName("AWS-ACCESS-KEY-ID");
    awsAccessKeyIdOption.setArgs(1);
    options.addOption(awsAccessKeyIdOption);

    Option awsSecretAccessKeyOption = new Option("aws_key", "aws-id", true,
            "aws secrect access key; " + "this is required if the execution is on aws");
    awsSecretAccessKeyOption.setRequired(false);
    awsSecretAccessKeyOption.setArgName("AWS-SECRET-ACCESS-KEY");
    awsSecretAccessKeyOption.setArgs(1);
    options.addOption(awsSecretAccessKeyOption);

    Option bucketOption = new Option("b", "s3-bucket", true,
            "bucket on s3; " + "this is required if the execution is on aws");
    bucketOption.setRequired(false);
    bucketOption.setArgName("S3-BUCKET");
    bucketOption.setArgs(1);
    options.addOption(bucketOption);

    Option helpOption = new Option("h", "help", false, "display this message");
    helpOption.setRequired(false);
    options.addOption(helpOption);

    HelpFormatter formatter = new HelpFormatter();
    CommandLineParser parser = new PosixParser();
    CommandLine cmd = null;

    try {
        cmd = parser.parse(options, args);
    } catch (ParseException e) {
        formatter.printHelp("hadoop jar data-polygamy.jar "
                + "edu.nyu.vida.data_polygamy.scalar_function_computation.Aggregation", options, true);
        System.exit(0);
    }

    if (cmd.hasOption("h")) {
        formatter.printHelp("hadoop jar data-polygamy.jar "
                + "edu.nyu.vida.data_polygamy.scalar_function_computation.Aggregation", options, true);
        System.exit(0);
    }

    boolean s3 = cmd.hasOption("s3");
    String s3bucket = "";
    String awsAccessKeyId = "";
    String awsSecretAccessKey = "";

    if (s3) {
        if ((!cmd.hasOption("aws_id")) || (!cmd.hasOption("aws_key")) || (!cmd.hasOption("b"))) {
            System.out.println(
                    "Arguments 'aws_id', 'aws_key', and 'b'" + " are mandatory if execution is on AWS.");
            formatter.printHelp(
                    "hadoop jar data-polygamy.jar "
                            + "edu.nyu.vida.data_polygamy.scalar_function_computation.Aggregation",
                    options, true);
            System.exit(0);
        }
        s3bucket = cmd.getOptionValue("b");
        awsAccessKeyId = cmd.getOptionValue("aws_id");
        awsSecretAccessKey = cmd.getOptionValue("aws_key");
    }

    boolean snappyCompression = false;
    boolean bzip2Compression = false;
    String machine = cmd.getOptionValue("m");
    int nbNodes = Integer.parseInt(cmd.getOptionValue("n"));

    Configuration s3conf = new Configuration();
    if (s3) {
        s3conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId);
        s3conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey);
        s3conf.set("bucket", s3bucket);
    }

    String datasetNames = "";
    String datasetIds = "";
    String preProcessingDatasets = "";

    ArrayList<String> shortDataset = new ArrayList<String>();
    ArrayList<String> shortDatasetAggregation = new ArrayList<String>();
    HashMap<String, String> datasetTempAtt = new HashMap<String, String>();
    HashMap<String, String> datasetSpatialAtt = new HashMap<String, String>();
    HashMap<String, String> preProcessingDataset = new HashMap<String, String>();
    HashMap<String, String> datasetId = new HashMap<String, String>();

    boolean removeExistingFiles = cmd.hasOption("f");
    String[] datasetArgs = cmd.getOptionValues("g");

    for (int i = 0; i < datasetArgs.length; i += 3) {
        String dataset = datasetArgs[i];

        // getting pre-processing
        String tempPreProcessing = FrameworkUtils.searchPreProcessing(dataset, s3conf, s3);
        if (tempPreProcessing == null) {
            System.out.println("No pre-processing available for " + dataset);
            continue;
        }
        preProcessingDataset.put(dataset, tempPreProcessing);

        shortDataset.add(dataset);
        datasetTempAtt.put(dataset, ((datasetArgs[i + 1] == "null") ? null : datasetArgs[i + 1]));
        datasetSpatialAtt.put(dataset, ((datasetArgs[i + 2] == "null") ? null : datasetArgs[i + 2]));

        datasetId.put(dataset, null);
    }

    if (shortDataset.size() == 0) {
        System.out.println("No datasets to process.");
        System.exit(0);
    }

    // getting dataset id

    Path path = null;
    FileSystem fs = null;

    if (s3) {
        path = new Path(s3bucket + FrameworkUtils.datasetsIndexDir);
        fs = FileSystem.get(path.toUri(), s3conf);
    } else {
        fs = FileSystem.get(new Configuration());
        path = new Path(fs.getHomeDirectory() + "/" + FrameworkUtils.datasetsIndexDir);
    }
    BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(path)));
    String line = br.readLine();
    while (line != null) {
        String[] dt = line.split("\t");
        if (datasetId.containsKey(dt[0])) {
            datasetId.put(dt[0], dt[1]);
            datasetNames += dt[0] + ",";
            datasetIds += dt[1] + ",";
        }
        line = br.readLine();
    }
    br.close();
    if (s3)
        fs.close();

    datasetNames = datasetNames.substring(0, datasetNames.length() - 1);
    datasetIds = datasetIds.substring(0, datasetIds.length() - 1);
    Iterator<String> it = shortDataset.iterator();
    while (it.hasNext()) {
        String dataset = it.next();
        if (datasetId.get(dataset) == null) {
            System.out.println("No dataset id for " + dataset);
            System.exit(0);
        }
    }

    FrameworkUtils.createDir(s3bucket + FrameworkUtils.aggregatesDir, s3conf, s3);

    // getting smallest resolution

    HashMap<String, String> tempResMap = new HashMap<String, String>();
    HashMap<String, String> spatialResMap = new HashMap<String, String>();

    HashMap<String, String> datasetTemporalStrMap = new HashMap<String, String>();
    HashMap<String, String> datasetSpatialStrMap = new HashMap<String, String>();

    HashSet<String> input = new HashSet<String>();

    for (String dataset : shortDataset) {

        String[] datasetArray = preProcessingDataset.get(dataset).split("-");

        String datasetTemporalStr = datasetArray[datasetArray.length - 2];
        int datasetTemporal = utils.temporalResolution(datasetTemporalStr);

        String datasetSpatialStr = datasetArray[datasetArray.length - 1];
        int datasetSpatial = utils.spatialResolution(datasetSpatialStr);

        // finding all possible resolutions

        String[] temporalResolutions = FrameworkUtils.getAggTempResolutions(datasetTemporal);
        String[] spatialResolutions = FrameworkUtils.getAggSpatialResolutions(datasetSpatial);

        String temporalResolution = "";
        String spatialResolution = "";

        String tempRes = "";
        String spatialRes = "";

        boolean dataAdded = false;

        for (int i = 0; i < temporalResolutions.length; i++) {
            for (int j = 0; j < spatialResolutions.length; j++) {

                temporalResolution = temporalResolutions[i];
                spatialResolution = spatialResolutions[j];

                String aggregatesOutputFileName = s3bucket + FrameworkUtils.aggregatesDir + "/" + dataset + "/";

                if (removeExistingFiles) {
                    FrameworkUtils.removeFile(aggregatesOutputFileName, s3conf, s3);
                }

                if (!FrameworkUtils.fileExists(aggregatesOutputFileName, s3conf, s3)) {

                    dataAdded = true;

                    tempRes += temporalResolution + "-";
                    spatialRes += spatialResolution + "-";
                }
            }
        }

        if (dataAdded) {
            input.add(s3bucket + FrameworkUtils.preProcessingDir + "/" + preProcessingDataset.get(dataset));
            shortDatasetAggregation.add(dataset);

            tempResMap.put(dataset, tempRes.substring(0, tempRes.length() - 1));
            spatialResMap.put(dataset, spatialRes.substring(0, spatialRes.length() - 1));

            datasetTemporalStrMap.put(dataset, datasetTemporalStr);
            datasetSpatialStrMap.put(dataset, datasetSpatialStr);
        }
    }

    if (input.isEmpty()) {
        System.out.println("All the input datasets have aggregates.");
        System.out.println("Use -f in the beginning of the command line to force the computation.");
        System.exit(0);
    }

    it = input.iterator();
    while (it.hasNext()) {
        preProcessingDatasets += it.next() + ",";
    }

    Job aggJob = null;
    String aggregatesOutputDir = s3bucket + FrameworkUtils.aggregatesDir + "/tmp/";
    String jobName = "aggregates";

    FrameworkUtils.removeFile(aggregatesOutputDir, s3conf, s3);

    Configuration aggConf = new Configuration();
    Machine machineConf = new Machine(machine, nbNodes);

    aggConf.set("dataset-name", datasetNames);
    aggConf.set("dataset-id", datasetIds);

    for (int i = 0; i < shortDatasetAggregation.size(); i++) {
        String dataset = shortDatasetAggregation.get(i);
        String id = datasetId.get(dataset);
        aggConf.set("dataset-" + id + "-temporal-resolutions", tempResMap.get(dataset));
        aggConf.set("dataset-" + id + "-spatial-resolutions", spatialResMap.get(dataset));
        aggConf.set("dataset-" + id + "-temporal-att", datasetTempAtt.get(dataset));
        aggConf.set("dataset-" + id + "-spatial-att", datasetSpatialAtt.get(dataset));
        aggConf.set("dataset-" + id + "-temporal", datasetTemporalStrMap.get(dataset));
        aggConf.set("dataset-" + id + "-spatial", datasetSpatialStrMap.get(dataset));

        if (s3)
            aggConf.set("dataset-" + id,
                    s3bucket + FrameworkUtils.preProcessingDir + "/" + preProcessingDataset.get(dataset));
        else
            aggConf.set("dataset-" + id, FileSystem.get(new Configuration()).getHomeDirectory() + "/"
                    + FrameworkUtils.preProcessingDir + "/" + preProcessingDataset.get(dataset));
    }

    aggConf.set("mapreduce.tasktracker.map.tasks.maximum", String.valueOf(machineConf.getMaximumTasks()));
    aggConf.set("mapreduce.tasktracker.reduce.tasks.maximum", String.valueOf(machineConf.getMaximumTasks()));
    aggConf.set("mapreduce.jobtracker.maxtasks.perjob", "-1");
    aggConf.set("mapreduce.reduce.shuffle.parallelcopies", "20");
    aggConf.set("mapreduce.input.fileinputformat.split.minsize", "0");
    aggConf.set("mapreduce.task.io.sort.mb", "200");
    aggConf.set("mapreduce.task.io.sort.factor", "100");
    machineConf.setMachineConfiguration(aggConf);

    if (s3) {
        machineConf.setMachineConfiguration(aggConf);
        aggConf.set("fs.s3.awsAccessKeyId", awsAccessKeyId);
        aggConf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey);
    }

    if (snappyCompression) {
        aggConf.set("mapreduce.map.output.compress", "true");
        aggConf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec");
        //aggConf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec");
    }
    if (bzip2Compression) {
        aggConf.set("mapreduce.map.output.compress", "true");
        aggConf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec");
        //aggConf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec");
    }

    aggJob = new Job(aggConf);
    aggJob.setJobName(jobName);

    aggJob.setMapOutputKeyClass(SpatioTemporalWritable.class);
    aggJob.setMapOutputValueClass(AggregationArrayWritable.class);
    aggJob.setOutputKeyClass(SpatioTemporalWritable.class);
    aggJob.setOutputValueClass(FloatArrayWritable.class);
    //aggJob.setOutputKeyClass(Text.class);
    //aggJob.setOutputValueClass(Text.class);

    aggJob.setMapperClass(AggregationMapper.class);
    aggJob.setCombinerClass(AggregationCombiner.class);
    aggJob.setReducerClass(AggregationReducer.class);
    aggJob.setNumReduceTasks(machineConf.getNumberReduces());

    aggJob.setInputFormatClass(SequenceFileInputFormat.class);
    //aggJob.setOutputFormatClass(SequenceFileOutputFormat.class);
    LazyOutputFormat.setOutputFormatClass(aggJob, SequenceFileOutputFormat.class);
    //LazyOutputFormat.setOutputFormatClass(aggJob, TextOutputFormat.class);
    SequenceFileOutputFormat.setCompressOutput(aggJob, true);
    SequenceFileOutputFormat.setOutputCompressionType(aggJob, CompressionType.BLOCK);

    FileInputFormat.setInputDirRecursive(aggJob, true);
    FileInputFormat.setInputPaths(aggJob,
            preProcessingDatasets.substring(0, preProcessingDatasets.length() - 1));
    FileOutputFormat.setOutputPath(aggJob, new Path(aggregatesOutputDir));

    aggJob.setJarByClass(Aggregation.class);

    long start = System.currentTimeMillis();
    aggJob.submit();
    aggJob.waitForCompletion(true);
    System.out.println(jobName + "\t" + (System.currentTimeMillis() - start));

    // moving files to right place
    for (String dataset : shortDatasetAggregation) {
        String from = s3bucket + FrameworkUtils.aggregatesDir + "/tmp/" + dataset + "/";
        String to = s3bucket + FrameworkUtils.aggregatesDir + "/" + dataset + "/";
        FrameworkUtils.renameFile(from, to, s3conf, s3);
    }

}