Example usage for org.apache.hadoop.conf Configuration setInt

List of usage examples for org.apache.hadoop.conf Configuration setInt

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration setInt.

Prototype

public void setInt(String name, int value) 

Source Link

Document

Set the value of the name property to an int.

Usage

From source file:ivory.driver.PreprocessTREC.java

License:Apache License

/**
 * Runs this tool.//  ww w  . j  a  va  2 s .  c  o m
 */
public int run(String[] args) throws Exception {
    if (args.length != 4) {
        printUsage();
        return -1;
    }

    String collection = args[0];
    String indexRootPath = args[1];
    int numMappers = Integer.parseInt(args[2]);
    int numReducers = Integer.parseInt(args[3]);

    sLogger.info("Tool name: PreprocessTREC");
    sLogger.info(" - Collection path: " + collection);
    sLogger.info(" - Index path: " + indexRootPath);

    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);

    // Create the index directory if it doesn't already exist.
    Path p = new Path(indexRootPath);
    if (!fs.exists(p)) {
        sLogger.info("index directory doesn't exist, creating...");
        fs.mkdirs(p);
    }

    RetrievalEnvironment env = new RetrievalEnvironment(indexRootPath, fs);

    // Look for the docno mapping, which maps from docid (String) to docno
    // (sequentially-number integer). If it doesn't exist create it.
    Path mappingFile = env.getDocnoMappingData();
    Path mappingDir = env.getDocnoMappingDirectory();

    if (!fs.exists(mappingFile)) {
        sLogger.info("docno-mapping.dat doesn't exist, creating...");
        String[] arr = new String[] { collection, mappingDir.toString(), mappingFile.toString(),
                new Integer(numMappers).toString() };
        NumberTrecDocuments tool = new NumberTrecDocuments();
        tool.setConf(conf);
        tool.run(arr);

        fs.delete(mappingDir, true);
    }

    // Now we're ready to start the preprocessing pipeline... set
    // appropriate properties.
    conf.setInt("Ivory.NumMapTasks", numMappers);
    conf.setInt("Ivory.NumReduceTasks", numReducers);

    conf.set("Ivory.CollectionName", "TREC_vol45");
    conf.set("Ivory.CollectionPath", collection);
    conf.set("Ivory.IndexPath", indexRootPath);
    conf.set("Ivory.InputFormat", "edu.umd.cloud9.collection.trec.TrecDocumentInputFormat");
    conf.set("Ivory.Tokenizer", "ivory.tokenize.GalagoTokenizer");
    conf.set("Ivory.DocnoMappingClass", "edu.umd.cloud9.collection.trec.TrecDocnoMapping");
    conf.set("Ivory.DocnoMappingFile", env.getDocnoMappingData().toString());

    conf.setInt("Ivory.DocnoOffset", 0); // docnos start at 1
    conf.setInt("Ivory.MinDf", 2); // toss away singleton terms
    conf.setInt("Ivory.MaxDf", Integer.MAX_VALUE);
    conf.setInt("Ivory.TermIndexWindow", 8);

    new BuildTermDocVectors(conf).run();
    new GetTermCount(conf).run();
    new BuildTermIdMap(conf).run();
    new BuildIntDocVectors(conf).run();

    new BuildIntDocVectorsForwardIndex(conf).run();
    new BuildTermDocVectorsForwardIndex(conf).run();

    return 0;
}

From source file:ivory.driver.PreprocessWt10g.java

License:Apache License

/**
 * Runs this tool./*from w w w  .j a  v  a  2 s . c o m*/
 */
public int run(String[] args) throws Exception {
    if (args.length != 4) {
        printUsage();
        return -1;
    }

    String collection = args[0];
    String indexRootPath = args[1];
    int numMappers = Integer.parseInt(args[2]);
    int numReducers = Integer.parseInt(args[3]);

    sLogger.info("Tool name: PreprocessWt10g");
    sLogger.info(" - Collection path: " + collection);
    sLogger.info(" - Index path: " + indexRootPath);

    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);

    // Create the index directory if it doesn't already exist.
    Path p = new Path(indexRootPath);
    if (!fs.exists(p)) {
        sLogger.info("index directory doesn't exist, creating...");
        fs.mkdirs(p);
    }

    RetrievalEnvironment env = new RetrievalEnvironment(indexRootPath, fs);

    // Look for the docno mapping, which maps from docid (String) to docno
    // (sequentially-number integer). If it doesn't exist create it.
    Path mappingFile = env.getDocnoMappingData();
    Path mappingDir = env.getDocnoMappingDirectory();

    if (!fs.exists(mappingFile)) {
        sLogger.info("docno-mapping.dat doesn't exist, creating...");
        String[] arr = new String[] { collection, mappingDir.toString(), mappingFile.toString(),
                new Integer(numMappers).toString() };
        NumberTrecWebDocuments tool = new NumberTrecWebDocuments();
        tool.setConf(conf);
        tool.run(arr);

        fs.delete(mappingDir, true);
    }

    // Now we're ready to start the preprocessing pipeline... set
    // appropriate properties.
    conf.setInt("Ivory.NumMapTasks", numMappers);
    conf.setInt("Ivory.NumReduceTasks", numReducers);

    conf.set("Ivory.CollectionName", "Wt10g");
    conf.set("Ivory.CollectionPath", collection);
    conf.set("Ivory.IndexPath", indexRootPath);
    conf.set("Ivory.InputFormat", "org.apache.hadoop.mapred.SequenceFileInputFormat");
    conf.set("Ivory.Tokenizer", "ivory.tokenize.GalagoTokenizer");
    conf.set("Ivory.DocnoMappingClass", "edu.umd.cloud9.collection.trecweb.Wt10gDocnoMapping");
    conf.set("Ivory.DocnoMappingFile", mappingFile.toString());

    conf.setInt("Ivory.DocnoOffset", 0); // docnos start at 1
    conf.setInt("Ivory.MinDf", 10);
    conf.setInt("Ivory.MaxDf", Integer.MAX_VALUE);
    conf.setInt("Ivory.TermIndexWindow", 8);

    new BuildTermDocVectors(conf).run();
    new GetTermCount(conf).run();
    new BuildTermIdMap(conf).run();
    new BuildIntDocVectors(conf).run();

    new BuildIntDocVectorsForwardIndex(conf).run();
    new BuildTermDocVectorsForwardIndex(conf).run();

    return 0;
}

From source file:ivory.preprocess.BuildTermDocVectors2.java

License:Apache License

@SuppressWarnings("unchecked")
public int runTool() throws Exception {
    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);

    String indexPath = conf.get(Constants.IndexPath);
    String collectionName = conf.get(Constants.CollectionName);
    String collectionPath = conf.get(Constants.CollectionPath);
    String inputFormat = conf.get(Constants.InputFormat);
    String tokenizer = conf.get(Constants.Tokenizer);
    String mappingClass = conf.get(Constants.DocnoMappingClass);
    int docnoOffset = conf.getInt(Constants.DocnoOffset, 0);

    LOG.info("PowerTool: BuildTermDocVectors2");
    LOG.info(String.format(" - %s: %s", Constants.IndexPath, indexPath));
    LOG.info(String.format(" - %s: %s", Constants.CollectionName, collectionName));
    LOG.info(String.format(" - %s: %s", Constants.CollectionPath, collectionPath));
    LOG.info(String.format(" - %s: %s", Constants.InputFormat, inputFormat));
    LOG.info(String.format(" - %s: %s", Constants.Tokenizer, tokenizer));
    LOG.info(String.format(" - %s: %s", Constants.DocnoMappingClass, mappingClass));
    LOG.info(String.format(" - %s: %s", Constants.DocnoOffset, docnoOffset));

    RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs);
    Path mappingFile = env.getDocnoMappingData();

    if (!fs.exists(mappingFile)) {
        LOG.error("Error, docno mapping data file " + mappingFile + "doesn't exist!");
        return 0;
    }/*from w  w w .j  ava2 s  .  co m*/

    DistributedCache.addCacheFile(mappingFile.toUri(), conf);

    Path outputPath = new Path(env.getTermDocVectorsDirectory());
    if (fs.exists(outputPath)) {
        LOG.info("TermDocVectors already exist: Skipping!");
        return 0;
    }

    env.writeCollectionName(collectionName);
    env.writeCollectionPath(collectionPath);
    env.writeInputFormat(inputFormat);
    env.writeDocnoMappingClass(mappingClass);
    env.writeTokenizerClass(tokenizer);
    env.writeDocnoOffset(docnoOffset);

    Job job1 = new Job(conf, "BuildTermDocVectors2:" + collectionName);
    job1.setJarByClass(BuildTermDocVectors2.class);

    job1.setNumReduceTasks(0);

    FileInputFormat.addInputPaths(job1, collectionPath);
    FileOutputFormat.setOutputPath(job1, outputPath);
    SequenceFileOutputFormat.setOutputCompressionType(job1, SequenceFile.CompressionType.RECORD);

    job1.setInputFormatClass((Class<? extends InputFormat>) Class.forName(inputFormat));
    job1.setOutputFormatClass(SequenceFileOutputFormat.class);

    job1.setMapOutputKeyClass(IntWritable.class);
    job1.setMapOutputValueClass(LazyTermDocVector.class);
    job1.setOutputKeyClass(IntWritable.class);
    job1.setOutputValueClass(LazyTermDocVector.class);

    job1.setMapperClass(MyMapper.class);

    long startTime = System.currentTimeMillis();
    job1.waitForCompletion(true);
    LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    // write out number of postings
    int collectionDocCount = (int) job1.getCounters().findCounter(Docs.Total).getValue();
    env.writeCollectionDocumentCount(collectionDocCount);

    Path dlFile = env.getDoclengthsData();
    if (fs.exists(dlFile)) {
        LOG.info("DocLength data exists: Skipping!");
        return 0;
    }

    conf.setInt(Constants.CollectionDocumentCount, collectionDocCount);
    conf.set(InputPath, env.getDoclengthsDirectory().toString());
    conf.set(DocLengthDataFile, dlFile.toString());

    conf.set("mapred.child.java.opts", "-Xmx2048m");
    conf.setBoolean("mapred.map.tasks.speculative.execution", false);
    conf.setBoolean("mapred.reduce.tasks.speculative.execution", false);

    LOG.info("Writing doc length data to " + dlFile + "...");

    Job job2 = new Job(conf, "DocLengthTable2:" + collectionName);
    job2.setJarByClass(BuildTermDocVectors2.class);

    job2.setNumReduceTasks(0);
    job2.setInputFormatClass(NullInputFormat.class);
    job2.setOutputFormatClass(NullOutputFormat.class);
    job2.setMapperClass(DocLengthDataWriterMapper.class);

    startTime = System.currentTimeMillis();
    job2.waitForCompletion(true);
    LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    long collectionSumOfDocLengths = job2.getCounters().findCounter(DocLengths.SumOfDocLengths).getValue();
    env.writeCollectionAverageDocumentLength((float) collectionSumOfDocLengths / collectionDocCount);

    return 0;
}

From source file:ivory.preprocess.BuildTermIdMap2.java

License:Apache License

public int runTool() throws Exception {
    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);

    String indexPath = conf.get(Constants.IndexPath);
    String collectionName = conf.get(Constants.CollectionName);

    LOG.info("PowerTool: BuildTermIdMap2");
    LOG.info(String.format(" - %s: %s", Constants.CollectionName, collectionName));
    LOG.info(String.format(" - %s: %s", Constants.IndexPath, indexPath));

    RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs);
    if (!fs.exists(new Path(indexPath))) {
        LOG.error("index path doesn't existing: skipping!");
        return 0;
    }//w  w w.j ava2  s  .  c  o m

    Path termsFilePath = new Path(env.getIndexTermsData());
    Path termIDsFilePath = new Path(env.getIndexTermIdsData());
    Path idToTermFilePath = new Path(env.getIndexTermIdMappingData());
    Path dfByTermFilePath = new Path(env.getDfByTermData());
    Path cfByTermFilePath = new Path(env.getCfByTermData());
    Path dfByIntFilePath = new Path(env.getDfByIntData());
    Path cfByIntFilePath = new Path(env.getCfByIntData());

    if (fs.exists(termsFilePath) || fs.exists(termIDsFilePath) || fs.exists(idToTermFilePath)
            || fs.exists(dfByTermFilePath) || fs.exists(cfByTermFilePath) || fs.exists(dfByIntFilePath)
            || fs.exists(cfByIntFilePath)) {
        LOG.info("term and term id data exist: skipping!");
        return 0;
    }

    conf.setInt(Constants.CollectionTermCount, (int) env.readCollectionTermCount());

    Path tmpPath = new Path(env.getTempDirectory());
    fs.delete(tmpPath, true);

    Job job = new Job(conf, "BuildTermIdMap2:" + collectionName);

    job.setJarByClass(BuildTermIdMap2.class);
    job.setNumReduceTasks(1);

    FileInputFormat.setInputPaths(job, new Path(env.getTermDfCfDirectory()));
    FileOutputFormat.setOutputPath(job, tmpPath);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(PairOfIntLong.class);
    job.setOutputKeyClass(Text.class);

    job.setMapperClass(Mapper.class);
    job.setReducerClass(MyReducer.class);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    fs.delete(tmpPath, true);

    return 0;
}

From source file:ivory.ptc.driver.BuildAnchorTextInvertedIndex.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 5) {
        printUsage();//from  w w w  .  j  av  a  2 s .  c o m
        return -1;
    }

    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);
    // Command line arguments
    String inPath = args[0];
    String outPath = args[1];
    int numReducers = Integer.parseInt(args[2]);
    String weightingSchemeClass = args[3];
    String weightingSchemeParameters = args[4];
    int numMappers = 1;
    Path inputPath = new Path(inPath);
    if (!fs.exists(inputPath)) {
        LOG.warn("Input webgraph doesn't exist...");
        return -1;
    }

    conf.set("Ivory.InputPath", inPath);
    conf.set("Ivory.OutputPath", outPath);
    conf.setInt("Ivory.NumMapTasks", numMappers);
    conf.setInt("Ivory.NumReduceTasks", numReducers);
    conf.set("Ivory.WeightingScheme", weightingSchemeClass);
    conf.set("Ivory.WeightingSchemeParameters", weightingSchemeParameters);

    AnchorTextInvertedIndex indexTool = new AnchorTextInvertedIndex(conf);
    indexTool.run();
    return 0;
}

From source file:ivory.pwsim.RunPCP.java

License:Apache License

/**
 * Runs this tool.//from w  w w . ja va 2  s . com
 */
public int run(String[] args) throws Exception {

    if (args.length < 6) {
        printUsage();
        return -1;
    }

    String indexPath = args[0];
    int numMappers = Integer.parseInt(args[1]);
    int numReducers = Integer.parseInt(args[2]);

    Configuration config = new Configuration();

    config.setInt("Ivory.NumMapTasks", numMappers);
    config.setInt("Ivory.NumReduceTasks", numReducers);

    int dfCut = Integer.parseInt(args[3]);

    int blockSize = Integer.parseInt(args[4]);

    String scoringModel = args[5];
    String fn = args[5];
    int i = scoringModel.lastIndexOf(".");
    if (i >= 0)
        fn = scoringModel.substring(i + 1);

    int topN = -1;
    if (args.length == 7)
        topN = Integer.parseInt(args[6]);

    config.set("Ivory.IndexPath", indexPath);
    config.set("Ivory.OutputPath", indexPath + "/pcp-dfCut=" + dfCut + "-blk=" + blockSize + "-" + fn
            + (topN > 0 ? "-topN=" + topN : ""));

    config.set("Ivory.ScoringModel", scoringModel);

    config.setInt("Ivory.DfCut", dfCut);
    config.setInt("Ivory.BlockSize", blockSize);

    config.setInt("Ivory.TopN", topN);

    PCP pwsimTask = new PCP(config);
    pwsimTask.run();

    return 0;
}

From source file:jobs.CreateUniformDoublyStochastic.java

License:Apache License

public int run(String[] args) throws Exception {

    Configuration conf = getConf();

    int N = Integer.parseInt(args[2]);
    conf.setInt("N", N);
    int sR = Integer.parseInt(args[3]);
    conf.setInt("SR", sR);
    int sC = Integer.parseInt(args[4]);
    conf.setInt("SC", sC);
    String delim = args[5];//from www . j av  a2  s .  c o m
    conf.set("DELIM", delim);
    conf.setInt("mapred.reduce.tasks", Integer.parseInt(args[6]));

    conf.set("RESNAME", args[1]);

    //heap space - should be entered with the -D format and not dealt with by the program.    
    conf.set("mapred.map.child.java.opts", "-Xmx3G");
    conf.set("mapred.reduce.child.java.opts", "-Xmx3G");

    //Create the File that We are mapping.

    //open the file in hdfs
    Path outFile = new Path(args[0]);
    FileSystem fs = FileSystem.get(conf);
    FSDataOutputStream out = fs.create(outFile);

    //write out an entry for the block
    int nR = N / sR + (N % sR > 0 ? 1 : 0);
    int nC = N / sC + (N % sC > 0 ? 1 : 0);

    for (int r = 0; r < nR; r++) {
        for (int c = 0; c < nC; c++) {
            out.writeUTF(String.valueOf(r) + delim + String.valueOf(c) + "\n");
        }
    }

    //close file
    out.close();

    //job
    Job job1 = new Job(conf, "CreateUniformDoubleStochastic");
    job1.setJarByClass(CreateUniformDoublyStochastic.class);

    // Map
    FileInputFormat.addInputPath(job1, outFile);
    job1.setInputFormatClass(TextInputFormat.class);
    job1.setMapperClass(UniformDoublyStochasticMapper.class);

    //Reduce
    job1.setOutputKeyClass(Text.class);
    job1.setOutputValueClass(MatrixBlock.class);
    FileOutputFormat.setOutputPath(job1, new Path(args[1]));
    job1.setOutputFormatClass(SequenceFileOutputFormat.class);
    //job1.setOutputFormatClass(TextOutputFormat.class);

    return job1.waitForCompletion(false) ? 0 : 1;
}

From source file:jobs.EdgeListToMatrixBlock.java

License:Apache License

public int run(String[] args) throws Exception {
    Configuration conf = getConf();

    //get params//from   w  ww  .  j  a  v a2  s  .co  m
    conf.setInt("SR", Integer.parseInt(args[2]));
    conf.setInt("SC", Integer.parseInt(args[3]));
    conf.set("DELIM", args[4]);
    conf.setInt("ONE", Integer.parseInt(args[5]));

    //set # of reducers
    conf.setInt("mapred.reduce.tasks", Integer.parseInt(args[6]));

    conf.set("RESNAME", args[1]);

    //heap space - this should be configurable
    conf.set("mapred.map.child.java.opts", "-Xmx3G");
    conf.set("mapred.reduce.child.java.opts", "-Xmx3G");

    //job
    Job job1 = new Job(conf, "EdgeListToMatrixBlock");
    job1.setJarByClass(EdgeListToMatrixBlock.class);

    // Map
    FileInputFormat.setInputPaths(job1, new Path(args[0]));
    job1.setInputFormatClass(TextInputFormat.class);
    job1.setMapperClass(EdgeListBlockEntryMapper.class);
    job1.setMapOutputKeyClass(Text.class);
    job1.setMapOutputValueClass(BlockEntry.class);

    //Reduce       
    job1.setReducerClass(BlockEntryMatrixBlockReducer.class);
    job1.setOutputKeyClass(Text.class);
    job1.setOutputValueClass(MatrixBlock.class);
    FileOutputFormat.setOutputPath(job1, new Path(args[1]));
    job1.setOutputFormatClass(SequenceFileOutputFormat.class);
    //job1.setOutputFormatClass(TextOutputFormat.class);

    return job1.waitForCompletion(false) ? 0 : 1;
}

From source file:jobs.MatrixBlockAdd.java

License:Apache License

public int run(String[] args) throws Exception {
    Configuration conf = getConf();

    conf.setFloat("ALPHA", Float.parseFloat(args[3]));
    conf.setFloat("BETA", Float.parseFloat(args[4]));
    conf.setInt("mapred.reduce.tasks", Integer.parseInt(args[5]));

    if (args.length >= 7)
        conf.setInt("SR", Integer.parseInt(args[6]));

    if (args.length >= 8)
        conf.setInt("SC", Integer.parseInt(args[7]));

    conf.set("LEFTNAME", args[0]);
    conf.set("RESNAME", args[2]);

    //heap space - again - should be passed with the -D option
    conf.set("mapred.map.child.java.opts", "-Xmx3G");
    conf.set("mapred.reduce.child.java.opts", "-Xmx3G");

    //job//  w  w w .ja v  a2  s. c o  m
    Job job1 = new Job(conf, "MatrixBlockAdd");
    job1.setJarByClass(MatrixBlockAdd.class);

    // No Map
    FileInputFormat.addInputPath(job1, new Path(args[0]));
    FileInputFormat.addInputPath(job1, new Path(args[1]));
    job1.setInputFormatClass(SequenceFileInputFormat.class);
    job1.setMapperClass(NoNameMapper.class);

    //Reduce
    job1.setReducerClass(MatrixBlockAddReducer.class);
    job1.setOutputKeyClass(Text.class);
    job1.setOutputValueClass(MatrixBlock.class);
    FileOutputFormat.setOutputPath(job1, new Path(args[2]));
    job1.setOutputFormatClass(SequenceFileOutputFormat.class);
    //job1.setOutputFormatClass(TextOutputFormat.class);

    return job1.waitForCompletion(false) ? 0 : 1;
}

From source file:jobs.MatrixBlockMult.java

License:Apache License

public int run(String[] args) throws Exception {

    Configuration conf = getConf();

    conf.setFloat("SCALAR", Float.parseFloat(args[3]));

    conf.setBoolean("LTRANS", Boolean.parseBoolean(args[4]));
    conf.setBoolean("RTRANS", Boolean.parseBoolean(args[5]));

    conf.setInt("NRL", Integer.parseInt(args[6]));
    conf.setInt("NCL", Integer.parseInt(args[7]));
    conf.setInt("NRR", Integer.parseInt(args[8]));
    conf.setInt("NCR", Integer.parseInt(args[9]));

    //set # of reducers
    conf.setInt("mapred.reduce.tasks", Integer.parseInt(args[10]));

    //Get optional blocksize parameters
    if (args.length >= 12)
        conf.setInt("SRL", Integer.parseInt(args[11]));

    if (args.length >= 13)
        conf.setInt("SCL", Integer.parseInt(args[12]));

    if (args.length >= 14)
        conf.setInt("SRR", Integer.parseInt(args[13]));

    if (args.length >= 15)
        conf.setInt("SCR", Integer.parseInt(args[14]));

    conf.set("LEFTNAME", args[0]);
    conf.set("RIGHTNAME", args[1]);
    conf.set("RESNAME", args[2]);

    //heap space - should be entered with the -D format and not dealt with by the program.    
    conf.set("mapred.map.child.java.opts", "-Xmx3G");
    conf.set("mapred.reduce.child.java.opts", "-Xmx3G");

    //job/* ww w .  j  a  va2s  .c o  m*/
    Job job1 = new Job(conf, "MatrixBlockMult");
    job1.setJarByClass(MatrixBlockMult.class);

    // Map
    FileInputFormat.addInputPath(job1, new Path(args[0]));
    FileInputFormat.addInputPath(job1, new Path(args[1]));
    job1.setInputFormatClass(SequenceFileInputFormat.class);
    job1.setMapperClass(BlockMultiplicationGroupingMapper.class);
    job1.setMapOutputKeyClass(Text.class);
    job1.setMapOutputValueClass(MatrixBlock.class);

    //Reduce       
    job1.setReducerClass(MatrixBlockMultReducer.class);
    job1.setOutputKeyClass(Text.class);
    job1.setOutputValueClass(MatrixBlock.class);
    FileOutputFormat.setOutputPath(job1, new Path(args[2]));
    job1.setOutputFormatClass(SequenceFileOutputFormat.class);
    //job1.setOutputFormatClass(TextOutputFormat.class);

    return job1.waitForCompletion(false) ? 0 : 1;
}