Example usage for org.apache.hadoop.conf Configuration Configuration

List of usage examples for org.apache.hadoop.conf Configuration Configuration

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration Configuration.

Prototype

public Configuration() 

Source Link

Document

A new configuration.

Usage

From source file:be.uantwerpen.adrem.bigfim.BigFIMDriver.java

License:Apache License

private void startCreatePrefixGroups(FIMOptions opt, int phase)
        throws IOException, ClassNotFoundException, InterruptedException, URISyntaxException {
    Path path = new Path(opt.outputDir + separator + "tg" + phase);
    for (FileStatus status : path.getFileSystem(new Configuration()).listStatus(path)) {
        String cacheFile = status.getPath().toString();
        String trieGroupCount = cacheFile.substring(cacheFile.lastIndexOf('/'), cacheFile.length());
        trieGroupCount = trieGroupCount.split("-")[1];
        String outputFile = opt.outputDir + separator + "pg-trieGroup" + trieGroupCount;
        System.out.println("[CreatePrefixGroups]: input: " + opt.inputFile + ", output: " + opt.outputDir
                + ", cache: " + cacheFile);

        Job job = prepareJob(new Path(opt.inputFile), new Path(outputFile), SplitByKTextInputFormat.class,
                ComputeTidListMapper.class, Text.class, IntArrayWritable.class, ComputeTidListReducer.class,
                IntArrayWritable.class, IntMatrixWritable.class, SequenceFileOutputFormat.class);

        job.setJobName("Create Prefix Groups");
        job.setJarByClass(BigFIMDriver.class);
        job.setNumReduceTasks(1);/*from  ww  w.ja  va 2 s . c om*/

        Configuration conf = job.getConfiguration();
        setConfigurationValues(conf, opt);
        conf.setInt(PREFIX_LENGTH_KEY, phase);

        addCacheFile(new URI(cacheFile.replace(" ", "%20")), job.getConfiguration());

        runJob(job, "Prefix Creation");
    }
}

From source file:be.uantwerpen.adrem.bigfim.ComputeTidListReducer.java

License:Apache License

private void getPgStartIndex(Configuration conf) {
    try {/*from w  ww .j ava  2  s  . c  om*/
        Path path = new Path(basePGDir);
        FileSystem fs = path.getFileSystem(new Configuration());

        if (!fs.exists(path)) {
            pgStartIndex = 0;
            return;
        }

        int largestIx = 0;
        for (FileStatus file : fs.listStatus(path)) {
            String tmp = file.getPath().toString();
            if (!tmp.contains("bucket")) {
                continue;
            }
            tmp = tmp.substring(tmp.lastIndexOf('/'), tmp.length());
            int ix = Integer.parseInt(tmp.split("-")[1]);
            largestIx = Math.max(largestIx, ix);
            pgStartIndex += 1;
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:be.uantwerpen.adrem.bigfim.ComputeTidListReducerTest.java

License:Apache License

private Configuration createConfiguration() throws Exception {
    Configuration conf = new Configuration();
    conf.setInt(MIN_SUP_KEY, 1);/*w w w. ja  va  2s. c  o m*/
    conf.setInt(NUMBER_OF_MAPPERS_KEY, 2);
    conf.setInt(SUBDB_SIZE, 10);
    conf.setStrings(OUTPUT_DIR_KEY, "file:///out");
    return conf;
}

From source file:be.uantwerpen.adrem.eclat.util.SplitByKTextInputFormatTest.java

License:Apache License

private Configuration createConfiguration(int... numberOfLines) {
    Configuration conf = new Configuration();
    if (numberOfLines.length > 0) {
        conf.setLong(NUMBER_OF_LINES_KEY, numberOfLines[0]);
    }/*from  www .  j a va  2  s .c  om*/
    conf.set("fs.default.name", "file:///");
    conf.setBoolean("fs.file.impl.disable.cache", false);
    conf.setClass("fs.file.impl", RawLocalFileSystem.class, FileSystem.class);
    return conf;
}

From source file:be.uantwerpen.adrem.hadoop.util.Tools.java

License:Apache License

/**
 * Cleans the Hadoop file system by deleting the specified files if they exist.
 * //  ww w.  j  av  a  2 s. c  om
 * @param files
 *          the files to delete
 */
public static void cleanDirs(String... files) {
    System.out.println("[Cleaning]: Cleaning HDFS");
    Configuration conf = new Configuration();
    for (String filename : files) {
        System.out.println("[Cleaning]: Trying to delete " + filename);
        Path path = new Path(filename);
        try {
            FileSystem fs = path.getFileSystem(conf);
            if (fs.exists(path)) {
                if (fs.delete(path, true)) {
                    System.out.println("[Cleaning]: Deleted " + filename);
                } else {
                    System.out.println("[Cleaning]: Error while deleting " + filename);
                }
            } else {
                System.out.println("[Cleaning]: " + filename + " does not exist on HDFS");
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

From source file:be.uantwerpen.adrem.hadoop.util.Tools.java

License:Apache License

@SuppressWarnings("rawtypes")
public static Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat,
        Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey,
        Class<? extends Writable> mapperValue, Class<? extends Reducer> reducer,
        Class<? extends Writable> reducerKey, Class<? extends Writable> reducerValue,
        Class<? extends OutputFormat> outputFormat) throws IOException {
    Job job = new Job(new Configuration());

    Configuration jobConf = job.getConfiguration();

    if (reducer.equals(Reducer.class)) {
        if (mapper.equals(Mapper.class)) {
            throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer");
        }/*from  ww w . j av a  2  s .  co m*/
        job.setJarByClass(mapper);
    } else {
        job.setJarByClass(reducer);
    }

    job.setInputFormatClass(inputFormat);
    jobConf.set("mapred.input.dir", inputPath.toString());

    job.setMapperClass(mapper);
    if (mapperKey != null) {
        job.setMapOutputKeyClass(mapperKey);
    }
    if (mapperValue != null) {
        job.setMapOutputValueClass(mapperValue);
    }

    jobConf.setBoolean("mapred.compress.map.output", true);

    job.setReducerClass(reducer);
    job.setOutputKeyClass(reducerKey);
    job.setOutputValueClass(reducerValue);

    job.setOutputFormatClass(outputFormat);
    jobConf.set("mapred.output.dir", outputPath.toString());

    return job;
}

From source file:be.uantwerpen.adrem.hadoop.util.Tools.java

License:Apache License

public static void cleanupSubdirsExcept(String dir, Collection<String> toKeep) {
    Path path = new Path(dir);
    try {//from   w w w .j a va 2 s  .  c om
        for (FileStatus fs : path.getFileSystem(new Configuration()).listStatus(path)) {
            String[] sp = fs.getPath().toString().split(Path.SEPARATOR);
            String filename = sp[sp.length - 1];
            if (toKeep.contains(filename)) {
                cleanDirs(fs.getPath().toString() + Path.SEPARATOR + "_SUCCESS");
                continue;
            }
            cleanDirs(fs.getPath().toString());
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:be.ugent.intec.halvade.Halvade.java

License:Open Source License

public static void runMapReduce(String[] args) {
    int res = 0;/*from   w  w  w .j a  va2s.c o  m*/
    try {
        Configuration c = new Configuration();
        MapReduceRunner runner = new MapReduceRunner();
        res = ToolRunner.run(c, runner, args);
    } catch (Exception ex) {
        Logger.EXCEPTION(ex);
    }
    System.exit(res);
}

From source file:be.ugent.intec.halvade.uploader.HalvadeUploader.java

License:Open Source License

/**
 * @param args the command line arguments
 *//*from   w  w  w .ja v  a 2s  .c  o m*/
public static void main(String[] args) throws Exception {
    // TODO code application logic here
    Configuration c = new Configuration();
    HalvadeUploader hau = new HalvadeUploader();
    int res = ToolRunner.run(c, hau, args);
}

From source file:be.ugent.intec.halvade.uploader.input.BaseFileReader.java

protected static BufferedReader getReader(boolean readFromDistributedStorage, String file)
        throws FileNotFoundException, IOException {
    InputStream hdfsIn;//w w w. ja va 2s. c o  m
    if (readFromDistributedStorage) {
        Path pt = new Path(file);
        FileSystem fs = FileSystem.get(pt.toUri(), new Configuration());
        hdfsIn = fs.open(pt);
        // read the stream in the correct format!
        if (file.endsWith(".gz")) {
            GZIPInputStream gzip = new GZIPInputStream(hdfsIn, BUFFERSIZE);
            return new BufferedReader(new InputStreamReader(gzip));
        } else if (file.endsWith(".bz2")) {
            CBZip2InputStream bzip2 = new CBZip2InputStream(hdfsIn);
            return new BufferedReader(new InputStreamReader(bzip2));
        } else
            return new BufferedReader(new InputStreamReader(hdfsIn));

    } else {
        if (file.endsWith(".gz")) {
            GZIPInputStream gzip = new GZIPInputStream(new FileInputStream(file), BUFFERSIZE);
            return new BufferedReader(new InputStreamReader(gzip));
        } else if (file.endsWith(".bz2")) {
            CBZip2InputStream bzip2 = new CBZip2InputStream(new FileInputStream(file));
            return new BufferedReader(new InputStreamReader(bzip2));
        } else if (file.equals("-")) {
            return new BufferedReader(new InputStreamReader(System.in));
        } else
            return new BufferedReader(new FileReader(file));
    }
}