Example usage for org.apache.hadoop.conf Configuration Configuration

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration Configuration.

Prototype

public Configuration()

Source Link

Document

A new configuration.

Usage

From source file:be.uantwerpen.adrem.bigfim.BigFIMDriver.java

License:Apache License

private void startCreatePrefixGroups(FIMOptions opt, int phase)
        throws IOException, ClassNotFoundException, InterruptedException, URISyntaxException {
    Path path = new Path(opt.outputDir + separator + "tg" + phase);
    for (FileStatus status : path.getFileSystem(new Configuration()).listStatus(path)) {
        String cacheFile = status.getPath().toString();
        String trieGroupCount = cacheFile.substring(cacheFile.lastIndexOf('/'), cacheFile.length());
        trieGroupCount = trieGroupCount.split("-")[1];
        String outputFile = opt.outputDir + separator + "pg-trieGroup" + trieGroupCount;
        System.out.println("[CreatePrefixGroups]: input: " + opt.inputFile + ", output: " + opt.outputDir
                + ", cache: " + cacheFile);

        Job job = prepareJob(new Path(opt.inputFile), new Path(outputFile), SplitByKTextInputFormat.class,
                ComputeTidListMapper.class, Text.class, IntArrayWritable.class, ComputeTidListReducer.class,
                IntArrayWritable.class, IntMatrixWritable.class, SequenceFileOutputFormat.class);

        job.setJobName("Create Prefix Groups");
        job.setJarByClass(BigFIMDriver.class);
        job.setNumReduceTasks(1);/*from  ww  w.ja  va 2 s . c om*/

        Configuration conf = job.getConfiguration();
        setConfigurationValues(conf, opt);
        conf.setInt(PREFIX_LENGTH_KEY, phase);

        addCacheFile(new URI(cacheFile.replace(" ", "%20")), job.getConfiguration());

        runJob(job, "Prefix Creation");
    }
}

From source file:be.uantwerpen.adrem.bigfim.ComputeTidListReducer.java

License:Apache License

private void getPgStartIndex(Configuration conf) {
    try {/*from w  ww .j ava  2  s  . c  om*/
        Path path = new Path(basePGDir);
        FileSystem fs = path.getFileSystem(new Configuration());

        if (!fs.exists(path)) {
            pgStartIndex = 0;
            return;
        }

        int largestIx = 0;
        for (FileStatus file : fs.listStatus(path)) {
            String tmp = file.getPath().toString();
            if (!tmp.contains("bucket")) {
                continue;
            }
            tmp = tmp.substring(tmp.lastIndexOf('/'), tmp.length());
            int ix = Integer.parseInt(tmp.split("-")[1]);
            largestIx = Math.max(largestIx, ix);
            pgStartIndex += 1;
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:be.uantwerpen.adrem.bigfim.ComputeTidListReducerTest.java

License:Apache License

private Configuration createConfiguration() throws Exception {
    Configuration conf = new Configuration();
    conf.setInt(MIN_SUP_KEY, 1);/*w w w. ja  va  2s. c  o m*/
    conf.setInt(NUMBER_OF_MAPPERS_KEY, 2);
    conf.setInt(SUBDB_SIZE, 10);
    conf.setStrings(OUTPUT_DIR_KEY, "file:///out");
    return conf;
}

From source file:be.uantwerpen.adrem.eclat.util.SplitByKTextInputFormatTest.java

License:Apache License

private Configuration createConfiguration(int... numberOfLines) {
    Configuration conf = new Configuration();
    if (numberOfLines.length > 0) {
        conf.setLong(NUMBER_OF_LINES_KEY, numberOfLines[0]);
    }/*from  www .  j a va  2  s .c  om*/
    conf.set("fs.default.name", "file:///");
    conf.setBoolean("fs.file.impl.disable.cache", false);
    conf.setClass("fs.file.impl", RawLocalFileSystem.class, FileSystem.class);
    return conf;
}

From source file:be.uantwerpen.adrem.hadoop.util.Tools.java

License:Apache License

/**
 * Cleans the Hadoop file system by deleting the specified files if they exist.
 * //  ww w.  j  av  a  2 s. c  om
 * @param files
 *          the files to delete
 */
public static void cleanDirs(String... files) {
    System.out.println("[Cleaning]: Cleaning HDFS");
    Configuration conf = new Configuration();
    for (String filename : files) {
        System.out.println("[Cleaning]: Trying to delete " + filename);
        Path path = new Path(filename);
        try {
            FileSystem fs = path.getFileSystem(conf);
            if (fs.exists(path)) {
                if (fs.delete(path, true)) {
                    System.out.println("[Cleaning]: Deleted " + filename);
                } else {
                    System.out.println("[Cleaning]: Error while deleting " + filename);
                }
            } else {
                System.out.println("[Cleaning]: " + filename + " does not exist on HDFS");
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

From source file:be.uantwerpen.adrem.hadoop.util.Tools.java

License:Apache License

@SuppressWarnings("rawtypes")
public static Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat,
        Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey,
        Class<? extends Writable> mapperValue, Class<? extends Reducer> reducer,
        Class<? extends Writable> reducerKey, Class<? extends Writable> reducerValue,
        Class<? extends OutputFormat> outputFormat) throws IOException {
    Job job = new Job(new Configuration());

    Configuration jobConf = job.getConfiguration();

    if (reducer.equals(Reducer.class)) {
        if (mapper.equals(Mapper.class)) {
            throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer");
        }/*from  ww w . j av a  2  s .  co m*/
        job.setJarByClass(mapper);
    } else {
        job.setJarByClass(reducer);
    }

    job.setInputFormatClass(inputFormat);
    jobConf.set("mapred.input.dir", inputPath.toString());

    job.setMapperClass(mapper);
    if (mapperKey != null) {
        job.setMapOutputKeyClass(mapperKey);
    }
    if (mapperValue != null) {
        job.setMapOutputValueClass(mapperValue);
    }

    jobConf.setBoolean("mapred.compress.map.output", true);

    job.setReducerClass(reducer);
    job.setOutputKeyClass(reducerKey);
    job.setOutputValueClass(reducerValue);

    job.setOutputFormatClass(outputFormat);
    jobConf.set("mapred.output.dir", outputPath.toString());

    return job;
}

From source file:be.uantwerpen.adrem.hadoop.util.Tools.java

License:Apache License

public static void cleanupSubdirsExcept(String dir, Collection<String> toKeep) {
    Path path = new Path(dir);
    try {//from   w w w .j a va 2 s  .  c om
        for (FileStatus fs : path.getFileSystem(new Configuration()).listStatus(path)) {
            String[] sp = fs.getPath().toString().split(Path.SEPARATOR);
            String filename = sp[sp.length - 1];
            if (toKeep.contains(filename)) {
                cleanDirs(fs.getPath().toString() + Path.SEPARATOR + "_SUCCESS");
                continue;
            }
            cleanDirs(fs.getPath().toString());
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:be.ugent.intec.halvade.Halvade.java

License:Open Source License

public static void runMapReduce(String[] args) {
    int res = 0;/*from   w  w  w .j a  va2s.c o  m*/
    try {
        Configuration c = new Configuration();
        MapReduceRunner runner = new MapReduceRunner();
        res = ToolRunner.run(c, runner, args);
    } catch (Exception ex) {
        Logger.EXCEPTION(ex);
    }
    System.exit(res);
}

From source file:be.ugent.intec.halvade.uploader.HalvadeUploader.java

License:Open Source License

/**
 * @param args the command line arguments
 *//*from   w  w  w .ja v  a 2s  .c  o m*/
public static void main(String[] args) throws Exception {
    // TODO code application logic here
    Configuration c = new Configuration();
    HalvadeUploader hau = new HalvadeUploader();
    int res = ToolRunner.run(c, hau, args);
}

From source file:be.ugent.intec.halvade.uploader.input.BaseFileReader.java

protected static BufferedReader getReader(boolean readFromDistributedStorage, String file)
        throws FileNotFoundException, IOException {
    InputStream hdfsIn;//w w w. ja va 2s. c o  m
    if (readFromDistributedStorage) {
        Path pt = new Path(file);
        FileSystem fs = FileSystem.get(pt.toUri(), new Configuration());
        hdfsIn = fs.open(pt);
        // read the stream in the correct format!
        if (file.endsWith(".gz")) {
            GZIPInputStream gzip = new GZIPInputStream(hdfsIn, BUFFERSIZE);
            return new BufferedReader(new InputStreamReader(gzip));
        } else if (file.endsWith(".bz2")) {
            CBZip2InputStream bzip2 = new CBZip2InputStream(hdfsIn);
            return new BufferedReader(new InputStreamReader(bzip2));
        } else
            return new BufferedReader(new InputStreamReader(hdfsIn));

    } else {
        if (file.endsWith(".gz")) {
            GZIPInputStream gzip = new GZIPInputStream(new FileInputStream(file), BUFFERSIZE);
            return new BufferedReader(new InputStreamReader(gzip));
        } else if (file.endsWith(".bz2")) {
            CBZip2InputStream bzip2 = new CBZip2InputStream(new FileInputStream(file));
            return new BufferedReader(new InputStreamReader(bzip2));
        } else if (file.equals("-")) {
            return new BufferedReader(new InputStreamReader(System.in));
        } else
            return new BufferedReader(new FileReader(file));
    }
}