Example usage for org.apache.hadoop.conf Configuration setClass

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration setClass.

Prototype

public void setClass(String name, Class<?> theClass, Class<?> xface)

Source Link

Document

Set the value of the name property to the name of a theClass implementing the given interface xface.

Usage

From source file:edu.umn.cs.spatialHadoop.visualization.Rasterizer.java

License:Open Source License

public static void setRasterizer(Configuration job, Class<? extends Rasterizer> rasterizerClass) {
    job.setClass(RasterizerClass, rasterizerClass, Rasterizer.class);
}

From source file:edu.umn.cs.spatialHadoop.visualization.SingleLevelPlot.java

License:Open Source License

/**
 * Generates a single level using a MapReduce job and returns the created job.
 * @param inFiles/*from   w w w  . jav a2s . c o  m*/
 * @param outFile
 * @param plotterClass
 * @param params
 * @return
 * @throws IOException
 * @throws InterruptedException 
 * @throws ClassNotFoundException 
 */
public static Job plotMapReduce(Path[] inFiles, Path outFile, Class<? extends Plotter> plotterClass,
        OperationsParams params) throws IOException, InterruptedException, ClassNotFoundException {
    Plotter plotter;
    try {
        plotter = plotterClass.newInstance();
    } catch (InstantiationException e) {
        throw new RuntimeException("Error creating rastierizer", e);
    } catch (IllegalAccessException e) {
        throw new RuntimeException("Error creating rastierizer", e);
    }

    Job job = new Job(params, "SingleLevelPlot");
    job.setJarByClass(SingleLevelPlot.class);
    job.setJobName("SingleLevelPlot");
    // Set plotter
    Configuration conf = job.getConfiguration();
    Plotter.setPlotter(conf, plotterClass);
    // Set input file MBR
    Rectangle inputMBR = (Rectangle) params.getShape("mbr");
    Rectangle drawRect = (Rectangle) params.getShape("rect");
    if (inputMBR == null)
        inputMBR = drawRect != null ? drawRect : FileMBR.fileMBR(inFiles, params);
    OperationsParams.setShape(conf, InputMBR, inputMBR);
    if (drawRect != null)
        OperationsParams.setShape(conf, SpatialInputFormat3.InputQueryRange, drawRect);

    // Adjust width and height if aspect ratio is to be kept
    int imageWidth = conf.getInt("width", 1000);
    int imageHeight = conf.getInt("height", 1000);
    if (params.getBoolean("keepratio", true)) {
        // Adjust width and height to maintain aspect ratio
        if (inputMBR.getWidth() / inputMBR.getHeight() > (double) imageWidth / imageHeight) {
            // Fix width and change height
            imageHeight = (int) (inputMBR.getHeight() * imageWidth / inputMBR.getWidth());
            // Make divisible by two for compatibility with ffmpeg
            if (imageHeight % 2 == 1)
                imageHeight--;
            conf.setInt("height", imageHeight);
        } else {
            imageWidth = (int) (inputMBR.getWidth() * imageHeight / inputMBR.getHeight());
            conf.setInt("width", imageWidth);
        }
    }

    boolean merge = conf.getBoolean("merge", true);
    // Set input and output
    job.setInputFormatClass(SpatialInputFormat3.class);
    SpatialInputFormat3.setInputPaths(job, inFiles);
    if (conf.getBoolean("output", true)) {
        if (merge) {
            job.setOutputFormatClass(CanvasOutputFormat.class);
            conf.setClass("mapred.output.committer.class", CanvasOutputFormat.ImageWriterOld.class,
                    org.apache.hadoop.mapred.OutputCommitter.class);
        } else {
            job.setOutputFormatClass(ImageOutputFormat.class);
        }
        CanvasOutputFormat.setOutputPath(job, outFile);
    } else {
        job.setOutputFormatClass(NullOutputFormat.class);
    }

    // Set mapper and reducer based on the partitioning scheme
    String partition = conf.get("partition", "none");
    ClusterStatus clusterStatus = new JobClient(new JobConf()).getClusterStatus();
    if (partition.equalsIgnoreCase("none")) {
        LOG.info("Using no-partition plot");
        job.setMapperClass(NoPartitionPlotMap.class);
        job.setCombinerClass(NoPartitionPlotCombine.class);
        job.setMapOutputKeyClass(IntWritable.class);
        job.setMapOutputValueClass(plotter.getCanvasClass());
        if (merge) {
            int numSplits = new SpatialInputFormat3().getSplits(job).size();
            job.setReducerClass(NoPartitionPlotReduce.class);
            // Set number of reduce tasks according to cluster status
            int maxReduce = Math.max(1, clusterStatus.getMaxReduceTasks() * 7 / 8);
            job.setNumReduceTasks(Math.max(1, Math.min(maxReduce, numSplits / maxReduce)));
        } else {
            job.setNumReduceTasks(0);
        }
    } else {
        LOG.info("Using repartition plot");
        Partitioner partitioner;
        if (partition.equals("pixel")) {
            // Special case for pixel level partitioning as it depends on the
            // visualization parameters
            partitioner = new GridPartitioner(inputMBR, imageWidth, imageHeight);
        } else if (partition.equals("grid")) {
            int numBlocks = 0;
            for (Path in : inFiles) {
                FileSystem fs = in.getFileSystem(params);
                long size = FileUtil.getPathSize(fs, in);
                long blockSize = fs.getDefaultBlockSize(in);
                numBlocks += Math.ceil(size / (double) blockSize);
            }
            int numPartitions = numBlocks * 1000;
            int gridSize = (int) Math.ceil(Math.sqrt(numPartitions));
            partitioner = new GridPartitioner(inputMBR, gridSize, gridSize);
        } else {
            // Use a standard partitioner as created by the indexer
            partitioner = Indexer.createPartitioner(inFiles, outFile, conf, partition);
        }
        Shape shape = params.getShape("shape");
        job.setMapperClass(RepartitionPlotMap.class);
        job.setMapOutputKeyClass(IntWritable.class);
        job.setMapOutputValueClass(shape.getClass());
        job.setReducerClass(RepartitionPlotReduce.class);
        // Set number of reducers according to cluster size
        job.setNumReduceTasks(Math.max(1, clusterStatus.getMaxReduceTasks() * 9 / 10));
        Partitioner.setPartitioner(conf, partitioner);
    }

    // Use multithreading in case the job is running locally
    conf.setInt(LocalJobRunner.LOCAL_MAX_MAPS, Runtime.getRuntime().availableProcessors());

    // Start the job
    if (params.getBoolean("background", false)) {
        // Run in background
        job.submit();
    } else {
        job.waitForCompletion(params.getBoolean("verbose", false));
    }
    return job;
}

From source file:eu.dnetlib.iis.core.javamapreduce.hack.AvroMultipleOutputs.java

License:Apache License

/**
 * Adds a named output for the job.//from   www .  j  a  v a2  s.  c o m
 * <p/>
 *
 * @param job               job to add the named output
 * @param namedOutput       named output name, it has to be a word, letters
 *                          and numbers only, cannot be the word 'part' as
 *                          that is reserved for the default output.
 * @param outputFormatClass OutputFormat class.
 * @param keySchema          Schema for the Key
 * @param valueSchema        Schema for the Value (used in case of AvroKeyValueOutputFormat or null)
 */
@SuppressWarnings("unchecked")
public static void addNamedOutput(Job job, String namedOutput, Class<? extends OutputFormat> outputFormatClass,
        Schema keySchema, Schema valueSchema) {
    checkNamedOutputName(job, namedOutput, true);
    Configuration conf = job.getConfiguration();
    conf.set(MULTIPLE_OUTPUTS, conf.get(MULTIPLE_OUTPUTS, "") + " " + namedOutput);
    conf.setClass(MO_PREFIX + namedOutput + FORMAT, outputFormatClass, OutputFormat.class);
    conf.set(MO_PREFIX + namedOutput + ".keyschema", keySchema.toString());
    if (valueSchema != null) {
        conf.set(MO_PREFIX + namedOutput + ".valueschema", valueSchema.toString());
    }
}

From source file:gaffer.accumulo.bulkimport.BulkImportDriver.java

License:Apache License

public int run(String[] args) throws Exception {
    // Usage/*from www .ja  va 2s .  c o m*/
    if (args.length < 3) {
        System.err.println("Usage: " + BulkImportDriver.class.getName()
                + " <inputpath> <output_path> <accumulo_properties_file>");
        return 1;
    }

    // Gets paths
    Path inputPath = new Path(args[0]);
    Path outputPath = new Path(args[1] + "/data_for_accumulo/");
    Path splitsFilePath = new Path(args[1] + "/splits_file");
    String accumuloPropertiesFile = args[2];

    // Hadoop configuration
    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);

    // Connect to Accumulo
    AccumuloConfig accConf = new AccumuloConfig(accumuloPropertiesFile);
    Connector conn = Accumulo.connect(accConf);
    String tableName = accConf.getTable();

    // Check if the table exists
    if (!conn.tableOperations().exists(tableName)) {
        System.err.println("Table " + tableName + " does not exist - create the table before running this");
        return 1;
    }

    // Get the current splits from the table.
    // (This assumes that we have already created the table using <code>InitialiseTable</code>.)
    Collection<Text> splits = conn.tableOperations().getSplits(tableName);
    int numSplits = splits.size();
    System.out.println("Number of splits in table is " + numSplits);

    // Write current splits to a file (this is needed so that the following MapReduce
    // job can move them to the DistributedCache).
    IngestUtils.createSplitsFile(conn, tableName, fs, splitsFilePath);

    // Run MapReduce to output data suitable for bulk import to Accumulo
    // Conf and job
    conf.setBoolean("mapred.compress.map.output", true);
    conf.setClass("mapred.map.output.compression.codec", SnappyCodec.class, CompressionCodec.class);
    Job job = new Job(conf);
    job.setJarByClass(getClass());
    job.setJobName("Convert data to Accumulo format: input = " + inputPath + ", output = " + outputPath);

    // Input
    job.setInputFormatClass(SequenceFileInputFormat.class);
    SequenceFileInputFormat.addInputPath(job, inputPath);

    // Mapper
    job.setMapperClass(BulkImportMapper.class);
    job.setMapOutputKeyClass(Key.class);
    job.setMapOutputValueClass(Value.class);

    // Partitioner
    job.setPartitionerClass(KeyRangePartitioner.class);
    KeyRangePartitioner.setSplitFile(job, splitsFilePath.toString());

    // Reducer
    job.setReducerClass(BulkImportReducer.class);
    job.setOutputKeyClass(Key.class);
    job.setOutputValueClass(Value.class);
    job.setNumReduceTasks(numSplits + 1);

    // Output
    job.setOutputFormatClass(AccumuloFileOutputFormat.class);
    AccumuloFileOutputFormat.setOutputPath(job, outputPath);

    // Run job
    job.waitForCompletion(true);

    // Successful?
    if (!job.isSuccessful()) {
        System.err.println("Error running job");
        return 1;
    }

    return 0;
}

From source file:gaffer.accumulo.inputformat.example.ExampleDriver.java

License:Apache License

public int run(String[] args) throws Exception {
    // Usage/*w  w w.j a  v a 2  s . c  o  m*/
    if (args.length != 6 && args.length != 7) {
        System.err.println(USAGE);
        return 1;
    }

    // Parse options
    Path outputPath = new Path(args[0]);
    String accumuloPropertiesFile = args[1];
    int numReduceTasks;
    try {
        numReduceTasks = Integer.parseInt(args[2]);
    } catch (NumberFormatException e) {
        System.err.println(USAGE);
        return 1;
    }
    Date startDate = null;
    Date endDate = null;
    boolean useTimeWindow = false;
    if (!args[3].equals("null") && !args[4].equals("null")) {
        try {
            startDate = DATE_FORMAT.parse(args[3]);
            endDate = DATE_FORMAT.parse(args[4]);
        } catch (ParseException e) {
            System.err.println("Error parsing dates: " + args[3] + " " + args[4] + " " + e.getMessage());
            return 1;
        }
        useTimeWindow = true;
    }
    boolean rollUpOverTimeAndVisibility = Boolean.parseBoolean(args[5]);
    boolean seedsSpecified = (args.length == 7);
    String seedsFile = "";
    if (seedsSpecified) {
        seedsFile = args[6];
    }

    // Hadoop configuration
    Configuration conf = getConf();

    // Connect to Accumulo, so we can check connection and check that the
    // table exists
    AccumuloConfig accConf = new AccumuloConfig(accumuloPropertiesFile);
    Connector conn = Accumulo.connect(accConf);
    String tableName = accConf.getTable();
    Authorizations authorizations = conn.securityOperations().getUserAuthorizations(accConf.getUserName());

    // Check if the table exists
    if (!conn.tableOperations().exists(tableName)) {
        System.err.println("Table " + tableName + " does not exist.");
        return 1;
    }

    // Create AccumuloBackedGraph and set view
    AccumuloBackedGraph graph = new AccumuloBackedGraph(conn, tableName);
    //    - Time window
    if (useTimeWindow) {
        graph.setTimeWindow(startDate, endDate);
    }
    //  - Roll up over time and visibility iterator
    graph.rollUpOverTimeAndVisibility(rollUpOverTimeAndVisibility);
    //    - If not specifying seeds then add iterator to avoid seeing the same edge multiple times
    if (seedsSpecified) {
        Set<TypeValue> typeValues = new HashSet<TypeValue>();
        BufferedReader reader = new BufferedReader(new FileReader(seedsFile));
        String line;
        while ((line = reader.readLine()) != null) {
            String[] tokens = line.split("\\|");
            if (tokens.length != 2) {
                System.err.println("Invalid line: " + line);
                continue;
            }
            String type = tokens[0];
            String value = tokens[1];
            typeValues.add(new TypeValue(type, value));
        }
        reader.close();
        // Use AccumuloBackedGraph to update the configuration with the view added above
        graph.setConfiguration(conf, typeValues, accConf);
    } else {
        // Use AccumuloBackedGraph to update the configuration with the view added above
        graph.setConfiguration(conf, accConf);
    }

    // Conf
    conf.setBoolean("mapred.compress.map.output", true);
    conf.setClass("mapred.map.output.compression.codec", SnappyCodec.class, CompressionCodec.class);

    // Job
    Job job = new Job(conf);
    job.setJarByClass(getClass());
    job.setJobName("Example MapReduce against Gaffer data in Accumulo format: input = " + tableName
            + ", output = " + outputPath);

    // Input format - use BatchScannerElementInputFormat if seeds have been specified (as that creates fewer
    // splits); otherwise use ElementInputFormat which is based on the standard AccumuloInputFormat.
    if (seedsSpecified) {
        job.setInputFormatClass(BatchScannerElementInputFormat.class);
    } else {
        job.setInputFormatClass(ElementInputFormat.class);
    }

    // Mapper
    job.setMapperClass(ExampleMapper.class);
    job.setMapOutputKeyClass(GraphElement.class);
    job.setMapOutputValueClass(SetOfStatistics.class);

    // Reducer - use default IdentityReducer for this example
    job.setOutputKeyClass(GraphElement.class);
    job.setOutputValueClass(SetOfStatistics.class);
    job.setNumReduceTasks(numReduceTasks);

    // Output
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(job, outputPath);

    System.out.println("Running MapReduce job over:");
    System.out.println("\tTable: " + accConf.getTable());
    System.out.println("\tUser: " + accConf.getUserName());
    System.out.println("\tAuths: " + authorizations);
    if (useTimeWindow) {
        System.out.println("\tFilter by time: start time is " + DATE_FORMAT.format(startDate) + ", "
                + DATE_FORMAT.format(endDate));
    } else {
        System.out.println("\tFilter by time is off");
    }
    System.out.println("\tRoll up over time and visibility: " + rollUpOverTimeAndVisibility);

    // Run job
    job.waitForCompletion(true);

    // Successful?
    if (!job.isSuccessful()) {
        System.err.println("Error running job");
        return 1;
    }

    return 0;
}

From source file:gaffer.analytic.impl.GraphStatistics.java

License:Apache License

public int run(String[] args) throws Exception {
    // Usage//w  w  w.j a v  a2  s. co  m
    if (args.length != 6 && args.length != 7) {
        System.err.println(USAGE);
        return 1;
    }

    // Parse options
    Path outputPath = new Path(args[0]);
    String accumuloPropertiesFile = args[1];
    int numReduceTasks;
    try {
        numReduceTasks = Integer.parseInt(args[2]);
    } catch (NumberFormatException e) {
        System.err.println(USAGE);
        return 1;
    }
    Date startDate = null;
    Date endDate = null;
    boolean useTimeWindow = false;
    if (!args[3].equals("null") && !args[4].equals("null")) {
        try {
            startDate = DATE_FORMAT.parse(args[3]);
            endDate = DATE_FORMAT.parse(args[4]);
        } catch (ParseException e) {
            System.err.println("Error parsing dates: " + args[3] + " " + args[4] + " " + e.getMessage());
            return 1;
        }
        useTimeWindow = true;
    }
    boolean rollUpOverTimeAndVisibility = Boolean.parseBoolean(args[5]);
    boolean seedsSpecified = (args.length == 7);
    String seedsFile = "";
    if (seedsSpecified) {
        seedsFile = args[6];
    }

    // Hadoop configuration
    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);

    // Connect to Accumulo, so we can check connection and check that the
    // table exists
    AccumuloConfig accConf = new AccumuloConfig(accumuloPropertiesFile);
    Connector conn = Accumulo.connect(accConf);
    String tableName = accConf.getTable();
    Authorizations authorizations = conn.securityOperations().getUserAuthorizations(accConf.getUserName());

    // Check if the table exists
    if (!conn.tableOperations().exists(tableName)) {
        System.err.println("Table " + tableName + " does not exist.");
        return 1;
    }

    // Create graph and update configuration based on the view
    AccumuloBackedGraph graph = new AccumuloBackedGraph(conn, tableName);
    if (useTimeWindow) {
        graph.setTimeWindow(startDate, endDate);
    }
    graph.rollUpOverTimeAndVisibility(rollUpOverTimeAndVisibility);
    if (seedsSpecified) {
        Set<TypeValue> typeValues = new HashSet<TypeValue>();
        BufferedReader reader = new BufferedReader(new FileReader(seedsFile));
        String line;
        while ((line = reader.readLine()) != null) {
            String[] tokens = line.split("\\|");
            if (tokens.length != 2) {
                System.err.println("Invalid line: " + line);
                continue;
            }
            String type = tokens[0];
            String value = tokens[1];
            typeValues.add(new TypeValue(type, value));
        }
        reader.close();
        graph.setConfiguration(conf, typeValues, accConf);
    } else {
        graph.setConfiguration(conf, accConf);
    }

    // Conf
    conf.setBoolean("mapred.compress.map.output", true);
    conf.setClass("mapred.map.output.compression.codec", SnappyCodec.class, CompressionCodec.class);

    // Job
    Job job = new Job(conf);
    job.setJarByClass(getClass());
    job.setJobName("Running MapReduce against Gaffer data in Accumulo: input = " + tableName + ", output = "
            + outputPath);

    // Input format - use BatchScannerElementInputFormat if seeds have been specified (as that creates fewer
    // splits); otherwise use ElementInputFormat which is based on the standard AccumuloInputFormat.
    if (seedsSpecified) {
        job.setInputFormatClass(BatchScannerElementInputFormat.class);
    } else {
        job.setInputFormatClass(ElementInputFormat.class);
    }

    // Mapper
    job.setMapperClass(GraphStatisticsMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(SetOfStatistics.class);

    // Combiner
    job.setCombinerClass(GraphStatisticsReducer.class);

    // Reducer
    job.setReducerClass(GraphStatisticsReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(SetOfStatistics.class);
    job.setNumReduceTasks(numReduceTasks);

    // Output
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(job, outputPath);

    System.out.println("Running MapReduce job over:");
    System.out.println("\tTable: " + accConf.getTable());
    System.out.println("\tUser: " + accConf.getUserName());
    System.out.println("\tAuths: " + authorizations);
    if (useTimeWindow) {
        System.out.println("\tFilter by time: start time is " + DATE_FORMAT.format(startDate) + ", "
                + DATE_FORMAT.format(endDate));
    } else {
        System.out.println("\tFilter by time is off");
    }
    System.out.println("\tRoll up over time and visibility: " + rollUpOverTimeAndVisibility);

    // Run job
    job.waitForCompletion(true);

    // Successful?
    if (!job.isSuccessful()) {
        System.err.println("Error running job");
        return 1;
    }

    // Write results out
    System.out.println("Summary of graph");
    for (FileStatus file : fs.listStatus(outputPath)) {
        if (!file.isDirectory() && !file.getPath().getName().contains("_SUCCESS")) {
            SequenceFile.Reader reader = new SequenceFile.Reader(fs, file.getPath(), conf);
            Text text = new Text();
            SetOfStatistics stats = new SetOfStatistics();
            while (reader.next(text, stats)) {
                System.out.println(text + ", " + stats);
            }
            reader.close();
        }
    }

    return 0;
}

From source file:hydrograph.engine.cascading.scheme.TextDelimitedAndFixedWidth.java

License:Apache License

@Override
public void sourceConfInit(FlowProcess<? extends Configuration> flowProcess,
        Tap<Configuration, RecordReader, OutputCollector> tap, Configuration conf) {
    if (hasZippedFiles(FileInputFormat.getInputPaths(asJobConfInstance(conf))))
        throw new IllegalStateException("cannot read zip files: "
                + Arrays.toString(FileInputFormat.getInputPaths(asJobConfInstance(conf))));

    conf.setBoolean("mapred.mapper.new-api", false);
    conf.setClass("mapred.input.format.class", DelimitedAndFixedWidthInputFormat.class, InputFormat.class);
    conf.set("charsetName", charsetName);
    conf.set("quote", quote);
    conf.set("lengthsAndDelimiters", DelimitedAndFixedWidthHelper.arrayToString(lengthsAndDelimiters));
    conf.setStrings("lengthsAndDelimitersType", lengthsAndDelimitersType);
}

From source file:hydrograph.engine.cascading.scheme.TextDelimitedAndFixedWidth.java

License:Apache License

@Override
public void sinkConfInit(FlowProcess<? extends Configuration> flowProcess,
        Tap<Configuration, RecordReader, OutputCollector> tap, Configuration conf) {
    if (tap.getFullIdentifier(conf).endsWith(".zip"))
        throw new IllegalStateException("cannot write zip files: " + getOutputPath(conf));
    conf.setBoolean("mapred.mapper.new-api", false);
    if (getSinkCompression() == Compress.DISABLE)
        conf.setBoolean("mapred.output.compress", false);
    else if (getSinkCompression() == Compress.ENABLE)
        conf.setBoolean("mapred.output.compress", true);
    conf.setClass("mapred.output.key.class", Text.class, Object.class);
    conf.setClass("mapred.output.value.class", Text.class, Object.class);
    conf.setClass("mapred.output.format.class", TextOutputFormat.class, OutputFormat.class);
}

From source file:io.prestosql.plugin.hive.HdfsConfigurationInitializer.java

License:Apache License

public void initializeConfiguration(Configuration config) {
    copy(resourcesConfiguration, config);

    // this is to prevent dfs client from doing reverse DNS lookups to determine whether nodes are rack local
    config.setClass(NET_TOPOLOGY_NODE_SWITCH_MAPPING_IMPL_KEY, NoOpDNSToSwitchMapping.class,
            DNSToSwitchMapping.class);

    if (socksProxy != null) {
        config.setClass(HADOOP_RPC_SOCKET_FACTORY_CLASS_DEFAULT_KEY, SocksSocketFactory.class,
                SocketFactory.class);
        config.set(HADOOP_SOCKS_SERVER_KEY, socksProxy.toString());
    }/*from  w ww  .j a  v  a 2  s  .  c o  m*/

    if (domainSocketPath != null) {
        config.setStrings(DFS_DOMAIN_SOCKET_PATH_KEY, domainSocketPath);
    }

    // only enable short circuit reads if domain socket path is properly configured
    if (!config.get(DFS_DOMAIN_SOCKET_PATH_KEY, "").trim().isEmpty()) {
        config.setBooleanIfUnset(DFS_CLIENT_READ_SHORTCIRCUIT_KEY, true);
    }

    config.setInt(DFS_CLIENT_SOCKET_TIMEOUT_KEY, toIntExact(dfsTimeout.toMillis()));
    config.setInt(IPC_PING_INTERVAL_KEY, toIntExact(ipcPingInterval.toMillis()));
    config.setInt(IPC_CLIENT_CONNECT_TIMEOUT_KEY, toIntExact(dfsConnectTimeout.toMillis()));
    config.setInt(IPC_CLIENT_CONNECT_MAX_RETRIES_KEY, dfsConnectMaxRetries);

    if (isHdfsWireEncryptionEnabled) {
        config.set(HADOOP_RPC_PROTECTION, "privacy");
        config.setBoolean("dfs.encrypt.data.transfer", true);
    }

    config.setInt("fs.cache.max-size", fileSystemMaxCacheSize);

    config.setInt(LineRecordReader.MAX_LINE_LENGTH, textMaxLineLength);

    configureCompression(config, compressionCodec);

    s3ConfigurationUpdater.updateConfiguration(config);
    gcsConfigurationInitialize.updateConfiguration(config);
}

From source file:it.crs4.pydoop.mapreduce.pipes.CommandLineParser.java

License:Apache License

private static void setupPipesJob(Job job) throws IOException, ClassNotFoundException {
    Configuration conf = job.getConfiguration();
    // default map output types to Text
    if (!getIsJavaMapper(conf)) {
        job.setMapperClass(PipesMapper.class);
        // Save the user's partitioner and hook in our's.
        setJavaPartitioner(conf, job.getPartitionerClass());
        job.setPartitionerClass(PipesPartitioner.class);
    }//ww w. j av a2s.  c  o m
    if (!getIsJavaReducer(conf)) {
        job.setReducerClass(PipesReducer.class);
        if (!getIsJavaRecordWriter(conf)) {
            job.setOutputFormatClass(NullOutputFormat.class);
        }
    }
    String textClassname = Text.class.getName();
    setIfUnset(conf, MRJobConfig.MAP_OUTPUT_KEY_CLASS, textClassname);
    setIfUnset(conf, MRJobConfig.MAP_OUTPUT_VALUE_CLASS, textClassname);
    setIfUnset(conf, MRJobConfig.OUTPUT_KEY_CLASS, textClassname);
    setIfUnset(conf, MRJobConfig.OUTPUT_VALUE_CLASS, textClassname);

    // Use PipesNonJavaInputFormat if necessary to handle progress reporting
    // from C++ RecordReaders ...
    if (!getIsJavaRecordReader(conf) && !getIsJavaMapper(conf)) {
        conf.setClass(Submitter.INPUT_FORMAT, job.getInputFormatClass(), InputFormat.class);
        job.setInputFormatClass(PipesNonJavaInputFormat.class);
    }

    if (avroInput != null) {
        if (explicitInputFormat) {
            conf.setClass(Submitter.INPUT_FORMAT, job.getInputFormatClass(), InputFormat.class);
        } // else let the bridge fall back to the appropriate Avro IF
        switch (avroInput) {
        case K:
            job.setInputFormatClass(PydoopAvroInputKeyBridge.class);
            break;
        case V:
            job.setInputFormatClass(PydoopAvroInputValueBridge.class);
            break;
        case KV:
            job.setInputFormatClass(PydoopAvroInputKeyValueBridge.class);
            break;
        default:
            throw new IllegalArgumentException("Bad Avro input type");
        }
    }
    if (avroOutput != null) {
        if (explicitOutputFormat) {
            conf.setClass(Submitter.OUTPUT_FORMAT, job.getOutputFormatClass(), OutputFormat.class);
        } // else let the bridge fall back to the appropriate Avro OF
        conf.set(props.getProperty("AVRO_OUTPUT"), avroOutput.name());
        switch (avroOutput) {
        case K:
            job.setOutputFormatClass(PydoopAvroOutputKeyBridge.class);
            break;
        case V:
            job.setOutputFormatClass(PydoopAvroOutputValueBridge.class);
            break;
        case KV:
            job.setOutputFormatClass(PydoopAvroOutputKeyValueBridge.class);
            break;
        default:
            throw new IllegalArgumentException("Bad Avro output type");
        }
    }

    String exec = getExecutable(conf);
    if (exec == null) {
        String msg = "No application program defined.";
        throw new IllegalArgumentException(msg);
    }
    // add default debug script only when executable is expressed as
    // <path>#<executable>
    //FIXME: this is kind of useless if the pipes program is not in c++
    if (exec.contains("#")) {
        // set default gdb commands for map and reduce task
        String defScript = "$HADOOP_PREFIX/src/c++/pipes/debug/pipes-default-script";
        setIfUnset(conf, MRJobConfig.MAP_DEBUG_SCRIPT, defScript);
        setIfUnset(conf, MRJobConfig.REDUCE_DEBUG_SCRIPT, defScript);
    }
    URI[] fileCache = DistributedCache.getCacheFiles(conf);
    if (fileCache == null) {
        fileCache = new URI[1];
    } else {
        URI[] tmp = new URI[fileCache.length + 1];
        System.arraycopy(fileCache, 0, tmp, 1, fileCache.length);
        fileCache = tmp;
    }
    try {
        fileCache[0] = new URI(exec);
    } catch (URISyntaxException e) {
        String msg = "Problem parsing executable URI " + exec;
        IOException ie = new IOException(msg);
        ie.initCause(e);
        throw ie;
    }
    DistributedCache.setCacheFiles(fileCache, conf);
}