Example usage for org.apache.hadoop.conf Configuration setInt

List of usage examples for org.apache.hadoop.conf Configuration setInt

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration setInt.

Prototype

public void setInt(String name, int value) 

Source Link

Document

Set the value of the name property to an int.

Usage

From source file:eu.scape_project.tpid.TomarPrepareInputdata.java

License:Apache License

/**
 * Start./*  w ww .  j  a  v a  2 s. c o m*/
 *
 * @param args Command line arguments
 * @throws IOException
 * @throws ParseException
 */
private static void start(String[] args) throws IOException, ParseException {

    // hadoop configuration
    Configuration hadoopConf = new Configuration();
    // Command line interface
    config = new TpidCliConfig();

    CommandLineParser cmdParser = new PosixParser();
    GenericOptionsParser gop = new GenericOptionsParser(hadoopConf, args);

    TpidOptions tpidOptions = new TpidOptions();
    CommandLine cmd = cmdParser.parse(tpidOptions.options, gop.getRemainingArgs());
    if ((args.length == 0) || (cmd.hasOption(tpidOptions.HELP_OPT))) {
        tpidOptions.exit("Help", 0);
    } else {
        tpidOptions.initOptions(cmd, config);
    }

    // configuration properties
    if (config.getPropertiesFilePath() != null) {
        pu = new PropertyUtil(config.getPropertiesFilePath(), true);
    } else {
        pu = new PropertyUtil("/eu/scape_project/tpid/config.properties", false);
    }

    // cli parameter has priority over default configuration
    int cliParamNumPerInv = config.getNumItemsPerInvokation();
    int defaultNumPerInv = Integer.parseInt(pu.getProp("default.itemsperinvokation"));
    int numPerInv = (cliParamNumPerInv != 0) ? cliParamNumPerInv : defaultNumPerInv;
    // setting hadoop configuration parameters so that they can be used
    // during MapReduce
    hadoopConf.setInt("num_items_per_task", numPerInv);
    hadoopConf.set("output_file_suffix", pu.getProp("default.outputfilesuffix"));
    hadoopConf.set("scape_platform_invoke", pu.getProp("tomar.invoke.command"));
    hadoopConf.set("unpack_hdfs_path", pu.getProp("default.hdfsdir.unpacked"));
    hadoopConf.set("joboutput_hdfs_path", pu.getProp("default.hdfsdir.joboutput"));
    hadoopConf.set("tooloutput_hdfs_path", pu.getProp("default.hdfsdir.toolout"));
    hadoopConf.set("container_file_suffix", pu.getProp("containerfilesuffix"));
    hadoopConf.set("tomar_param_pattern", pu.getProp("tomar.param.pattern"));
    hadoopConf.setBoolean("pseudo_distributed", config.isPseudoDistributed());
    startHadoopJob(hadoopConf);
}

From source file:example.TestLineRecordReader.java

License:Apache License

private void testSplitRecordsForFile(Configuration conf, long firstSplitLength, long testFileSize,
        Path testFilePath) throws IOException {
    conf.setInt(org.apache.hadoop.mapreduce.lib.input.LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
    assertTrue("unexpected test data at " + testFilePath, testFileSize > firstSplitLength);

    String delimiter = conf.get("textinputformat.record.delimiter");
    byte[] recordDelimiterBytes = null;
    if (null != delimiter) {
        recordDelimiterBytes = delimiter.getBytes(Charsets.UTF_8);
    }/*from  www . j  a  v a  2s.  co m*/
    TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());

    // read the data without splitting to count the records
    FileSplit split = new FileSplit(testFilePath, 0, testFileSize, (String[]) null);
    LineRecordReader reader = new LineRecordReader(recordDelimiterBytes);
    reader.initialize(split, context);
    int numRecordsNoSplits = 0;
    while (reader.nextKeyValue()) {
        ++numRecordsNoSplits;
    }
    reader.close();

    // count the records in the first split
    split = new FileSplit(testFilePath, 0, firstSplitLength, (String[]) null);
    reader = new LineRecordReader(recordDelimiterBytes);
    reader.initialize(split, context);
    int numRecordsFirstSplit = 0;
    while (reader.nextKeyValue()) {
        ++numRecordsFirstSplit;
    }
    reader.close();

    // count the records in the second split
    split = new FileSplit(testFilePath, firstSplitLength, testFileSize - firstSplitLength, (String[]) null);
    reader = new LineRecordReader(recordDelimiterBytes);
    reader.initialize(split, context);
    int numRecordsRemainingSplits = 0;
    while (reader.nextKeyValue()) {
        ++numRecordsRemainingSplits;
    }
    reader.close();
    assertEquals("Unexpected number of records in split ", numRecordsNoSplits,
            numRecordsFirstSplit + numRecordsRemainingSplits);
}

From source file:example.TestLineRecordReader.java

License:Apache License

public ArrayList<String> readRecords(URL testFileUrl, int splitSize) throws IOException {

    // Set up context
    File testFile = new File(testFileUrl.getFile());
    long testFileSize = testFile.length();
    Path testFilePath = new Path(testFile.getAbsolutePath());
    Configuration conf = new Configuration();
    conf.setInt("io.file.buffer.size", 1);
    TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());

    // Gather the records returned by the record reader
    ArrayList<String> records = new ArrayList<String>();

    long offset = 0;
    while (offset < testFileSize) {
        FileSplit split = new FileSplit(testFilePath, offset, splitSize, null);
        LineRecordReader reader = new LineRecordReader();
        reader.initialize(split, context);

        while (reader.nextKeyValue()) {
            records.add(reader.getCurrentValue().toString());
        }/*  w ww . j av  a2  s .c  om*/
        offset += splitSize;
    }
    return records;
}

From source file:example.TestLineRecordReader.java

License:Apache License

@Test
public void testStripBOM() throws IOException {
    // the test data contains a BOM at the start of the file
    // confirm the BOM is skipped by LineRecordReader
    String UTF8_BOM = "\uFEFF";
    URL testFileUrl = getClass().getClassLoader().getResource("testBOM.txt");
    assertNotNull("Cannot find testBOM.txt", testFileUrl);
    File testFile = new File(testFileUrl.getFile());
    Path testFilePath = new Path(testFile.getAbsolutePath());
    long testFileSize = testFile.length();
    Configuration conf = new Configuration();
    conf.setInt(org.apache.hadoop.mapreduce.lib.input.LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);

    TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());

    // read the data and check whether BOM is skipped
    FileSplit split = new FileSplit(testFilePath, 0, testFileSize, (String[]) null);
    LineRecordReader reader = new LineRecordReader();
    reader.initialize(split, context);/*from ww  w . j  a  va  2  s . c  o  m*/
    int numRecords = 0;
    boolean firstLine = true;
    boolean skipBOM = true;
    while (reader.nextKeyValue()) {
        if (firstLine) {
            firstLine = false;
            if (reader.getCurrentValue().toString().startsWith(UTF8_BOM)) {
                skipBOM = false;
            }
        }
        ++numRecords;
    }
    reader.close();

    assertTrue("BOM is not skipped", skipBOM);
}

From source file:example.TestLineRecordReader.java

License:Apache License

@Test
public void testMultipleClose() throws IOException {
    URL testFileUrl = getClass().getClassLoader().getResource("recordSpanningMultipleSplits.txt.bz2");
    assertNotNull("Cannot find recordSpanningMultipleSplits.txt.bz2", testFileUrl);
    File testFile = new File(testFileUrl.getFile());
    Path testFilePath = new Path(testFile.getAbsolutePath());
    long testFileSize = testFile.length();
    Configuration conf = new Configuration();
    conf.setInt(org.apache.hadoop.mapreduce.lib.input.LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
    TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());

    // read the data and check whether BOM is skipped
    FileSplit split = new FileSplit(testFilePath, 0, testFileSize, null);
    LineRecordReader reader = new LineRecordReader();
    reader.initialize(split, context);// w w w  .  j  a v a2s  . c o  m

    //noinspection StatementWithEmptyBody
    while (reader.nextKeyValue())
        ;
    reader.close();
    reader.close();

    BZip2Codec codec = new BZip2Codec();
    codec.setConf(conf);
    Set<Decompressor> decompressors = new HashSet<Decompressor>();
    for (int i = 0; i < 10; ++i) {
        decompressors.add(CodecPool.getDecompressor(codec));
    }
    assertEquals(10, decompressors.size());
}

From source file:example.TestLineRecordReader.java

License:Apache License

@Test
public void testUncompressedInput() throws Exception {
    Configuration conf = new Configuration();
    // single char delimiter, best case
    String inputData = "abc+def+ghi+jkl+mno+pqr+stu+vw +xyz";
    Path inputFile = createInputFile(conf, inputData);
    conf.set("textinputformat.record.delimiter", "+");
    for (int bufferSize = 1; bufferSize <= inputData.length(); bufferSize++) {
        for (int splitSize = 1; splitSize < inputData.length(); splitSize++) {
            conf.setInt("io.file.buffer.size", bufferSize);
            testSplitRecordsForFile(conf, splitSize, inputData.length(), inputFile);
        }//from   www . j  a  v a 2s  .c  o m
    }
    // multi char delimiter, best case
    inputData = "abc|+|def|+|ghi|+|jkl|+|mno|+|pqr|+|stu|+|vw |+|xyz";
    inputFile = createInputFile(conf, inputData);
    conf.set("textinputformat.record.delimiter", "|+|");
    for (int bufferSize = 1; bufferSize <= inputData.length(); bufferSize++) {
        for (int splitSize = 1; splitSize < inputData.length(); splitSize++) {
            conf.setInt("io.file.buffer.size", bufferSize);
            testSplitRecordsForFile(conf, splitSize, inputData.length(), inputFile);
        }
    }
    // single char delimiter with empty records
    inputData = "abc+def++ghi+jkl++mno+pqr++stu+vw ++xyz";
    inputFile = createInputFile(conf, inputData);
    conf.set("textinputformat.record.delimiter", "+");
    for (int bufferSize = 1; bufferSize <= inputData.length(); bufferSize++) {
        for (int splitSize = 1; splitSize < inputData.length(); splitSize++) {
            conf.setInt("io.file.buffer.size", bufferSize);
            testSplitRecordsForFile(conf, splitSize, inputData.length(), inputFile);
        }
    }
    // multi char delimiter with empty records
    inputData = "abc|+||+|defghi|+|jkl|+||+|mno|+|pqr|+||+|stu|+|vw |+||+|xyz";
    inputFile = createInputFile(conf, inputData);
    conf.set("textinputformat.record.delimiter", "|+|");
    for (int bufferSize = 1; bufferSize <= inputData.length(); bufferSize++) {
        for (int splitSize = 1; splitSize < inputData.length(); splitSize++) {
            conf.setInt("io.file.buffer.size", bufferSize);
            testSplitRecordsForFile(conf, splitSize, inputData.length(), inputFile);
        }
    }
    // multi char delimiter with starting part of the delimiter in the data
    inputData = "abc+def+-ghi+jkl+-mno+pqr+-stu+vw +-xyz";
    inputFile = createInputFile(conf, inputData);
    conf.set("textinputformat.record.delimiter", "+-");
    for (int bufferSize = 1; bufferSize <= inputData.length(); bufferSize++) {
        for (int splitSize = 1; splitSize < inputData.length(); splitSize++) {
            conf.setInt("io.file.buffer.size", bufferSize);
            testSplitRecordsForFile(conf, splitSize, inputData.length(), inputFile);
        }
    }
    // multi char delimiter with newline as start of the delimiter
    inputData = "abc\n+def\n+ghi\n+jkl\n+mno";
    inputFile = createInputFile(conf, inputData);
    conf.set("textinputformat.record.delimiter", "\n+");
    for (int bufferSize = 1; bufferSize <= inputData.length(); bufferSize++) {
        for (int splitSize = 1; splitSize < inputData.length(); splitSize++) {
            conf.setInt("io.file.buffer.size", bufferSize);
            testSplitRecordsForFile(conf, splitSize, inputData.length(), inputFile);
        }
    }
    // multi char delimiter with newline in delimiter and in data
    inputData = "abc\ndef+\nghi+\njkl\nmno";
    inputFile = createInputFile(conf, inputData);
    conf.set("textinputformat.record.delimiter", "+\n");
    for (int bufferSize = 1; bufferSize <= inputData.length(); bufferSize++) {
        for (int splitSize = 1; splitSize < inputData.length(); splitSize++) {
            conf.setInt("io.file.buffer.size", bufferSize);
            testSplitRecordsForFile(conf, splitSize, inputData.length(), inputFile);
        }
    }
}

From source file:example.TestLineRecordReader.java

License:Apache License

@Test
public void testUncompressedInputContainingCRLF() throws Exception {
    Configuration conf = new Configuration();
    String inputData = "a\r\nb\rc\nd\r\n";
    Path inputFile = createInputFile(conf, inputData);
    for (int bufferSize = 1; bufferSize <= inputData.length(); bufferSize++) {
        for (int splitSize = 1; splitSize < inputData.length(); splitSize++) {
            conf.setInt("io.file.buffer.size", bufferSize);
            testSplitRecordsForFile(conf, splitSize, inputData.length(), inputFile);
        }/*from  ww w  . j  a va 2s.  co m*/
    }
}

From source file:example.TestLineRecordReader.java

License:Apache License

@Test
public void testUncompressedInputCustomDelimiterPosValue() throws Exception {
    Configuration conf = new Configuration();
    conf.setInt("io.file.buffer.size", 10);
    conf.setInt(org.apache.hadoop.mapreduce.lib.input.LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
    String inputData = "abcdefghij++kl++mno";
    Path inputFile = createInputFile(conf, inputData);
    String delimiter = "++";
    byte[] recordDelimiterBytes = delimiter.getBytes(Charsets.UTF_8);
    int splitLength = 15;
    FileSplit split = new FileSplit(inputFile, 0, splitLength, (String[]) null);
    TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
    LineRecordReader reader = new LineRecordReader(recordDelimiterBytes);
    reader.initialize(split, context);//from  w  w w .j  a  va 2  s  .  co m
    // Get first record: "abcdefghij"
    assertTrue("Expected record got nothing", reader.nextKeyValue());
    LongWritable key = reader.getCurrentKey();
    Text value = reader.getCurrentValue();
    assertEquals("Wrong length for record value", 10, value.getLength());
    assertEquals("Wrong position after record read", 0, key.get());
    // Get second record: "kl"
    assertTrue("Expected record got nothing", reader.nextKeyValue());
    assertEquals("Wrong length for record value", 2, value.getLength());
    // Key should be 12 right after "abcdefghij++"
    assertEquals("Wrong position after record read", 12, key.get());
    // Get third record: "mno"
    assertTrue("Expected record got nothing", reader.nextKeyValue());
    assertEquals("Wrong length for record value", 3, value.getLength());
    // Key should be 16 right after "abcdefghij++kl++"
    assertEquals("Wrong position after record read", 16, key.get());
    assertFalse(reader.nextKeyValue());
    // Key should be 19 right after "abcdefghij++kl++mno"
    assertEquals("Wrong position after record read", 19, key.get());
    // after refresh should be empty
    key = reader.getCurrentKey();
    assertNull("Unexpected key returned", key);
    reader.close();
    split = new FileSplit(inputFile, splitLength, inputData.length() - splitLength, (String[]) null);
    reader = new LineRecordReader(recordDelimiterBytes);
    reader.initialize(split, context);
    // No record is in the second split because the second split dropped
    // the first record, which was already reported by the first split.
    assertFalse("Unexpected record returned", reader.nextKeyValue());
    key = reader.getCurrentKey();
    assertNull("Unexpected key returned", key);
    reader.close();

    // multi char delimiter with starting part of the delimiter in the data
    inputData = "abcd+efgh++ijk++mno";
    inputFile = createInputFile(conf, inputData);
    splitLength = 5;
    split = new FileSplit(inputFile, 0, splitLength, (String[]) null);
    reader = new LineRecordReader(recordDelimiterBytes);
    reader.initialize(split, context);
    // Get first record: "abcd+efgh"
    assertTrue("Expected record got nothing", reader.nextKeyValue());
    key = reader.getCurrentKey();
    value = reader.getCurrentValue();
    assertEquals("Wrong position after record read", 0, key.get());
    assertEquals("Wrong length for record value", 9, value.getLength());
    // should have jumped over the delimiter, no record
    assertFalse(reader.nextKeyValue());
    assertEquals("Wrong position after record read", 11, key.get());
    // after refresh should be empty
    key = reader.getCurrentKey();
    assertNull("Unexpected key returned", key);
    reader.close();
    // next split: check for duplicate or dropped records
    split = new FileSplit(inputFile, splitLength, inputData.length() - splitLength, (String[]) null);
    reader = new LineRecordReader(recordDelimiterBytes);
    reader.initialize(split, context);
    assertTrue("Expected record got nothing", reader.nextKeyValue());
    key = reader.getCurrentKey();
    value = reader.getCurrentValue();
    // Get second record: "ijk" first in this split
    assertEquals("Wrong position after record read", 11, key.get());
    assertEquals("Wrong length for record value", 3, value.getLength());
    // Get third record: "mno" second in this split
    assertTrue("Expected record got nothing", reader.nextKeyValue());
    assertEquals("Wrong position after record read", 16, key.get());
    assertEquals("Wrong length for record value", 3, value.getLength());
    // should be at the end of the input
    assertFalse(reader.nextKeyValue());
    assertEquals("Wrong position after record read", 19, key.get());
    reader.close();

    inputData = "abcd|efgh|+|ij|kl|+|mno|pqr";
    inputFile = createInputFile(conf, inputData);
    delimiter = "|+|";
    recordDelimiterBytes = delimiter.getBytes(Charsets.UTF_8);
    // walking over the buffer and split sizes checks for proper processing
    // of the ambiguous bytes of the delimiter
    for (int bufferSize = 1; bufferSize <= inputData.length(); bufferSize++) {
        for (int splitSize = 1; splitSize < inputData.length(); splitSize++) {
            // track where we are in the inputdata
            int keyPosition = 0;
            conf.setInt("io.file.buffer.size", bufferSize);
            split = new FileSplit(inputFile, 0, bufferSize, (String[]) null);
            reader = new LineRecordReader(recordDelimiterBytes);
            reader.initialize(split, context);
            // Get the first record: "abcd|efgh" always possible
            assertTrue("Expected record got nothing", reader.nextKeyValue());
            key = reader.getCurrentKey();
            value = reader.getCurrentValue();
            assertTrue("abcd|efgh".equals(value.toString()));
            // Position should be 0 right at the start
            assertEquals("Wrong position after record read", keyPosition, key.get());
            // Position should be 12 right after the first "|+|"
            keyPosition = 12;
            // get the next record: "ij|kl" if the split/buffer allows it
            if (reader.nextKeyValue()) {
                // check the record info: "ij|kl"
                assertTrue("ij|kl".equals(value.toString()));
                assertEquals("Wrong position after record read", keyPosition, key.get());
                // Position should be 20 after the second "|+|"
                keyPosition = 20;
            }
            // get the third record: "mno|pqr" if the split/buffer allows it
            if (reader.nextKeyValue()) {
                // check the record info: "mno|pqr"
                assertTrue("mno|pqr".equals(value.toString()));
                assertEquals("Wrong position after record read", keyPosition, key.get());
                // Position should be the end of the input
                keyPosition = inputData.length();
            }
            assertFalse("Unexpected record returned", reader.nextKeyValue());
            // no more records can be read we should be at the last position
            assertEquals("Wrong position after record read", keyPosition, key.get());
            // after refresh should be empty
            key = reader.getCurrentKey();
            assertNull("Unexpected key returned", key);
            reader.close();
        }
    }
}

From source file:example.TestLineRecordReader.java

License:Apache License

@Test
public void testUncompressedInputDefaultDelimiterPosValue() throws Exception {
    Configuration conf = new Configuration();
    String inputData = "1234567890\r\n12\r\n345";
    Path inputFile = createInputFile(conf, inputData);
    conf.setInt("io.file.buffer.size", 10);
    conf.setInt(org.apache.hadoop.mapreduce.lib.input.LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
    FileSplit split = new FileSplit(inputFile, 0, 15, (String[]) null);
    TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
    LineRecordReader reader = new LineRecordReader(null);
    reader.initialize(split, context);//from   w  w  w  .  j  a va 2s .c  om
    LongWritable key;
    Text value;
    reader.nextKeyValue();
    key = reader.getCurrentKey();
    value = reader.getCurrentValue();
    // Get first record:"1234567890"
    assertEquals(10, value.getLength());
    assertEquals(0, key.get());
    reader.nextKeyValue();
    // Get second record:"12"
    assertEquals(2, value.getLength());
    // Key should be 12 right after "1234567890\r\n"
    assertEquals(12, key.get());
    assertFalse(reader.nextKeyValue());
    // Key should be 16 right after "1234567890\r\n12\r\n"
    assertEquals(16, key.get());

    split = new FileSplit(inputFile, 15, 4, (String[]) null);
    reader = new LineRecordReader(null);
    reader.initialize(split, context);
    // The second split dropped the first record "\n"
    reader.nextKeyValue();
    key = reader.getCurrentKey();
    value = reader.getCurrentValue();
    // Get third record:"345"
    assertEquals(3, value.getLength());
    // Key should be 16 right after "1234567890\r\n12\r\n"
    assertEquals(16, key.get());
    assertFalse(reader.nextKeyValue());
    // Key should be 19 right after "1234567890\r\n12\r\n345"
    assertEquals(19, key.get());

    inputData = "123456789\r\r\n";
    inputFile = createInputFile(conf, inputData);
    split = new FileSplit(inputFile, 0, 12, (String[]) null);
    reader = new LineRecordReader(null);
    reader.initialize(split, context);
    reader.nextKeyValue();
    key = reader.getCurrentKey();
    value = reader.getCurrentValue();
    // Get first record:"123456789"
    assertEquals(9, value.getLength());
    assertEquals(0, key.get());
    reader.nextKeyValue();
    // Get second record:""
    assertEquals(0, value.getLength());
    // Key should be 10 right after "123456789\r"
    assertEquals(10, key.get());
    assertFalse(reader.nextKeyValue());
    // Key should be 12 right after "123456789\r\r\n"
    assertEquals(12, key.get());
}

From source file:format.OverlapLengthInputFormat.java

License:Apache License

/**
 * Set the length of each record//from w  w w  . java 2s  .  com
 * @param conf configuration
 * @param recordLength the length of a record
 */
public static void setRecordLength(Configuration conf, int recordLength) {
    conf.setInt(FIXED_RECORD_LENGTH, recordLength);
}