List of usage examples for org.apache.hadoop.conf Configuration setInt
public void setInt(String name, int value)
name
property to an int
. From source file:eu.scape_project.tpid.TomarPrepareInputdata.java
License:Apache License
/** * Start./* w ww . j a v a 2 s. c o m*/ * * @param args Command line arguments * @throws IOException * @throws ParseException */ private static void start(String[] args) throws IOException, ParseException { // hadoop configuration Configuration hadoopConf = new Configuration(); // Command line interface config = new TpidCliConfig(); CommandLineParser cmdParser = new PosixParser(); GenericOptionsParser gop = new GenericOptionsParser(hadoopConf, args); TpidOptions tpidOptions = new TpidOptions(); CommandLine cmd = cmdParser.parse(tpidOptions.options, gop.getRemainingArgs()); if ((args.length == 0) || (cmd.hasOption(tpidOptions.HELP_OPT))) { tpidOptions.exit("Help", 0); } else { tpidOptions.initOptions(cmd, config); } // configuration properties if (config.getPropertiesFilePath() != null) { pu = new PropertyUtil(config.getPropertiesFilePath(), true); } else { pu = new PropertyUtil("/eu/scape_project/tpid/config.properties", false); } // cli parameter has priority over default configuration int cliParamNumPerInv = config.getNumItemsPerInvokation(); int defaultNumPerInv = Integer.parseInt(pu.getProp("default.itemsperinvokation")); int numPerInv = (cliParamNumPerInv != 0) ? cliParamNumPerInv : defaultNumPerInv; // setting hadoop configuration parameters so that they can be used // during MapReduce hadoopConf.setInt("num_items_per_task", numPerInv); hadoopConf.set("output_file_suffix", pu.getProp("default.outputfilesuffix")); hadoopConf.set("scape_platform_invoke", pu.getProp("tomar.invoke.command")); hadoopConf.set("unpack_hdfs_path", pu.getProp("default.hdfsdir.unpacked")); hadoopConf.set("joboutput_hdfs_path", pu.getProp("default.hdfsdir.joboutput")); hadoopConf.set("tooloutput_hdfs_path", pu.getProp("default.hdfsdir.toolout")); hadoopConf.set("container_file_suffix", pu.getProp("containerfilesuffix")); hadoopConf.set("tomar_param_pattern", pu.getProp("tomar.param.pattern")); hadoopConf.setBoolean("pseudo_distributed", config.isPseudoDistributed()); startHadoopJob(hadoopConf); }
From source file:example.TestLineRecordReader.java
License:Apache License
private void testSplitRecordsForFile(Configuration conf, long firstSplitLength, long testFileSize, Path testFilePath) throws IOException { conf.setInt(org.apache.hadoop.mapreduce.lib.input.LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE); assertTrue("unexpected test data at " + testFilePath, testFileSize > firstSplitLength); String delimiter = conf.get("textinputformat.record.delimiter"); byte[] recordDelimiterBytes = null; if (null != delimiter) { recordDelimiterBytes = delimiter.getBytes(Charsets.UTF_8); }/*from www . j a v a 2s. co m*/ TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); // read the data without splitting to count the records FileSplit split = new FileSplit(testFilePath, 0, testFileSize, (String[]) null); LineRecordReader reader = new LineRecordReader(recordDelimiterBytes); reader.initialize(split, context); int numRecordsNoSplits = 0; while (reader.nextKeyValue()) { ++numRecordsNoSplits; } reader.close(); // count the records in the first split split = new FileSplit(testFilePath, 0, firstSplitLength, (String[]) null); reader = new LineRecordReader(recordDelimiterBytes); reader.initialize(split, context); int numRecordsFirstSplit = 0; while (reader.nextKeyValue()) { ++numRecordsFirstSplit; } reader.close(); // count the records in the second split split = new FileSplit(testFilePath, firstSplitLength, testFileSize - firstSplitLength, (String[]) null); reader = new LineRecordReader(recordDelimiterBytes); reader.initialize(split, context); int numRecordsRemainingSplits = 0; while (reader.nextKeyValue()) { ++numRecordsRemainingSplits; } reader.close(); assertEquals("Unexpected number of records in split ", numRecordsNoSplits, numRecordsFirstSplit + numRecordsRemainingSplits); }
From source file:example.TestLineRecordReader.java
License:Apache License
public ArrayList<String> readRecords(URL testFileUrl, int splitSize) throws IOException { // Set up context File testFile = new File(testFileUrl.getFile()); long testFileSize = testFile.length(); Path testFilePath = new Path(testFile.getAbsolutePath()); Configuration conf = new Configuration(); conf.setInt("io.file.buffer.size", 1); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); // Gather the records returned by the record reader ArrayList<String> records = new ArrayList<String>(); long offset = 0; while (offset < testFileSize) { FileSplit split = new FileSplit(testFilePath, offset, splitSize, null); LineRecordReader reader = new LineRecordReader(); reader.initialize(split, context); while (reader.nextKeyValue()) { records.add(reader.getCurrentValue().toString()); }/* w ww . j av a2 s .c om*/ offset += splitSize; } return records; }
From source file:example.TestLineRecordReader.java
License:Apache License
@Test public void testStripBOM() throws IOException { // the test data contains a BOM at the start of the file // confirm the BOM is skipped by LineRecordReader String UTF8_BOM = "\uFEFF"; URL testFileUrl = getClass().getClassLoader().getResource("testBOM.txt"); assertNotNull("Cannot find testBOM.txt", testFileUrl); File testFile = new File(testFileUrl.getFile()); Path testFilePath = new Path(testFile.getAbsolutePath()); long testFileSize = testFile.length(); Configuration conf = new Configuration(); conf.setInt(org.apache.hadoop.mapreduce.lib.input.LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); // read the data and check whether BOM is skipped FileSplit split = new FileSplit(testFilePath, 0, testFileSize, (String[]) null); LineRecordReader reader = new LineRecordReader(); reader.initialize(split, context);/*from ww w . j a va 2 s . c o m*/ int numRecords = 0; boolean firstLine = true; boolean skipBOM = true; while (reader.nextKeyValue()) { if (firstLine) { firstLine = false; if (reader.getCurrentValue().toString().startsWith(UTF8_BOM)) { skipBOM = false; } } ++numRecords; } reader.close(); assertTrue("BOM is not skipped", skipBOM); }
From source file:example.TestLineRecordReader.java
License:Apache License
@Test public void testMultipleClose() throws IOException { URL testFileUrl = getClass().getClassLoader().getResource("recordSpanningMultipleSplits.txt.bz2"); assertNotNull("Cannot find recordSpanningMultipleSplits.txt.bz2", testFileUrl); File testFile = new File(testFileUrl.getFile()); Path testFilePath = new Path(testFile.getAbsolutePath()); long testFileSize = testFile.length(); Configuration conf = new Configuration(); conf.setInt(org.apache.hadoop.mapreduce.lib.input.LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); // read the data and check whether BOM is skipped FileSplit split = new FileSplit(testFilePath, 0, testFileSize, null); LineRecordReader reader = new LineRecordReader(); reader.initialize(split, context);// w w w . j a v a2s . c o m //noinspection StatementWithEmptyBody while (reader.nextKeyValue()) ; reader.close(); reader.close(); BZip2Codec codec = new BZip2Codec(); codec.setConf(conf); Set<Decompressor> decompressors = new HashSet<Decompressor>(); for (int i = 0; i < 10; ++i) { decompressors.add(CodecPool.getDecompressor(codec)); } assertEquals(10, decompressors.size()); }
From source file:example.TestLineRecordReader.java
License:Apache License
@Test public void testUncompressedInput() throws Exception { Configuration conf = new Configuration(); // single char delimiter, best case String inputData = "abc+def+ghi+jkl+mno+pqr+stu+vw +xyz"; Path inputFile = createInputFile(conf, inputData); conf.set("textinputformat.record.delimiter", "+"); for (int bufferSize = 1; bufferSize <= inputData.length(); bufferSize++) { for (int splitSize = 1; splitSize < inputData.length(); splitSize++) { conf.setInt("io.file.buffer.size", bufferSize); testSplitRecordsForFile(conf, splitSize, inputData.length(), inputFile); }//from www . j a v a 2s .c o m } // multi char delimiter, best case inputData = "abc|+|def|+|ghi|+|jkl|+|mno|+|pqr|+|stu|+|vw |+|xyz"; inputFile = createInputFile(conf, inputData); conf.set("textinputformat.record.delimiter", "|+|"); for (int bufferSize = 1; bufferSize <= inputData.length(); bufferSize++) { for (int splitSize = 1; splitSize < inputData.length(); splitSize++) { conf.setInt("io.file.buffer.size", bufferSize); testSplitRecordsForFile(conf, splitSize, inputData.length(), inputFile); } } // single char delimiter with empty records inputData = "abc+def++ghi+jkl++mno+pqr++stu+vw ++xyz"; inputFile = createInputFile(conf, inputData); conf.set("textinputformat.record.delimiter", "+"); for (int bufferSize = 1; bufferSize <= inputData.length(); bufferSize++) { for (int splitSize = 1; splitSize < inputData.length(); splitSize++) { conf.setInt("io.file.buffer.size", bufferSize); testSplitRecordsForFile(conf, splitSize, inputData.length(), inputFile); } } // multi char delimiter with empty records inputData = "abc|+||+|defghi|+|jkl|+||+|mno|+|pqr|+||+|stu|+|vw |+||+|xyz"; inputFile = createInputFile(conf, inputData); conf.set("textinputformat.record.delimiter", "|+|"); for (int bufferSize = 1; bufferSize <= inputData.length(); bufferSize++) { for (int splitSize = 1; splitSize < inputData.length(); splitSize++) { conf.setInt("io.file.buffer.size", bufferSize); testSplitRecordsForFile(conf, splitSize, inputData.length(), inputFile); } } // multi char delimiter with starting part of the delimiter in the data inputData = "abc+def+-ghi+jkl+-mno+pqr+-stu+vw +-xyz"; inputFile = createInputFile(conf, inputData); conf.set("textinputformat.record.delimiter", "+-"); for (int bufferSize = 1; bufferSize <= inputData.length(); bufferSize++) { for (int splitSize = 1; splitSize < inputData.length(); splitSize++) { conf.setInt("io.file.buffer.size", bufferSize); testSplitRecordsForFile(conf, splitSize, inputData.length(), inputFile); } } // multi char delimiter with newline as start of the delimiter inputData = "abc\n+def\n+ghi\n+jkl\n+mno"; inputFile = createInputFile(conf, inputData); conf.set("textinputformat.record.delimiter", "\n+"); for (int bufferSize = 1; bufferSize <= inputData.length(); bufferSize++) { for (int splitSize = 1; splitSize < inputData.length(); splitSize++) { conf.setInt("io.file.buffer.size", bufferSize); testSplitRecordsForFile(conf, splitSize, inputData.length(), inputFile); } } // multi char delimiter with newline in delimiter and in data inputData = "abc\ndef+\nghi+\njkl\nmno"; inputFile = createInputFile(conf, inputData); conf.set("textinputformat.record.delimiter", "+\n"); for (int bufferSize = 1; bufferSize <= inputData.length(); bufferSize++) { for (int splitSize = 1; splitSize < inputData.length(); splitSize++) { conf.setInt("io.file.buffer.size", bufferSize); testSplitRecordsForFile(conf, splitSize, inputData.length(), inputFile); } } }
From source file:example.TestLineRecordReader.java
License:Apache License
@Test public void testUncompressedInputContainingCRLF() throws Exception { Configuration conf = new Configuration(); String inputData = "a\r\nb\rc\nd\r\n"; Path inputFile = createInputFile(conf, inputData); for (int bufferSize = 1; bufferSize <= inputData.length(); bufferSize++) { for (int splitSize = 1; splitSize < inputData.length(); splitSize++) { conf.setInt("io.file.buffer.size", bufferSize); testSplitRecordsForFile(conf, splitSize, inputData.length(), inputFile); }/*from ww w . j a va 2s. co m*/ } }
From source file:example.TestLineRecordReader.java
License:Apache License
@Test public void testUncompressedInputCustomDelimiterPosValue() throws Exception { Configuration conf = new Configuration(); conf.setInt("io.file.buffer.size", 10); conf.setInt(org.apache.hadoop.mapreduce.lib.input.LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE); String inputData = "abcdefghij++kl++mno"; Path inputFile = createInputFile(conf, inputData); String delimiter = "++"; byte[] recordDelimiterBytes = delimiter.getBytes(Charsets.UTF_8); int splitLength = 15; FileSplit split = new FileSplit(inputFile, 0, splitLength, (String[]) null); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); LineRecordReader reader = new LineRecordReader(recordDelimiterBytes); reader.initialize(split, context);//from w w w .j a va 2 s . co m // Get first record: "abcdefghij" assertTrue("Expected record got nothing", reader.nextKeyValue()); LongWritable key = reader.getCurrentKey(); Text value = reader.getCurrentValue(); assertEquals("Wrong length for record value", 10, value.getLength()); assertEquals("Wrong position after record read", 0, key.get()); // Get second record: "kl" assertTrue("Expected record got nothing", reader.nextKeyValue()); assertEquals("Wrong length for record value", 2, value.getLength()); // Key should be 12 right after "abcdefghij++" assertEquals("Wrong position after record read", 12, key.get()); // Get third record: "mno" assertTrue("Expected record got nothing", reader.nextKeyValue()); assertEquals("Wrong length for record value", 3, value.getLength()); // Key should be 16 right after "abcdefghij++kl++" assertEquals("Wrong position after record read", 16, key.get()); assertFalse(reader.nextKeyValue()); // Key should be 19 right after "abcdefghij++kl++mno" assertEquals("Wrong position after record read", 19, key.get()); // after refresh should be empty key = reader.getCurrentKey(); assertNull("Unexpected key returned", key); reader.close(); split = new FileSplit(inputFile, splitLength, inputData.length() - splitLength, (String[]) null); reader = new LineRecordReader(recordDelimiterBytes); reader.initialize(split, context); // No record is in the second split because the second split dropped // the first record, which was already reported by the first split. assertFalse("Unexpected record returned", reader.nextKeyValue()); key = reader.getCurrentKey(); assertNull("Unexpected key returned", key); reader.close(); // multi char delimiter with starting part of the delimiter in the data inputData = "abcd+efgh++ijk++mno"; inputFile = createInputFile(conf, inputData); splitLength = 5; split = new FileSplit(inputFile, 0, splitLength, (String[]) null); reader = new LineRecordReader(recordDelimiterBytes); reader.initialize(split, context); // Get first record: "abcd+efgh" assertTrue("Expected record got nothing", reader.nextKeyValue()); key = reader.getCurrentKey(); value = reader.getCurrentValue(); assertEquals("Wrong position after record read", 0, key.get()); assertEquals("Wrong length for record value", 9, value.getLength()); // should have jumped over the delimiter, no record assertFalse(reader.nextKeyValue()); assertEquals("Wrong position after record read", 11, key.get()); // after refresh should be empty key = reader.getCurrentKey(); assertNull("Unexpected key returned", key); reader.close(); // next split: check for duplicate or dropped records split = new FileSplit(inputFile, splitLength, inputData.length() - splitLength, (String[]) null); reader = new LineRecordReader(recordDelimiterBytes); reader.initialize(split, context); assertTrue("Expected record got nothing", reader.nextKeyValue()); key = reader.getCurrentKey(); value = reader.getCurrentValue(); // Get second record: "ijk" first in this split assertEquals("Wrong position after record read", 11, key.get()); assertEquals("Wrong length for record value", 3, value.getLength()); // Get third record: "mno" second in this split assertTrue("Expected record got nothing", reader.nextKeyValue()); assertEquals("Wrong position after record read", 16, key.get()); assertEquals("Wrong length for record value", 3, value.getLength()); // should be at the end of the input assertFalse(reader.nextKeyValue()); assertEquals("Wrong position after record read", 19, key.get()); reader.close(); inputData = "abcd|efgh|+|ij|kl|+|mno|pqr"; inputFile = createInputFile(conf, inputData); delimiter = "|+|"; recordDelimiterBytes = delimiter.getBytes(Charsets.UTF_8); // walking over the buffer and split sizes checks for proper processing // of the ambiguous bytes of the delimiter for (int bufferSize = 1; bufferSize <= inputData.length(); bufferSize++) { for (int splitSize = 1; splitSize < inputData.length(); splitSize++) { // track where we are in the inputdata int keyPosition = 0; conf.setInt("io.file.buffer.size", bufferSize); split = new FileSplit(inputFile, 0, bufferSize, (String[]) null); reader = new LineRecordReader(recordDelimiterBytes); reader.initialize(split, context); // Get the first record: "abcd|efgh" always possible assertTrue("Expected record got nothing", reader.nextKeyValue()); key = reader.getCurrentKey(); value = reader.getCurrentValue(); assertTrue("abcd|efgh".equals(value.toString())); // Position should be 0 right at the start assertEquals("Wrong position after record read", keyPosition, key.get()); // Position should be 12 right after the first "|+|" keyPosition = 12; // get the next record: "ij|kl" if the split/buffer allows it if (reader.nextKeyValue()) { // check the record info: "ij|kl" assertTrue("ij|kl".equals(value.toString())); assertEquals("Wrong position after record read", keyPosition, key.get()); // Position should be 20 after the second "|+|" keyPosition = 20; } // get the third record: "mno|pqr" if the split/buffer allows it if (reader.nextKeyValue()) { // check the record info: "mno|pqr" assertTrue("mno|pqr".equals(value.toString())); assertEquals("Wrong position after record read", keyPosition, key.get()); // Position should be the end of the input keyPosition = inputData.length(); } assertFalse("Unexpected record returned", reader.nextKeyValue()); // no more records can be read we should be at the last position assertEquals("Wrong position after record read", keyPosition, key.get()); // after refresh should be empty key = reader.getCurrentKey(); assertNull("Unexpected key returned", key); reader.close(); } } }
From source file:example.TestLineRecordReader.java
License:Apache License
@Test public void testUncompressedInputDefaultDelimiterPosValue() throws Exception { Configuration conf = new Configuration(); String inputData = "1234567890\r\n12\r\n345"; Path inputFile = createInputFile(conf, inputData); conf.setInt("io.file.buffer.size", 10); conf.setInt(org.apache.hadoop.mapreduce.lib.input.LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE); FileSplit split = new FileSplit(inputFile, 0, 15, (String[]) null); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); LineRecordReader reader = new LineRecordReader(null); reader.initialize(split, context);//from w w w . j a va 2s .c om LongWritable key; Text value; reader.nextKeyValue(); key = reader.getCurrentKey(); value = reader.getCurrentValue(); // Get first record:"1234567890" assertEquals(10, value.getLength()); assertEquals(0, key.get()); reader.nextKeyValue(); // Get second record:"12" assertEquals(2, value.getLength()); // Key should be 12 right after "1234567890\r\n" assertEquals(12, key.get()); assertFalse(reader.nextKeyValue()); // Key should be 16 right after "1234567890\r\n12\r\n" assertEquals(16, key.get()); split = new FileSplit(inputFile, 15, 4, (String[]) null); reader = new LineRecordReader(null); reader.initialize(split, context); // The second split dropped the first record "\n" reader.nextKeyValue(); key = reader.getCurrentKey(); value = reader.getCurrentValue(); // Get third record:"345" assertEquals(3, value.getLength()); // Key should be 16 right after "1234567890\r\n12\r\n" assertEquals(16, key.get()); assertFalse(reader.nextKeyValue()); // Key should be 19 right after "1234567890\r\n12\r\n345" assertEquals(19, key.get()); inputData = "123456789\r\r\n"; inputFile = createInputFile(conf, inputData); split = new FileSplit(inputFile, 0, 12, (String[]) null); reader = new LineRecordReader(null); reader.initialize(split, context); reader.nextKeyValue(); key = reader.getCurrentKey(); value = reader.getCurrentValue(); // Get first record:"123456789" assertEquals(9, value.getLength()); assertEquals(0, key.get()); reader.nextKeyValue(); // Get second record:"" assertEquals(0, value.getLength()); // Key should be 10 right after "123456789\r" assertEquals(10, key.get()); assertFalse(reader.nextKeyValue()); // Key should be 12 right after "123456789\r\r\n" assertEquals(12, key.get()); }
From source file:format.OverlapLengthInputFormat.java
License:Apache License
/** * Set the length of each record//from w w w . java 2s . com * @param conf configuration * @param recordLength the length of a record */ public static void setRecordLength(Configuration conf, int recordLength) { conf.setInt(FIXED_RECORD_LENGTH, recordLength); }