List of usage examples for org.apache.hadoop.fs FileSystem setConf
@Override public void setConf(Configuration conf)
From source file:com.dasasian.chok.testutil.GenerateMapFiles.java
License:Apache License
/** * This generates the very simple MapFiles in chok/src/test/testMapFile[AB]/. * These files are supposed to simulate taking 2 large MapFiles and splitting the first one * into 4 shards, the second into 2 shards. We do not provide such a tool yet. * The results are checked in, so you should not need to run this. Is is provided * as a reference./*w w w .j a v a2s. co m*/ * @param args the arguments * @throws java.lang.Exception when and error occurs */ public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); conf.set("io.file.buffer.size", "4096"); FileSystem fs = new RawLocalFileSystem(); fs.setConf(conf); // File f = new File("src/test/testMapFileA/a1"); MapFile.Writer w = new MapFile.Writer(conf, fs, f.getAbsolutePath(), Text.class, Text.class); write(w, "a.txt", "This is a test"); write(w, "b.xml", "<name>test</name>"); write(w, "c.log", "1/1/2009: test"); w.close(); // f = new File("src/test/testMapFileA/a2"); w = new MapFile.Writer(conf, fs, f.getAbsolutePath(), Text.class, Text.class); write(w, "d.html", "<b>test</b>"); write(w, "e.txt", "An e test"); write(w, "f.log", "1/2/2009: test2"); w.close(); // f = new File("src/test/testMapFileA/a3"); w = new MapFile.Writer(conf, fs, f.getAbsolutePath(), Text.class, Text.class); write(w, "g.log", "1/3/2009: more test"); write(w, "h.txt", "Test in part 3"); w.close(); // f = new File("src/test/testMapFileA/a4"); w = new MapFile.Writer(conf, fs, f.getAbsolutePath(), Text.class, Text.class); write(w, "i.xml", "<i>test</i>"); write(w, "j.log", "1/4/2009: 4 test"); write(w, "k.out", "test data"); write(w, "l.txt", "line 4"); w.close(); // // f = new File("src/test/testMapFileB/b1"); w = new MapFile.Writer(conf, fs, f.getAbsolutePath(), Text.class, Text.class); write(w, "u.txt", "Test U text"); write(w, "v.xml", "<victor>foo</victor>"); write(w, "w.txt", "where is test"); w.close(); // f = new File("src/test/testMapFileB/b2"); w = new MapFile.Writer(conf, fs, f.getAbsolutePath(), Text.class, Text.class); write(w, "x.txt", "xrays ionize"); write(w, "y.xml", "<yankee>foo</yankee>"); write(w, "z.xml", "<zed>foo</zed>"); w.close(); }
From source file:it.crs4.pydoop.mapreduce.pipes.TestPipeApplication.java
License:Apache License
/** * test PipesMapRunner test the transfer data from reader * * @throws Exception//www.j av a 2 s .c o m */ @Test public void testRunner() throws Exception { // clean old password files File[] psw = cleanTokenPasswordFile(); try { JobID jobId = new JobID("201408272347", 0); TaskID taskId = new TaskID(jobId, TaskType.MAP, 0); TaskAttemptID taskAttemptid = new TaskAttemptID(taskId, 0); Job job = new Job(new Configuration()); job.setJobID(jobId); Configuration conf = job.getConfiguration(); conf.set(Submitter.IS_JAVA_RR, "true"); conf.set(MRJobConfig.TASK_ATTEMPT_ID, taskAttemptid.toString()); job.setInputFormatClass(DummyInputFormat.class); FileSystem fs = new RawLocalFileSystem(); fs.setConf(conf); DummyInputFormat input_format = new DummyInputFormat(); List<InputSplit> isplits = input_format.getSplits(job); InputSplit isplit = isplits.get(0); TaskAttemptContextImpl tcontext = new TaskAttemptContextImpl(conf, taskAttemptid); RecordReader<FloatWritable, NullWritable> rReader = input_format.createRecordReader(isplit, tcontext); TestMapContext context = new TestMapContext(conf, taskAttemptid, rReader, null, null, null, isplit); // stub for client File fCommand = getFileCommand("it.crs4.pydoop.mapreduce.pipes.PipeApplicationRunnableStub"); conf.set(MRJobConfig.CACHE_LOCALFILES, fCommand.getAbsolutePath()); // token for authorization Token<AMRMTokenIdentifier> token = new Token<AMRMTokenIdentifier>("user".getBytes(), "password".getBytes(), new Text("kind"), new Text("service")); TokenCache.setJobToken(token, job.getCredentials()); conf.setBoolean(MRJobConfig.SKIP_RECORDS, true); PipesMapper<FloatWritable, NullWritable, IntWritable, Text> mapper = new PipesMapper<FloatWritable, NullWritable, IntWritable, Text>( context); initStdOut(conf); mapper.run(context); String stdOut = readStdOut(conf); // test part of translated data. As common file for client and test - // clients stdOut // check version assertTrue(stdOut.contains("CURRENT_PROTOCOL_VERSION:0")); // check key and value classes assertTrue(stdOut.contains("Key class:org.apache.hadoop.io.FloatWritable")); assertTrue(stdOut.contains("Value class:org.apache.hadoop.io.NullWritable")); // test have sent all data from reader assertTrue(stdOut.contains("value:0.0")); assertTrue(stdOut.contains("value:9.0")); } finally { if (psw != null) { // remove password files for (File file : psw) { file.deleteOnExit(); } } } }
From source file:it.crs4.pydoop.mapreduce.pipes.TestPipeApplication.java
License:Apache License
/** * test org.apache.hadoop.mapreduce.pipes.Application * test a internal functions: //from w w w . jav a 2s .co m * MessageType.REGISTER_COUNTER, INCREMENT_COUNTER, STATUS, PROGRESS... * * @throws Throwable */ @Test public void testApplication() throws Throwable { System.err.println("testApplication"); File[] psw = cleanTokenPasswordFile(); try { JobID jobId = new JobID("201408272347", 0); TaskID taskId = new TaskID(jobId, TaskType.MAP, 0); TaskAttemptID taskAttemptid = new TaskAttemptID(taskId, 0); Job job = new Job(new Configuration()); job.setJobID(jobId); Configuration conf = job.getConfiguration(); conf.set(MRJobConfig.TASK_ATTEMPT_ID, taskAttemptid.toString()); FileSystem fs = new RawLocalFileSystem(); fs.setConf(conf); File fCommand = getFileCommand("it.crs4.pydoop.mapreduce.pipes.PipeApplicationStub"); //getFileCommand("it.crs4.pydoop.mapreduce.pipes.PipeApplicationRunnableStub"); conf.set(MRJobConfig.CACHE_LOCALFILES, fCommand.getAbsolutePath()); System.err.println("fCommand" + fCommand.getAbsolutePath()); Token<AMRMTokenIdentifier> token = new Token<AMRMTokenIdentifier>("user".getBytes(), "password".getBytes(), new Text("kind"), new Text("service")); TokenCache.setJobToken(token, job.getCredentials()); conf.setBoolean(MRJobConfig.SKIP_RECORDS, true); TestReporter reporter = new TestReporter(); DummyInputFormat input_format = new DummyInputFormat(); List<InputSplit> isplits = input_format.getSplits(job); InputSplit isplit = isplits.get(0); TaskAttemptContextImpl tcontext = new TaskAttemptContextImpl(conf, taskAttemptid); DummyRecordReader reader = (DummyRecordReader) input_format.createRecordReader(isplit, tcontext); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); RecordWriter<IntWritable, Text> writer = new TestRecordWriter( new FileOutputStream(workSpace.getAbsolutePath() + File.separator + "outfile")); MapContextImpl<IntWritable, Text, IntWritable, Text> context = new MapContextImpl<IntWritable, Text, IntWritable, Text>( conf, taskAttemptid, null, writer, null, reporter, null); System.err.println("ready to launch application"); Application<IntWritable, Text, IntWritable, Text> application = new Application<IntWritable, Text, IntWritable, Text>( context, reader); System.err.println("done"); application.getDownlink().flush(); application.getDownlink().mapItem(new IntWritable(3), new Text("txt")); application.getDownlink().flush(); application.waitForFinish(); // test getDownlink().mapItem(); String stdOut = readStdOut(conf); assertTrue(stdOut.contains("key:3")); assertTrue(stdOut.contains("value:txt")); assertEquals(0.0, context.getProgress(), 0.01); assertNotNull(context.getCounter("group", "name")); // test status MessageType.STATUS assertEquals(context.getStatus(), "PROGRESS"); // check MessageType.PROGRESS assertEquals(0.55f, reader.getProgress(), 0.001); application.getDownlink().close(); // test MessageType.OUTPUT stdOut = readFile(new File(workSpace.getAbsolutePath() + File.separator + "outfile")); assertTrue(stdOut.contains("key:123")); assertTrue(stdOut.contains("value:value")); try { // try to abort application.abort(new Throwable()); fail(); } catch (IOException e) { // abort works ? assertEquals("pipe child exception", e.getMessage()); } } finally { if (psw != null) { // remove password files for (File file : psw) { file.deleteOnExit(); } } } }
From source file:it.crs4.pydoop.mapreduce.pipes.TestPipeApplication.java
License:Apache License
/** * test org.apache.hadoop.mapreduce.pipes.PipesReducer * test the transfer of data: key and value * * @throws Exception//from ww w . j a v a 2 s .c o m */ @Test public void testPipesReducer() throws Exception { System.err.println("testPipesReducer"); File[] psw = cleanTokenPasswordFile(); try { JobID jobId = new JobID("201408272347", 0); TaskID taskId = new TaskID(jobId, TaskType.MAP, 0); TaskAttemptID taskAttemptid = new TaskAttemptID(taskId, 0); Job job = new Job(new Configuration()); job.setJobID(jobId); Configuration conf = job.getConfiguration(); conf.set(MRJobConfig.TASK_ATTEMPT_ID, taskAttemptid.toString()); FileSystem fs = new RawLocalFileSystem(); fs.setConf(conf); File fCommand = getFileCommand("it.crs4.pydoop.mapreduce.pipes.PipeReducerStub"); conf.set(MRJobConfig.CACHE_LOCALFILES, fCommand.getAbsolutePath()); System.err.println("fCommand" + fCommand.getAbsolutePath()); Token<AMRMTokenIdentifier> token = new Token<AMRMTokenIdentifier>("user".getBytes(), "password".getBytes(), new Text("kind"), new Text("service")); TokenCache.setJobToken(token, job.getCredentials()); conf.setBoolean(MRJobConfig.SKIP_RECORDS, true); TestReporter reporter = new TestReporter(); DummyInputFormat input_format = new DummyInputFormat(); List<InputSplit> isplits = input_format.getSplits(job); InputSplit isplit = isplits.get(0); TaskAttemptContextImpl tcontext = new TaskAttemptContextImpl(conf, taskAttemptid); RecordWriter<IntWritable, Text> writer = new TestRecordWriter( new FileOutputStream(workSpace.getAbsolutePath() + File.separator + "outfile")); BooleanWritable bw = new BooleanWritable(true); List<Text> texts = new ArrayList<Text>(); texts.add(new Text("first")); texts.add(new Text("second")); texts.add(new Text("third")); DummyRawKeyValueIterator kvit = new DummyRawKeyValueIterator(); ReduceContextImpl<BooleanWritable, Text, IntWritable, Text> context = new ReduceContextImpl<BooleanWritable, Text, IntWritable, Text>( conf, taskAttemptid, kvit, null, null, writer, null, null, null, BooleanWritable.class, Text.class); PipesReducer<BooleanWritable, Text, IntWritable, Text> reducer = new PipesReducer<BooleanWritable, Text, IntWritable, Text>(); reducer.setup(context); initStdOut(conf); reducer.reduce(bw, texts, context); reducer.cleanup(context); String stdOut = readStdOut(conf); // test data: key assertTrue(stdOut.contains("reducer key :true")); // and values assertTrue(stdOut.contains("reduce value :first")); assertTrue(stdOut.contains("reduce value :second")); assertTrue(stdOut.contains("reduce value :third")); } finally { if (psw != null) { // remove password files for (File file : psw) { file.deleteOnExit(); } } } }
From source file:org.apache.giraph.io.hbase.TestHBaseRootMarkerVertextFormat.java
License:Apache License
@Test public void testHBaseInputOutput() throws Exception { if (System.getProperty("prop.mapred.job.tracker") != null) { if (log.isInfoEnabled()) log.info("testHBaseInputOutput: Ignore this test if not local mode."); return;//from w w w.j a v a2 s . com } File jarTest = new File(System.getProperty("prop.jarLocation")); if (!jarTest.exists()) { fail("Could not find Giraph jar at " + "location specified by 'prop.jarLocation'. " + "Make sure you built the main Giraph artifact?."); } FileSystem fs = null; Path hbaseRootdir = null; try { MiniHBaseCluster cluster = testUtil.startMiniCluster(1); cluster.waitForActiveAndReadyMaster(); testUtil.startMiniMapReduceCluster(); // Let's set up the hbase root directory. Configuration conf = testUtil.getConfiguration(); try { fs = testUtil.getTestFileSystem(); String randomStr = UUID.randomUUID().toString(); String tmpdir = System.getProperty("java.io.tmpdir") + "/" + randomStr + "/"; hbaseRootdir = fs.makeQualified(new Path(tmpdir)); conf.set(HConstants.HBASE_DIR, hbaseRootdir.toString()); fs.mkdirs(hbaseRootdir); } catch (IOException ioe) { fail("Could not create hbase root directory."); } //First let's load some data using ImportTsv into our mock table. String INPUT_FILE = hbaseRootdir.toString() + "/graph.csv"; String[] args = new String[] { "-Dimporttsv.columns=HBASE_ROW_KEY,cf:" + QUALIFER, "-Dimporttsv.separator=" + "\u002c", TABLE_NAME, INPUT_FILE }; GenericOptionsParser opts = new GenericOptionsParser(testUtil.getConfiguration(), args); args = opts.getRemainingArgs(); fs = FileSystem.get(conf); fs.setConf(conf); Path inputPath = fs.makeQualified(new Path(hbaseRootdir, "graph.csv")); FSDataOutputStream op = fs.create(inputPath, true); String line1 = "0001,0002\n"; String line2 = "0002,0004\n"; String line3 = "0003,0005\n"; String line4 = "0004,-1\n"; String line5 = "0005,-1\n"; op.write(line1.getBytes()); op.write(line2.getBytes()); op.write(line3.getBytes()); op.write(line4.getBytes()); op.write(line5.getBytes()); op.close(); final byte[] FAM = Bytes.toBytes(FAMILY); final byte[] TAB = Bytes.toBytes(TABLE_NAME); HTableDescriptor desc = new HTableDescriptor(TAB); desc.addFamily(new HColumnDescriptor(FAM)); HBaseAdmin hbaseAdmin = new HBaseAdmin(conf); if (hbaseAdmin.isTableAvailable(TABLE_NAME)) { hbaseAdmin.disableTable(TABLE_NAME); hbaseAdmin.deleteTable(TABLE_NAME); } hbaseAdmin.createTable(desc); // Do the import Job job = ImportTsv.createSubmittableJob(conf, args); job.waitForCompletion(false); assertTrue(job.isSuccessful()); if (log.isInfoEnabled()) log.info("ImportTsv successful. Running HBase Giraph job."); // Now operate over HBase using Vertex I/O formats conf.set(TableInputFormat.INPUT_TABLE, TABLE_NAME); conf.set(TableOutputFormat.OUTPUT_TABLE, TABLE_NAME); GiraphJob giraphJob = new GiraphJob(conf, BspCase.getCallingMethodName()); GiraphConfiguration giraphConf = giraphJob.getConfiguration(); setupConfiguration(giraphJob); giraphConf.setComputationClass(EdgeNotification.class); giraphConf.setVertexInputFormatClass(TableEdgeInputFormat.class); giraphConf.setVertexOutputFormatClass(TableEdgeOutputFormat.class); assertTrue(giraphJob.run(true)); if (log.isInfoEnabled()) log.info("Giraph job successful. Checking output qualifier."); // Do a get on row 0002, it should have a parent of 0001 // if the outputFormat worked. HTable table = new HTable(conf, TABLE_NAME); Result result = table.get(new Get("0002".getBytes())); byte[] parentBytes = result.getValue(FAMILY.getBytes(), OUTPUT_FIELD.getBytes()); assertNotNull(parentBytes); assertTrue(parentBytes.length > 0); assertEquals("0001", Bytes.toString(parentBytes)); } finally { testUtil.shutdownMiniMapReduceCluster(); testUtil.shutdownMiniCluster(); } }
From source file:org.lilyproject.hadooptestfw.fork.HBaseTestingUtility.java
License:Apache License
/** * Starts up mini hbase cluster. Usually used after call to * {@link #startMiniCluster(int, int)} when doing stepped startup of clusters. * Usually you won't want this. You'll usually want {@link #startMiniCluster()}. * * @return Reference to the hbase mini hbase cluster. * @see {@link #startMiniCluster()}//from w w w .ja v a 2 s . c o m */ public MiniHBaseCluster startMiniHBaseCluster(final int numMasters, final int numSlaves) throws IOException, InterruptedException { // Now do the mini hbase cluster. Set the hbase.rootdir in config. createRootDir(); // These settings will make the server waits until this exact number of // regions servers are connected. if (conf.getInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, -1) == -1) { conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, numSlaves); } if (conf.getInt(ServerManager.WAIT_ON_REGIONSERVERS_MAXTOSTART, -1) == -1) { conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MAXTOSTART, numSlaves); } Configuration c = new Configuration(this.conf); this.hbaseCluster = new MiniHBaseCluster(c, numMasters, numSlaves); // Don't leave here till we've done a successful scan of the .META. HTable t = new HTable(c, HConstants.META_TABLE_NAME); ResultScanner s = t.getScanner(new Scan()); while (s.next() != null) { continue; } s.close(); t.close(); // Lily change: fix wrong master filesystem // TODO Remove this once HBASE-6441 is available in the currently-used version of HBase Configuration masterConf = hbaseCluster.getMaster().getConfiguration(); FileSystem fs = FileSystem.get(masterConf); fs.setConf(masterConf); getHBaseAdmin(); // create immediately the hbaseAdmin LOG.info("Minicluster is up"); return (MiniHBaseCluster) this.hbaseCluster; }
From source file:org.mrgeo.format.CsvInputFormatTest.java
License:Apache License
@Test @Category(UnitTest.class) public void testBasics() throws Exception { // this class and its unit tests are a work in progress. FileSystem fs = new RawLocalFileSystem(); try {/*from www . j a v a 2s . c om*/ Job j = new Job(new Configuration()); Configuration c = j.getConfiguration(); fs.setConf(c); Path testFile = new Path(input, "testBasics.csv"); testFile = fs.makeQualified(testFile); FileInputFormat.addInputPath(j, testFile); FileSplit split = new FileSplit(testFile, 0, 500, null); CsvInputFormat.CsvRecordReader reader = new CsvInputFormat.CsvRecordReader(); reader.initialize(split, HadoopUtils.createTaskAttemptContext(c, new TaskAttemptID())); @SuppressWarnings("unused") int l = 0; StringBuffer buf = new StringBuffer(); String[] base = { "word1:Hello word2:world number:1 ", "word1:foo word2:bar number:2 ", "word1:cat word2:dog number:3 ", "word1:rock word2:paper number:4 ", "word1:red word2:blue, number:5 ", "word1:,green, word2:,, number:6 ", }; int index = 0; while (reader.nextKeyValue()) { Geometry f = reader.getCurrentValue(); String row = ""; for (Map.Entry attr : f.getAllAttributes().entrySet()) { row += attr.getKey() + ":" + attr.getValue() + " "; } Assert.assertEquals("Error in row " + index, base[index++], row); } // This hash code will tell us if anything changes then it can be manually verified. } catch (Exception e) { e.printStackTrace(); throw e; } finally { fs.close(); } }
From source file:org.mrgeo.format.CsvInputFormatTest.java
License:Apache License
@Test @Category(UnitTest.class) public void testNullProcessing() throws Exception { // this class and its unit tests are a work in progress. FileSystem fs = new RawLocalFileSystem(); try {/*from w w w. j a v a 2s . c om*/ Job j = new Job(new Configuration()); Configuration c = j.getConfiguration(); fs.setConf(c); Path testFile = new Path(input, "testNullValues.csv"); testFile = fs.makeQualified(testFile); FileInputFormat.addInputPath(j, testFile); FileSplit split = new FileSplit(testFile, 0, 500, null); CsvInputFormat.CsvRecordReader reader = new CsvInputFormat.CsvRecordReader(); reader.initialize(split, HadoopUtils.createTaskAttemptContext(c, new TaskAttemptID())); @SuppressWarnings("unused") int l = 0; //StringBuffer buf = new StringBuffer(); // Test specific rows returned to make sure the values are as expected. Assert.assertTrue(reader.nextKeyValue()); Geometry f = reader.getCurrentValue(); Assert.assertNotNull(f); Assert.assertEquals("test1", f.getAttribute("string1")); Assert.assertEquals(1.0, Double.parseDouble(f.getAttribute("int1")), EPSILON); Assert.assertEquals(1.5, Double.parseDouble(f.getAttribute("double1")), EPSILON); // Row 2 check Assert.assertTrue(reader.nextKeyValue()); f = reader.getCurrentValue(); Assert.assertNotNull(f); Assert.assertEquals("test2", f.getAttribute("string1")); Assert.assertEquals(2.0, Double.parseDouble(f.getAttribute("int1")), EPSILON); Assert.assertNull("Expected null value instead of: " + f.getAttribute("double1"), f.getAttribute("2")); // Row 3 check Assert.assertTrue(reader.nextKeyValue()); f = reader.getCurrentValue(); Assert.assertNotNull(f); Assert.assertEquals("test3", f.getAttribute("string1")); Assert.assertEquals(3.0, Double.parseDouble(f.getAttribute("int1")), EPSILON); Assert.assertEquals(3.5, Double.parseDouble(f.getAttribute("double1")), EPSILON); // Row 4 check Assert.assertTrue(reader.nextKeyValue()); f = reader.getCurrentValue(); Assert.assertNotNull(f); Assert.assertEquals("test4", f.getAttribute("string1")); Assert.assertNull("Expected null value instead of: " + f.getAttribute("int1"), f.getAttribute("1")); Assert.assertEquals(4.5, Double.parseDouble(f.getAttribute("double1")), EPSILON); // Row 5 check Assert.assertTrue(reader.nextKeyValue()); f = reader.getCurrentValue(); Assert.assertNotNull(f); Assert.assertEquals("test5", f.getAttribute("string1")); Assert.assertEquals(5.0, Double.parseDouble(f.getAttribute("int1")), EPSILON); Assert.assertEquals(5.5, Double.parseDouble(f.getAttribute("double1")), EPSILON); // Row 6 check Assert.assertTrue(reader.nextKeyValue()); f = reader.getCurrentValue(); Assert.assertNotNull(f); Assert.assertEquals("test6", f.getAttribute("string1")); Assert.assertEquals("", f.getAttribute("int1")); Assert.assertEquals("", f.getAttribute("double1")); // Row 7 check Assert.assertTrue(reader.nextKeyValue()); f = reader.getCurrentValue(); Assert.assertNotNull(f); Assert.assertEquals("test7", f.getAttribute("string1")); Assert.assertNull("Expected null value instead of: " + f.getAttribute("int1"), f.getAttribute("int1")); Assert.assertNull("Expected null value instead of: " + f.getAttribute("double1"), f.getAttribute("double1")); Assert.assertFalse(reader.nextKeyValue()); } catch (Exception e) { e.printStackTrace(); throw e; } finally { fs.close(); } }
From source file:org.mrgeo.format.CsvInputFormatTest.java
License:Apache License
@Test @Category(UnitTest.class) public void testNullIgnore() throws Exception { FileSystem fs = new RawLocalFileSystem(); try {//from w w w . ja v a2 s. co m int lineCount = 0; // Write columns file which defines the columns title and type String cstr = "<?xml version='1.0' encoding='UTF-8'?>\n<AllColumns firstLineHeader='false'>\n"; cstr += " <Column name='name' type='Nominal'/>\n"; cstr += " <Column name='x' type='Numeric'/>\n"; cstr += " <Column name='y' type='Numeric'/>\n"; cstr += "</AllColumns>\n"; FileOutputStream fos = new FileOutputStream(output + "/nulXY.csv.columns"); PrintStream ps = new PrintStream(fos); ps.print(cstr); ps.close(); // Write csv test data fos = new FileOutputStream(output + "/nullXY.csv"); ps = new PrintStream(fos); // populated rows for (int ii = 0; ii < 10; ii++) { ps.print("ASDF,1.0,1.0\n"); lineCount++; } // empty rows ps.print("ASDF,,1.0\n"); ps.print("ASDF,1.0,\n"); ps.print("ASDF,,\n"); lineCount += 3; // populated rows for (int ii = 0; ii < 5; ii++) { ps.print("ASDF,1.0,1.0\n"); lineCount++; } ps.close(); System.out.println(output + "nulXY.csv"); Job j = new Job(new Configuration()); Configuration c = j.getConfiguration(); fs.setConf(c); Path testFile = new Path(output, "nullXY.csv"); testFile = fs.makeQualified(testFile); InputSplit split; long l; long start; TextInputFormat format = new TextInputFormat(); split = new FileSplit(testFile, 0, lineCount * 1000, null); RecordReader<LongWritable, Text> reader2 = format.createRecordReader(split, HadoopUtils.createTaskAttemptContext(c, new TaskAttemptID())); reader2.initialize(split, HadoopUtils.createTaskAttemptContext(c, new TaskAttemptID())); l = 0; start = System.currentTimeMillis(); while (reader2.nextKeyValue()) { reader2.getCurrentValue().toString(); l++; } Assert.assertEquals(lineCount, l); System.out.printf("text line reader with null x,y ignore: %d\n", System.currentTimeMillis() - start); } catch (Exception e) { e.printStackTrace(); throw e; } finally { fs.close(); } }
From source file:org.mrgeo.format.CsvInputFormatTest.java
License:Apache License
@Test @Category(UnitTest.class) public void testBadValues() throws Exception { // this class and its unit tests are a work in progress. FileSystem fs = new RawLocalFileSystem(); try {// ww w. j a v a 2 s. co m Job j = new Job(new Configuration()); Configuration c = j.getConfiguration(); fs.setConf(c); Path testFile = new Path(input, "testErrors.csv"); testFile = fs.makeQualified(testFile); FileInputFormat.addInputPath(j, testFile); FileSplit split = new FileSplit(testFile, 0, 500, null); CsvInputFormat.CsvRecordReader reader = new CsvInputFormat.CsvRecordReader(); reader.initialize(split, HadoopUtils.createTaskAttemptContext(c, new TaskAttemptID())); @SuppressWarnings("unused") int l = 0; //StringBuffer buf = new StringBuffer(); // Test specific rows returned to make sure the values are as expected. Assert.assertTrue(reader.nextKeyValue()); Geometry f = reader.getCurrentValue(); Assert.assertNotNull(f); Assert.assertTrue(f.isEmpty()); Assert.assertEquals("test1,1,1.5,30.0,40.0", f.getAttribute("string1")); Assert.assertNull(f.getAttribute("int1")); Assert.assertNull(f.getAttribute("double1")); Assert.assertNull(f.getAttribute("x")); Assert.assertNull(f.getAttribute("y")); // Row 2 check Assert.assertTrue(reader.nextKeyValue()); f = reader.getCurrentValue(); Assert.assertNotNull(f); Assert.assertTrue(f.isEmpty()); Assert.assertEquals("test2", f.getAttribute("string1")); Assert.assertEquals(2, Integer.parseInt(f.getAttribute("int1"))); Assert.assertEquals("", f.getAttribute("double1")); Assert.assertEquals("30.abc", f.getAttribute("x")); Assert.assertEquals(40.0, Double.parseDouble(f.getAttribute("y")), EPSILON); // Row 3 check Assert.assertTrue(reader.nextKeyValue()); f = reader.getCurrentValue(); Assert.assertNotNull(f); Assert.assertTrue(f.isEmpty()); Assert.assertEquals("test3", f.getAttribute("string1")); Assert.assertEquals(3, Integer.parseInt(f.getAttribute("int1"))); Assert.assertEquals(3.5, Double.parseDouble(f.getAttribute("double1")), EPSILON); Assert.assertEquals(30.0, Double.parseDouble(f.getAttribute("x")), EPSILON); Assert.assertEquals("40.abc", f.getAttribute("y")); // Row 4 check Assert.assertTrue(reader.nextKeyValue()); f = reader.getCurrentValue(); Assert.assertNotNull(f); Assert.assertTrue(f.isEmpty()); Assert.assertEquals("test4", f.getAttribute("string1")); Assert.assertEquals("", f.getAttribute("int1")); Assert.assertEquals(4.5, Double.parseDouble(f.getAttribute("double1")), EPSILON); Assert.assertEquals(30.0, Double.parseDouble(f.getAttribute("x")), EPSILON); Assert.assertNull(f.getAttribute("y")); // Row 5 check Assert.assertTrue(reader.nextKeyValue()); f = reader.getCurrentValue(); Assert.assertNotNull(f); Assert.assertTrue(f.isEmpty()); Assert.assertEquals("test5", f.getAttribute("string1")); Assert.assertEquals(5, Integer.parseInt(f.getAttribute("int1"))); Assert.assertEquals(5.5, Double.parseDouble(f.getAttribute("double1")), EPSILON); Assert.assertEquals("", f.getAttribute("x")); Assert.assertEquals(40.0, Double.parseDouble(f.getAttribute("y")), EPSILON); // Row 6 check Assert.assertTrue(reader.nextKeyValue()); f = reader.getCurrentValue(); Assert.assertNotNull(f); Assert.assertTrue(f.isEmpty()); Assert.assertEquals("test6", f.getAttribute("string1")); Assert.assertEquals("", f.getAttribute("int1")); Assert.assertEquals("", f.getAttribute("double1")); Assert.assertEquals("", f.getAttribute("x")); Assert.assertEquals("", f.getAttribute("y")); // end Assert.assertFalse(reader.nextKeyValue()); } catch (Exception e) { e.printStackTrace(); throw e; } finally { fs.close(); } }