Example usage for org.apache.hadoop.fs FileSystem setConf

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem setConf.

Prototype

@Override
    public void setConf(Configuration conf)

Source Link

Usage

From source file:com.dasasian.chok.testutil.GenerateMapFiles.java

License:Apache License

/**
 * This generates the very simple MapFiles in chok/src/test/testMapFile[AB]/.
 * These files are supposed to simulate taking 2 large MapFiles and splitting the first one
 * into 4 shards, the second into 2 shards. We do not provide such a tool yet.
 * The results are checked in, so you should not need to run this. Is is provided
 * as a reference./*w  w  w  .j a  v  a2s.  co  m*/
 * @param args the arguments
 * @throws java.lang.Exception when and error occurs
 */
public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    conf.set("io.file.buffer.size", "4096");
    FileSystem fs = new RawLocalFileSystem();
    fs.setConf(conf);
    //
    File f = new File("src/test/testMapFileA/a1");
    MapFile.Writer w = new MapFile.Writer(conf, fs, f.getAbsolutePath(), Text.class, Text.class);
    write(w, "a.txt", "This is a test");
    write(w, "b.xml", "<name>test</name>");
    write(w, "c.log", "1/1/2009: test");
    w.close();
    //
    f = new File("src/test/testMapFileA/a2");
    w = new MapFile.Writer(conf, fs, f.getAbsolutePath(), Text.class, Text.class);
    write(w, "d.html", "<b>test</b>");
    write(w, "e.txt", "An e test");
    write(w, "f.log", "1/2/2009: test2");
    w.close();
    //
    f = new File("src/test/testMapFileA/a3");
    w = new MapFile.Writer(conf, fs, f.getAbsolutePath(), Text.class, Text.class);
    write(w, "g.log", "1/3/2009: more test");
    write(w, "h.txt", "Test in part 3");
    w.close();
    //
    f = new File("src/test/testMapFileA/a4");
    w = new MapFile.Writer(conf, fs, f.getAbsolutePath(), Text.class, Text.class);
    write(w, "i.xml", "<i>test</i>");
    write(w, "j.log", "1/4/2009: 4 test");
    write(w, "k.out", "test data");
    write(w, "l.txt", "line 4");
    w.close();
    //
    //
    f = new File("src/test/testMapFileB/b1");
    w = new MapFile.Writer(conf, fs, f.getAbsolutePath(), Text.class, Text.class);
    write(w, "u.txt", "Test U text");
    write(w, "v.xml", "<victor>foo</victor>");
    write(w, "w.txt", "where is test");
    w.close();
    //
    f = new File("src/test/testMapFileB/b2");
    w = new MapFile.Writer(conf, fs, f.getAbsolutePath(), Text.class, Text.class);
    write(w, "x.txt", "xrays ionize");
    write(w, "y.xml", "<yankee>foo</yankee>");
    write(w, "z.xml", "<zed>foo</zed>");
    w.close();
}

From source file:it.crs4.pydoop.mapreduce.pipes.TestPipeApplication.java

License:Apache License

/**
 * test PipesMapRunner    test the transfer data from reader
 *
 * @throws Exception//www.j av  a  2  s  .c  o m
 */
@Test
public void testRunner() throws Exception {
    // clean old password files
    File[] psw = cleanTokenPasswordFile();
    try {
        JobID jobId = new JobID("201408272347", 0);
        TaskID taskId = new TaskID(jobId, TaskType.MAP, 0);
        TaskAttemptID taskAttemptid = new TaskAttemptID(taskId, 0);

        Job job = new Job(new Configuration());
        job.setJobID(jobId);
        Configuration conf = job.getConfiguration();
        conf.set(Submitter.IS_JAVA_RR, "true");
        conf.set(MRJobConfig.TASK_ATTEMPT_ID, taskAttemptid.toString());
        job.setInputFormatClass(DummyInputFormat.class);
        FileSystem fs = new RawLocalFileSystem();
        fs.setConf(conf);

        DummyInputFormat input_format = new DummyInputFormat();
        List<InputSplit> isplits = input_format.getSplits(job);

        InputSplit isplit = isplits.get(0);

        TaskAttemptContextImpl tcontext = new TaskAttemptContextImpl(conf, taskAttemptid);

        RecordReader<FloatWritable, NullWritable> rReader = input_format.createRecordReader(isplit, tcontext);

        TestMapContext context = new TestMapContext(conf, taskAttemptid, rReader, null, null, null, isplit);
        // stub for client
        File fCommand = getFileCommand("it.crs4.pydoop.mapreduce.pipes.PipeApplicationRunnableStub");
        conf.set(MRJobConfig.CACHE_LOCALFILES, fCommand.getAbsolutePath());
        // token for authorization
        Token<AMRMTokenIdentifier> token = new Token<AMRMTokenIdentifier>("user".getBytes(),
                "password".getBytes(), new Text("kind"), new Text("service"));
        TokenCache.setJobToken(token, job.getCredentials());
        conf.setBoolean(MRJobConfig.SKIP_RECORDS, true);
        PipesMapper<FloatWritable, NullWritable, IntWritable, Text> mapper = new PipesMapper<FloatWritable, NullWritable, IntWritable, Text>(
                context);

        initStdOut(conf);
        mapper.run(context);
        String stdOut = readStdOut(conf);

        // test part of translated data. As common file for client and test -
        // clients stdOut
        // check version
        assertTrue(stdOut.contains("CURRENT_PROTOCOL_VERSION:0"));
        // check key and value classes
        assertTrue(stdOut.contains("Key class:org.apache.hadoop.io.FloatWritable"));
        assertTrue(stdOut.contains("Value class:org.apache.hadoop.io.NullWritable"));
        // test have sent all data from reader
        assertTrue(stdOut.contains("value:0.0"));
        assertTrue(stdOut.contains("value:9.0"));

    } finally {
        if (psw != null) {
            // remove password files
            for (File file : psw) {
                file.deleteOnExit();
            }
        }
    }
}

From source file:it.crs4.pydoop.mapreduce.pipes.TestPipeApplication.java

License:Apache License

/**
 * test org.apache.hadoop.mapreduce.pipes.Application
 * test a internal functions: //from w w w .  jav a 2s .co  m
 *     MessageType.REGISTER_COUNTER,  INCREMENT_COUNTER, STATUS, PROGRESS...
 *
 * @throws Throwable
 */

@Test
public void testApplication() throws Throwable {

    System.err.println("testApplication");

    File[] psw = cleanTokenPasswordFile();
    try {
        JobID jobId = new JobID("201408272347", 0);
        TaskID taskId = new TaskID(jobId, TaskType.MAP, 0);
        TaskAttemptID taskAttemptid = new TaskAttemptID(taskId, 0);

        Job job = new Job(new Configuration());
        job.setJobID(jobId);
        Configuration conf = job.getConfiguration();
        conf.set(MRJobConfig.TASK_ATTEMPT_ID, taskAttemptid.toString());
        FileSystem fs = new RawLocalFileSystem();
        fs.setConf(conf);

        File fCommand = getFileCommand("it.crs4.pydoop.mapreduce.pipes.PipeApplicationStub");
        //getFileCommand("it.crs4.pydoop.mapreduce.pipes.PipeApplicationRunnableStub");
        conf.set(MRJobConfig.CACHE_LOCALFILES, fCommand.getAbsolutePath());
        System.err.println("fCommand" + fCommand.getAbsolutePath());

        Token<AMRMTokenIdentifier> token = new Token<AMRMTokenIdentifier>("user".getBytes(),
                "password".getBytes(), new Text("kind"), new Text("service"));
        TokenCache.setJobToken(token, job.getCredentials());
        conf.setBoolean(MRJobConfig.SKIP_RECORDS, true);

        TestReporter reporter = new TestReporter();
        DummyInputFormat input_format = new DummyInputFormat();
        List<InputSplit> isplits = input_format.getSplits(job);
        InputSplit isplit = isplits.get(0);
        TaskAttemptContextImpl tcontext = new TaskAttemptContextImpl(conf, taskAttemptid);

        DummyRecordReader reader = (DummyRecordReader) input_format.createRecordReader(isplit, tcontext);

        job.setOutputKeyClass(IntWritable.class);
        job.setOutputValueClass(Text.class);

        RecordWriter<IntWritable, Text> writer = new TestRecordWriter(
                new FileOutputStream(workSpace.getAbsolutePath() + File.separator + "outfile"));

        MapContextImpl<IntWritable, Text, IntWritable, Text> context = new MapContextImpl<IntWritable, Text, IntWritable, Text>(
                conf, taskAttemptid, null, writer, null, reporter, null);

        System.err.println("ready to launch application");
        Application<IntWritable, Text, IntWritable, Text> application = new Application<IntWritable, Text, IntWritable, Text>(
                context, reader);
        System.err.println("done");

        application.getDownlink().flush();
        application.getDownlink().mapItem(new IntWritable(3), new Text("txt"));
        application.getDownlink().flush();
        application.waitForFinish();

        // test getDownlink().mapItem();
        String stdOut = readStdOut(conf);
        assertTrue(stdOut.contains("key:3"));
        assertTrue(stdOut.contains("value:txt"));

        assertEquals(0.0, context.getProgress(), 0.01);
        assertNotNull(context.getCounter("group", "name"));

        // test status MessageType.STATUS
        assertEquals(context.getStatus(), "PROGRESS");
        // check MessageType.PROGRESS
        assertEquals(0.55f, reader.getProgress(), 0.001);
        application.getDownlink().close();
        // test MessageType.OUTPUT
        stdOut = readFile(new File(workSpace.getAbsolutePath() + File.separator + "outfile"));
        assertTrue(stdOut.contains("key:123"));
        assertTrue(stdOut.contains("value:value"));
        try {
            // try to abort
            application.abort(new Throwable());
            fail();
        } catch (IOException e) {
            // abort works ?
            assertEquals("pipe child exception", e.getMessage());
        }
    } finally {
        if (psw != null) {
            // remove password files
            for (File file : psw) {
                file.deleteOnExit();
            }
        }
    }
}

From source file:it.crs4.pydoop.mapreduce.pipes.TestPipeApplication.java

License:Apache License

/**
 * test org.apache.hadoop.mapreduce.pipes.PipesReducer
 * test the transfer of data: key and value
 *
 * @throws Exception//from  ww w  .  j a  v a 2  s .c o m
 */
@Test
public void testPipesReducer() throws Exception {
    System.err.println("testPipesReducer");

    File[] psw = cleanTokenPasswordFile();
    try {
        JobID jobId = new JobID("201408272347", 0);
        TaskID taskId = new TaskID(jobId, TaskType.MAP, 0);
        TaskAttemptID taskAttemptid = new TaskAttemptID(taskId, 0);

        Job job = new Job(new Configuration());
        job.setJobID(jobId);
        Configuration conf = job.getConfiguration();
        conf.set(MRJobConfig.TASK_ATTEMPT_ID, taskAttemptid.toString());
        FileSystem fs = new RawLocalFileSystem();
        fs.setConf(conf);

        File fCommand = getFileCommand("it.crs4.pydoop.mapreduce.pipes.PipeReducerStub");
        conf.set(MRJobConfig.CACHE_LOCALFILES, fCommand.getAbsolutePath());
        System.err.println("fCommand" + fCommand.getAbsolutePath());

        Token<AMRMTokenIdentifier> token = new Token<AMRMTokenIdentifier>("user".getBytes(),
                "password".getBytes(), new Text("kind"), new Text("service"));
        TokenCache.setJobToken(token, job.getCredentials());
        conf.setBoolean(MRJobConfig.SKIP_RECORDS, true);

        TestReporter reporter = new TestReporter();
        DummyInputFormat input_format = new DummyInputFormat();
        List<InputSplit> isplits = input_format.getSplits(job);
        InputSplit isplit = isplits.get(0);
        TaskAttemptContextImpl tcontext = new TaskAttemptContextImpl(conf, taskAttemptid);

        RecordWriter<IntWritable, Text> writer = new TestRecordWriter(
                new FileOutputStream(workSpace.getAbsolutePath() + File.separator + "outfile"));

        BooleanWritable bw = new BooleanWritable(true);
        List<Text> texts = new ArrayList<Text>();
        texts.add(new Text("first"));
        texts.add(new Text("second"));
        texts.add(new Text("third"));

        DummyRawKeyValueIterator kvit = new DummyRawKeyValueIterator();

        ReduceContextImpl<BooleanWritable, Text, IntWritable, Text> context = new ReduceContextImpl<BooleanWritable, Text, IntWritable, Text>(
                conf, taskAttemptid, kvit, null, null, writer, null, null, null, BooleanWritable.class,
                Text.class);

        PipesReducer<BooleanWritable, Text, IntWritable, Text> reducer = new PipesReducer<BooleanWritable, Text, IntWritable, Text>();
        reducer.setup(context);

        initStdOut(conf);
        reducer.reduce(bw, texts, context);
        reducer.cleanup(context);
        String stdOut = readStdOut(conf);

        // test data: key
        assertTrue(stdOut.contains("reducer key :true"));
        // and values
        assertTrue(stdOut.contains("reduce value  :first"));
        assertTrue(stdOut.contains("reduce value  :second"));
        assertTrue(stdOut.contains("reduce value  :third"));

    } finally {
        if (psw != null) {
            // remove password files
            for (File file : psw) {
                file.deleteOnExit();
            }
        }
    }

}

From source file:org.apache.giraph.io.hbase.TestHBaseRootMarkerVertextFormat.java

License:Apache License

@Test
public void testHBaseInputOutput() throws Exception {
    if (System.getProperty("prop.mapred.job.tracker") != null) {
        if (log.isInfoEnabled())
            log.info("testHBaseInputOutput: Ignore this test if not local mode.");
        return;//from   w  w w.j  a  v  a2 s  .  com
    }

    File jarTest = new File(System.getProperty("prop.jarLocation"));
    if (!jarTest.exists()) {
        fail("Could not find Giraph jar at " + "location specified by 'prop.jarLocation'. "
                + "Make sure you built the main Giraph artifact?.");
    }

    FileSystem fs = null;
    Path hbaseRootdir = null;
    try {
        MiniHBaseCluster cluster = testUtil.startMiniCluster(1);
        cluster.waitForActiveAndReadyMaster();
        testUtil.startMiniMapReduceCluster();

        // Let's set up the hbase root directory.
        Configuration conf = testUtil.getConfiguration();
        try {
            fs = testUtil.getTestFileSystem();
            String randomStr = UUID.randomUUID().toString();
            String tmpdir = System.getProperty("java.io.tmpdir") + "/" + randomStr + "/";
            hbaseRootdir = fs.makeQualified(new Path(tmpdir));

            conf.set(HConstants.HBASE_DIR, hbaseRootdir.toString());
            fs.mkdirs(hbaseRootdir);
        } catch (IOException ioe) {
            fail("Could not create hbase root directory.");
        }

        //First let's load some data using ImportTsv into our mock table.
        String INPUT_FILE = hbaseRootdir.toString() + "/graph.csv";
        String[] args = new String[] { "-Dimporttsv.columns=HBASE_ROW_KEY,cf:" + QUALIFER,
                "-Dimporttsv.separator=" + "\u002c", TABLE_NAME, INPUT_FILE };

        GenericOptionsParser opts = new GenericOptionsParser(testUtil.getConfiguration(), args);
        args = opts.getRemainingArgs();

        fs = FileSystem.get(conf);
        fs.setConf(conf);
        Path inputPath = fs.makeQualified(new Path(hbaseRootdir, "graph.csv"));
        FSDataOutputStream op = fs.create(inputPath, true);
        String line1 = "0001,0002\n";
        String line2 = "0002,0004\n";
        String line3 = "0003,0005\n";
        String line4 = "0004,-1\n";
        String line5 = "0005,-1\n";
        op.write(line1.getBytes());
        op.write(line2.getBytes());
        op.write(line3.getBytes());
        op.write(line4.getBytes());
        op.write(line5.getBytes());
        op.close();

        final byte[] FAM = Bytes.toBytes(FAMILY);
        final byte[] TAB = Bytes.toBytes(TABLE_NAME);

        HTableDescriptor desc = new HTableDescriptor(TAB);
        desc.addFamily(new HColumnDescriptor(FAM));
        HBaseAdmin hbaseAdmin = new HBaseAdmin(conf);
        if (hbaseAdmin.isTableAvailable(TABLE_NAME)) {
            hbaseAdmin.disableTable(TABLE_NAME);
            hbaseAdmin.deleteTable(TABLE_NAME);
        }
        hbaseAdmin.createTable(desc);

        // Do the import
        Job job = ImportTsv.createSubmittableJob(conf, args);
        job.waitForCompletion(false);
        assertTrue(job.isSuccessful());
        if (log.isInfoEnabled())
            log.info("ImportTsv successful. Running HBase Giraph job.");

        // Now operate over HBase using Vertex I/O formats
        conf.set(TableInputFormat.INPUT_TABLE, TABLE_NAME);
        conf.set(TableOutputFormat.OUTPUT_TABLE, TABLE_NAME);

        GiraphJob giraphJob = new GiraphJob(conf, BspCase.getCallingMethodName());
        GiraphConfiguration giraphConf = giraphJob.getConfiguration();
        setupConfiguration(giraphJob);
        giraphConf.setComputationClass(EdgeNotification.class);
        giraphConf.setVertexInputFormatClass(TableEdgeInputFormat.class);
        giraphConf.setVertexOutputFormatClass(TableEdgeOutputFormat.class);

        assertTrue(giraphJob.run(true));
        if (log.isInfoEnabled())
            log.info("Giraph job successful. Checking output qualifier.");

        // Do a get on row 0002, it should have a parent of 0001
        // if the outputFormat worked.
        HTable table = new HTable(conf, TABLE_NAME);
        Result result = table.get(new Get("0002".getBytes()));
        byte[] parentBytes = result.getValue(FAMILY.getBytes(), OUTPUT_FIELD.getBytes());
        assertNotNull(parentBytes);
        assertTrue(parentBytes.length > 0);
        assertEquals("0001", Bytes.toString(parentBytes));
    } finally {
        testUtil.shutdownMiniMapReduceCluster();
        testUtil.shutdownMiniCluster();
    }
}

From source file:org.lilyproject.hadooptestfw.fork.HBaseTestingUtility.java

License:Apache License

/**
 * Starts up mini hbase cluster.  Usually used after call to
 * {@link #startMiniCluster(int, int)} when doing stepped startup of clusters.
 * Usually you won't want this.  You'll usually want {@link #startMiniCluster()}.
 *
 * @return Reference to the hbase mini hbase cluster.
 * @see {@link #startMiniCluster()}//from  w w  w .ja  v  a  2  s  . c o  m
 */
public MiniHBaseCluster startMiniHBaseCluster(final int numMasters, final int numSlaves)
        throws IOException, InterruptedException {
    // Now do the mini hbase cluster.  Set the hbase.rootdir in config.
    createRootDir();

    // These settings will make the server waits until this exact number of
    // regions servers are connected.
    if (conf.getInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, -1) == -1) {
        conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, numSlaves);
    }
    if (conf.getInt(ServerManager.WAIT_ON_REGIONSERVERS_MAXTOSTART, -1) == -1) {
        conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MAXTOSTART, numSlaves);
    }

    Configuration c = new Configuration(this.conf);
    this.hbaseCluster = new MiniHBaseCluster(c, numMasters, numSlaves);
    // Don't leave here till we've done a successful scan of the .META.
    HTable t = new HTable(c, HConstants.META_TABLE_NAME);
    ResultScanner s = t.getScanner(new Scan());
    while (s.next() != null) {
        continue;
    }
    s.close();
    t.close();

    // Lily change: fix wrong master filesystem
    // TODO Remove this once HBASE-6441 is available in the currently-used version of HBase
    Configuration masterConf = hbaseCluster.getMaster().getConfiguration();
    FileSystem fs = FileSystem.get(masterConf);
    fs.setConf(masterConf);

    getHBaseAdmin(); // create immediately the hbaseAdmin
    LOG.info("Minicluster is up");
    return (MiniHBaseCluster) this.hbaseCluster;
}

From source file:org.mrgeo.format.CsvInputFormatTest.java

License:Apache License

@Test
@Category(UnitTest.class)
public void testBasics() throws Exception {
    // this class and its unit tests are a work in progress.
    FileSystem fs = new RawLocalFileSystem();
    try {/*from  www . j  a  v  a 2s . c om*/
        Job j = new Job(new Configuration());
        Configuration c = j.getConfiguration();
        fs.setConf(c);
        Path testFile = new Path(input, "testBasics.csv");
        testFile = fs.makeQualified(testFile);

        FileInputFormat.addInputPath(j, testFile);
        FileSplit split = new FileSplit(testFile, 0, 500, null);
        CsvInputFormat.CsvRecordReader reader = new CsvInputFormat.CsvRecordReader();
        reader.initialize(split, HadoopUtils.createTaskAttemptContext(c, new TaskAttemptID()));
        @SuppressWarnings("unused")
        int l = 0;

        StringBuffer buf = new StringBuffer();

        String[] base = { "word1:Hello word2:world number:1 ", "word1:foo word2:bar number:2 ",
                "word1:cat word2:dog number:3 ", "word1:rock word2:paper number:4 ",
                "word1:red word2:blue, number:5 ", "word1:,green, word2:,, number:6 ", };

        int index = 0;
        while (reader.nextKeyValue()) {
            Geometry f = reader.getCurrentValue();
            String row = "";
            for (Map.Entry attr : f.getAllAttributes().entrySet()) {
                row += attr.getKey() + ":" + attr.getValue() + " ";
            }
            Assert.assertEquals("Error in row " + index, base[index++], row);
        }

        // This hash code will tell us if anything changes then it can be manually verified.
    } catch (Exception e) {
        e.printStackTrace();
        throw e;
    } finally {
        fs.close();
    }
}

From source file:org.mrgeo.format.CsvInputFormatTest.java

License:Apache License

@Test
@Category(UnitTest.class)
public void testNullProcessing() throws Exception {
    // this class and its unit tests are a work in progress.
    FileSystem fs = new RawLocalFileSystem();
    try {/*from w  w  w. j a  v  a 2s .  c  om*/
        Job j = new Job(new Configuration());
        Configuration c = j.getConfiguration();
        fs.setConf(c);
        Path testFile = new Path(input, "testNullValues.csv");
        testFile = fs.makeQualified(testFile);

        FileInputFormat.addInputPath(j, testFile);
        FileSplit split = new FileSplit(testFile, 0, 500, null);
        CsvInputFormat.CsvRecordReader reader = new CsvInputFormat.CsvRecordReader();
        reader.initialize(split, HadoopUtils.createTaskAttemptContext(c, new TaskAttemptID()));
        @SuppressWarnings("unused")
        int l = 0;

        //StringBuffer buf = new StringBuffer();

        // Test specific rows returned to make sure the values are as expected.
        Assert.assertTrue(reader.nextKeyValue());
        Geometry f = reader.getCurrentValue();
        Assert.assertNotNull(f);
        Assert.assertEquals("test1", f.getAttribute("string1"));
        Assert.assertEquals(1.0, Double.parseDouble(f.getAttribute("int1")), EPSILON);
        Assert.assertEquals(1.5, Double.parseDouble(f.getAttribute("double1")), EPSILON);
        // Row 2 check
        Assert.assertTrue(reader.nextKeyValue());
        f = reader.getCurrentValue();
        Assert.assertNotNull(f);
        Assert.assertEquals("test2", f.getAttribute("string1"));
        Assert.assertEquals(2.0, Double.parseDouble(f.getAttribute("int1")), EPSILON);
        Assert.assertNull("Expected null value instead of: " + f.getAttribute("double1"), f.getAttribute("2"));
        // Row 3 check
        Assert.assertTrue(reader.nextKeyValue());
        f = reader.getCurrentValue();
        Assert.assertNotNull(f);
        Assert.assertEquals("test3", f.getAttribute("string1"));
        Assert.assertEquals(3.0, Double.parseDouble(f.getAttribute("int1")), EPSILON);
        Assert.assertEquals(3.5, Double.parseDouble(f.getAttribute("double1")), EPSILON);
        // Row 4 check
        Assert.assertTrue(reader.nextKeyValue());
        f = reader.getCurrentValue();
        Assert.assertNotNull(f);
        Assert.assertEquals("test4", f.getAttribute("string1"));
        Assert.assertNull("Expected null value instead of: " + f.getAttribute("int1"), f.getAttribute("1"));
        Assert.assertEquals(4.5, Double.parseDouble(f.getAttribute("double1")), EPSILON);
        // Row 5 check
        Assert.assertTrue(reader.nextKeyValue());
        f = reader.getCurrentValue();
        Assert.assertNotNull(f);
        Assert.assertEquals("test5", f.getAttribute("string1"));
        Assert.assertEquals(5.0, Double.parseDouble(f.getAttribute("int1")), EPSILON);
        Assert.assertEquals(5.5, Double.parseDouble(f.getAttribute("double1")), EPSILON);
        // Row 6 check
        Assert.assertTrue(reader.nextKeyValue());
        f = reader.getCurrentValue();
        Assert.assertNotNull(f);
        Assert.assertEquals("test6", f.getAttribute("string1"));
        Assert.assertEquals("", f.getAttribute("int1"));
        Assert.assertEquals("", f.getAttribute("double1"));
        // Row 7 check
        Assert.assertTrue(reader.nextKeyValue());
        f = reader.getCurrentValue();
        Assert.assertNotNull(f);
        Assert.assertEquals("test7", f.getAttribute("string1"));
        Assert.assertNull("Expected null value instead of: " + f.getAttribute("int1"), f.getAttribute("int1"));
        Assert.assertNull("Expected null value instead of: " + f.getAttribute("double1"),
                f.getAttribute("double1"));
        Assert.assertFalse(reader.nextKeyValue());
    } catch (Exception e) {
        e.printStackTrace();
        throw e;
    } finally {
        fs.close();
    }
}

From source file:org.mrgeo.format.CsvInputFormatTest.java

License:Apache License

@Test
@Category(UnitTest.class)
public void testNullIgnore() throws Exception {
    FileSystem fs = new RawLocalFileSystem();
    try {//from  w w w .  ja  v  a2  s.  co  m
        int lineCount = 0;

        // Write columns file which defines the columns title and type
        String cstr = "<?xml version='1.0' encoding='UTF-8'?>\n<AllColumns firstLineHeader='false'>\n";
        cstr += "  <Column name='name' type='Nominal'/>\n";
        cstr += "  <Column name='x' type='Numeric'/>\n";
        cstr += "  <Column name='y' type='Numeric'/>\n";
        cstr += "</AllColumns>\n";
        FileOutputStream fos = new FileOutputStream(output + "/nulXY.csv.columns");
        PrintStream ps = new PrintStream(fos);
        ps.print(cstr);
        ps.close();

        // Write csv test data
        fos = new FileOutputStream(output + "/nullXY.csv");
        ps = new PrintStream(fos);
        // populated rows
        for (int ii = 0; ii < 10; ii++) {
            ps.print("ASDF,1.0,1.0\n");
            lineCount++;
        }
        // empty rows
        ps.print("ASDF,,1.0\n");
        ps.print("ASDF,1.0,\n");
        ps.print("ASDF,,\n");
        lineCount += 3;
        // populated rows
        for (int ii = 0; ii < 5; ii++) {
            ps.print("ASDF,1.0,1.0\n");
            lineCount++;
        }
        ps.close();

        System.out.println(output + "nulXY.csv");

        Job j = new Job(new Configuration());
        Configuration c = j.getConfiguration();
        fs.setConf(c);
        Path testFile = new Path(output, "nullXY.csv");
        testFile = fs.makeQualified(testFile);
        InputSplit split;
        long l;
        long start;

        TextInputFormat format = new TextInputFormat();
        split = new FileSplit(testFile, 0, lineCount * 1000, null);
        RecordReader<LongWritable, Text> reader2 = format.createRecordReader(split,
                HadoopUtils.createTaskAttemptContext(c, new TaskAttemptID()));

        reader2.initialize(split, HadoopUtils.createTaskAttemptContext(c, new TaskAttemptID()));
        l = 0;
        start = System.currentTimeMillis();
        while (reader2.nextKeyValue()) {
            reader2.getCurrentValue().toString();
            l++;
        }
        Assert.assertEquals(lineCount, l);
        System.out.printf("text line reader with null x,y ignore: %d\n", System.currentTimeMillis() - start);

    } catch (Exception e) {
        e.printStackTrace();
        throw e;
    } finally {
        fs.close();
    }
}

From source file:org.mrgeo.format.CsvInputFormatTest.java

License:Apache License

@Test
@Category(UnitTest.class)
public void testBadValues() throws Exception {
    // this class and its unit tests are a work in progress.
    FileSystem fs = new RawLocalFileSystem();
    try {// ww  w.  j a v a 2  s. co m
        Job j = new Job(new Configuration());
        Configuration c = j.getConfiguration();
        fs.setConf(c);
        Path testFile = new Path(input, "testErrors.csv");
        testFile = fs.makeQualified(testFile);

        FileInputFormat.addInputPath(j, testFile);
        FileSplit split = new FileSplit(testFile, 0, 500, null);
        CsvInputFormat.CsvRecordReader reader = new CsvInputFormat.CsvRecordReader();
        reader.initialize(split, HadoopUtils.createTaskAttemptContext(c, new TaskAttemptID()));
        @SuppressWarnings("unused")
        int l = 0;

        //StringBuffer buf = new StringBuffer();

        // Test specific rows returned to make sure the values are as expected.
        Assert.assertTrue(reader.nextKeyValue());
        Geometry f = reader.getCurrentValue();
        Assert.assertNotNull(f);
        Assert.assertTrue(f.isEmpty());
        Assert.assertEquals("test1,1,1.5,30.0,40.0", f.getAttribute("string1"));
        Assert.assertNull(f.getAttribute("int1"));
        Assert.assertNull(f.getAttribute("double1"));
        Assert.assertNull(f.getAttribute("x"));
        Assert.assertNull(f.getAttribute("y"));
        // Row 2 check
        Assert.assertTrue(reader.nextKeyValue());
        f = reader.getCurrentValue();
        Assert.assertNotNull(f);
        Assert.assertTrue(f.isEmpty());
        Assert.assertEquals("test2", f.getAttribute("string1"));
        Assert.assertEquals(2, Integer.parseInt(f.getAttribute("int1")));
        Assert.assertEquals("", f.getAttribute("double1"));
        Assert.assertEquals("30.abc", f.getAttribute("x"));
        Assert.assertEquals(40.0, Double.parseDouble(f.getAttribute("y")), EPSILON);
        // Row 3 check
        Assert.assertTrue(reader.nextKeyValue());
        f = reader.getCurrentValue();
        Assert.assertNotNull(f);
        Assert.assertTrue(f.isEmpty());
        Assert.assertEquals("test3", f.getAttribute("string1"));
        Assert.assertEquals(3, Integer.parseInt(f.getAttribute("int1")));
        Assert.assertEquals(3.5, Double.parseDouble(f.getAttribute("double1")), EPSILON);
        Assert.assertEquals(30.0, Double.parseDouble(f.getAttribute("x")), EPSILON);
        Assert.assertEquals("40.abc", f.getAttribute("y"));
        // Row 4 check
        Assert.assertTrue(reader.nextKeyValue());
        f = reader.getCurrentValue();
        Assert.assertNotNull(f);
        Assert.assertTrue(f.isEmpty());
        Assert.assertEquals("test4", f.getAttribute("string1"));
        Assert.assertEquals("", f.getAttribute("int1"));
        Assert.assertEquals(4.5, Double.parseDouble(f.getAttribute("double1")), EPSILON);
        Assert.assertEquals(30.0, Double.parseDouble(f.getAttribute("x")), EPSILON);
        Assert.assertNull(f.getAttribute("y"));
        // Row 5 check
        Assert.assertTrue(reader.nextKeyValue());
        f = reader.getCurrentValue();
        Assert.assertNotNull(f);
        Assert.assertTrue(f.isEmpty());
        Assert.assertEquals("test5", f.getAttribute("string1"));
        Assert.assertEquals(5, Integer.parseInt(f.getAttribute("int1")));
        Assert.assertEquals(5.5, Double.parseDouble(f.getAttribute("double1")), EPSILON);
        Assert.assertEquals("", f.getAttribute("x"));
        Assert.assertEquals(40.0, Double.parseDouble(f.getAttribute("y")), EPSILON);
        // Row 6 check
        Assert.assertTrue(reader.nextKeyValue());
        f = reader.getCurrentValue();
        Assert.assertNotNull(f);
        Assert.assertTrue(f.isEmpty());
        Assert.assertEquals("test6", f.getAttribute("string1"));
        Assert.assertEquals("", f.getAttribute("int1"));
        Assert.assertEquals("", f.getAttribute("double1"));
        Assert.assertEquals("", f.getAttribute("x"));
        Assert.assertEquals("", f.getAttribute("y"));

        // end
        Assert.assertFalse(reader.nextKeyValue());
    } catch (Exception e) {
        e.printStackTrace();
        throw e;
    } finally {
        fs.close();
    }
}