List of usage examples for org.apache.hadoop.io Text toString
@Override
public String toString()
From source file:com.inmobi.conduit.distcp.tools.mapred.TestCopyMapper.java
License:Apache License
@Test public void testRun() { try {/*from ww w. j a v a 2s . c om*/ deleteState(); createSourceData(); FileSystem fs = cluster.getFileSystem(); CopyMapper copyMapper = new CopyMapper(); StatusReporter reporter = new StubStatusReporter(); InMemoryWriter writer = new InMemoryWriter(); Mapper<Text, FileStatus, NullWritable, Text>.Context context = getMapperContext(copyMapper, reporter, writer); copyMapper.setup(context); for (Path path : pathList) { copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), path)), fs.getFileStatus(path), context); } // Check that the maps worked. for (Path path : pathList) { final Path targetPath = new Path(path.toString().replaceAll(SOURCE_PATH, TARGET_PATH)); Assert.assertTrue(fs.exists(targetPath)); Assert.assertTrue(fs.isFile(targetPath) == fs.isFile(path)); Assert.assertEquals(fs.getFileStatus(path).getReplication(), fs.getFileStatus(targetPath).getReplication()); Assert.assertEquals(fs.getFileStatus(path).getBlockSize(), fs.getFileStatus(targetPath).getBlockSize()); Assert.assertTrue( !fs.isFile(targetPath) || fs.getFileChecksum(targetPath).equals(fs.getFileChecksum(path))); } Assert.assertEquals(pathList.size(), reporter.getCounter(CopyMapper.Counter.PATHS_COPIED).getValue()); // Here file is compressed file. So, we should compare the file length // with the number of bytes read long totalSize = 0; for (Path path : pathList) { totalSize += fs.getFileStatus(path).getLen(); } Assert.assertEquals(totalSize, reporter.getCounter(CopyMapper.Counter.BYTES_COPIED).getValue()); long totalCounterValue = 0; for (Text value : writer.values()) { String tmp[] = value.toString().split(ConduitConstants.AUDIT_COUNTER_NAME_DELIMITER); Assert.assertEquals(4, tmp.length); Long numOfMsgs = Long.parseLong(tmp[3]); totalCounterValue += numOfMsgs; } Assert.assertEquals(nFiles * NUMBER_OF_MESSAGES_PER_FILE, totalCounterValue); testCopyingExistingFiles(fs, copyMapper, context); } catch (Exception e) { LOG.error("Unexpected exception: ", e); Assert.assertTrue(false); } }
From source file:com.inmobi.conduit.distcp.tools.mapred.TestCopyMapper.java
License:Apache License
private void doTestIgnoreFailures(boolean ignoreFailures) { try {/*from www.j a v a 2 s. com*/ deleteState(); createSourceData(); FileSystem fs = cluster.getFileSystem(); CopyMapper copyMapper = new CopyMapper(); StatusReporter reporter = new StubStatusReporter(); InMemoryWriter writer = new InMemoryWriter(); Mapper<Text, FileStatus, NullWritable, Text>.Context context = getMapperContext(copyMapper, reporter, writer); Configuration configuration = context.getConfiguration(); configuration.setBoolean(DistCpOptionSwitch.IGNORE_FAILURES.getConfigLabel(), ignoreFailures); configuration.setBoolean(DistCpOptionSwitch.OVERWRITE.getConfigLabel(), true); configuration.setBoolean(DistCpOptionSwitch.SKIP_CRC.getConfigLabel(), true); copyMapper.setup(context); for (Path path : pathList) { final FileStatus fileStatus = fs.getFileStatus(path); if (!fileStatus.isDir()) { fs.delete(path, true); copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), path)), fileStatus, context); } } if (ignoreFailures) { for (Text value : writer.values()) { Assert.assertTrue(value.toString() + " is not skipped", value.toString().startsWith("FAIL:")); } } Assert.assertTrue("There should have been an exception.", ignoreFailures); } catch (Exception e) { Assert.assertTrue("Unexpected exception: " + e.getMessage(), !ignoreFailures); e.printStackTrace(); } }
From source file:com.inmobi.conduit.distcp.tools.TestCopyListing.java
License:Apache License
@Test public void testBuildListingForSingleFile() { FileSystem fs = null;/*from w ww . ja v a2 s.c o m*/ String testRootString = "/singleFileListing"; Path testRoot = new Path(testRootString); SequenceFile.Reader reader = null; try { fs = FileSystem.get(getConf()); if (fs.exists(testRoot)) TestDistCpUtils.delete(fs, testRootString); Path sourceFile = new Path(testRoot, "/source/foo/bar/source.txt"); Path decoyFile = new Path(testRoot, "/target/moo/source.txt"); Path targetFile = new Path(testRoot, "/target/moo/target.txt"); TestDistCpUtils.createFile(fs, sourceFile.toString()); TestDistCpUtils.createFile(fs, decoyFile.toString()); TestDistCpUtils.createFile(fs, targetFile.toString()); List<Path> srcPaths = new ArrayList<Path>(); srcPaths.add(sourceFile); DistCpOptions options = new DistCpOptions(srcPaths, targetFile); CopyListing listing = new SimpleCopyListing(getConf(), CREDENTIALS); final Path listFile = new Path(testRoot, "/tmp/fileList.seq"); listing.buildListing(listFile, options); reader = new SequenceFile.Reader(fs, listFile, getConf()); FileStatus fileStatus = new FileStatus(); Text relativePath = new Text(); Assert.assertTrue(reader.next(relativePath, fileStatus)); Assert.assertTrue(relativePath.toString().equals("")); } catch (Exception e) { Assert.fail("Unexpected exception encountered."); LOG.error("Unexpected exception: ", e); } finally { TestDistCpUtils.delete(fs, testRootString); IOUtils.closeStream(reader); } }
From source file:com.inmobi.conduit.distcp.tools.TestFileBasedCopyListing.java
License:Apache License
private void checkResult(Path listFile, int count) throws IOException { if (count == 0) { return;// w w w .j ava 2 s . c o m } int recCount = 0; SequenceFile.Reader reader = new SequenceFile.Reader(fs, listFile, config); try { Text relPath = new Text(); FileStatus fileStatus = new FileStatus(); while (reader.next(relPath, fileStatus)) { Assert.assertEquals(fileStatus.getPath().toUri().getPath(), map.get(relPath.toString())); recCount++; } } finally { IOUtils.closeStream(reader); } Assert.assertEquals(recCount, count); }
From source file:com.inmobi.conduit.distcp.tools.TestGlobbedCopyListing.java
License:Apache License
private void verifyContents(Path listingPath) throws Exception { SequenceFile.Reader reader = new SequenceFile.Reader(cluster.getFileSystem(), listingPath, new Configuration()); Text key = new Text(); FileStatus value = new FileStatus(); Map<String, String> actualValues = new HashMap<String, String>(); while (reader.next(key, value)) { actualValues.put(value.getPath().toString(), key.toString()); }//ww w . j ava 2 s . c o m Assert.assertEquals(expectedValues.size(), actualValues.size()); for (Map.Entry<String, String> entry : actualValues.entrySet()) { Assert.assertEquals(entry.getValue(), expectedValues.get(entry.getKey())); } }
From source file:com.inmobi.conduit.local.CopyMapper.java
License:Apache License
@Override public void map(Text key, FileStatus value, Context context) throws IOException, InterruptedException { Path src = value.getPath();/* w w w.j a v a 2s . c o m*/ String dest = key.toString(); String collector = src.getParent().getName(); String category = src.getParent().getParent().getName(); Map<Long, Long> received = null; if (context.getConfiguration().getBoolean(ConduitConstants.AUDIT_ENABLED_KEY, true)) { received = new HashMap<Long, Long>(); } Configuration srcConf = new Configuration(); srcConf.set(FS_DEFAULT_NAME_KEY, context.getConfiguration().get(SRC_FS_DEFAULT_NAME_KEY)); FileSystem fs = FileSystem.get(srcConf); Path target = getTempPath(context, src, category, collector); if (FileUtil.gzip(src, target, srcConf, received)) { LOG.info("File " + src + " is empty hence returning without compressing"); return; } // move to final destination fs.mkdirs(new Path(dest).makeQualified(fs)); String destnFilename = collector + "-" + src.getName() + ".gz"; Path destPath = new Path(dest + File.separator + destnFilename); LOG.info("Renaming file " + target + " to " + destPath); fs.rename(target, destPath); if (received != null) { for (Entry<Long, Long> entry : received.entrySet()) { String counterNameValue = getCounterNameValue(category, destnFilename, entry.getKey(), entry.getValue()); context.write(NullWritable.get(), new Text(counterNameValue)); } } }
From source file:com.inmobi.databus.local.CopyMapper.java
License:Apache License
@Override public void map(Text key, Text value, Context context) throws IOException, InterruptedException { Path src = new Path(key.toString()); String dest = value.toString(); String collector = src.getParent().getName(); String category = src.getParent().getParent().getName(); FileSystem fs = FileSystem.get(context.getConfiguration()); Path target = getTempPath(context, src, category, collector); FileUtil.gzip(src, target, context.getConfiguration()); // move to final destination fs.mkdirs(new Path(dest).makeQualified(fs)); Path destPath = new Path(dest + File.separator + collector + "-" + src.getName() + ".gz"); LOG.info("Renaming file " + target + " to " + destPath); fs.rename(target, destPath);/*from www .j av a 2 s . com*/ }
From source file:com.intel.hadoop.graphbuilder.idnormalize.mapreduce.HashIdMapper.java
License:Open Source License
@Override public void map(LongWritable key, Text val, OutputCollector<IntWritable, Text> out, Reporter arg3) throws IOException { if (graphparser.isVertexData(val.toString())) { out.collect(new IntWritable(curId), new Text(val.toString())); ++curId;//ww w.j a va 2 s . c o m } }
From source file:com.intel.hadoop.graphbuilder.idnormalize.mapreduce.SortDictMapper.java
License:Open Source License
@Override public void map(LongWritable key, Text val, OutputCollector<IntWritable, Text> out, Reporter reporter) throws IOException { String line = val.toString(); StringTokenizer tokenizer = new StringTokenizer(line); try {//from w w w . j a v a2 s . c o m String vid = tokenizer.nextToken(); if (hashRawVid) { // partition by old vid Object rawId = vidparser.getValue(tokenizer.nextToken()); int hash = rawId.hashCode() % numChunks; if (hash < 0) hash += numChunks; // resolving negative hashcode out.collect(new IntWritable(hash), val); } else { // partition by new vid int hash = Long.valueOf(vid).hashCode() % numChunks; out.collect(new IntWritable(hash), val); } } catch (NoSuchElementException e) { e.printStackTrace(); LOG.error("Error parsing vertex dictionary: " + val.toString()); } }
From source file:com.intel.hadoop.graphbuilder.idnormalize.mapreduce.TransEdgeMapper.java
License:Open Source License
@Override public void map(LongWritable key, Text value, OutputCollector<IntWritable, Text> out, Reporter reporter) throws IOException { if (!graphparser.isEdgeData(value.toString())) return;/*from w w w . j av a2s. co m*/ Edge<VidType, ?> e = graphparser.parseEdge(value.toString(), vidparser, edataparser); int part = e.source().hashCode() % numChunks; if (part < 0) part += numChunks; if (part != dictionaryId) { dictionaryId = part; loadDictionary(); } if (dict.containsKey(e.source())) { long srcId = dict.get(e.source()); int targetHash = e.target().hashCode() % numChunks; if (targetHash < 0) targetHash += numChunks; Text output = new Text(srcId + "\t" + e.target().toString() + "\t" + e.EdgeData().toString()); out.collect(new IntWritable(targetHash), output); } else { LOG.error("TransEdgeMapper: Cannot find key " + e.source().toString()); LOG.error("Line: " + value.toString()); } }