List of usage examples for org.apache.hadoop.io Text set
public void set(Text other)
From source file:org.apache.sqoop.connector.hdfs.TestHdfsBase.java
License:Apache License
protected void createSequenceInput(String indir, Class<? extends CompressionCodec> clz, int numberOfFiles, int numberOfRows) throws IOException, InstantiationException, IllegalAccessException { Configuration conf = new Configuration(); CompressionCodec codec = null;//from w w w . j a v a 2 s . co m if (clz != null) { codec = clz.newInstance(); if (codec instanceof Configurable) { ((Configurable) codec).setConf(conf); } } int index = 1; for (int fi = 0; fi < numberOfFiles; fi++) { Path filepath = new Path(indir, UUID.randomUUID() + ".seq"); SequenceFile.Writer filewriter; if (codec != null) { filewriter = SequenceFile.createWriter(filepath.getFileSystem(conf), conf, filepath, Text.class, NullWritable.class, SequenceFile.CompressionType.BLOCK, codec); } else { filewriter = SequenceFile.createWriter(filepath.getFileSystem(conf), conf, filepath, Text.class, NullWritable.class, SequenceFile.CompressionType.NONE); } Text text = new Text(); for (int ri = 0; ri < numberOfRows; ri++) { String row = index + "," + (double) index + ",'" + index + "'"; text.set(row); filewriter.append(text, NullWritable.get()); index++; } filewriter.close(); } }
From source file:org.apache.sqoop.mapreduce.db.netezza.NetezzaExternalTableImportMapper.java
License:Apache License
public void map(Integer dataSliceId, NullWritable val, Context context) throws IOException, InterruptedException { conf = context.getConfiguration();// w w w. j a va 2 s . c om dbc = new DBConfiguration(conf); numMappers = ConfigurationHelper.getConfNumMaps(conf); char rd = (char) conf.getInt(DelimiterSet.OUTPUT_RECORD_DELIM_KEY, '\n'); initNetezzaExternalTableImport(dataSliceId); counter = new PerfCounters(); counter.startClock(); Text outputRecord = new Text(); if (extTableThread.isAlive()) { try { String inputRecord = recordReader.readLine(); while (inputRecord != null) { if (Thread.interrupted()) { if (!extTableThread.isAlive()) { break; } } outputRecord.set(inputRecord + rd); // May be we should set the output to be String for faster performance // There is no real benefit in changing it to Text and then // converting it back in our case writeRecord(outputRecord, context); counter.addBytes(1 + inputRecord.length()); inputRecord = recordReader.readLine(); } } finally { recordReader.close(); extTableThread.join(); counter.stopClock(); LOG.info("Transferred " + counter.toString()); if (extTableThread.hasExceptions()) { extTableThread.printException(); throw new IOException(extTableThread.getException()); } } } }
From source file:org.apache.sqoop.mapreduce.hcat.SqoopHCatExportHelper.java
License:Apache License
public SqoopRecord convertToSqoopRecord(HCatRecord hcr) throws IOException { Text key = new Text(); for (Map.Entry<String, Object> e : sqoopRecord.getFieldMap().entrySet()) { String colName = e.getKey(); String hfn = colName.toLowerCase(); key.set(hfn); String javaColType = colTypesJava.get(key).toString(); int sqlType = ((IntWritable) colTypesSql.get(key)).get(); HCatFieldSchema field = hCatFullTableSchema.get(hfn); HCatFieldSchema.Type fieldType = field.getType(); Object hCatVal = hcr.get(hfn, hCatFullTableSchema); String hCatTypeString = field.getTypeString(); Object sqlVal = convertToSqoop(hCatVal, fieldType, javaColType, hCatTypeString); if (debugHCatExportMapper) { LOG.debug("hCatVal " + hCatVal + " of type " + (hCatVal == null ? null : hCatVal.getClass().getName()) + ",sqlVal " + sqlVal + " of type " + (sqlVal == null ? null : sqlVal.getClass().getName()) + ",java type " + javaColType + ", sql type = " + SqoopHCatUtilities.sqlTypeString(sqlType)); }/*from w w w . j a v a 2s. co m*/ sqoopRecord.setField(colName, sqlVal); } return sqoopRecord; }
From source file:org.apache.tez.mapreduce.examples.processor.FilterByWordInputProcessor.java
License:Apache License
@Override public void run(Map<String, LogicalInput> inputs, Map<String, LogicalOutput> outputs) throws Exception { if (inputs.size() != 1) { throw new IllegalStateException( "FilterByWordInputProcessor processor can only work with a single input"); }/*from w w w .j a va 2 s . c om*/ if (outputs.size() != 1) { throw new IllegalStateException( "FilterByWordInputProcessor processor can only work with a single output"); } for (LogicalInput input : inputs.values()) { input.start(); } for (LogicalOutput output : outputs.values()) { output.start(); } LogicalInput li = inputs.values().iterator().next(); if (!(li instanceof MRInput)) { throw new IllegalStateException("FilterByWordInputProcessor processor can only work with MRInput"); } LogicalOutput lo = outputs.values().iterator().next(); if (!(lo instanceof UnorderedKVOutput)) { throw new IllegalStateException( "FilterByWordInputProcessor processor can only work with OnFileUnorderedKVOutput"); } MRInputLegacy mrInput = (MRInputLegacy) li; mrInput.init(); UnorderedKVOutput kvOutput = (UnorderedKVOutput) lo; Configuration updatedConf = mrInput.getConfigUpdates(); Text srcFile = new Text(); srcFile.set("UNKNOWN_FILENAME_IN_PROCESSOR"); if (updatedConf != null) { String fileName = updatedConf.get(MRJobConfig.MAP_INPUT_FILE); if (fileName != null) { LOG.info("Processing file: " + fileName); srcFile.set(fileName); } } KeyValueReader kvReader = mrInput.getReader(); KeyValueWriter kvWriter = kvOutput.getWriter(); while (kvReader.next()) { Object key = kvReader.getCurrentKey(); Object val = kvReader.getCurrentValue(); Text valText = (Text) val; String readVal = valText.toString(); if (readVal.contains(filterWord)) { LongWritable lineNum = (LongWritable) key; TextLongPair outVal = new TextLongPair(srcFile, lineNum); kvWriter.write(valText, outVal); } } }
From source file:org.apache.tez.mapreduce.input.TestMultiMRInput.java
License:Apache License
public static LinkedHashMap<LongWritable, Text> createInputData(FileSystem fs, Path workDir, JobConf job, String filename, long startKey, long numKeys) throws IOException { LinkedHashMap<LongWritable, Text> data = new LinkedHashMap<LongWritable, Text>(); Path file = new Path(workDir, filename); LOG.info("Generating data at path: " + file); // create a file with length entries @SuppressWarnings("deprecation") SequenceFile.Writer writer = SequenceFile.createWriter(fs, job, file, LongWritable.class, Text.class); try {// w w w . java 2 s.c o m Random r = new Random(System.currentTimeMillis()); LongWritable key = new LongWritable(); Text value = new Text(); for (long i = startKey; i < numKeys; i++) { key.set(i); value.set(Integer.toString(r.nextInt(10000))); data.put(new LongWritable(key.get()), new Text(value.toString())); writer.append(key, value); LOG.info("<k, v> : <" + key.get() + ", " + value + ">"); } } finally { writer.close(); } return data; }
From source file:org.apache.tez.mapreduce.processor.MapUtils.java
License:Apache License
private static InputSplit createInputSplit(FileSystem fs, Path workDir, JobConf job, Path file) throws IOException { FileInputFormat.setInputPaths(job, workDir); LOG.info("Generating data at path: " + file); // create a file with length entries @SuppressWarnings("deprecation") SequenceFile.Writer writer = SequenceFile.createWriter(fs, job, file, LongWritable.class, Text.class); try {/*from w w w . j a v a2 s.co m*/ Random r = new Random(System.currentTimeMillis()); LongWritable key = new LongWritable(); Text value = new Text(); for (int i = 10; i > 0; i--) { key.set(r.nextInt(1000)); value.set(Integer.toString(i)); writer.append(key, value); LOG.info("<k, v> : <" + key.get() + ", " + value + ">"); } } finally { writer.close(); } SequenceFileInputFormat<LongWritable, Text> format = new SequenceFileInputFormat<LongWritable, Text>(); InputSplit[] splits = format.getSplits(job, 1); System.err.println("#split = " + splits.length + " ; " + "#locs = " + splits[0].getLocations().length + "; " + "loc = " + splits[0].getLocations()[0] + "; " + "off = " + splits[0].getLength() + "; " + "file = " + ((FileSplit) splits[0]).getPath()); return splits[0]; }
From source file:org.apache.tez.processor.FilterByWordInputProcessor.java
License:Apache License
@Override public void run(Map<String, LogicalInput> inputs, Map<String, LogicalOutput> outputs) throws Exception { if (inputs.size() != 1) { throw new IllegalStateException( "FilterByWordInputProcessor processor can only work with a single input"); }// www . jav a 2 s. c o m if (outputs.size() != 1) { throw new IllegalStateException( "FilterByWordInputProcessor processor can only work with a single output"); } LogicalInput li = inputs.values().iterator().next(); if (!(li instanceof MRInput)) { throw new IllegalStateException("FilterByWordInputProcessor processor can only work with MRInput"); } LogicalOutput lo = outputs.values().iterator().next(); if (!(lo instanceof OnFileUnorderedKVOutput)) { throw new IllegalStateException( "FilterByWordInputProcessor processor can only work with OnFileUnorderedKVOutput"); } MRInputLegacy mrInput = (MRInputLegacy) li; mrInput.init(); OnFileUnorderedKVOutput kvOutput = (OnFileUnorderedKVOutput) lo; Configuration updatedConf = mrInput.getConfigUpdates(); Text srcFile = new Text(); srcFile.set("UNKNOWN_FILENAME_IN_PROCESSOR"); if (updatedConf != null) { String fileName = updatedConf.get(MRJobConfig.MAP_INPUT_FILE); if (fileName != null) { LOG.info("Processing file: " + fileName); srcFile.set(fileName); } } KeyValueReader kvReader = mrInput.getReader(); KeyValueWriter kvWriter = kvOutput.getWriter(); while (kvReader.next()) { Object key = kvReader.getCurrentKey(); Object val = kvReader.getCurrentValue(); Text valText = (Text) val; String readVal = valText.toString(); if (readVal.contains(filterWord)) { LongWritable lineNum = (LongWritable) key; TextLongPair outVal = new TextLongPair(srcFile, lineNum); kvWriter.write(valText, outVal); } } }
From source file:org.apache.tez.runtime.library.common.writers.TestUnorderedPartitionedKVWriter.java
License:Apache License
public void textTest(int numRegularRecords, int numPartitions, long availableMemory, int numLargeKeys, int numLargevalues, int numLargeKvPairs) throws IOException, InterruptedException { Partitioner partitioner = new HashPartitioner(); ApplicationId appId = ApplicationId.newInstance(10000, 1); TezCounters counters = new TezCounters(); String uniqueId = UUID.randomUUID().toString(); OutputContext outputContext = createMockOutputContext(counters, appId, uniqueId); Random random = new Random(); Configuration conf = createConfiguration(outputContext, Text.class, Text.class, shouldCompress, -1, HashPartitioner.class); CompressionCodec codec = null;//from w ww .j a va 2 s .c om if (shouldCompress) { codec = new DefaultCodec(); ((Configurable) codec).setConf(conf); } int numRecordsWritten = 0; Map<Integer, Multimap<String, String>> expectedValues = new HashMap<Integer, Multimap<String, String>>(); for (int i = 0; i < numPartitions; i++) { expectedValues.put(i, LinkedListMultimap.<String, String>create()); } UnorderedPartitionedKVWriter kvWriter = new UnorderedPartitionedKVWriterForTest(outputContext, conf, numPartitions, availableMemory); int sizePerBuffer = kvWriter.sizePerBuffer; BitSet partitionsWithData = new BitSet(numPartitions); Text keyText = new Text(); Text valText = new Text(); for (int i = 0; i < numRegularRecords; i++) { String key = createRandomString(Math.abs(random.nextInt(10))); String val = createRandomString(Math.abs(random.nextInt(20))); keyText.set(key); valText.set(val); int partition = partitioner.getPartition(keyText, valText, numPartitions); partitionsWithData.set(partition); expectedValues.get(partition).put(key, val); kvWriter.write(keyText, valText); numRecordsWritten++; } // Write Large key records for (int i = 0; i < numLargeKeys; i++) { String key = createRandomString(sizePerBuffer + Math.abs(random.nextInt(100))); String val = createRandomString(Math.abs(random.nextInt(20))); keyText.set(key); valText.set(val); int partition = partitioner.getPartition(keyText, valText, numPartitions); partitionsWithData.set(partition); expectedValues.get(partition).put(key, val); kvWriter.write(keyText, valText); numRecordsWritten++; } // Write Large val records for (int i = 0; i < numLargevalues; i++) { String key = createRandomString(Math.abs(random.nextInt(10))); String val = createRandomString(sizePerBuffer + Math.abs(random.nextInt(100))); keyText.set(key); valText.set(val); int partition = partitioner.getPartition(keyText, valText, numPartitions); partitionsWithData.set(partition); expectedValues.get(partition).put(key, val); kvWriter.write(keyText, valText); numRecordsWritten++; } // Write records where key + val are large (but both can fit in the buffer individually) for (int i = 0; i < numLargeKvPairs; i++) { String key = createRandomString(sizePerBuffer / 2 + Math.abs(random.nextInt(100))); String val = createRandomString(sizePerBuffer / 2 + Math.abs(random.nextInt(100))); keyText.set(key); valText.set(val); int partition = partitioner.getPartition(keyText, valText, numPartitions); partitionsWithData.set(partition); expectedValues.get(partition).put(key, val); kvWriter.write(keyText, valText); numRecordsWritten++; } List<Event> events = kvWriter.close(); verify(outputContext, never()).fatalError(any(Throwable.class), any(String.class)); TezCounter outputLargeRecordsCounter = counters.findCounter(TaskCounter.OUTPUT_LARGE_RECORDS); assertEquals(numLargeKeys + numLargevalues + numLargeKvPairs, outputLargeRecordsCounter.getValue()); // Validate the event assertEquals(1, events.size()); assertTrue(events.get(0) instanceof CompositeDataMovementEvent); CompositeDataMovementEvent cdme = (CompositeDataMovementEvent) events.get(0); assertEquals(0, cdme.getSourceIndexStart()); assertEquals(numPartitions, cdme.getCount()); DataMovementEventPayloadProto eventProto = DataMovementEventPayloadProto .parseFrom(ByteString.copyFrom(cdme.getUserPayload())); assertFalse(eventProto.hasData()); BitSet emptyPartitionBits = null; if (partitionsWithData.cardinality() != numPartitions) { assertTrue(eventProto.hasEmptyPartitions()); byte[] emptyPartitions = TezCommonUtils .decompressByteStringToByteArray(eventProto.getEmptyPartitions()); emptyPartitionBits = TezUtilsInternal.fromByteArray(emptyPartitions); assertEquals(numPartitions - partitionsWithData.cardinality(), emptyPartitionBits.cardinality()); } else { assertFalse(eventProto.hasEmptyPartitions()); emptyPartitionBits = new BitSet(numPartitions); } assertEquals(HOST_STRING, eventProto.getHost()); assertEquals(SHUFFLE_PORT, eventProto.getPort()); assertEquals(uniqueId, eventProto.getPathComponent()); // Verify the data // Verify the actual data TezTaskOutput taskOutput = new TezTaskOutputFiles(conf, uniqueId); Path outputFilePath = kvWriter.finalOutPath; Path spillFilePath = kvWriter.finalIndexPath; if (numRecordsWritten > 0) { assertTrue(localFs.exists(outputFilePath)); assertTrue(localFs.exists(spillFilePath)); } else { return; } // Special case for 0 records. TezSpillRecord spillRecord = new TezSpillRecord(spillFilePath, conf); DataInputBuffer keyBuffer = new DataInputBuffer(); DataInputBuffer valBuffer = new DataInputBuffer(); Text keyDeser = new Text(); Text valDeser = new Text(); for (int i = 0; i < numPartitions; i++) { if (emptyPartitionBits.get(i)) { continue; } TezIndexRecord indexRecord = spillRecord.getIndex(i); FSDataInputStream inStream = FileSystem.getLocal(conf).open(outputFilePath); inStream.seek(indexRecord.getStartOffset()); IFile.Reader reader = new IFile.Reader(inStream, indexRecord.getPartLength(), codec, null, null, false, 0, -1); while (reader.nextRawKey(keyBuffer)) { reader.nextRawValue(valBuffer); keyDeser.readFields(keyBuffer); valDeser.readFields(valBuffer); int partition = partitioner.getPartition(keyDeser, valDeser, numPartitions); assertTrue(expectedValues.get(partition).remove(keyDeser.toString(), valDeser.toString())); } inStream.close(); } for (int i = 0; i < numPartitions; i++) { assertEquals(0, expectedValues.get(i).size()); expectedValues.remove(i); } assertEquals(0, expectedValues.size()); }
From source file:org.archive.giraph.InDegreeCountComputationVertexWithTextValue.java
License:Apache License
@Override public void compute(Vertex<LongWritable, Text, Text> vertex, Iterable<Text> messages) { if (getSuperstep() == 0) { Iterable<Edge<LongWritable, Text>> edges = vertex.getEdges(); for (Edge<LongWritable, Text> edge : edges) { sendMessage(edge.getTargetVertexId(), new Text("1")); }/* w ww . ja v a2 s . c om*/ } else { long sum = 0; for (Text message : messages) { sum++; } Text vertexValue = vertex.getValue(); vertexValue.set(Double.toString(sum)); vertex.setValue(vertexValue); vertex.voteToHalt(); } }
From source file:org.archive.giraph.InDegreeCountVertexWithTextValue.java
License:Apache License
@Override public void compute(Iterable<Text> messages) { if (getSuperstep() == 0) { Iterable<Edge<LongWritable, Text>> edges = getEdges(); for (Edge<LongWritable, Text> edge : edges) { sendMessage(edge.getTargetVertexId(), new Text("1")); }// w w w.j ava2 s . c o m } else { long sum = 0; for (Text message : messages) { sum++; } Text vertexValue = getValue(); vertexValue.set(Double.toString(sum)); setValue(vertexValue); voteToHalt(); } }