List of usage examples for org.apache.hadoop.io Text toString
@Override
public String toString()
From source file:com.ifeng.vdn.loggroup.tool.VideologFilter.java
License:Apache License
public static final VideologPair filerByErrCode(Text value) { String result = ""; VideologPair pair = null;/* w w w. ja va2 s .c o m*/ if (value == null) { pair = new VideologPair(""); pair.setValue(""); return pair; } String errCode = ""; String[] items = value.toString().split("\t"); if (items != null && items.length == 14) { // extract the err code: column index 8: errCode = items[7]; // check the err code is valid, if not then ignore. if (VALID_ERR_CODE.contains(errCode) || errCode.startsWith("") || errCode.startsWith("")) { pair = new VideologPair(items[0] + "|" + items[4]); result = value.toString(); pair.setValue(result); } } else { pair = new VideologPair(""); pair.setValue(""); } return pair; }
From source file:com.ifeng.vdn.logparser.mapper.VideoLogMapper.java
License:Apache License
@Override protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context) throws IOException, InterruptedException { if (value != null) { String[] items = value.toString().split("\t"); if (items.length == 24) { log.info("Key[{}] Value>>>>{}", key, value.toString()); if (items[20].endsWith("zhvp1.0.16") || items[20].endsWith("nsvp1.0.18")) { context.write(new Text("items[20]"), value); }/*from w w w . j a v a2s . c o m*/ } } }
From source file:com.ifeng.vdn.logparser.mapper.VideoLogReducer.java
License:Apache License
@Override protected void reduce(Text key, Iterable<Text> values, Reducer<Text, Text, Text, Text>.Context context) throws IOException, InterruptedException { for (Text value : values) { log.info("Key = {} Value = {}", key.toString(), value.toString()); context.write(key, value);/*ww w . j av a 2s .c o m*/ } }
From source file:com.ifeng.vdn.parser.VideoLogParseMapper.java
License:Apache License
/** * <code>/*from www . ja va2s .c o m*/ ----------------------------------------------------------------------------------------------------- -- ?? ----------------------------------------------------------------------------------------------------- 16 err EventRetCode ? EventCode1?+ActionCode2?+Data3? ? err=100000 3 ip IP? 4 ref ?url ref=http://v.ifeng.com/v/news/djmdnz/index.shtml#01c92b9c-37c7-4510-ac87-519a1224c263 5 sid ???cookie[sid] sid=3232F65C8864C995D82D087D8A15FF05kzzxc1 6 uid ID uid=1395896719356_cqf3nr8244 9 loc 12 tm ? tm=1424048309234 13 url ? url=http://ips.ifeng.com/video19.ifeng.com/video09/2015/02/15/2999516-102-2028.mp4 15 dur ?XML dur=155 17 bt ?B bt=12451187 18 bl ?B bl=12451187 19 lt lt=139059 21 vid vid=vNsPlayer_nsvp1.0.18 23 cdnId CDNSooner-Chinanet-Chinacache-? ?? cdnId=ifengP2P 24 netname ?? ?? netname= ----------------------------------------------------------------------------------------------------- </code> */ @Override protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context) throws IOException, InterruptedException { if (value != null) { String ds = ""; FileSplit split = (FileSplit) context.getInputSplit(); String parentPath = split.getPath().getParent().toString(); String fileName = split.getPath().getName(); fileName = fileName.substring(0, 4); String[] parents = parentPath.split("/"); if (parents.length > 0) { ds = parents[parents.length - 1]; } VideologPair pair = VideologFilter.filte(value.toString(), ds, fileName); if (pair != null && pair.getKey() != null && pair.getValue() != null) { context.write(new Text(pair.getKey()), new Text(pair.getValue())); } } }
From source file:com.ifeng.vdn.videolog.VideologFilter.java
License:Apache License
public static final VideologPair filerByErrCode(Text value) { String result = ""; VideologPair pair = null;/*from w w w .j a va 2s . c om*/ if (value == null) { pair = new VideologPair(""); pair.setValue(""); return pair; } String errCode = ""; String[] items = value.toString().split("\t"); if (items != null && items.length == 14) { // extract the err code: column index 8: errCode = items[7]; // check the err code is valid, if not then ignore. if (VALID_ERR_CODE.contains(errCode) || errCode.startsWith("")) { pair = new VideologPair(items[0] + "|" + items[4]); result = value.toString(); pair.setErrId(errCode); pair.setValue(result); } } else { pair = new VideologPair(""); pair.setValue(""); } return pair; }
From source file:com.ikanow.aleph2.analytics.hadoop.assets.ObjectNodeWritableComparable.java
License:Apache License
@Override public void readFields(DataInput in) throws IOException { final Text text = new Text(); text.readFields(in);/* w w w .java 2 s . com*/ _object_node = (ObjectNode) _mapper.readTree(text.toString()); //(object node by construction) }
From source file:com.ikanow.infinit.e.processing.custom.utils.HadoopUtils.java
License:Open Source License
public static BasicDBList getBsonFromSequenceFile(CustomMapReduceJobPojo cmr, int nLimit, String fields) throws SAXException, IOException, ParserConfigurationException { BasicDBList dbl = new BasicDBList(); PropertiesManager props = new PropertiesManager(); Configuration conf = getConfiguration(props); Path pathDir = HadoopUtils.getPathForJob(cmr, conf, false); @SuppressWarnings({ "unchecked", "rawtypes" }) SequenceFileDirIterable<? extends Writable, ? extends Writable> seqFileDir = new SequenceFileDirIterable( pathDir, PathType.LIST, PathFilters.logsCRCFilter(), conf); // Very basic, only allow top level, 1 level of nesting, and field removal HashSet<String> fieldLookup = null; if (null != fields) { fieldLookup = new HashSet<String>(); String[] fieldArray = fields.split(","); for (String field : fieldArray) { String[] fieldDecomp = field.split(":"); fieldLookup.add(fieldDecomp[0]); }//from www.j a va2 s .c om } //TOTEST int nRecords = 0; for (Pair<? extends Writable, ? extends Writable> record : seqFileDir) { BasicDBObject element = new BasicDBObject(); // KEY Writable key = record.getFirst(); if (key instanceof org.apache.hadoop.io.Text) { org.apache.hadoop.io.Text writable = (org.apache.hadoop.io.Text) key; element.put("key", writable.toString()); } else if (key instanceof org.apache.hadoop.io.DoubleWritable) { org.apache.hadoop.io.DoubleWritable writable = (org.apache.hadoop.io.DoubleWritable) key; element.put("key", Double.toString(writable.get())); } else if (key instanceof org.apache.hadoop.io.IntWritable) { org.apache.hadoop.io.IntWritable writable = (org.apache.hadoop.io.IntWritable) key; element.put("key", Integer.toString(writable.get())); } else if (key instanceof org.apache.hadoop.io.LongWritable) { org.apache.hadoop.io.LongWritable writable = (org.apache.hadoop.io.LongWritable) key; element.put("key", Long.toString(writable.get())); } else if (key instanceof BSONWritable) { element.put("key", MongoDbUtil.convert((BSONWritable) key)); } // VALUE Writable value = record.getSecond(); if (value instanceof org.apache.hadoop.io.Text) { org.apache.hadoop.io.Text writable = (org.apache.hadoop.io.Text) value; element.put("value", writable.toString()); } else if (value instanceof org.apache.hadoop.io.DoubleWritable) { org.apache.hadoop.io.DoubleWritable writable = (org.apache.hadoop.io.DoubleWritable) value; element.put("value", Double.toString(writable.get())); } else if (value instanceof org.apache.hadoop.io.IntWritable) { org.apache.hadoop.io.IntWritable writable = (org.apache.hadoop.io.IntWritable) value; element.put("value", Integer.toString(writable.get())); } else if (value instanceof org.apache.hadoop.io.LongWritable) { org.apache.hadoop.io.LongWritable writable = (org.apache.hadoop.io.LongWritable) value; element.put("value", Long.toString(writable.get())); } else if (value instanceof BSONWritable) { element.put("value", MongoDbUtil.convert((BSONWritable) value)); } else if (value instanceof org.apache.mahout.math.VectorWritable) { Vector vec = ((org.apache.mahout.math.VectorWritable) value).get(); BasicDBList dbl2 = listFromMahoutVector(vec, "value", element); element.put("value", dbl2); } else if (value instanceof org.apache.mahout.clustering.classify.WeightedVectorWritable) { org.apache.mahout.clustering.classify.WeightedVectorWritable vecW = (org.apache.mahout.clustering.classify.WeightedVectorWritable) value; element.put("valueWeight", vecW.getWeight()); BasicDBList dbl2 = listFromMahoutVector(vecW.getVector(), "value", element); element.put("value", dbl2); } else if (value instanceof org.apache.mahout.clustering.iterator.ClusterWritable) { Cluster cluster = ((org.apache.mahout.clustering.iterator.ClusterWritable) value).getValue(); BasicDBObject clusterVal = new BasicDBObject(); clusterVal.put("center", listFromMahoutVector(cluster.getCenter(), "center", clusterVal)); clusterVal.put("radius", listFromMahoutVector(cluster.getRadius(), "radius", clusterVal)); element.put("value", clusterVal); } else { element.put("unknownValue", value.getClass().toString()); } // Check the fields settings: // Only handle a few... if (null != fieldLookup) { for (String fieldToRemove : fieldLookup) { if (fieldToRemove.startsWith("value.")) { fieldToRemove = fieldToRemove.substring(6); BasicDBObject nested = (BasicDBObject) element.get("value."); if (null != nested) { nested.remove(fieldToRemove); } } else { element.remove(fieldToRemove); } } //TOTEST } dbl.add(element); nRecords++; if ((nLimit > 0) && (nRecords >= nLimit)) { break; } } return dbl; }
From source file:com.impetus.code.examples.hadoop.mapred.earthquake.EarthQuakeMapper.java
License:Apache License
@Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { if (key.get() > 0) { String[] parsedData = value.toString().split(","); String date = DateCoverter.convertDate(parsedData[0]); if (date != null) { context.write(new Text(date), new IntWritable(1)); }/*from ww w.j av a 2 s.c o m*/ } }
From source file:com.impetus.code.examples.hadoop.mapred.weather.MaxTempMapper.java
License:Apache License
@Override public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); String year = line.substring(15, 19); int airTemperature; if (line.charAt(87) == '+') { // parseInt doesn't like leading plus signs airTemperature = Integer.parseInt(line.substring(88, 92)); } else {/* www . j ava 2s. c o m*/ airTemperature = Integer.parseInt(line.substring(87, 92)); } String quality = line.substring(92, 93); if (airTemperature != MISSING && quality.matches("[01459]")) { context.write(new Text(year), new IntWritable(airTemperature)); } }
From source file:com.inmobi.conduit.distcp.tools.mapred.CopyMapper.java
License:Apache License
/** * Implementation of the Mapper<>::map(). Does the copy. * @param relPath: The target path./*from ww w .jav a 2 s . c o m*/ * @param sourceFileStatus: The source path. * @throws IOException */ @Override public void map(Text relPath, FileStatus sourceFileStatus, Context context) throws IOException, InterruptedException { Path sourcePath = sourceFileStatus.getPath(); Map<Long, Long> received = null; if (context.getConfiguration().getBoolean(ConduitConstants.AUDIT_ENABLED_KEY, true)) { received = new HashMap<Long, Long>(); } if (LOG.isDebugEnabled()) LOG.debug("DistCpMapper::map(): Received " + sourcePath + ", " + relPath); Path target = new Path(targetWorkPath.makeQualified(targetFS) + relPath.toString()); EnumSet<DistCpOptions.FileAttribute> fileAttributes = getFileAttributeSettings(context); final String description = "Copying " + sourcePath + " to " + target; context.setStatus(description); LOG.info(description); try { FileStatus sourceCurrStatus; FileSystem sourceFS; try { sourceFS = sourcePath.getFileSystem(conf); sourceCurrStatus = sourceFS.getFileStatus(sourcePath); } catch (FileNotFoundException e) { throw new IOException(new RetriableFileCopyCommand.CopyReadException(e)); } FileStatus targetStatus = null; try { targetStatus = targetFS.getFileStatus(target); } catch (FileNotFoundException ignore) { } if (targetStatus != null && (targetStatus.isDir() != sourceCurrStatus.isDir())) { throw new IOException("Can't replace " + target + ". Target is " + getFileType(targetStatus) + ", Source is " + getFileType(sourceCurrStatus)); } if (sourceCurrStatus.isDir()) { createTargetDirsWithRetry(description, target, context); return; } if (skipFile(sourceFS, sourceCurrStatus, target)) { LOG.info("Skipping copy of " + sourceCurrStatus.getPath() + " to " + target); updateSkipCounters(context, sourceCurrStatus); } else { String streamName = null; if (!relPath.toString().isEmpty()) { Path relativePath = new Path(relPath.toString()); if (relativePath.depth() > 2) { // path is for mirror service and is of format // /conduit/streams/<streamName>/2013/09/12 Path tmpPath = relativePath; while (tmpPath.getParent() != null && !tmpPath.getParent().getName().equals("streams")) { tmpPath = tmpPath.getParent(); } streamName = tmpPath.getName(); } else { // path is for merge service and of form /<stream name>/filename.gz streamName = relativePath.getParent().getName(); } } copyFileWithRetry(description, sourceCurrStatus, target, context, fileAttributes, received); // generate audit counters if (received != null) { for (Entry<Long, Long> entry : received.entrySet()) { String counterNameValue = getCounterNameValue(streamName, sourcePath.getName(), entry.getKey(), entry.getValue()); context.write(NullWritable.get(), new Text(counterNameValue)); } } } DistCpUtils.preserve(target.getFileSystem(conf), target, sourceCurrStatus, fileAttributes); } catch (IOException exception) { handleFailures(exception, sourceFileStatus, target, context); } }