List of usage examples for org.apache.hadoop.io Text toString
@Override
public String toString()
From source file:com.ibm.bi.dml.udf.lib.RemoveEmptyRows.java
License:Open Source License
@Override public void execute() { Matrix mat = (Matrix) this.getFunctionInput(0); String fnameOld = mat.getFilePath(); HashMap<Long, Long> keyMap = new HashMap<Long, Long>(); //old,new rowID try {/*from w ww . ja v a2 s.c o m*/ //prepare input JobConf job = new JobConf(ConfigurationManager.getCachedJobConf()); Path path = new Path(fnameOld); FileSystem fs = FileSystem.get(job); if (!fs.exists(path)) throw new IOException("File " + fnameOld + " does not exist on HDFS."); FileInputFormat.addInputPath(job, path); TextInputFormat informat = new TextInputFormat(); informat.configure(job); //prepare output String fnameNew = createOutputFilePathAndName(OUTPUT_FILE); DataOutputStream ostream = MapReduceTool.getHDFSDataOutputStream(fnameNew, true); //read and write if necessary InputSplit[] splits = informat.getSplits(job, 1); LongWritable key = new LongWritable(); Text value = new Text(); long ID = 1; try { //for obj reuse and preventing repeated buffer re-allocations StringBuilder sb = new StringBuilder(); for (InputSplit split : splits) { RecordReader<LongWritable, Text> reader = informat.getRecordReader(split, job, Reporter.NULL); try { while (reader.next(key, value)) { String cellStr = value.toString().trim(); StringTokenizer st = new StringTokenizer(cellStr, " "); long row = Integer.parseInt(st.nextToken()); long col = Integer.parseInt(st.nextToken()); double lvalue = Double.parseDouble(st.nextToken()); if (!keyMap.containsKey(row)) keyMap.put(row, ID++); long rowNew = keyMap.get(row); sb.append(rowNew); sb.append(' '); sb.append(col); sb.append(' '); sb.append(lvalue); sb.append('\n'); ostream.writeBytes(sb.toString()); sb.setLength(0); } } finally { if (reader != null) reader.close(); } } _ret = new Matrix(fnameNew, keyMap.size(), mat.getNumCols(), ValueType.Double); } finally { if (ostream != null) ostream.close(); } } catch (Exception ex) { throw new RuntimeException("Unable to execute external function.", ex); } }
From source file:com.ibm.db2j.AccumuloVTI.java
License:Open Source License
/** * GaianDB extract rows by calling this method repeatedly. * 'dvdRecord' contains the number of columns resolved in tableShapeRSMD. * However we only need to populate the projected columns indexes. */// w ww . jav a2 s . com @Override public int nextRow(final DataValueDescriptor[] dvdRecord) throws StandardException, SQLException { // logger.logDetail("Getting new relational record based on set of Accumulo rows. rowCount = " + rowCount + // ", currenAccumuloRow: " + currentAccumuloRow ); if (0 == rowCount) { numRowsReceivedFromAccumulo = 0; if (0 == projectedColumns.length || false == rowScanIterator.hasNext()) return IFastPath.SCAN_COMPLETED; // empty table else currentAccumuloRow = rowScanIterator.next(); // kick-start row extraction } // Check if there are any Accumulo records left... if (null == currentAccumuloRow) return IFastPath.SCAN_COMPLETED; Key key = currentAccumuloRow.getKey(); // lots of info available off the Key: rowID, col name/family, col qualifier, visibility, timestamp Text rowID = key.getRow(); // Look for a new record... until one is found that meets qualifiers, or until none are left do { // Check if there are any Accumulo records left... if (null == currentAccumuloRow) return IFastPath.SCAN_COMPLETED; numRowsReceivedFromAccumulo++; // Set rowID column before extracting others associated with it in the while loop if (1 == rowidColShift) dvdRecord[0].setValue(rowID.toString()); // Initialise column cells to NULL value. for (int i = rowidColShift; i < projectedColumns.length; i++) dvdRecord[projectedColumns[i] - 1].setToNull(); // Extract columns from Accumulo records for this rowID - note: Accumulo rows don't have to be complete Text previousRowID = rowID; while (rowID.equals(previousRowID)) { final String colName = key.getColumnFamily().toString(); final Integer pColID = projectedColumnsNameToIndexMap.get(colName); if (null == pColID) { logger.logImportant( "Encountered Accumulo column which was not requested as column family (skipped): " + colName); continue; // this column was not requested - should not happen } // Log info about the newly found column final String cellStringValue = isExtractAccumuloColumnQualifiersInPlaceOfValues ? currentAccumuloRow.getKey().getColumnQualifier().toString() : currentAccumuloRow.getValue().toString(); // logger.logDetail("Setting ProjectedColID: " + pColID + // ", from record with Key: " + key + " ==> ColFamily: " + key.getColumnFamily() // + ( isExtractAccumuloColumnQualifiersInPlaceOfValues ? ", ColQualifier: " : ", Value: " ) + cellStringValue ); // Set column value for the row - this also does type conversion. dvdRecord[pColID - 1].setValue(cellStringValue); // normalise to 0-based // Scroll to the next column - break if we run out of records (rows don't have to be complete) if (false == rowScanIterator.hasNext()) { currentAccumuloRow = null; break; } currentAccumuloRow = rowScanIterator.next(); key = currentAccumuloRow.getKey(); previousRowID = rowID; rowID = key.getRow(); } } while (null != qualifiers && false == RowsFilter.testQualifiers(dvdRecord, qualifiers)); rowCount++; return IFastPath.GOT_ROW; }
From source file:com.ibm.jaql.io.hadoop.converter.FromJsonTextConverter.java
License:Apache License
@Override protected ToJson<Text> createValueConverter() { return new ToJson<Text>() { JsonParser parser = new JsonParser(); public JsonValue convert(Text src, JsonValue tgt) { if (src == null) { return null; }//from w w w . j av a 2 s. c om try { parser.ReInit(new StringReader(src.toString())); JsonValue value = parser.JsonVal(); return value; } catch (ParseException pe) { throw new RuntimeException(pe); } } public JsonValue createTarget() { return null; } public Schema getSchema() { return SchemaFactory.anySchema(); } }; }
From source file:com.ibm.jaql.io.hadoop.FromLinesConverter.java
License:Apache License
/** Converts the given line into a JSON value. */ @Override/*from ww w .j a va 2 s. c o m*/ public JsonValue convert(LongWritable key, Text value, JsonValue target) { String text = value.toString(); if (text.equals(nullString)) { if (converter.isNullable()) { return null; } else { throw new RuntimeException("found null value, expected " + converter.getType()); } } target = converter.convert(new JsonString(value.toString()), target); return target; }
From source file:com.ibm.spss.hive.serde2.xml.XmlSerDe.java
License:Open Source License
/** * @see org.apache.hadoop.hive.serde2.Deserializer#deserialize(org.apache.hadoop.io.Writable) *//* ww w . ja va 2s . com*/ @Override public Object deserialize(Writable writable) throws SerDeException { Text text = (Text) writable; if (text == null || text.getLength() == 0) { return (Object) null; } try { return this.xmlProcessor.parse(text.toString()); } catch (Exception e) { throw new SerDeException(e); } }
From source file:com.ifeng.hadoop.thinker.LogMapper.java
License:Apache License
@Override protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context) throws IOException, InterruptedException { try {/*from ww w. j av a2s . c om*/ if (value != null && value.toString().trim().length() > 0) { String line = value.toString(); String[] items = line.split("\\s+"); if (items.length == 3) { if (items[0] != null && items[1] != null && items[2] != null) { String name = items[0]; String swift = items[1].trim(); String origin = items[2].trim(); if (swift.split(":").length == 2 && origin.split(":").length == 2) { swift = swift.split(":")[1]; origin = origin.split(":")[1]; long swfitSize = Long.parseLong(swift); long originSize = Long.parseLong(origin); LogModel model = new LogModel(name, swfitSize, originSize, (swfitSize - originSize)); context.write(new Text(name), new Text(model.toString())); } else { log.info("Invalid line: ", value.toString()); } } } else { log.info("Invalid line: ", value.toString()); } } } catch (NumberFormatException e) { log.error(e.getMessage(), e); } }
From source file:com.ifeng.ipserver.IPServerLogParseMapper.java
License:Apache License
@Override protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context) throws IOException, InterruptedException { String line = value.toString(); if (line.contains("The node 59.53.89.94") || line.contains("http://59.53.89.94")) { log.info("Matched: {}", line); context.write(new Text(key.toString()), value); }/*from w ww.j a va 2s. com*/ }
From source file:com.ifeng.logparser.NginxLogMapper.java
License:Apache License
@Override protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context) throws IOException, InterruptedException { //log.info("Key = {} -- value = {}", key, value); String line = value.toString(); String[] logs = line.split(" - - "); if (logs.length == 2) { String seria = "\"NULL\""; String ip = logs[0];/*from w w w . ja v a 2s. c om*/ String factors = logs[1]; //String[] msgs = factors.split("\"-\""); //String[] msgs = factors.split("\\\"[^\"]*\""); //\"[^"]*" Pattern pattern = Pattern.compile("\"[^\"]*\""); Matcher matcher = pattern.matcher(factors); matcher.find(); matcher.find(); if (matcher.find()) { seria = matcher.group().replaceAll("\"", ""); log.info("Seria: {}", seria); } //log.info("length: {} == {}", msgs.length, msgs); /*if(msgs != null && msgs.length > 1){ seria = msgs[2].trim(); }*/ seria = (seria.length() > 0 && (!seria.equals("-"))) ? seria : "NULL"; for (int i = 0; i < (15 - ip.length()); i++) { ip += " "; } context.write(new Text(ip), new Text("\t" + seria)); log.info("IP: {}, Mathine: {}", ip, seria); } }
From source file:com.ifeng.sorter.LogSortMapper.java
License:Apache License
@Override protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context) throws IOException, InterruptedException { log.info("Value: {}", value.toString()); }
From source file:com.ifeng.vdn.iparea.parser.IPAreaMapper.java
License:Apache License
@Override protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context) throws IOException, InterruptedException { log.info("Key: {}, Value: {}", key, value); if (value != null) { String[] items = value.toString().split("\\|"); String start = items[0];/*w w w . ja v a2 s . c o m*/ String end = items[1]; List<String> ips = IPV4Handler.getAllFromRange(start, end); for (String ip : ips) { StringBuilder sb = new StringBuilder(); //sb.append("\t"); sb.append(items[2]); sb.append("\t"); sb.append(items[3]); sb.append("\t"); sb.append(items[4]); context.write(new Text(ip), new Text(sb.toString())); } } }