List of usage examples for org.apache.hadoop.io Text toString
@Override
public String toString()
From source file:co.nubetech.hiho.similarity.ngram.NGramMapper.java
License:Apache License
@Override public void map(Text key, Text val, Context context) throws IOException, InterruptedException { if (key == null) { throw new IOException("Key is null"); }/* w w w .java 2 s . co m*/ HashSet<String> nGramList = new HashSet<String>(); int gramSize = 2; nGramList = getNGrams(key, gramSize); for (String nGrams : nGramList) { String value = key.toString() + "delimiterBetweenKeyAndValue" + val.toString(); context.write(new Text(nGrams), new Text(value)); logger.info("Key and Value in NGram Mapper is: " + new Text(nGrams) + ", " + new Text(value)); } }
From source file:co.nubetech.hiho.similarity.ngram.NGramMapper.java
License:Apache License
public HashSet<String> getNGrams(Text line, int gramSize) { ArrayList<String> words = new ArrayList<String>(); HashSet<String> nGrams = new HashSet<String>(); String[] tokens = line.toString().split(" "); for (String t : tokens) { words.add(t);//from w ww. j a va2s . co m } for (int i = 0; i < words.size() - gramSize + 1; i++) { String key = ""; for (int j = i; j < i + gramSize; j++) { key += words.get(j); if (j != (i + gramSize - 1)) { key += " "; } } nGrams.add(key); } return nGrams; }
From source file:com.acme.io.JsonLoader.java
License:Apache License
/** * Retrieves the next tuple to be processed. Implementations should NOT * reuse tuple objects (or inner member objects) they return across calls * and should return a different tuple object in each call. * @return the next tuple to be processed or null if there are no more * tuples to be processed.//from ww w . ja va2 s. c o m * @throws IOException if there is an exception while retrieving the next * tuple */ public Tuple getNext() throws IOException { Text val = null; try { // Read the next key value pair from the record reader. If it's // finished, return null if (!reader.nextKeyValue()) return null; // Get the current value. We don't use the key. val = (Text) reader.getCurrentValue(); } catch (InterruptedException ie) { throw new IOException(ie); } // Create a parser specific for this input line. This may not be the // most efficient approach. ByteArrayInputStream bais = new ByteArrayInputStream(val.getBytes()); JsonParser p = jsonFactory.createJsonParser(bais); // Create the tuple we will be returning. We create it with the right // number of fields, as the Tuple object is optimized for this case. Tuple t = tupleFactory.newTuple(fields.length); // Read the start object marker. Throughout this file if the parsing // isn't what we expect we return a tuple with null fields rather than // throwing an exception. That way a few mangled lines don't fail the // job. if (p.nextToken() != JsonToken.START_OBJECT) { log.warn("Bad record, could not find start of record " + val.toString()); return t; } // Read each field in the record for (int i = 0; i < fields.length; i++) { t.set(i, readField(p, fields[i], i)); } if (p.nextToken() != JsonToken.END_OBJECT) { log.warn("Bad record, could not find end of record " + val.toString()); return t; } p.close(); return t; }
From source file:com.ailk.oci.ocnosql.tools.load.csvbulkload.PhoenixCsvToKeyValueMapper.java
License:Apache License
@SuppressWarnings("deprecation") @Override/* www . j a v a 2 s . co m*/ protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String lineStr = value.toString(); // ? if (null != lineStr && lineStr.length() > 1) { // ?rowkey(hash?++md5?)???? lineStr = generateRowKey(lineStr) + separator + lineStr; } ImmutableBytesWritable outputKey = new ImmutableBytesWritable(); try { CSVRecord csvRecord = null; try { csvRecord = csvLineParser.parse(lineStr); } catch (IOException e) { context.getCounter(COUNTER_GROUP_NAME, "CSV Parser errors").increment(1L); } if (csvRecord == null) { context.getCounter(COUNTER_GROUP_NAME, "Empty records").increment(1L); return; } csvUpsertExecutor.execute(ImmutableList.of(csvRecord)); Iterator<Pair<byte[], List<KeyValue>>> uncommittedDataIterator = PhoenixRuntime .getUncommittedDataIterator(conn); while (uncommittedDataIterator.hasNext()) { Pair<byte[], List<KeyValue>> kvPair = uncommittedDataIterator.next(); List<KeyValue> keyValueList = kvPair.getSecond(); keyValueList = preUpdateProcessor.preUpsert(kvPair.getFirst(), keyValueList); for (KeyValue kv : keyValueList) { outputKey.set(kv.getBuffer(), kv.getRowOffset(), kv.getRowLength()); context.write(outputKey, kv); } } conn.rollback(); } catch (Exception e) { throw new RuntimeException(e); } }
From source file:com.ailk.oci.ocnosql.tools.load.mutiple.MutipleColumnImporterMapper.java
License:Apache License
/** * Convert a line of TSV text into an HBase table row. *///from ww w .j av a 2s . com @Override public void map(LongWritable offset, Text value, Context context) throws IOException { byte[] lineBytes = value.getBytes(); ts = System.currentTimeMillis(); try { MutipleColumnImportTsv.TsvParser.ParsedLine parsed = parser.parse(lineBytes, value.getLength()); String newRowKey = rowkeyGenerator.generateByGenRKStep(value.toString(), false);//???rowkey Put put = new Put(newRowKey.getBytes()); for (int i = 0; i < parsed.getColumnCount(); i++) { String columnQualifierStr = new String(parser.getQualifier(i)); String rowStr = newRowKey + new String(parser.getFamily(i) + columnQualifierStr); if (notNeedLoadColumnQulifiers.contains(columnQualifierStr)) { continue; } KeyValue kv = new KeyValue(rowStr.getBytes(), 0, newRowKey.getBytes().length, //roffset,rofflength parser.getFamily(i), 0, parser.getFamily(i).length, parser.getQualifier(i), 0, parser.getQualifier(i).length, ts, KeyValue.Type.Put, lineBytes, parsed.getColumnOffset(i), parsed.getColumnLength(i)); KeyValue newKv = new KeyValue(newRowKey.getBytes(), kv.getFamily(), kv.getQualifier(), ts, kv.getValue()); kv = null; put.add(newKv); } context.write(new ImmutableBytesWritable(newRowKey.getBytes()), put); } catch (MutipleColumnImportTsv.TsvParser.BadTsvLineException badLine) { if (skipBadLines) { System.err.println("Bad line at offset: " + offset.get() + ":\n" + badLine.getMessage()); incrementBadLineCount(1); return; } else { throw new IOException(badLine); } } catch (IllegalArgumentException e) { if (skipBadLines) { System.err.println("Bad line at offset: " + offset.get() + ":\n" + e.getMessage()); incrementBadLineCount(1); return; } else { throw new IOException(e); } } catch (InterruptedException e) { e.printStackTrace(); } catch (RowKeyGeneratorException e) { System.err.println("gen rowkey error, please check config in the ocnosqlTab.xml." + e.getMessage()); throw new IOException(e); } finally { totalLineCount.increment(1); } }
From source file:com.ailk.oci.ocnosql.tools.load.single.SingleColumnImporterMapper.java
License:Apache License
/** * Convert a line of TSV text into an HBase table row. * /*from w w w. j ava2 s. c o m*/ */ @Override public void map(LongWritable offset, Text value, Context context) throws IOException { byte[] lineBytes = value.getBytes(); try { TsvParser.ParsedLine parsed = parser.parse(lineBytes, value.getLength()); // Text[] texts = new Text[parsed.getColumnCount()]; int index = 0; for (int i = 0; i < parsed.getColumnCount(); i++) { // if (i == parser.getRowKeyColumnIndex()){ // continue; // } text = new Text(); //? text.append(lineBytes, parsed.getColumnOffset(i), parsed.getColumnLength(i)); texts[index] = text; index++; } writer.set(texts); /* //rowkey String oriRowKey = new String(lineBytes, parsed.getRowKeyOffset(), parsed.getRowKeyLength()); // hash rowkey String newRowKey = oriRowKey; if(rowkeyGenerator != null){ newRowKey = (String)rowkeyGenerator.generate(oriRowKey); } */ String newRowKey = rowkeyGenerator.generateByGenRKStep(value.toString(), false);//???rowkey //LOG.info("single column newRowKey = " + newRowKey); context.write(new ImmutableBytesWritable(newRowKey.getBytes()), writer); } catch (BadTsvLineException badLine) { if (skipBadLines) { LOG.error("Bad line at offset: " + offset.get() + ":\n" + badLine.getMessage()); badLineCount.increment(1); return; } else { throw new IOException(badLine); } } catch (InterruptedException e) { e.printStackTrace(); } }
From source file:com.alectenharmsel.research.LineCountMapper.java
License:Apache License
public void map(Text key, Text contents, Context context) throws IOException, InterruptedException { long numLines = 0; String tmp = contents.toString(); for (int i = 0; i < tmp.length(); i++) { if (tmp.charAt(i) == '\n') { numLines++;/*from w ww .j a va 2s .c om*/ } } context.write(key, new LongWritable(numLines)); }
From source file:com.alectenharmsel.research.MoabLicensesMapper.java
License:Apache License
public void map(LongWritable key, Text contents, Context context) throws IOException, InterruptedException { if (contents.toString().contains("License")) { String date = ""; String licenseInfo = ""; String pkgName = ""; ArrayList<String> license = new ArrayList<String>(); String[] blah = contents.toString().split(" "); for (String tmp : blah) { if (tmp.length() != 0) { license.add(tmp);/*ww w .jav a 2s. co m*/ } } if (license.size() != 13) { return; } date = license.get(0).replaceAll("/", "-"); pkgName = license.get(4); licenseInfo += license.get(5) + "," + license.get(7); context.write(new Text(pkgName + "-" + date), new Text(licenseInfo)); } }
From source file:com.alectenharmsel.research.MoabLicensesReducer.java
License:Apache License
public void reduce(Text key, Iterable<Text> counts, Context context) throws IOException, InterruptedException { int sum = 0;/*from www.java 2 s .c o m*/ int num = 0; int total = 0; for (Text tmp : counts) { String[] split = tmp.toString().split(","); sum += Integer.parseInt(split[0]); total += Integer.parseInt(split[1]); num++; } double avgAvail = (double) sum / (double) num; String avgTotal = ""; if (total % num == 0) { avgTotal = String.valueOf(total / num); } else { avgTotal = String.valueOf((double) total / (double) num); } String[] keyArr = key.toString().split("-"); String keyOut = keyArr[keyArr.length - 2] + "-" + keyArr[keyArr.length - 1]; keyOut += ","; for (int i = 0; i < keyArr.length - 2; i++) { if (i > 0) { keyOut += "-"; } keyOut += keyArr[i]; } context.write(new Text(keyOut), new Text(avgAvail + "," + avgTotal)); }
From source file:com.alectenharmsel.research.MoabLogSearchMapper.java
License:Apache License
public void map(LongWritable key, Text contents, Context context) throws IOException, InterruptedException { String tmp = contents.toString(); if (tmp.contains("ERROR")) { context.write(new LongWritable(0), contents); }/*from w ww . j a v a 2 s . c o m*/ }