List of usage examples for org.apache.hadoop.io Text set
public void set(Text other)
From source file:gobblin.compat.TextSerializerTest.java
License:Apache License
@Test public void testDeserialize() throws IOException { // Use Hadoop's serializer, verify our deserializer can read the string back for (String textToSerialize : textsToSerialize) { ByteArrayOutputStream bOs = new ByteArrayOutputStream(); DataOutputStream dataOutputStream = new DataOutputStream(bOs); Text hadoopText = new Text(); hadoopText.set(textToSerialize); hadoopText.write(dataOutputStream); dataOutputStream.close();/*from w ww . j a v a2 s .c o m*/ ByteArrayInputStream bIn = new ByteArrayInputStream(bOs.toByteArray()); DataInputStream dataInputStream = new DataInputStream(bIn); String deserializedString = TextSerializer.readTextAsString(dataInputStream); Assert.assertEquals(deserializedString, textToSerialize); } }
From source file:gobblin.runtime.JobState.java
License:Apache License
public void write(DataOutput out, boolean writeTasks) throws IOException { Text text = new Text(); text.set(this.jobName); text.write(out);/* ww w.j ava 2 s.co m*/ text.set(this.jobId); text.write(out); out.writeLong(this.startTime); out.writeLong(this.endTime); out.writeLong(this.duration); text.set(this.state.name()); text.write(out); out.writeInt(this.taskCount); if (writeTasks) { out.writeInt(this.taskStates.size() + this.skippedTaskStates.size()); for (TaskState taskState : this.taskStates.values()) { taskState.write(out); } for (TaskState taskState : this.skippedTaskStates.values()) { taskState.write(out); } } else { out.writeInt(0); } super.write(out); }
From source file:gobblin.runtime.TaskState.java
License:Apache License
@Override public void write(DataOutput out) throws IOException { Text text = new Text(); text.set(this.jobId); text.write(out);// w ww . j a v a 2 s . c o m text.set(this.taskId); text.write(out); out.writeLong(this.startTime); out.writeLong(this.endTime); out.writeLong(this.duration); super.write(out); }
From source file:gov.jgi.meta.hadoop.input.FastaLineReader.java
License:Open Source License
/** * Read one line from the InputStream into the given Text. A line * can be terminated by one of the following: '\n' (LF) , '\r' (CR), * or '\r\n' (CR+LF). EOF also terminates an otherwise unterminated * line./* w w w . j a v a2s .c o m*/ * * @param str the object to store the given line (without newline) * @param maxLineLength the maximum number of bytes to store into str; * the rest of the line is silently discarded. * @param maxBytesToConsume the maximum number of bytes to consume * in this call. This is only a hint, because if the line cross * this threshold, we allow it to happen. It can overshoot * potentially by as much as one buffer length. * * @return the number of bytes read including the (longest) newline * found. * * @throws IOException if the underlying stream throws */ public int readLine(Text key, Text str, int maxLineLength, int maxBytesToConsume) throws IOException { int totalBytesRead = 0; int numRecordsRead = 0; Boolean eof = false; int startPosn; StringBuilder recordBlock = new StringBuilder(this.bufferSize); /* first thing to do is to move forward till you see a start character */ startPosn = bufferPosn; do { if (bufferPosn >= bufferLength) { totalBytesRead += bufferPosn - startPosn; bufferPosn = 0; bufferLength = in.read(buffer); if (bufferLength <= 0) { eof = true; break; // EOF } } } while (buffer[bufferPosn++] != '>'); /* if we hit the end of file already, then just return 0 bytes processed */ if (eof) return totalBytesRead; /* now bufferPosn should be at the start of a fasta record */ totalBytesRead += (bufferPosn - 1) - startPosn; startPosn = bufferPosn - 1; // startPosn guaranteed to be at a ">" /* find the next record start: first scan to end of the line */ eof = false; do { if (bufferPosn >= bufferLength) { /* copy the current buffer before refreshing the buffer */ int appendLength = bufferPosn - startPosn; for (int copyi = startPosn; copyi < startPosn + appendLength; copyi++) { recordBlock.append((char) buffer[copyi]); } //recordBlock.append(buffer, startPosn, appendLength); totalBytesRead += appendLength; startPosn = bufferPosn = 0; bufferLength = in.read(buffer); if (bufferLength <= 0) { eof = true; break; // EOF } } bufferPosn++; } while (buffer[bufferPosn - 1] != CR && buffer[bufferPosn - 1] != LF); /* find the next record start: scan till next ">" */ do { if (bufferPosn >= bufferLength) { /* copy the current buffer before refreshing the buffer */ int appendLength = bufferPosn - startPosn; for (int copyi = startPosn; copyi < startPosn + appendLength; copyi++) { recordBlock.append((char) buffer[copyi]); } //recordBlock.append(buffer, startPosn, appendLength); totalBytesRead += appendLength; startPosn = bufferPosn = 0; bufferLength = in.read(buffer); if (bufferLength <= 0) { eof = true; break; // EOF } } } while (buffer[bufferPosn++] != '>'); // only read one record at a time if (!eof) { bufferPosn--; // make sure we leave bufferPosn pointing to the next record int appendLength = bufferPosn - startPosn; for (int copyi = startPosn; copyi < startPosn + appendLength; copyi++) { recordBlock.append((char) buffer[copyi]); } //recordBlock.append(buffer, startPosn, appendLength); totalBytesRead += appendLength; } /* record block now has the byte array we want to process for reads */ int i = 1; // skip initial record seperator ">" int j = 1; do { key.clear(); str.clear(); /* first parse the key */ i = j; Boolean junkOnLine = false; while (j < recordBlock.length()) { int c = recordBlock.charAt(j++); if (c == CR || c == LF) { break; } //else if (c == ' ' || c == '\t') { // junkOnLine = true; // break; //} } if (j == i) { // then we didn't parse out a proper id LOG.error("Unable to parse entry: " + recordBlock); str.clear(); key.clear(); return totalBytesRead; } key.set(recordBlock.substring(i, j - 1)); /* in case there is additional metadata on the header line, ignore everything after the first word. */ if (junkOnLine) { while (j < recordBlock.length() && recordBlock.charAt(j) != CR && recordBlock.charAt(j) != LF) j++; } //LOG.info ("key = " + k.toString()); /* now skip the newlines */ while (j < recordBlock.length() && (recordBlock.charAt(j) == CR || recordBlock.charAt(j) == LF)) j++; /* now read the sequence */ StringBuilder sequenceTmp = new StringBuilder(recordBlock.length()); do { i = j; while (j < recordBlock.length()) { int c = recordBlock.charAt(j++); if (c == CR || c == LF) { break; } } //byte[] ba = recordBlock.getBytes(); //if (ba.length <= i || ba.length <= j - i - 1) { // LOG.fatal("hmm... ba.length = " + ba.length + " i = " + i + " j-i-1 = " + (j-i-1)); //} if (j == i) { // then we didn't parse out a proper id LOG.error("Unable to parse entry: " + recordBlock); str.clear(); key.clear(); return totalBytesRead; } for (int copyi = i; copyi < j - 1; copyi++) { sequenceTmp.append((char) recordBlock.charAt(copyi)); } while (j < recordBlock.length() && (recordBlock.charAt(j) == CR || recordBlock.charAt(j) == LF)) j++; } while (j < recordBlock.length() && recordBlock.charAt(j) != '>'); str.set(sequenceTmp.toString()); numRecordsRead++; /* now skip characters (newline or carige return most likely) till record start */ while (j < recordBlock.length() && recordBlock.charAt(j) != '>') { j++; } j++; // skip the ">" } while (j < recordBlock.length()); // LOG.info(""); // LOG.info("object key = " + key); byte[] strpacked = SequenceString.sequenceToByteArray(str.toString().toLowerCase()); str.clear(); str.append(strpacked, 0, strpacked.length); return totalBytesRead; }
From source file:gr.ntua.h2rdf.partialJoin.HbaseJoinBGPReducer.java
License:Open Source License
private void writeOut(Text outKey2, Text outValue2, Context context) { if (isLast == 2) {// prepare data for MapReduce translation StringTokenizer list;// w w w . j av a 2 s . c o m list = new StringTokenizer(outKey2.toString()); list.nextToken("!"); StringTokenizer tok; String newline = ""; while (list.hasMoreTokens()) { String binding = list.nextToken("!"); tok = new StringTokenizer(binding); String pred = tok.nextToken("#"); if (resultVars.contains(pred.substring(1))) { newline += binding + "!"; pred += "#"; if (!tok.hasMoreTokens()) { System.exit(2); } String b = tok.nextToken("#"); StringTokenizer tokenizer = new StringTokenizer(b); while (tokenizer.hasMoreTokens()) { String temp = tokenizer.nextToken("_"); Integer id = CastLongToInt.castLong(Long.parseLong(temp)); trans_hash.add(id); } } } outKey2.set(newline); } /*if(isLast==1){// Index Translate StringTokenizer list; list=new StringTokenizer(outKey2.toString()); String newline = ""; list.nextToken("!"); StringTokenizer tok; while (list.hasMoreTokens()) { String binding=list.nextToken("!"); //if(binding.startsWith("?")){ tok=new StringTokenizer(binding); String pred=tok.nextToken("#"); if(resultVars.contains(pred.substring(1))){ pred+="#"; if(!tok.hasMoreTokens()){ System.exit(2); } //byte[] b = Bytes.toBytes(tok.nextToken("#").toCharArray()); String b = tok.nextToken("#"); newline=transform(newline, b, pred); } //} } outKey2.set(newline); }*/ try { context.write(outKey2, outValue2); } catch (InterruptedException e) { e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
From source file:gr.ntua.h2rdf.partialJoin.ThreadedProcessor.java
License:Open Source License
public void run() { String key, tpat, else_value; ValueDoubleMerger value;/* www. ja v a 2 s .c o m*/ Text outKey = new Text(); try { int end = (threadId + 1) * size; if (end > keyTable.length) end = keyTable.length; long time = 0, nextTime; float averLatency = 0; int num = 0; for (int i = threadId * size; i < end; i++) { /*if(num%100==0) System.out.println("Average Latency (ms): "+averLatency);*/ key = keyTable[i]; value = bindings.get(key); int bind_no = value.getTotalPatterns(); if (nonJoinSize == 0) {//Full input //System.out.println("bind_no="+bind_no+" in_no="+in_no); if (bind_no == in_no) { //out.writeBytes(sum+key+"!"+else_value+"\n"); if (value.itter()) { while (value.hasMore()) { String s1 = value.getValue(); if (!s1.equals("")) { outKey.set(sum + key + "!" + value.getTotal() + "!" + s1); SerialQueryProcessorThreaded.writeOut(outKey); } } } else { outKey.set(sum + key + "!" + value.getTotal() + "!"); SerialQueryProcessorThreaded.writeOut(outKey); } //outKey.set(sum+key+"!"+else_value+"!"); //SerialQueryProcessorThreaded.writeOut(outKey); } } else { int vid = SerialQueryProcessorThreaded.getvarind(var); int nonjno = SerialQueryProcessorThreaded.nonJoinSizeTab[vid]; //System.out.println(vid); //System.out.println(nonjno); String foundkvals1 = null; //System.out.println("bind_no="+bind_no+" in_no="+in_no+" nonjno="+nonjno); if (bind_no == in_no - nonjno) { StringTokenizer vt = new StringTokenizer(key.toString()); String patvals = vt.nextToken("#"); String keyvals = vt.nextToken("#"); foundkvals1 = patvals + "#"; String foundkb = ""; int foundsize = 0; boolean findDoub = SerialQueryProcessorThreaded.findDouble(vid); StringTokenizer tokenizer1 = new StringTokenizer(keyvals); while (tokenizer1.hasMoreTokens()) { String temp2 = tokenizer1.nextToken("_"); //System.out.println(temp2); byte[] temp3 = Bytes.toBytes(Long.parseLong(temp2.substring(temp2.indexOf("|") + 1))); byte[] temp1 = new byte[totsize]; temp1[0] = (byte) new Byte(temp2.substring(0, temp2.indexOf("|"))); for (int j = 0; j < 8; j++) { temp1[j + 1] = temp3[j]; } int found = 0; String outkeytemp = ""; //time = new Date().getTime(); for (int jj = 0; jj < nonjno; jj++) { SerialQueryProcessorThreaded.nonJoinCol[vid][jj] = SerialQueryProcessorThreaded.nonJoinCol[vid][jj] .replace(":", ""); //System.out.println(nonJoinCol[vid][jj]); if (SerialQueryProcessorThreaded.nonJoinCol[vid][jj].contains("?")) { StringTokenizer tt1 = new StringTokenizer( SerialQueryProcessorThreaded.nonJoinCol[vid][jj]); String varname1 = tt1.nextToken("|"); String varname2 = tt1.nextToken("|"); //System.out.println(varname1+" "+varname2); byte[] b = new byte[totsize]; for (int j = 0; j < totsize; j++) { b[j] = SerialQueryProcessorThreaded.nonJoinStartRow[vid][jj][j + 1]; } if (SerialQueryProcessorThreaded.nonJoinStartRow[vid][jj][0] == (byte) 2) {//osp if (patvals.equals(varname1)) { //osp byte pinakas = (byte) 2; String outkeytemp1 = SerialQueryProcessorThreaded.reduceJoin(pinakas, b, temp1, varname2); value.merge(outkeytemp1, "K" + jj); if (!outkeytemp1.equals("")) { found++; outkeytemp += outkeytemp1; } } else { //pos byte pinakas = (byte) 3; String outkeytemp1 = SerialQueryProcessorThreaded.reduceJoin(pinakas, temp1, b, varname1); value.merge(outkeytemp1, "K" + jj); if (!outkeytemp1.equals("")) { found++; outkeytemp += outkeytemp1; } } } else if (SerialQueryProcessorThreaded.nonJoinStartRow[vid][jj][0] == (byte) 3) {//pos if (patvals.equals(varname1)) { //pos byte pinakas = (byte) 3; String outkeytemp1 = SerialQueryProcessorThreaded.reduceJoin(pinakas, b, temp1, varname2); value.merge(outkeytemp1, "K" + jj); if (!outkeytemp1.equals("")) { found++; outkeytemp += outkeytemp1; } } else { //spo byte pinakas = (byte) 4; String outkeytemp1 = SerialQueryProcessorThreaded.reduceJoin(pinakas, temp1, b, varname1); value.merge(outkeytemp1, "K" + jj); if (!outkeytemp1.equals("")) { found++; outkeytemp += outkeytemp1; } } } else if (SerialQueryProcessorThreaded.nonJoinStartRow[vid][jj][0] == (byte) 4) {//spo if (patvals.equals(varname1)) {//spo byte pinakas = (byte) 4; String outkeytemp1 = SerialQueryProcessorThreaded.reduceJoin(pinakas, b, temp1, varname2); value.merge(outkeytemp1, "K" + jj); if (!outkeytemp1.equals("")) { found++; outkeytemp += outkeytemp1; } } else { //osp byte pinakas = (byte) 2; String outkeytemp1 = SerialQueryProcessorThreaded.reduceJoin(pinakas, temp1, b, varname1); value.merge(outkeytemp1, "K" + jj); if (!outkeytemp1.equals("")) { found++; outkeytemp += outkeytemp1; } } } } else {//have all three values pame panta sto osp if (SerialQueryProcessorThreaded.nonJoinStartRow[vid][jj][0] == (byte) 2) {//osp byte pinakas = (byte) 2; byte[] b1 = new byte[totsize]; for (int j = 0; j < totsize; j++) { b1[j] = SerialQueryProcessorThreaded.nonJoinStartRow[vid][jj][j + 1]; } byte[] b2 = new byte[totsize]; for (int j = 0; j < totsize; j++) { b2[j] = SerialQueryProcessorThreaded.nonJoinStartRow[vid][jj][j + totsize + 1]; } found += SerialQueryProcessorThreaded.reduceJoinAllVar(pinakas, b1, b2, temp1); } else if (SerialQueryProcessorThreaded.nonJoinStartRow[vid][jj][0] == (byte) 3) {//pos byte[] b1 = new byte[totsize]; byte[] b2 = new byte[totsize]; byte[] b3 = new byte[totsize]; int size = SerialQueryProcessorThreaded.nonJoinStartRow[vid][jj].length; byte pinakas = (byte) 2;//osp for (int j = 0; j < totsize; j++) { b2[j] = temp1[j]; } for (int i1 = 0; i1 < totsize; i1++) { b3[i1] = SerialQueryProcessorThreaded.nonJoinStartRow[vid][jj][i1 + 1]; } //find subclasses if (size > SerialQueryProcessorThreaded.rowlength) {//uparxoun subclasses int ffound = 0; for (int ik = 0; ik < (size - totsize - 1) / totsize; ik++) { for (int j = 0; j < totsize; j++) { b1[j] = SerialQueryProcessorThreaded.nonJoinStartRow[vid][jj][j + totsize + 1 + ik * totsize]; } //System.out.println(Bytes.toStringBinary(b1)); ffound += SerialQueryProcessorThreaded.reduceJoinAllVar(pinakas, b1, b2, b3); } if (ffound > 0) { found++; } } else {//no subclasses for (int j = 0; j < totsize; j++) { b1[j] = SerialQueryProcessorThreaded.nonJoinStartRow[vid][jj][totsize + 1 + j]; } //System.out.println(Bytes.toStringBinary(b1)); found += SerialQueryProcessorThreaded.reduceJoinAllVar(pinakas, b1, b2, b3); } } else if (SerialQueryProcessorThreaded.nonJoinStartRow[vid][jj][0] == (byte) 4) {//spo byte pinakas = (byte) 2; byte[] b1 = new byte[totsize]; for (int j = 0; j < totsize; j++) { b1[j] = SerialQueryProcessorThreaded.nonJoinStartRow[vid][jj][j + 1]; } byte[] b2 = new byte[totsize]; for (int j = 0; j < totsize; j++) { b2[j] = SerialQueryProcessorThreaded.nonJoinStartRow[vid][jj][j + totsize + 1]; } found += SerialQueryProcessorThreaded.reduceJoinAllVar(pinakas, temp1, b1, b2); } } if (found == nonjno) { break; } } if (found == nonjno) { /*nextTime = new Date().getTime(); long latency = nextTime -time; averLatency = (averLatency*num+latency)/(num+1); num++;*/ if (findDoub) { String fkvals = temp2 + "_"; if (value.itter()) { while (value.hasMore()) { String s1 = value.getValue(); if (!s1.equals("")) { outKey.set(sum + foundkvals1 + fkvals + "!" + value.getTotal() + "!" + s1); SerialQueryProcessorThreaded.writeOut(outKey); } } } else { outKey.set(sum + foundkvals1 + fkvals + "!" + value.getTotal() + "!"); SerialQueryProcessorThreaded.writeOut(outKey); } //outKey.set(sum+foundkvals1+fkvals+"!"+outkeytemp+else_value+"!"); //SerialQueryProcessorThreaded.writeOut(outKey); } else { foundkb += temp2 + "_"; foundsize++; } } } if ((foundsize > 0) && (!findDoub)) { /*nextTime = new Date().getTime(); long latency = nextTime -time; averLatency = (averLatency*num+latency)/(num+1); num++;*/ foundkvals1 += foundkb; if (value.itter()) { while (value.hasMore()) { String s1 = value.getValue(); if (!s1.equals("")) { outKey.set(sum + foundkvals1 + "!" + value.getTotal() + "!" + s1); SerialQueryProcessorThreaded.writeOut(outKey); } } } else { outKey.set(sum + foundkvals1 + "!" + value.getTotal() + "!"); SerialQueryProcessorThreaded.writeOut(outKey); } } } } } //System.out.println("Average Latency (ms): "+averLatency); } catch (NumberFormatException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (NotSupportedDatatypeException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
From source file:hadoop.examples.hdfs.SequenceFileWriteDemo.java
License:Open Source License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String uri = "hdfs://exoplatform:9000/user/haint/temp.file"; Path path = new Path(uri); FileSystem fs = FileSystem.get(URI.create(uri), conf); ///*from w w w . jav a2 s. c o m*/ IntWritable key = new IntWritable(); Text value = new Text(); SequenceFile.Writer writer = null; try { writer = SequenceFile.createWriter(fs, conf, path, key.getClass(), value.getClass()); for (int i = 0; i < 100; i++) { key.set(100 - i); value.set(DATA[i % DATA.length]); System.out.printf("[%s]\t%s\t%s\n", writer.getLength(), key, value); writer.append(key, value); } } finally { IOUtils.closeStream(writer); } }
From source file:hip.ch3.seqfile.writable.seqfile.SequenceFileStockWriter.java
/** * Write the sequence file./* w w w . j a va2 s .c o m*/ * * @param args the command-line arguments * @return the process exit code * @throws Exception if something goes wrong */ public int run(final String[] args) throws Exception { Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.MrIoOpts.values()).build(); int result = cli.runCmd(); if (result != 0) { return result; } File inputFile = new File(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.INPUT)); Path outputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.OUTPUT)); Configuration conf = super.getConf(); SequenceFile.Writer writer = //<co id="ch03_comment_seqfile_write1"/> SequenceFile.createWriter(conf, SequenceFile.Writer.file(outputPath), SequenceFile.Writer.keyClass(Text.class), SequenceFile.Writer.valueClass(StockPriceWritable.class), SequenceFile.Writer.compression(SequenceFile.CompressionType.BLOCK, new DefaultCodec())); try { Text key = new Text(); for (String line : FileUtils.readLines(inputFile)) { StockPriceWritable stock = StockPriceWritable.fromLine(line); System.out.println("Stock = " + stock); key.set(stock.getSymbol()); writer.append(key, stock); //<co id="ch03_comment_seqfile_write4"/> } } finally { writer.close(); } return 0; }
From source file:hivemall.fm.FactorizationMachineUDTF.java
License:Apache License
private void forwardAsStringFeature(@Nonnull final FMStringFeatureMapModel model, final int factors) throws HiveException { final Text feature = new Text(); final FloatWritable f_Wi = new FloatWritable(0.f); final FloatWritable[] f_Vi = HiveUtils.newFloatArray(factors, 0.f); final Object[] forwardObjs = new Object[3]; forwardObjs[0] = feature;// www. ja v a 2 s .co m forwardObjs[1] = f_Wi; forwardObjs[2] = null; // W0 feature.set("0"); f_Wi.set(model.getW0()); // V0 is null forward(forwardObjs); // Wi, Vif (i starts from 1..P) forwardObjs[2] = Arrays.asList(f_Vi); final IMapIterator<String, Entry> itor = model.entries(); while (itor.next() != -1) { String i = itor.getKey(); assert (i != null); // set i feature.set(i); Entry entry = itor.getValue(); // set Wi f_Wi.set(entry.W); // set Vif final float[] Vi = entry.Vf; for (int f = 0; f < factors; f++) { float v = Vi[f]; f_Vi[f].set(v); } forward(forwardObjs); } }
From source file:hivemall.fm.FieldAwareFactorizationMachineUDTF.java
License:Apache License
@Override protected void forwardModel() throws HiveException { this._model = null; this._fieldList = null; this._sumVfX = null; Text modelId = new Text(); String taskId = HadoopUtils.getUniqueTaskIdString(); modelId.set(taskId); FFMPredictionModel predModel = _ffmModel.toPredictionModel(); this._ffmModel = null; // help GC if (LOG.isInfoEnabled()) { LOG.info("Serializing a model '" + modelId + "'... Configured # features: " + _numFeatures + ", Configured # fields: " + _numFields + ", Actual # features: " + predModel.getActualNumFeatures() + ", Estimated uncompressed bytes: " + NumberUtils.prettySize(predModel.approxBytesConsumed())); }//from w ww .ja va2s .co m byte[] serialized; try { serialized = predModel.serialize(); predModel = null; } catch (IOException e) { throw new HiveException("Failed to serialize a model", e); } if (LOG.isInfoEnabled()) { LOG.info("Forwarding a serialized/compressed model '" + modelId + "' of size: " + NumberUtils.prettySize(serialized.length)); } Text modelObj = new Text3(serialized); serialized = null; Object[] forwardObjs = new Object[] { modelId, modelObj }; forward(forwardObjs); }