List of usage examples for org.apache.hadoop.io Text getBytes
@Override public byte[] getBytes()
From source file:com.marklogic.contentpump.SingleDocumentWriter.java
License:Apache License
@Override public void write(DocumentURI uri, MarkLogicDocument content) throws IOException, InterruptedException { OutputStream os = null;/*w w w .j a v a 2 s. c om*/ try { String childPath = URIUtil.getPathFromURI(uri); Path path; if (childPath.charAt(0) == '/') { // concatenate outputPath with path to form the path path = new Path(dir.toString() + childPath); } else { path = new Path(dir, childPath); } FileSystem fs = path.getFileSystem(conf); if (fs instanceof DistributedFileSystem) { os = fs.create(path, false); } else { File f = new File(path.toUri().getPath()); if (!f.exists()) { f.getParentFile().mkdirs(); f.createNewFile(); } os = new FileOutputStream(f, false); } ContentType type = content.getContentType(); if (ContentType.BINARY.equals(type)) { if (content.isStreamable()) { InputStream is = null; try { is = content.getContentAsByteStream(); long size = content.getContentSize(); long bufSize = Math.min(size, 512 << 10); byte[] buf = new byte[(int) bufSize]; for (long toRead = size, read = 0; toRead > 0; toRead -= read) { read = is.read(buf, 0, (int) bufSize); if (read > 0) { os.write(buf, 0, (int) read); } else { LOG.error("Premature EOF: uri=" + uri + ",toRead=" + toRead); break; } } } finally { if (is != null) { is.close(); } } } else { os.write(content.getContentAsByteArray()); } } else if (ContentType.TEXT.equals(type) || ContentType.XML.equals(type) || ContentType.JSON.equals(type)) { if (encoding.equals("UTF-8")) { Text t = content.getContentAsText(); os.write(t.getBytes(), 0, t.getLength()); } else { String t = content.getContentAsString(); os.write(t.getBytes(encoding)); } if (LOG.isTraceEnabled()) { Text t = content.getContentAsText(); LOG.trace(t); byte[] bytes = content.getContentAsByteArray(); StringBuilder sb = new StringBuilder(); for (int i = 0; i < bytes.length; i++) { sb.append(Byte.toString(bytes[i])); sb.append(" "); } LOG.trace(sb); } } else { LOG.error("Skipping " + uri + ". Unsupported content type: " + type.name()); } } catch (Exception e) { LOG.error("Error saving: " + uri, e); } finally { if (os != null) { os.close(); } } }
From source file:com.marklogic.mapreduce.MarkLogicInputSplit.java
License:Apache License
@Override public void readFields(DataInput in) throws IOException { start = in.readLong();//from www . j a va 2s . c om length = in.readLong(); Text forestIdText = new Text(); forestIdText.readFields(in); forestId = new BigInteger(forestIdText.getBytes()); hostName = new String[1]; hostName[0] = Text.readString(in); isLastSplit = in.readBoolean(); }
From source file:com.mortardata.pig.JsonLoader.java
License:Apache License
@Override public Tuple getNext() throws IOException { Text val = null; try {/* w w w . j ava 2 s . c om*/ if (!reader.nextKeyValue()) return null; val = (Text) reader.getCurrentValue(); } catch (Exception e) { throw new IOException(e); } // Create a parser specific for this input line. // This may not be the most efficient approach. ByteArrayInputStream bais = new ByteArrayInputStream(val.getBytes()); JsonParser p = jsonFactory.createJsonParser(bais); Tuple t; // schema provided if (!useDefaultSchema) { // Create a map of field names to ResourceFieldSchema's, // and create a map of field names to positions in the tuple. // These are used during parsing to handle extra, missing, and/or out-of-order // fields properly. Map<String, ResourceFieldSchema> schemaMap = new HashMap<String, ResourceFieldSchema>(); Map<String, Integer> schemaPositionMap = new HashMap<String, Integer>(); if (requiredFields != null) { int count = 0; for (int i = 0; i < fields.length; i++) { if (requiredFields[i]) { schemaMap.put(fields[i].getName(), fields[i]); schemaPositionMap.put(fields[i].getName(), count); count++; } } t = tupleFactory.newTuple(count); } else { for (int i = 0; i < fields.length; i++) { schemaMap.put(fields[i].getName(), fields[i]); schemaPositionMap.put(fields[i].getName(), i); } t = tupleFactory.newTuple(fields.length); } try { p.nextToken(); // move to start of object parseObjectIntoTuple(val.toString(), p, schemaMap, schemaPositionMap, t); } catch (JsonParseException jpe) { // If the line doesn't parse as a valid JSON object, log an error and move on log.error("Error parsing record: " + val + ": " + jpe.toString()); } } else { // schema not provided: load whole document as a map t = tupleFactory.newTuple(1); try { p.nextToken(); // move to start of object t.set(0, readField(val.toString(), p, schema.getFields()[0])); } catch (JsonParseException jpe) { log.error("Error parsing record: " + val + ": " + jpe.toString()); } } p.close(); return t; }
From source file:com.mycustomloader.vsamloader.VSAMLoader.java
License:Apache License
@Override public Tuple getNext() throws IOException { mProtoTuple = new ArrayList<Object>(); boolean inField = false; boolean inQuotedField = false; boolean evenQuotesSeen = true; if (!mRequiredColumnsInitialized) { if (signature != null) { Properties p = UDFContext.getUDFContext().getUDFProperties(this.getClass()); mRequiredColumns = (boolean[]) ObjectSerializer.deserialize(p.getProperty(signature)); }//from ww w .java 2 s .c om mRequiredColumnsInitialized = true; } try { if (!in.nextKeyValue()) { return null; } Text value = (Text) in.getCurrentValue(); byte[] buf = value.getBytes(); int len = value.getLength(); int fieldID = 0; ByteBuffer fieldBuffer = ByteBuffer.allocate(len); for (int i = 0; i < len; i++) { byte b = buf[i]; inField = true; if (inQuotedField) { if (b == DOUBLE_QUOTE) { evenQuotesSeen = !evenQuotesSeen; if (evenQuotesSeen) { fieldBuffer.put(DOUBLE_QUOTE); } } else if (!evenQuotesSeen && (b == FIELD_DEL || b == RECORD_DEL)) { inQuotedField = false; inField = false; readField(fieldBuffer, fieldID++); } else { fieldBuffer.put(b); } } else if (b == DOUBLE_QUOTE) { inQuotedField = true; evenQuotesSeen = true; } else if (b == FIELD_DEL) { inField = false; readField(fieldBuffer, fieldID++); // end of the field } else { evenQuotesSeen = true; fieldBuffer.put(b); } } if (inField) readField(fieldBuffer, fieldID++); } catch (InterruptedException e) { int errCode = 6018; String errMsg = "Error while reading input"; throw new ExecException(errMsg, errCode, PigException.REMOTE_ENVIRONMENT, e); } Tuple t = mTupleFactory.newTupleNoCopy(mProtoTuple); return t; }
From source file:com.naver.nelo2analyzer.udf.TextLoaderPlusTime.java
License:Apache License
@Override public Tuple getNext() throws IOException { mProtoTuple = new ArrayList<Object>(); try {/*www . jav a2 s .c o m*/ boolean notDone = in.nextKeyValue(); if (!notDone) { return null; } Text value = (Text) in.getCurrentValue(); timebytes = timeGenerator(sourcePath.toString()).getBytes(); ba = value.getBytes(); outputStream = new ByteArrayOutputStream(); outputStream.write(timebytes); outputStream.write(ba); c = outputStream.toByteArray(); // return mTupleFactory.newTupleNoCopy(mProtoTuple); System.err.println("? ? "); System.err.println("ba = " + new String(ba)); System.err.println("c = " + new String(c)); System.err.println("? ? ??"); forReturn = mTupleFactory.newTuple(new DataByteArray(c, 0, c.length)); return forReturn; } catch (InterruptedException e) { throw new IOException("Error getting input"); } finally { outputStream.close(); outputStream = null; c = null; ba = null; timebytes = null; System.gc(); } }
From source file:com.pagerankcalculator.calculation.PageRankCalculationMapper.java
@Override public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { int tabIdx1 = value.find("\t"); int tabIdx2 = value.find("\t", tabIdx1 + 1); String userID = Text.decode(value.getBytes(), 0, tabIdx1); String pageRank = Text.decode(value.getBytes(), tabIdx1 + 1, tabIdx2 - (tabIdx1 + 1)); String CSVFollowingIDs = Text.decode(value.getBytes(), tabIdx2 + 1, value.getLength() - (tabIdx2 + 1)); // System.out.print(userID); // System.out.print("\t"); // System.out.print(pageRank); // System.out.print("\t"); // System.out.println(CSVFollowingIDs); String[] followingIDs = CSVFollowingIDs.split(TwitterPageRank.FOLLOWING_LIST_DELIMETER); Integer totalFollowingIDs = followingIDs.length; for (String followingID : followingIDs) { String pageRankWithTotalFollowing = pageRank + "\t" + totalFollowingIDs.toString(); context.write(new Text(followingID), new Text(pageRankWithTotalFollowing)); }/*from w w w . j a v a2 s . com*/ context.write(new Text(userID), new Text(TwitterPageRank.FOLLOWING_LIST_TAG + CSVFollowingIDs)); }
From source file:com.pagerankcalculator.graphparsing.GraphParsingMapper.java
@Override public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { int tabIndex = value.find("\t"); userID = Text.decode(value.getBytes(), 0, tabIndex); followerID = Text.decode(value.getBytes(), tabIndex + 1, value.getLength() - (tabIndex + 1)); context.write(new Text(followerID), new Text(userID)); }
From source file:com.pagerankcalculator.ordering.PageRankSortingMapper.java
@Override public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { int tabIdx1 = value.find("\t"); int tabIdx2 = value.find("\t", tabIdx1 + 1); String username = Text.decode(value.getBytes(), 0, tabIdx1); Double pageRank = new Double(Text.decode(value.getBytes(), tabIdx1 + 1, tabIdx2 - (tabIdx1 + 1))); context.write(new DoubleWritable(pageRank), new Text(username)); }
From source file:com.qq.pig.udf.CustomJsonLoader.java
License:Apache License
public Tuple parseTuple(Text val) throws IOException { // Create a parser specific for this input line. This may not be the // most efficient approach. //TODO why make a byte copy? byte[] newBytes = new byte[val.getLength()]; System.arraycopy(val.getBytes(), 0, newBytes, 0, val.getLength()); ByteArrayInputStream bais = new ByteArrayInputStream(newBytes); JsonParser p = jsonFactory.createJsonParser(bais); // Create the tuple we will be returning. We create it with the right // number of fields, as the Tuple object is optimized for this case. ResourceFieldSchema[] fields = schema.getFields(); Tuple t = tupleFactory.newTuple(fields.length); // Read the start object marker. Throughout this file if the parsing // isn't what we expect we return a tuple with null fields rather than // throwing an exception. That way a few mangled lines don't fail the // job.//w w w .j av a 2 s .c o m if (p.nextToken() != JsonToken.START_OBJECT) { warn("Bad record, could not find start of record " + val.toString(), PigWarning.UDF_WARNING_1); return t; } readFields(p, t); p.close(); return t; }
From source file:com.redsqirl.workflow.server.connect.HDFSInterface.java
License:Open Source License
/** * Read a Sequence File//w ww . ja v a 2 s. co m * * @param path * @param delimiter * @param maxToRead * @param fields * @return List of read rows from the path * @throws RemoteException */ public List<String> selectSeq(String path, String delimiter, int maxToRead, FieldList fields) throws RemoteException { Path p = new Path(path); List<String> ans = null; HdfsFileChecker fCh = new HdfsFileChecker(p); try { FileSystem fs = NameNodeVar.getFS(); if (fCh.isDirectory()) { FileStatus[] fsA = fs.listStatus(p); int listSize = Math.min(maxToRead, fsA.length); ans = new ArrayList<String>(listSize); for (int i = 0; i < listSize; ++i) { ans.add(fsA[i].getPath().toString()); } } else if (fCh.isFile()) { FSDataInputStream in = fs.open(p); LineReader reader = new LineReader(in); ans = new ArrayList<String>(maxToRead); Text line = new Text(); reader.readLine(line); int lineNb = 0; maxToRead *= fields.getSize(); int i = 0; String toWrite = ""; logger.debug("delim : " + delimiter); while (reader.readLine(line) != 0 && lineNb < maxToRead) { reader.readLine(line); logger.debug("line : " + line); ++lineNb; FieldType type = fields.getFieldType(fields.getFieldNames().get(i)); if (type == FieldType.BOOLEAN) { toWrite += BytesWritable.Comparator.readInt(line.getBytes(), 0); } else if (type == FieldType.INT) { toWrite += BytesWritable.Comparator.readInt(line.getBytes(), 0); } else if (type == FieldType.FLOAT) { toWrite += BytesWritable.Comparator.readFloat(line.getBytes(), 0); } else if (type == FieldType.DOUBLE) { toWrite += BytesWritable.Comparator.readDouble(line.getBytes(), 0); } else if (type == FieldType.LONG) { toWrite += BytesWritable.Comparator.readLong(line.getBytes(), 0); } else if (type == FieldType.STRING) { toWrite += line.getBytes().toString(); } if ((i + 1) % fields.getSize() == 0) { ans.add(toWrite); toWrite = ""; } else { toWrite += '\001'; } ++i; if (i >= fields.getSize()) { i = 0; } } } // fs.close(); } catch (IOException e) { logger.error("Cannot select the file or directory: " + p); logger.error(e.getMessage()); } // fCh.close(); return ans; }