List of usage examples for org.apache.hadoop.mapred Reporter NULL
Reporter NULL
To view the source code for org.apache.hadoop.mapred Reporter NULL.
Click Source Link
From source file:org.apache.asterix.external.input.record.reader.hdfs.HDFSRecordReader.java
License:Apache License
@SuppressWarnings("unchecked") private RecordReader<K, Writable> getRecordReader(int splitIndex) throws IOException { reader = (RecordReader<K, Writable>) inputFormat.getRecordReader(inputSplits[splitIndex], conf, Reporter.NULL); if (key == null) { key = reader.createKey();/*from w w w. j a va 2 s . c o m*/ value = (V) reader.createValue(); } if (indexer != null) { try { indexer.reset(this); } catch (Exception e) { throw new HyracksDataException(e); } } return reader; }
From source file:org.apache.asterix.external.input.stream.HDFSInputStream.java
License:Apache License
@SuppressWarnings("unchecked") private RecordReader<Object, Text> getRecordReader(int splitIndex) throws IOException { reader = (RecordReader<Object, Text>) inputFormat.getRecordReader(inputSplits[splitIndex], conf, Reporter.NULL); if (key == null) { key = reader.createKey();/*from ww w .j a v a 2s .c om*/ value = reader.createValue(); } if (indexer != null) { try { indexer.reset(this); } catch (Exception e) { throw new HyracksDataException(e); } } return reader; }
From source file:org.apache.crunch.io.orc.OrcFileReaderFactory.java
License:Apache License
@Override public Iterator<T> read(FileSystem fs, final Path path) { try {//from w w w . java2 s .com if (!fs.isFile(path)) { throw new CrunchRuntimeException("Not a file: " + path); } inputFn.initialize(); FileStatus status = fs.getFileStatus(path); FileSplit split = new FileSplit(path, 0, status.getLen(), new String[0]); JobConf conf = new JobConf(); if (readColumns != null) { conf.setBoolean(OrcFileSource.HIVE_READ_ALL_COLUMNS, false); conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, OrcFileSource.getColumnIdsStr(readColumns)); } final RecordReader<NullWritable, OrcStruct> reader = inputFormat.getRecordReader(split, conf, Reporter.NULL); return new UnmodifiableIterator<T>() { private boolean checked = false; private boolean hasNext; private OrcStruct value; private OrcWritable writable = new OrcWritable(); @Override public boolean hasNext() { try { if (value == null) { value = reader.createValue(); } if (!checked) { hasNext = reader.next(NullWritable.get(), value); checked = true; } return hasNext; } catch (Exception e) { throw new CrunchRuntimeException("Error while reading local file: " + path, e); } } @Override public T next() { try { if (value == null) { value = reader.createValue(); } if (!checked) { reader.next(NullWritable.get(), value); } checked = false; writable.set(value); return inputFn.map(writable); } catch (Exception e) { throw new CrunchRuntimeException("Error while reading local file: " + path, e); } } }; } catch (Exception e) { throw new CrunchRuntimeException("Error while reading local file: " + path, e); } }
From source file:org.apache.crunch.io.orc.OrcFileWriter.java
License:Apache License
@Override public void close() throws IOException { writer.close(Reporter.NULL); }
From source file:org.apache.drill.exec.store.easy.sequencefile.SequenceFileRecordReader.java
License:Apache License
private org.apache.hadoop.mapred.RecordReader<BytesWritable, BytesWritable> getRecordReader( final InputFormat<BytesWritable, BytesWritable> inputFormat, final JobConf jobConf) throws ExecutionSetupException { try {//www . j a v a 2s .c o m final UserGroupInformation ugi = ImpersonationUtil.createProxyUgi(this.opUserName, this.queryUserName); return ugi.doAs( new PrivilegedExceptionAction<org.apache.hadoop.mapred.RecordReader<BytesWritable, BytesWritable>>() { @Override public org.apache.hadoop.mapred.RecordReader<BytesWritable, BytesWritable> run() throws Exception { return inputFormat.getRecordReader(split, jobConf, Reporter.NULL); } }); } catch (IOException | InterruptedException e) { throw new ExecutionSetupException( String.format("Error in creating sequencefile reader for file: %s, start: %d, length: %d", split.getPath(), split.getStart(), split.getLength()), e); } }
From source file:org.apache.drill.exec.store.hive.HiveAbstractReader.java
License:Apache License
private void init() throws ExecutionSetupException { final JobConf job = new JobConf(hiveConf); // Get the configured default val defaultPartitionValue = hiveConf.get(ConfVars.DEFAULTPARTITIONNAME.varname); Properties tableProperties;/*from ww w . j a v a 2 s. co m*/ try { tableProperties = HiveUtilities.getTableMetadata(table); final Properties partitionProperties = (partition == null) ? tableProperties : HiveUtilities.getPartitionMetadata(partition, table); HiveUtilities.addConfToJob(job, partitionProperties); final SerDe tableSerDe = createSerDe(job, table.getSd().getSerdeInfo().getSerializationLib(), tableProperties); final StructObjectInspector tableOI = getStructOI(tableSerDe); if (partition != null) { partitionSerDe = createSerDe(job, partition.getSd().getSerdeInfo().getSerializationLib(), partitionProperties); partitionOI = getStructOI(partitionSerDe); finalOI = (StructObjectInspector) ObjectInspectorConverters.getConvertedOI(partitionOI, tableOI); partTblObjectInspectorConverter = ObjectInspectorConverters.getConverter(partitionOI, finalOI); job.setInputFormat(HiveUtilities.getInputFormatClass(job, partition.getSd(), table)); } else { // For non-partitioned tables, there is no need to create converter as there are no schema changes expected. partitionSerDe = tableSerDe; partitionOI = tableOI; partTblObjectInspectorConverter = null; finalOI = tableOI; job.setInputFormat(HiveUtilities.getInputFormatClass(job, table.getSd(), table)); } if (logger.isTraceEnabled()) { for (StructField field : finalOI.getAllStructFieldRefs()) { logger.trace("field in finalOI: {}", field.getClass().getName()); } logger.trace("partitionSerDe class is {} {}", partitionSerDe.getClass().getName()); } // Get list of partition column names final List<String> partitionNames = Lists.newArrayList(); for (FieldSchema field : table.getPartitionKeys()) { partitionNames.add(field.getName()); } // We should always get the columns names from ObjectInspector. For some of the tables (ex. avro) metastore // may not contain the schema, instead it is derived from other sources such as table properties or external file. // SerDe object knows how to get the schema with all the config and table properties passed in initialization. // ObjectInspector created from the SerDe object has the schema. final StructTypeInfo sTypeInfo = (StructTypeInfo) TypeInfoUtils.getTypeInfoFromObjectInspector(finalOI); final List<String> tableColumnNames = sTypeInfo.getAllStructFieldNames(); // Select list of columns for project pushdown into Hive SerDe readers. final List<Integer> columnIds = Lists.newArrayList(); if (isStarQuery()) { selectedColumnNames = tableColumnNames; for (int i = 0; i < selectedColumnNames.size(); i++) { columnIds.add(i); } selectedPartitionNames = partitionNames; } else { selectedColumnNames = Lists.newArrayList(); for (SchemaPath field : getColumns()) { String columnName = field.getRootSegment().getPath(); if (partitionNames.contains(columnName)) { selectedPartitionNames.add(columnName); } else { columnIds.add(tableColumnNames.indexOf(columnName)); selectedColumnNames.add(columnName); } } } ColumnProjectionUtils.appendReadColumns(job, columnIds, selectedColumnNames); for (String columnName : selectedColumnNames) { StructField fieldRef = finalOI.getStructFieldRef(columnName); selectedStructFieldRefs.add(fieldRef); ObjectInspector fieldOI = fieldRef.getFieldObjectInspector(); TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(fieldOI.getTypeName()); selectedColumnObjInspectors.add(fieldOI); selectedColumnTypes.add(typeInfo); selectedColumnFieldConverters.add(HiveFieldConverter.create(typeInfo, fragmentContext)); } for (int i = 0; i < selectedColumnNames.size(); ++i) { logger.trace("inspector:typeName={}, className={}, TypeInfo: {}, converter:{}", selectedColumnObjInspectors.get(i).getTypeName(), selectedColumnObjInspectors.get(i).getClass().getName(), selectedColumnTypes.get(i).toString(), selectedColumnFieldConverters.get(i).getClass().getName()); } for (int i = 0; i < table.getPartitionKeys().size(); i++) { FieldSchema field = table.getPartitionKeys().get(i); if (selectedPartitionNames.contains(field.getName())) { TypeInfo pType = TypeInfoUtils.getTypeInfoFromTypeString(field.getType()); selectedPartitionTypes.add(pType); if (partition != null) { selectedPartitionValues.add(HiveUtilities.convertPartitionType(pType, partition.getValues().get(i), defaultPartitionValue)); } } } } catch (Exception e) { throw new ExecutionSetupException("Failure while initializing Hive Reader " + this.getClass().getName(), e); } if (!empty) { try { reader = (org.apache.hadoop.mapred.RecordReader<Object, Object>) job.getInputFormat() .getRecordReader(inputSplit, job, Reporter.NULL); logger.trace("hive reader created: {} for inputSplit {}", reader.getClass().getName(), inputSplit.toString()); } catch (Exception e) { throw new ExecutionSetupException( "Failed to get o.a.hadoop.mapred.RecordReader from Hive InputFormat", e); } internalInit(tableProperties, reader); } }
From source file:org.apache.drill.exec.store.hive.HiveInputReader.java
License:Apache License
public static void main(String args[]) throws Exception { /*/*from w w w . ja va 2 s .c o m*/ String[] columnNames = {"n_nationkey", "n_name", "n_regionkey", "n_comment"}; String[] columnTypes = {"bigint", "string", "bigint", "string"}; List<FieldSchema> cols = Lists.newArrayList(); for (int i = 0; i < columnNames.length; i++) { cols.add(new FieldSchema(columnNames[i], columnTypes[i], null)); } String location = "file:///tmp/nation_s"; String inputFormat = TextInputFormat.class.getCanonicalName(); String serdeLib = LazySimpleSerDe.class.getCanonicalName(); // String inputFormat = HiveHBaseTableInputFormat.class.getCanonicalName(); // String serdeLib = HBaseSerDe.class.getCanonicalName(); Map<String, String> serdeParams = new HashMap(); // serdeParams.put("serialization.format", "1"); // serdeParams.put("hbase.columns.mapping", ":key,f:name,f:regionkey,f:comment"); serdeParams.put("serialization.format", "|"); serdeParams.put("field.delim", "|"); Map<String, String> tableParams = new HashMap(); tableParams.put("hbase.table.name", "nation"); SerDeInfo serDeInfo = new SerDeInfo(null, serdeLib, serdeParams); StorageDescriptor storageDescriptor = new StorageDescriptor(cols, location, inputFormat, null, false, -1, serDeInfo, null, null, null); Table table = new Table("table", "default", "sphillips", 0, 0, 0, storageDescriptor, new ArrayList<FieldSchema>(), tableParams, null, null, "MANAGED_TABLE"); Properties properties = MetaStoreUtils.getTableMetadata(table); */ HiveConf conf = new HiveConf(); conf.set("hive.metastore.uris", "thrift://10.10.31.51:9083"); HiveMetaStoreClient client = new HiveMetaStoreClient(conf); Table table = client.getTable("default", "nation"); Properties properties = MetaStoreUtils.getTableMetadata(table); Path path = new Path(table.getSd().getLocation()); JobConf job = new JobConf(); for (Object obj : properties.keySet()) { job.set((String) obj, (String) properties.get(obj)); } // job.set("hbase.zookeeper.quorum", "10.10.31.51"); // job.set("hbase.zookeeper.property.clientPort", "5181"); InputFormat f = (InputFormat) Class.forName(table.getSd().getInputFormat()).getConstructor().newInstance(); job.setInputFormat(f.getClass()); FileInputFormat.addInputPath(job, path); InputFormat format = job.getInputFormat(); SerDe serde = (SerDe) Class.forName(table.getSd().getSerdeInfo().getSerializationLib()).getConstructor() .newInstance(); serde.initialize(job, properties); ObjectInspector inspector = serde.getObjectInspector(); ObjectInspector.Category cat = inspector.getCategory(); TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(inspector); List<String> columns = null; List<TypeInfo> colTypes = null; List<ObjectInspector> fieldObjectInspectors = Lists.newArrayList(); switch (typeInfo.getCategory()) { case STRUCT: columns = ((StructTypeInfo) typeInfo).getAllStructFieldNames(); colTypes = ((StructTypeInfo) typeInfo).getAllStructFieldTypeInfos(); for (int i = 0; i < columns.size(); i++) { System.out.print(columns.get(i)); System.out.print(" "); System.out.print(colTypes.get(i)); } System.out.println(""); for (StructField field : ((StructObjectInspector) inspector).getAllStructFieldRefs()) { fieldObjectInspectors.add(field.getFieldObjectInspector()); } } for (InputSplit split : format.getSplits(job, 1)) { String encoded = serializeInputSplit(split); System.out.println(encoded); InputSplit newSplit = deserializeInputSplit(encoded, split.getClass().getCanonicalName()); System.out.print("Length: " + newSplit.getLength() + " "); System.out.print("Locations: "); for (String loc : newSplit.getLocations()) System.out.print(loc + " "); System.out.println(); } for (InputSplit split : format.getSplits(job, 1)) { RecordReader reader = format.getRecordReader(split, job, Reporter.NULL); Object key = reader.createKey(); Object value = reader.createValue(); int count = 0; while (reader.next(key, value)) { List<Object> values = ((StructObjectInspector) inspector) .getStructFieldsDataAsList(serde.deserialize((Writable) value)); StructObjectInspector sInsp = (StructObjectInspector) inspector; Object obj = sInsp.getStructFieldData(serde.deserialize((Writable) value), sInsp.getStructFieldRef("n_name")); System.out.println(obj); /* for (Object obj : values) { PrimitiveObjectInspector.PrimitiveCategory pCat = ((PrimitiveObjectInspector)fieldObjectInspectors.get(count)).getPrimitiveCategory(); Object pObj = ((PrimitiveObjectInspector)fieldObjectInspectors.get(count)).getPrimitiveJavaObject(obj); System.out.print(pObj + " "); } */ System.out.println(""); } } }
From source file:org.apache.drill.exec.store.hive.HiveRecordReader.java
License:Apache License
private void init() throws ExecutionSetupException { final JobConf job = new JobConf(hiveConf); // Get the configured default val defaultPartitionValue = hiveConf.get(ConfVars.DEFAULTPARTITIONNAME.varname); Properties tableProperties;//ww w . jav a 2 s. c om try { tableProperties = MetaStoreUtils.getTableMetadata(table); final Properties partitionProperties = (partition == null) ? tableProperties : HiveUtilities.getPartitionMetadata(partition, table); HiveUtilities.addConfToJob(job, partitionProperties); final SerDe tableSerDe = createSerDe(job, table.getSd().getSerdeInfo().getSerializationLib(), tableProperties); final StructObjectInspector tableOI = getStructOI(tableSerDe); if (partition != null) { partitionSerDe = createSerDe(job, partition.getSd().getSerdeInfo().getSerializationLib(), partitionProperties); partitionOI = getStructOI(partitionSerDe); finalOI = (StructObjectInspector) ObjectInspectorConverters.getConvertedOI(partitionOI, tableOI); partTblObjectInspectorConverter = ObjectInspectorConverters.getConverter(partitionOI, finalOI); job.setInputFormat(HiveUtilities.getInputFormatClass(job, partition.getSd(), table)); } else { // For non-partitioned tables, there is no need to create converter as there are no schema changes expected. partitionSerDe = tableSerDe; partitionOI = tableOI; partTblObjectInspectorConverter = null; finalOI = tableOI; job.setInputFormat(HiveUtilities.getInputFormatClass(job, table.getSd(), table)); } // Get list of partition column names final List<String> partitionNames = Lists.newArrayList(); for (FieldSchema field : table.getPartitionKeys()) { partitionNames.add(field.getName()); } // We should always get the columns names from ObjectInspector. For some of the tables (ex. avro) metastore // may not contain the schema, instead it is derived from other sources such as table properties or external file. // SerDe object knows how to get the schema with all the config and table properties passed in initialization. // ObjectInspector created from the SerDe object has the schema. final StructTypeInfo sTypeInfo = (StructTypeInfo) TypeInfoUtils.getTypeInfoFromObjectInspector(finalOI); final List<String> tableColumnNames = sTypeInfo.getAllStructFieldNames(); // Select list of columns for project pushdown into Hive SerDe readers. final List<Integer> columnIds = Lists.newArrayList(); if (isStarQuery()) { selectedColumnNames = tableColumnNames; for (int i = 0; i < selectedColumnNames.size(); i++) { columnIds.add(i); } selectedPartitionNames = partitionNames; } else { selectedColumnNames = Lists.newArrayList(); for (SchemaPath field : getColumns()) { String columnName = field.getRootSegment().getPath(); if (partitionNames.contains(columnName)) { selectedPartitionNames.add(columnName); } else { columnIds.add(tableColumnNames.indexOf(columnName)); selectedColumnNames.add(columnName); } } } ColumnProjectionUtils.appendReadColumns(job, columnIds, selectedColumnNames); for (String columnName : selectedColumnNames) { ObjectInspector fieldOI = finalOI.getStructFieldRef(columnName).getFieldObjectInspector(); TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(fieldOI.getTypeName()); selectedColumnObjInspectors.add(fieldOI); selectedColumnTypes.add(typeInfo); selectedColumnFieldConverters.add(HiveFieldConverter.create(typeInfo, fragmentContext)); } for (int i = 0; i < table.getPartitionKeys().size(); i++) { FieldSchema field = table.getPartitionKeys().get(i); if (selectedPartitionNames.contains(field.getName())) { TypeInfo pType = TypeInfoUtils.getTypeInfoFromTypeString(field.getType()); selectedPartitionTypes.add(pType); if (partition != null) { selectedPartitionValues.add(HiveUtilities.convertPartitionType(pType, partition.getValues().get(i), defaultPartitionValue)); } } } } catch (Exception e) { throw new ExecutionSetupException("Failure while initializing HiveRecordReader: " + e.getMessage(), e); } if (!empty) { try { reader = (org.apache.hadoop.mapred.RecordReader<Object, Object>) job.getInputFormat() .getRecordReader(inputSplit, job, Reporter.NULL); } catch (Exception e) { throw new ExecutionSetupException( "Failed to get o.a.hadoop.mapred.RecordReader from Hive InputFormat", e); } key = reader.createKey(); skipRecordsInspector = new SkipRecordsInspector(tableProperties, reader); } }
From source file:org.apache.drill.exec.store.hive.readers.HiveAbstractReader.java
License:Apache License
/** * Initializes next reader if available, will close previous reader if any. * * @param job map / reduce job configuration. * @return true if new reader was initialized, false is no more readers are available * @throws ExecutionSetupException if could not init record reader *//* www . j av a 2 s. c o m*/ protected boolean initNextReader(JobConf job) throws ExecutionSetupException { if (inputSplitsIterator.hasNext()) { if (reader != null) { closeReader(); } InputSplit inputSplit = inputSplitsIterator.next(); try { reader = (org.apache.hadoop.mapred.RecordReader<Object, Object>) job.getInputFormat() .getRecordReader(inputSplit, job, Reporter.NULL); logger.trace("hive reader created: {} for inputSplit {}", reader.getClass().getName(), inputSplit.toString()); } catch (Exception e) { throw new ExecutionSetupException( "Failed to get o.a.hadoop.mapred.RecordReader from Hive InputFormat", e); } return true; } return false; }
From source file:org.apache.drill.exec.store.text.DrillTextRecordReader.java
License:Apache License
public DrillTextRecordReader(FileSplit split, Configuration fsConf, FragmentContext context, char delimiter, List<SchemaPath> columns) { this.delimiter = (byte) delimiter; this.split = split; setColumns(columns);/*w w w. ja v a 2 s.c o m*/ if (!isStarQuery()) { String pathStr; for (SchemaPath path : columns) { assert path.getRootSegment().isNamed(); pathStr = path.getRootSegment().getPath(); Preconditions.checkArgument( pathStr.equals(COL_NAME) || (pathStr.equals("*") && path.getRootSegment().getChild() == null), "Selected column(s) must have name 'columns' or must be plain '*'"); if (path.getRootSegment().getChild() != null) { Preconditions.checkArgument(path.getRootSegment().getChild().isArray(), "Selected column must be an array index"); int index = path.getRootSegment().getChild().getArraySegment().getIndex(); columnIds.add(index); } } Collections.sort(columnIds); numCols = columnIds.size(); } TextInputFormat inputFormat = new TextInputFormat(); JobConf job = new JobConf(fsConf); job.setInt("io.file.buffer.size", context.getConfig().getInt(ExecConstants.TEXT_LINE_READER_BUFFER_SIZE)); job.setInputFormat(inputFormat.getClass()); try { reader = inputFormat.getRecordReader(split, job, Reporter.NULL); key = reader.createKey(); value = reader.createValue(); totalRecordsRead = 0; } catch (Exception e) { handleAndRaise("Failure in creating record reader", e); } }