Example usage for org.apache.hadoop.mapred Reporter NULL

List of usage examples for org.apache.hadoop.mapred Reporter NULL

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred Reporter NULL.

Prototype

Reporter NULL

To view the source code for org.apache.hadoop.mapred Reporter NULL.

Click Source Link

Document

A constant of Reporter type that does nothing.

Usage

From source file:org.apache.asterix.external.input.record.reader.hdfs.HDFSRecordReader.java

License:Apache License

@SuppressWarnings("unchecked")
private RecordReader<K, Writable> getRecordReader(int splitIndex) throws IOException {
    reader = (RecordReader<K, Writable>) inputFormat.getRecordReader(inputSplits[splitIndex], conf,
            Reporter.NULL);
    if (key == null) {
        key = reader.createKey();/*from   w w  w.  j a  va  2  s .  c o  m*/
        value = (V) reader.createValue();
    }
    if (indexer != null) {
        try {
            indexer.reset(this);
        } catch (Exception e) {
            throw new HyracksDataException(e);
        }
    }
    return reader;
}

From source file:org.apache.asterix.external.input.stream.HDFSInputStream.java

License:Apache License

@SuppressWarnings("unchecked")
private RecordReader<Object, Text> getRecordReader(int splitIndex) throws IOException {
    reader = (RecordReader<Object, Text>) inputFormat.getRecordReader(inputSplits[splitIndex], conf,
            Reporter.NULL);
    if (key == null) {
        key = reader.createKey();/*from   ww w .j a v a 2s  .c  om*/
        value = reader.createValue();
    }
    if (indexer != null) {
        try {
            indexer.reset(this);
        } catch (Exception e) {
            throw new HyracksDataException(e);
        }
    }
    return reader;
}

From source file:org.apache.crunch.io.orc.OrcFileReaderFactory.java

License:Apache License

@Override
public Iterator<T> read(FileSystem fs, final Path path) {
    try {//from   w w  w .  java2 s .com
        if (!fs.isFile(path)) {
            throw new CrunchRuntimeException("Not a file: " + path);
        }

        inputFn.initialize();

        FileStatus status = fs.getFileStatus(path);
        FileSplit split = new FileSplit(path, 0, status.getLen(), new String[0]);

        JobConf conf = new JobConf();
        if (readColumns != null) {
            conf.setBoolean(OrcFileSource.HIVE_READ_ALL_COLUMNS, false);
            conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR,
                    OrcFileSource.getColumnIdsStr(readColumns));
        }
        final RecordReader<NullWritable, OrcStruct> reader = inputFormat.getRecordReader(split, conf,
                Reporter.NULL);

        return new UnmodifiableIterator<T>() {

            private boolean checked = false;
            private boolean hasNext;
            private OrcStruct value;
            private OrcWritable writable = new OrcWritable();

            @Override
            public boolean hasNext() {
                try {
                    if (value == null) {
                        value = reader.createValue();
                    }
                    if (!checked) {
                        hasNext = reader.next(NullWritable.get(), value);
                        checked = true;
                    }
                    return hasNext;
                } catch (Exception e) {
                    throw new CrunchRuntimeException("Error while reading local file: " + path, e);
                }
            }

            @Override
            public T next() {
                try {
                    if (value == null) {
                        value = reader.createValue();
                    }
                    if (!checked) {
                        reader.next(NullWritable.get(), value);
                    }
                    checked = false;
                    writable.set(value);
                    return inputFn.map(writable);
                } catch (Exception e) {
                    throw new CrunchRuntimeException("Error while reading local file: " + path, e);
                }
            }

        };
    } catch (Exception e) {
        throw new CrunchRuntimeException("Error while reading local file: " + path, e);
    }
}

From source file:org.apache.crunch.io.orc.OrcFileWriter.java

License:Apache License

@Override
public void close() throws IOException {
    writer.close(Reporter.NULL);
}

From source file:org.apache.drill.exec.store.easy.sequencefile.SequenceFileRecordReader.java

License:Apache License

private org.apache.hadoop.mapred.RecordReader<BytesWritable, BytesWritable> getRecordReader(
        final InputFormat<BytesWritable, BytesWritable> inputFormat, final JobConf jobConf)
        throws ExecutionSetupException {
    try {//www . j a  v a 2s .c o m
        final UserGroupInformation ugi = ImpersonationUtil.createProxyUgi(this.opUserName, this.queryUserName);
        return ugi.doAs(
                new PrivilegedExceptionAction<org.apache.hadoop.mapred.RecordReader<BytesWritable, BytesWritable>>() {
                    @Override
                    public org.apache.hadoop.mapred.RecordReader<BytesWritable, BytesWritable> run()
                            throws Exception {
                        return inputFormat.getRecordReader(split, jobConf, Reporter.NULL);
                    }
                });
    } catch (IOException | InterruptedException e) {
        throw new ExecutionSetupException(
                String.format("Error in creating sequencefile reader for file: %s, start: %d, length: %d",
                        split.getPath(), split.getStart(), split.getLength()),
                e);
    }
}

From source file:org.apache.drill.exec.store.hive.HiveAbstractReader.java

License:Apache License

private void init() throws ExecutionSetupException {
    final JobConf job = new JobConf(hiveConf);

    // Get the configured default val
    defaultPartitionValue = hiveConf.get(ConfVars.DEFAULTPARTITIONNAME.varname);

    Properties tableProperties;/*from ww w . j  a v a 2 s. co m*/
    try {
        tableProperties = HiveUtilities.getTableMetadata(table);
        final Properties partitionProperties = (partition == null) ? tableProperties
                : HiveUtilities.getPartitionMetadata(partition, table);
        HiveUtilities.addConfToJob(job, partitionProperties);

        final SerDe tableSerDe = createSerDe(job, table.getSd().getSerdeInfo().getSerializationLib(),
                tableProperties);
        final StructObjectInspector tableOI = getStructOI(tableSerDe);

        if (partition != null) {
            partitionSerDe = createSerDe(job, partition.getSd().getSerdeInfo().getSerializationLib(),
                    partitionProperties);
            partitionOI = getStructOI(partitionSerDe);

            finalOI = (StructObjectInspector) ObjectInspectorConverters.getConvertedOI(partitionOI, tableOI);
            partTblObjectInspectorConverter = ObjectInspectorConverters.getConverter(partitionOI, finalOI);
            job.setInputFormat(HiveUtilities.getInputFormatClass(job, partition.getSd(), table));
        } else {
            // For non-partitioned tables, there is no need to create converter as there are no schema changes expected.
            partitionSerDe = tableSerDe;
            partitionOI = tableOI;
            partTblObjectInspectorConverter = null;
            finalOI = tableOI;
            job.setInputFormat(HiveUtilities.getInputFormatClass(job, table.getSd(), table));
        }

        if (logger.isTraceEnabled()) {
            for (StructField field : finalOI.getAllStructFieldRefs()) {
                logger.trace("field in finalOI: {}", field.getClass().getName());
            }
            logger.trace("partitionSerDe class is {} {}", partitionSerDe.getClass().getName());
        }
        // Get list of partition column names
        final List<String> partitionNames = Lists.newArrayList();
        for (FieldSchema field : table.getPartitionKeys()) {
            partitionNames.add(field.getName());
        }

        // We should always get the columns names from ObjectInspector. For some of the tables (ex. avro) metastore
        // may not contain the schema, instead it is derived from other sources such as table properties or external file.
        // SerDe object knows how to get the schema with all the config and table properties passed in initialization.
        // ObjectInspector created from the SerDe object has the schema.
        final StructTypeInfo sTypeInfo = (StructTypeInfo) TypeInfoUtils.getTypeInfoFromObjectInspector(finalOI);
        final List<String> tableColumnNames = sTypeInfo.getAllStructFieldNames();

        // Select list of columns for project pushdown into Hive SerDe readers.
        final List<Integer> columnIds = Lists.newArrayList();
        if (isStarQuery()) {
            selectedColumnNames = tableColumnNames;
            for (int i = 0; i < selectedColumnNames.size(); i++) {
                columnIds.add(i);
            }
            selectedPartitionNames = partitionNames;
        } else {
            selectedColumnNames = Lists.newArrayList();
            for (SchemaPath field : getColumns()) {
                String columnName = field.getRootSegment().getPath();
                if (partitionNames.contains(columnName)) {
                    selectedPartitionNames.add(columnName);
                } else {
                    columnIds.add(tableColumnNames.indexOf(columnName));
                    selectedColumnNames.add(columnName);
                }
            }
        }
        ColumnProjectionUtils.appendReadColumns(job, columnIds, selectedColumnNames);

        for (String columnName : selectedColumnNames) {
            StructField fieldRef = finalOI.getStructFieldRef(columnName);
            selectedStructFieldRefs.add(fieldRef);
            ObjectInspector fieldOI = fieldRef.getFieldObjectInspector();

            TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(fieldOI.getTypeName());

            selectedColumnObjInspectors.add(fieldOI);
            selectedColumnTypes.add(typeInfo);
            selectedColumnFieldConverters.add(HiveFieldConverter.create(typeInfo, fragmentContext));
        }

        for (int i = 0; i < selectedColumnNames.size(); ++i) {
            logger.trace("inspector:typeName={}, className={}, TypeInfo: {}, converter:{}",
                    selectedColumnObjInspectors.get(i).getTypeName(),
                    selectedColumnObjInspectors.get(i).getClass().getName(),
                    selectedColumnTypes.get(i).toString(),
                    selectedColumnFieldConverters.get(i).getClass().getName());
        }

        for (int i = 0; i < table.getPartitionKeys().size(); i++) {
            FieldSchema field = table.getPartitionKeys().get(i);
            if (selectedPartitionNames.contains(field.getName())) {
                TypeInfo pType = TypeInfoUtils.getTypeInfoFromTypeString(field.getType());
                selectedPartitionTypes.add(pType);

                if (partition != null) {
                    selectedPartitionValues.add(HiveUtilities.convertPartitionType(pType,
                            partition.getValues().get(i), defaultPartitionValue));
                }
            }
        }
    } catch (Exception e) {
        throw new ExecutionSetupException("Failure while initializing Hive Reader " + this.getClass().getName(),
                e);
    }

    if (!empty) {
        try {
            reader = (org.apache.hadoop.mapred.RecordReader<Object, Object>) job.getInputFormat()
                    .getRecordReader(inputSplit, job, Reporter.NULL);
            logger.trace("hive reader created: {} for inputSplit {}", reader.getClass().getName(),
                    inputSplit.toString());
        } catch (Exception e) {
            throw new ExecutionSetupException(
                    "Failed to get o.a.hadoop.mapred.RecordReader from Hive InputFormat", e);
        }

        internalInit(tableProperties, reader);
    }
}

From source file:org.apache.drill.exec.store.hive.HiveInputReader.java

License:Apache License

public static void main(String args[]) throws Exception {
    /*/*from  w w w .  ja  va  2  s  .c  o m*/
        String[] columnNames = {"n_nationkey", "n_name", "n_regionkey",   "n_comment"};
        String[] columnTypes = {"bigint", "string", "bigint", "string"};
            
        List<FieldSchema> cols = Lists.newArrayList();
            
        for (int i = 0; i < columnNames.length; i++) {
          cols.add(new FieldSchema(columnNames[i], columnTypes[i], null));
        }
        String location = "file:///tmp/nation_s";
        String inputFormat = TextInputFormat.class.getCanonicalName();
        String serdeLib = LazySimpleSerDe.class.getCanonicalName();
    //    String inputFormat = HiveHBaseTableInputFormat.class.getCanonicalName();
    //    String serdeLib = HBaseSerDe.class.getCanonicalName();
        Map<String, String> serdeParams = new HashMap();
    //    serdeParams.put("serialization.format", "1");
    //    serdeParams.put("hbase.columns.mapping", ":key,f:name,f:regionkey,f:comment");
        serdeParams.put("serialization.format", "|");
        serdeParams.put("field.delim", "|");
            
            
        Map<String, String> tableParams = new HashMap();
        tableParams.put("hbase.table.name", "nation");
        SerDeInfo serDeInfo = new SerDeInfo(null, serdeLib, serdeParams);
        StorageDescriptor storageDescriptor = new StorageDescriptor(cols, location, inputFormat, null, false, -1, serDeInfo, null, null, null);
        Table table = new Table("table", "default", "sphillips", 0, 0, 0, storageDescriptor, new ArrayList<FieldSchema>(), tableParams, null, null, "MANAGED_TABLE");
        Properties properties = MetaStoreUtils.getTableMetadata(table);
        */

    HiveConf conf = new HiveConf();
    conf.set("hive.metastore.uris", "thrift://10.10.31.51:9083");
    HiveMetaStoreClient client = new HiveMetaStoreClient(conf);
    Table table = client.getTable("default", "nation");
    Properties properties = MetaStoreUtils.getTableMetadata(table);

    Path path = new Path(table.getSd().getLocation());
    JobConf job = new JobConf();
    for (Object obj : properties.keySet()) {
        job.set((String) obj, (String) properties.get(obj));
    }
    //    job.set("hbase.zookeeper.quorum", "10.10.31.51");
    //    job.set("hbase.zookeeper.property.clientPort", "5181");
    InputFormat f = (InputFormat) Class.forName(table.getSd().getInputFormat()).getConstructor().newInstance();
    job.setInputFormat(f.getClass());
    FileInputFormat.addInputPath(job, path);
    InputFormat format = job.getInputFormat();
    SerDe serde = (SerDe) Class.forName(table.getSd().getSerdeInfo().getSerializationLib()).getConstructor()
            .newInstance();
    serde.initialize(job, properties);
    ObjectInspector inspector = serde.getObjectInspector();
    ObjectInspector.Category cat = inspector.getCategory();
    TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(inspector);
    List<String> columns = null;
    List<TypeInfo> colTypes = null;
    List<ObjectInspector> fieldObjectInspectors = Lists.newArrayList();

    switch (typeInfo.getCategory()) {
    case STRUCT:
        columns = ((StructTypeInfo) typeInfo).getAllStructFieldNames();
        colTypes = ((StructTypeInfo) typeInfo).getAllStructFieldTypeInfos();
        for (int i = 0; i < columns.size(); i++) {
            System.out.print(columns.get(i));
            System.out.print(" ");
            System.out.print(colTypes.get(i));
        }
        System.out.println("");
        for (StructField field : ((StructObjectInspector) inspector).getAllStructFieldRefs()) {
            fieldObjectInspectors.add(field.getFieldObjectInspector());
        }
    }

    for (InputSplit split : format.getSplits(job, 1)) {
        String encoded = serializeInputSplit(split);
        System.out.println(encoded);
        InputSplit newSplit = deserializeInputSplit(encoded, split.getClass().getCanonicalName());
        System.out.print("Length: " + newSplit.getLength() + " ");
        System.out.print("Locations: ");
        for (String loc : newSplit.getLocations())
            System.out.print(loc + " ");
        System.out.println();
    }

    for (InputSplit split : format.getSplits(job, 1)) {
        RecordReader reader = format.getRecordReader(split, job, Reporter.NULL);
        Object key = reader.createKey();
        Object value = reader.createValue();
        int count = 0;
        while (reader.next(key, value)) {
            List<Object> values = ((StructObjectInspector) inspector)
                    .getStructFieldsDataAsList(serde.deserialize((Writable) value));
            StructObjectInspector sInsp = (StructObjectInspector) inspector;
            Object obj = sInsp.getStructFieldData(serde.deserialize((Writable) value),
                    sInsp.getStructFieldRef("n_name"));
            System.out.println(obj);
            /*
            for (Object obj : values) {
              PrimitiveObjectInspector.PrimitiveCategory pCat = ((PrimitiveObjectInspector)fieldObjectInspectors.get(count)).getPrimitiveCategory();
              Object pObj = ((PrimitiveObjectInspector)fieldObjectInspectors.get(count)).getPrimitiveJavaObject(obj);
              System.out.print(pObj + " ");
            }
            */
            System.out.println("");
        }
    }
}

From source file:org.apache.drill.exec.store.hive.HiveRecordReader.java

License:Apache License

private void init() throws ExecutionSetupException {
    final JobConf job = new JobConf(hiveConf);

    // Get the configured default val
    defaultPartitionValue = hiveConf.get(ConfVars.DEFAULTPARTITIONNAME.varname);

    Properties tableProperties;//ww w .  jav  a 2 s. c om
    try {
        tableProperties = MetaStoreUtils.getTableMetadata(table);
        final Properties partitionProperties = (partition == null) ? tableProperties
                : HiveUtilities.getPartitionMetadata(partition, table);
        HiveUtilities.addConfToJob(job, partitionProperties);

        final SerDe tableSerDe = createSerDe(job, table.getSd().getSerdeInfo().getSerializationLib(),
                tableProperties);
        final StructObjectInspector tableOI = getStructOI(tableSerDe);

        if (partition != null) {
            partitionSerDe = createSerDe(job, partition.getSd().getSerdeInfo().getSerializationLib(),
                    partitionProperties);
            partitionOI = getStructOI(partitionSerDe);

            finalOI = (StructObjectInspector) ObjectInspectorConverters.getConvertedOI(partitionOI, tableOI);
            partTblObjectInspectorConverter = ObjectInspectorConverters.getConverter(partitionOI, finalOI);
            job.setInputFormat(HiveUtilities.getInputFormatClass(job, partition.getSd(), table));
        } else {
            // For non-partitioned tables, there is no need to create converter as there are no schema changes expected.
            partitionSerDe = tableSerDe;
            partitionOI = tableOI;
            partTblObjectInspectorConverter = null;
            finalOI = tableOI;
            job.setInputFormat(HiveUtilities.getInputFormatClass(job, table.getSd(), table));
        }

        // Get list of partition column names
        final List<String> partitionNames = Lists.newArrayList();
        for (FieldSchema field : table.getPartitionKeys()) {
            partitionNames.add(field.getName());
        }

        // We should always get the columns names from ObjectInspector. For some of the tables (ex. avro) metastore
        // may not contain the schema, instead it is derived from other sources such as table properties or external file.
        // SerDe object knows how to get the schema with all the config and table properties passed in initialization.
        // ObjectInspector created from the SerDe object has the schema.
        final StructTypeInfo sTypeInfo = (StructTypeInfo) TypeInfoUtils.getTypeInfoFromObjectInspector(finalOI);
        final List<String> tableColumnNames = sTypeInfo.getAllStructFieldNames();

        // Select list of columns for project pushdown into Hive SerDe readers.
        final List<Integer> columnIds = Lists.newArrayList();
        if (isStarQuery()) {
            selectedColumnNames = tableColumnNames;
            for (int i = 0; i < selectedColumnNames.size(); i++) {
                columnIds.add(i);
            }
            selectedPartitionNames = partitionNames;
        } else {
            selectedColumnNames = Lists.newArrayList();
            for (SchemaPath field : getColumns()) {
                String columnName = field.getRootSegment().getPath();
                if (partitionNames.contains(columnName)) {
                    selectedPartitionNames.add(columnName);
                } else {
                    columnIds.add(tableColumnNames.indexOf(columnName));
                    selectedColumnNames.add(columnName);
                }
            }
        }
        ColumnProjectionUtils.appendReadColumns(job, columnIds, selectedColumnNames);

        for (String columnName : selectedColumnNames) {
            ObjectInspector fieldOI = finalOI.getStructFieldRef(columnName).getFieldObjectInspector();
            TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(fieldOI.getTypeName());

            selectedColumnObjInspectors.add(fieldOI);
            selectedColumnTypes.add(typeInfo);
            selectedColumnFieldConverters.add(HiveFieldConverter.create(typeInfo, fragmentContext));
        }

        for (int i = 0; i < table.getPartitionKeys().size(); i++) {
            FieldSchema field = table.getPartitionKeys().get(i);
            if (selectedPartitionNames.contains(field.getName())) {
                TypeInfo pType = TypeInfoUtils.getTypeInfoFromTypeString(field.getType());
                selectedPartitionTypes.add(pType);

                if (partition != null) {
                    selectedPartitionValues.add(HiveUtilities.convertPartitionType(pType,
                            partition.getValues().get(i), defaultPartitionValue));
                }
            }
        }
    } catch (Exception e) {
        throw new ExecutionSetupException("Failure while initializing HiveRecordReader: " + e.getMessage(), e);
    }

    if (!empty) {
        try {
            reader = (org.apache.hadoop.mapred.RecordReader<Object, Object>) job.getInputFormat()
                    .getRecordReader(inputSplit, job, Reporter.NULL);
        } catch (Exception e) {
            throw new ExecutionSetupException(
                    "Failed to get o.a.hadoop.mapred.RecordReader from Hive InputFormat", e);
        }
        key = reader.createKey();
        skipRecordsInspector = new SkipRecordsInspector(tableProperties, reader);
    }
}

From source file:org.apache.drill.exec.store.hive.readers.HiveAbstractReader.java

License:Apache License

/**
 * Initializes next reader if available, will close previous reader if any.
 *
 * @param job map / reduce job configuration.
 * @return true if new reader was initialized, false is no more readers are available
 * @throws ExecutionSetupException if could not init record reader
 *//*  www  . j  av  a 2 s. c  o m*/
protected boolean initNextReader(JobConf job) throws ExecutionSetupException {
    if (inputSplitsIterator.hasNext()) {
        if (reader != null) {
            closeReader();
        }
        InputSplit inputSplit = inputSplitsIterator.next();
        try {
            reader = (org.apache.hadoop.mapred.RecordReader<Object, Object>) job.getInputFormat()
                    .getRecordReader(inputSplit, job, Reporter.NULL);
            logger.trace("hive reader created: {} for inputSplit {}", reader.getClass().getName(),
                    inputSplit.toString());
        } catch (Exception e) {
            throw new ExecutionSetupException(
                    "Failed to get o.a.hadoop.mapred.RecordReader from Hive InputFormat", e);
        }
        return true;
    }
    return false;
}

From source file:org.apache.drill.exec.store.text.DrillTextRecordReader.java

License:Apache License

public DrillTextRecordReader(FileSplit split, Configuration fsConf, FragmentContext context, char delimiter,
        List<SchemaPath> columns) {
    this.delimiter = (byte) delimiter;
    this.split = split;
    setColumns(columns);/*w w w. ja v  a  2  s.c o  m*/

    if (!isStarQuery()) {
        String pathStr;
        for (SchemaPath path : columns) {
            assert path.getRootSegment().isNamed();
            pathStr = path.getRootSegment().getPath();
            Preconditions.checkArgument(
                    pathStr.equals(COL_NAME)
                            || (pathStr.equals("*") && path.getRootSegment().getChild() == null),
                    "Selected column(s) must have name 'columns' or must be plain '*'");

            if (path.getRootSegment().getChild() != null) {
                Preconditions.checkArgument(path.getRootSegment().getChild().isArray(),
                        "Selected column must be an array index");
                int index = path.getRootSegment().getChild().getArraySegment().getIndex();
                columnIds.add(index);
            }
        }
        Collections.sort(columnIds);
        numCols = columnIds.size();
    }

    TextInputFormat inputFormat = new TextInputFormat();
    JobConf job = new JobConf(fsConf);
    job.setInt("io.file.buffer.size", context.getConfig().getInt(ExecConstants.TEXT_LINE_READER_BUFFER_SIZE));
    job.setInputFormat(inputFormat.getClass());
    try {
        reader = inputFormat.getRecordReader(split, job, Reporter.NULL);
        key = reader.createKey();
        value = reader.createValue();
        totalRecordsRead = 0;
    } catch (Exception e) {
        handleAndRaise("Failure in creating record reader", e);
    }
}