Example usage for org.apache.hadoop.mapred InputSplit getLocations

List of usage examples for org.apache.hadoop.mapred InputSplit getLocations

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred InputSplit getLocations.

Prototype

String[] getLocations() throws IOException;

Source Link

Document

Get the list of hostnames where the input split is located.

Usage

From source file:org.apache.hive.jdbc.TestJdbcWithMiniLlap.java

License:Apache License

private int processQuery(String query, int numSplits, RowProcessor rowProcessor) throws Exception {
    String url = miniHS2.getJdbcURL();
    String user = System.getProperty("user.name");
    String pwd = user;/*ww  w.j a  v a2s  .co  m*/

    LlapRowInputFormat inputFormat = new LlapRowInputFormat();

    // Get splits
    JobConf job = new JobConf(conf);
    job.set(LlapBaseInputFormat.URL_KEY, url);
    job.set(LlapBaseInputFormat.USER_KEY, user);
    job.set(LlapBaseInputFormat.PWD_KEY, pwd);
    job.set(LlapBaseInputFormat.QUERY_KEY, query);

    InputSplit[] splits = inputFormat.getSplits(job, numSplits);
    assertTrue(splits.length > 0);

    // Fetch rows from splits
    boolean first = true;
    int rowCount = 0;
    for (InputSplit split : splits) {
        System.out.println("Processing split " + split.getLocations());

        int numColumns = 2;
        RecordReader<NullWritable, Row> reader = inputFormat.getRecordReader(split, job, null);
        Row row = reader.createValue();
        while (reader.next(NullWritable.get(), row)) {
            rowProcessor.process(row);
            ++rowCount;
        }
    }

    return rowCount;
}

From source file:org.apache.ignite.internal.processors.hadoop.impl.v1.HadoopV1Splitter.java

License:Apache License

/**
 * @param jobConf Job configuration./*from  w ww  . ja  va  2  s  . com*/
 * @return Collection of mapped splits.
 * @throws IgniteCheckedException If mapping failed.
 */
public static Collection<HadoopInputSplit> splitJob(JobConf jobConf) throws IgniteCheckedException {
    try {
        InputFormat<?, ?> format = jobConf.getInputFormat();

        assert format != null;

        InputSplit[] splits = format.getSplits(jobConf, 0);

        Collection<HadoopInputSplit> res = new ArrayList<>(splits.length);

        for (int i = 0; i < splits.length; i++) {
            InputSplit nativeSplit = splits[i];

            if (nativeSplit instanceof FileSplit) {
                FileSplit s = (FileSplit) nativeSplit;

                res.add(new HadoopFileBlock(s.getLocations(), s.getPath().toUri(), s.getStart(),
                        s.getLength()));
            } else
                res.add(HadoopUtils.wrapSplit(i, nativeSplit, nativeSplit.getLocations()));
        }

        return res;
    } catch (IOException e) {
        throw new IgniteCheckedException(e);
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.v1.GridHadoopV1Splitter.java

License:Apache License

/**
 * @param jobConf Job configuration.//from  w w  w .j a  va2 s  .  c om
 * @return Collection of mapped splits.
 * @throws IgniteCheckedException If mapping failed.
 */
public static Collection<GridHadoopInputSplit> splitJob(JobConf jobConf) throws IgniteCheckedException {
    try {
        InputFormat<?, ?> format = jobConf.getInputFormat();

        assert format != null;

        InputSplit[] splits = format.getSplits(jobConf, 0);

        Collection<GridHadoopInputSplit> res = new ArrayList<>(splits.length);

        for (int i = 0; i < splits.length; i++) {
            InputSplit nativeSplit = splits[i];

            if (nativeSplit instanceof FileSplit) {
                FileSplit s = (FileSplit) nativeSplit;

                res.add(new GridHadoopFileBlock(s.getLocations(), s.getPath().toUri(), s.getStart(),
                        s.getLength()));
            } else
                res.add(GridHadoopUtils.wrapSplit(i, nativeSplit, nativeSplit.getLocations()));
        }

        return res;
    } catch (IOException e) {
        throw new IgniteCheckedException(e);
    }
}

From source file:org.apache.reef.io.data.loading.impl.SingleDataCenterEvaluatorToPartitionStrategy.java

License:Apache License

@Override
protected void updateLocations(final NumberedSplit<InputSplit> numberedSplit) {
    try {//from  w  w  w . j  a v  a  2  s. c  o m
        final InputSplit split = numberedSplit.getEntry();
        final String[] locations = split.getLocations();
        for (final String location : locations) {
            BlockingQueue<NumberedSplit<InputSplit>> newSplitQue = new LinkedBlockingQueue<>();
            final BlockingQueue<NumberedSplit<InputSplit>> splitQue = locationToSplits.putIfAbsent(location,
                    newSplitQue);
            if (splitQue != null) {
                newSplitQue = splitQue;
            }
            newSplitQue.add(numberedSplit);
        }
    } catch (final IOException e) {
        throw new RuntimeException("Unable to get InputSplits using the specified InputFormat", e);
    }
}

From source file:org.apache.tez.mapreduce.hadoop.MRInputHelpers.java

License:Apache License

private static List<TaskLocationHint> createTaskLocationHintsFromSplits(
        org.apache.hadoop.mapreduce.InputSplit[] newFormatSplits) {
    Iterable<TaskLocationHint> iterable = Iterables.transform(Arrays.asList(newFormatSplits),
            new Function<org.apache.hadoop.mapreduce.InputSplit, TaskLocationHint>() {
                @Override/*from w w w.j  av a  2 s  .c  om*/

                public TaskLocationHint apply(org.apache.hadoop.mapreduce.InputSplit input) {
                    try {
                        if (input instanceof TezGroupedSplit) {
                            String rack = ((org.apache.hadoop.mapreduce.split.TezGroupedSplit) input).getRack();
                            if (rack == null) {
                                if (input.getLocations() != null) {
                                    return TaskLocationHint.createTaskLocationHint(
                                            new HashSet<String>(Arrays.asList(input.getLocations())), null);
                                } else {
                                    return TaskLocationHint.createTaskLocationHint(null, null);
                                }
                            } else {
                                return TaskLocationHint.createTaskLocationHint(null,
                                        Collections.singleton(rack));
                            }
                        } else {
                            return TaskLocationHint.createTaskLocationHint(
                                    new HashSet<String>(Arrays.asList(input.getLocations())), null);
                        }
                    } catch (IOException e) {
                        throw new RuntimeException(e);
                    } catch (InterruptedException e) {
                        throw new RuntimeException(e);
                    }
                }
            });
    return Lists.newArrayList(iterable);
}

From source file:org.apache.tez.mapreduce.hadoop.MRInputHelpers.java

License:Apache License

private static List<TaskLocationHint> createTaskLocationHintsFromSplits(
        org.apache.hadoop.mapred.InputSplit[] oldFormatSplits) {
    Iterable<TaskLocationHint> iterable = Iterables.transform(Arrays.asList(oldFormatSplits),
            new Function<org.apache.hadoop.mapred.InputSplit, TaskLocationHint>() {
                @Override//from  w  w  w . j a  v a 2 s .c o  m
                public TaskLocationHint apply(org.apache.hadoop.mapred.InputSplit input) {
                    try {
                        if (input instanceof org.apache.hadoop.mapred.split.TezGroupedSplit) {
                            String rack = ((org.apache.hadoop.mapred.split.TezGroupedSplit) input).getRack();
                            if (rack == null) {
                                if (input.getLocations() != null) {
                                    return TaskLocationHint.createTaskLocationHint(
                                            new HashSet<String>(Arrays.asList(input.getLocations())), null);
                                } else {
                                    return TaskLocationHint.createTaskLocationHint(null, null);
                                }
                            } else {
                                return TaskLocationHint.createTaskLocationHint(null,
                                        Collections.singleton(rack));
                            }
                        } else {
                            return TaskLocationHint.createTaskLocationHint(
                                    new HashSet<String>(Arrays.asList(input.getLocations())), null);
                        }
                    } catch (IOException e) {
                        throw new RuntimeException(e);
                    }
                }
            });
    return Lists.newArrayList(iterable);
}

From source file:org.apache.tez.mapreduce.processor.MapUtils.java

License:Apache License

private static void writeSplitFiles(FileSystem fs, JobConf conf, InputSplit split) throws IOException {
    Path jobSplitFile = new Path(conf.get(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR,
            MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR_DEFAULT), MRJobConfig.JOB_SPLIT);
    LOG.info("Writing split to: " + jobSplitFile);
    FSDataOutputStream out = FileSystem.create(fs, jobSplitFile, new FsPermission(JOB_FILE_PERMISSION));

    long offset = out.getPos();
    Text.writeString(out, split.getClass().getName());
    split.write(out);/* www  . ja va  2  s  .  c o  m*/
    out.close();

    String[] locations = split.getLocations();

    SplitMetaInfo info = null;
    info = new JobSplit.SplitMetaInfo(locations, offset, split.getLength());

    Path jobSplitMetaInfoFile = new Path(conf.get(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR),
            MRJobConfig.JOB_SPLIT_METAINFO);

    FSDataOutputStream outMeta = FileSystem.create(fs, jobSplitMetaInfoFile,
            new FsPermission(JOB_FILE_PERMISSION));
    outMeta.write(SplitMetaInfoReaderTez.META_SPLIT_FILE_HEADER);
    WritableUtils.writeVInt(outMeta, SplitMetaInfoReaderTez.META_SPLIT_VERSION);
    WritableUtils.writeVInt(outMeta, 1); // Only 1 split meta info being written
    info.write(outMeta);
    outMeta.close();
}

From source file:org.deeplearning4j.iterativereduce.runtime.yarn.appmaster.ApplicationMaster.java

License:Apache License

private Set<ConfigurationTuple> getConfigurationTuples() throws IOException {
    if (confTuples != null)
        return confTuples;
    Path inputPath = new Path(props.getProperty(ConfigFields.APP_INPUT_PATH));
    FileSystem fs = FileSystem.get(conf);
    FileStatus f = fs.getFileStatus(inputPath);
    //BlockLocation[] bl = fs.getFileBlockLocations(p, 0, f.getLen());
    Set<ConfigurationTuple> configTuples = new HashSet<>();
    int workerId = 0;

    JobConf job = new JobConf(new Configuration());

    job.setInputFormat((Class<? extends InputFormat>) this.inputFormatClass); //TextInputFormat.class);

    FileInputFormat.setInputPaths(job, inputPath);

    InputSplit[] splits = job.getInputFormat().getSplits(job, job.getNumMapTasks());

    for (InputSplit split : splits) {

        FileSplit convertedToMetronomeSplit = new FileSplit();

        org.apache.hadoop.mapred.FileSplit hadoopFileSplit = (org.apache.hadoop.mapred.FileSplit) split;

        if (hadoopFileSplit.getLength() - hadoopFileSplit.getStart() > 0) {
            convertedToMetronomeSplit.setLength(hadoopFileSplit.getLength());
            convertedToMetronomeSplit.setOffset(hadoopFileSplit.getStart());
            convertedToMetronomeSplit.setPath(hadoopFileSplit.getPath().toString());

            StartupConfiguration config = StartupConfiguration.newBuilder().setBatchSize(batchSize)
                    .setIterations(iterationCount).setOther(appConfig).setSplit(convertedToMetronomeSplit)
                    .build();//from  www  .j  a v  a 2  s . co  m

            String wid = "worker-" + workerId;
            ConfigurationTuple tuple = new ConfigurationTuple(split.getLocations()[0], wid, config);

            configTuples.add(tuple);
            workerId++;

            LOG.info("IR_AM_worker: " + wid + " added split: " + convertedToMetronomeSplit.toString());

        } else {
            LOG.info("IR_AM: Culled out 0 length Split: " + convertedToMetronomeSplit.toString());
        }

    }

    LOG.info("Total Splits/Workers: " + configTuples.size());

    confTuples = configTuples;
    return configTuples;
}

From source file:org.gridgain.grid.kernal.processors.hadoop.v1.GridHadoopV1Splitter.java

License:Open Source License

/**
 * @param jobConf Job configuration.//from   ww  w  . j  a  va 2s  .  co m
 * @return Collection of mapped splits.
 * @throws GridException If mapping failed.
 */
public static Collection<GridHadoopInputSplit> splitJob(JobConf jobConf) throws GridException {
    try {
        InputFormat<?, ?> format = jobConf.getInputFormat();

        assert format != null;

        InputSplit[] splits = format.getSplits(jobConf, 0);

        Collection<GridHadoopInputSplit> res = new ArrayList<>(splits.length);

        for (int i = 0; i < splits.length; i++) {
            InputSplit nativeSplit = splits[i];

            if (nativeSplit instanceof FileSplit) {
                FileSplit s = (FileSplit) nativeSplit;

                res.add(new GridHadoopFileBlock(s.getLocations(), s.getPath().toUri(), s.getStart(),
                        s.getLength()));
            } else
                res.add(GridHadoopUtils.wrapSplit(i, nativeSplit, nativeSplit.getLocations()));
        }

        return res;
    } catch (IOException e) {
        throw new GridException(e);
    }
}

From source file:org.kiji.hive.KijiTableInputFormat.java

License:Apache License

/**
 * Returns an object responsible for generating records contained in a
 * given input split.// w w  w  .  j ava2 s  . c o  m
 *
 * @param split The input split to create a record reader for.
 * @param job The job configuration.
 * @param reporter A job info reporter (for counters, status, etc.).
 * @return The record reader.
 * @throws IOException If there is an error.
 */
@Override
public RecordReader<ImmutableBytesWritable, KijiRowDataWritable> getRecordReader(InputSplit split, JobConf job,
        Reporter reporter) throws IOException {
    LOG.info("Getting record reader {}", split.getLocations());
    return new KijiTableRecordReader((KijiTableInputSplit) split, job);
}