List of usage examples for org.apache.hadoop.mapred InputSplit getLocations
String[] getLocations() throws IOException;
From source file:org.apache.hive.jdbc.TestJdbcWithMiniLlap.java
License:Apache License
private int processQuery(String query, int numSplits, RowProcessor rowProcessor) throws Exception { String url = miniHS2.getJdbcURL(); String user = System.getProperty("user.name"); String pwd = user;/*ww w.j a v a2s .co m*/ LlapRowInputFormat inputFormat = new LlapRowInputFormat(); // Get splits JobConf job = new JobConf(conf); job.set(LlapBaseInputFormat.URL_KEY, url); job.set(LlapBaseInputFormat.USER_KEY, user); job.set(LlapBaseInputFormat.PWD_KEY, pwd); job.set(LlapBaseInputFormat.QUERY_KEY, query); InputSplit[] splits = inputFormat.getSplits(job, numSplits); assertTrue(splits.length > 0); // Fetch rows from splits boolean first = true; int rowCount = 0; for (InputSplit split : splits) { System.out.println("Processing split " + split.getLocations()); int numColumns = 2; RecordReader<NullWritable, Row> reader = inputFormat.getRecordReader(split, job, null); Row row = reader.createValue(); while (reader.next(NullWritable.get(), row)) { rowProcessor.process(row); ++rowCount; } } return rowCount; }
From source file:org.apache.ignite.internal.processors.hadoop.impl.v1.HadoopV1Splitter.java
License:Apache License
/** * @param jobConf Job configuration./*from w ww . ja va 2 s . com*/ * @return Collection of mapped splits. * @throws IgniteCheckedException If mapping failed. */ public static Collection<HadoopInputSplit> splitJob(JobConf jobConf) throws IgniteCheckedException { try { InputFormat<?, ?> format = jobConf.getInputFormat(); assert format != null; InputSplit[] splits = format.getSplits(jobConf, 0); Collection<HadoopInputSplit> res = new ArrayList<>(splits.length); for (int i = 0; i < splits.length; i++) { InputSplit nativeSplit = splits[i]; if (nativeSplit instanceof FileSplit) { FileSplit s = (FileSplit) nativeSplit; res.add(new HadoopFileBlock(s.getLocations(), s.getPath().toUri(), s.getStart(), s.getLength())); } else res.add(HadoopUtils.wrapSplit(i, nativeSplit, nativeSplit.getLocations())); } return res; } catch (IOException e) { throw new IgniteCheckedException(e); } }
From source file:org.apache.ignite.internal.processors.hadoop.v1.GridHadoopV1Splitter.java
License:Apache License
/** * @param jobConf Job configuration.//from w w w .j a va2 s . c om * @return Collection of mapped splits. * @throws IgniteCheckedException If mapping failed. */ public static Collection<GridHadoopInputSplit> splitJob(JobConf jobConf) throws IgniteCheckedException { try { InputFormat<?, ?> format = jobConf.getInputFormat(); assert format != null; InputSplit[] splits = format.getSplits(jobConf, 0); Collection<GridHadoopInputSplit> res = new ArrayList<>(splits.length); for (int i = 0; i < splits.length; i++) { InputSplit nativeSplit = splits[i]; if (nativeSplit instanceof FileSplit) { FileSplit s = (FileSplit) nativeSplit; res.add(new GridHadoopFileBlock(s.getLocations(), s.getPath().toUri(), s.getStart(), s.getLength())); } else res.add(GridHadoopUtils.wrapSplit(i, nativeSplit, nativeSplit.getLocations())); } return res; } catch (IOException e) { throw new IgniteCheckedException(e); } }
From source file:org.apache.reef.io.data.loading.impl.SingleDataCenterEvaluatorToPartitionStrategy.java
License:Apache License
@Override protected void updateLocations(final NumberedSplit<InputSplit> numberedSplit) { try {//from w w w . j a v a 2 s. c o m final InputSplit split = numberedSplit.getEntry(); final String[] locations = split.getLocations(); for (final String location : locations) { BlockingQueue<NumberedSplit<InputSplit>> newSplitQue = new LinkedBlockingQueue<>(); final BlockingQueue<NumberedSplit<InputSplit>> splitQue = locationToSplits.putIfAbsent(location, newSplitQue); if (splitQue != null) { newSplitQue = splitQue; } newSplitQue.add(numberedSplit); } } catch (final IOException e) { throw new RuntimeException("Unable to get InputSplits using the specified InputFormat", e); } }
From source file:org.apache.tez.mapreduce.hadoop.MRInputHelpers.java
License:Apache License
private static List<TaskLocationHint> createTaskLocationHintsFromSplits( org.apache.hadoop.mapreduce.InputSplit[] newFormatSplits) { Iterable<TaskLocationHint> iterable = Iterables.transform(Arrays.asList(newFormatSplits), new Function<org.apache.hadoop.mapreduce.InputSplit, TaskLocationHint>() { @Override/*from w w w.j av a 2 s .c om*/ public TaskLocationHint apply(org.apache.hadoop.mapreduce.InputSplit input) { try { if (input instanceof TezGroupedSplit) { String rack = ((org.apache.hadoop.mapreduce.split.TezGroupedSplit) input).getRack(); if (rack == null) { if (input.getLocations() != null) { return TaskLocationHint.createTaskLocationHint( new HashSet<String>(Arrays.asList(input.getLocations())), null); } else { return TaskLocationHint.createTaskLocationHint(null, null); } } else { return TaskLocationHint.createTaskLocationHint(null, Collections.singleton(rack)); } } else { return TaskLocationHint.createTaskLocationHint( new HashSet<String>(Arrays.asList(input.getLocations())), null); } } catch (IOException e) { throw new RuntimeException(e); } catch (InterruptedException e) { throw new RuntimeException(e); } } }); return Lists.newArrayList(iterable); }
From source file:org.apache.tez.mapreduce.hadoop.MRInputHelpers.java
License:Apache License
private static List<TaskLocationHint> createTaskLocationHintsFromSplits( org.apache.hadoop.mapred.InputSplit[] oldFormatSplits) { Iterable<TaskLocationHint> iterable = Iterables.transform(Arrays.asList(oldFormatSplits), new Function<org.apache.hadoop.mapred.InputSplit, TaskLocationHint>() { @Override//from w w w . j a v a 2 s .c o m public TaskLocationHint apply(org.apache.hadoop.mapred.InputSplit input) { try { if (input instanceof org.apache.hadoop.mapred.split.TezGroupedSplit) { String rack = ((org.apache.hadoop.mapred.split.TezGroupedSplit) input).getRack(); if (rack == null) { if (input.getLocations() != null) { return TaskLocationHint.createTaskLocationHint( new HashSet<String>(Arrays.asList(input.getLocations())), null); } else { return TaskLocationHint.createTaskLocationHint(null, null); } } else { return TaskLocationHint.createTaskLocationHint(null, Collections.singleton(rack)); } } else { return TaskLocationHint.createTaskLocationHint( new HashSet<String>(Arrays.asList(input.getLocations())), null); } } catch (IOException e) { throw new RuntimeException(e); } } }); return Lists.newArrayList(iterable); }
From source file:org.apache.tez.mapreduce.processor.MapUtils.java
License:Apache License
private static void writeSplitFiles(FileSystem fs, JobConf conf, InputSplit split) throws IOException { Path jobSplitFile = new Path(conf.get(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR, MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR_DEFAULT), MRJobConfig.JOB_SPLIT); LOG.info("Writing split to: " + jobSplitFile); FSDataOutputStream out = FileSystem.create(fs, jobSplitFile, new FsPermission(JOB_FILE_PERMISSION)); long offset = out.getPos(); Text.writeString(out, split.getClass().getName()); split.write(out);/* www . ja va 2 s . c o m*/ out.close(); String[] locations = split.getLocations(); SplitMetaInfo info = null; info = new JobSplit.SplitMetaInfo(locations, offset, split.getLength()); Path jobSplitMetaInfoFile = new Path(conf.get(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR), MRJobConfig.JOB_SPLIT_METAINFO); FSDataOutputStream outMeta = FileSystem.create(fs, jobSplitMetaInfoFile, new FsPermission(JOB_FILE_PERMISSION)); outMeta.write(SplitMetaInfoReaderTez.META_SPLIT_FILE_HEADER); WritableUtils.writeVInt(outMeta, SplitMetaInfoReaderTez.META_SPLIT_VERSION); WritableUtils.writeVInt(outMeta, 1); // Only 1 split meta info being written info.write(outMeta); outMeta.close(); }
From source file:org.deeplearning4j.iterativereduce.runtime.yarn.appmaster.ApplicationMaster.java
License:Apache License
private Set<ConfigurationTuple> getConfigurationTuples() throws IOException { if (confTuples != null) return confTuples; Path inputPath = new Path(props.getProperty(ConfigFields.APP_INPUT_PATH)); FileSystem fs = FileSystem.get(conf); FileStatus f = fs.getFileStatus(inputPath); //BlockLocation[] bl = fs.getFileBlockLocations(p, 0, f.getLen()); Set<ConfigurationTuple> configTuples = new HashSet<>(); int workerId = 0; JobConf job = new JobConf(new Configuration()); job.setInputFormat((Class<? extends InputFormat>) this.inputFormatClass); //TextInputFormat.class); FileInputFormat.setInputPaths(job, inputPath); InputSplit[] splits = job.getInputFormat().getSplits(job, job.getNumMapTasks()); for (InputSplit split : splits) { FileSplit convertedToMetronomeSplit = new FileSplit(); org.apache.hadoop.mapred.FileSplit hadoopFileSplit = (org.apache.hadoop.mapred.FileSplit) split; if (hadoopFileSplit.getLength() - hadoopFileSplit.getStart() > 0) { convertedToMetronomeSplit.setLength(hadoopFileSplit.getLength()); convertedToMetronomeSplit.setOffset(hadoopFileSplit.getStart()); convertedToMetronomeSplit.setPath(hadoopFileSplit.getPath().toString()); StartupConfiguration config = StartupConfiguration.newBuilder().setBatchSize(batchSize) .setIterations(iterationCount).setOther(appConfig).setSplit(convertedToMetronomeSplit) .build();//from www .j a v a 2 s . co m String wid = "worker-" + workerId; ConfigurationTuple tuple = new ConfigurationTuple(split.getLocations()[0], wid, config); configTuples.add(tuple); workerId++; LOG.info("IR_AM_worker: " + wid + " added split: " + convertedToMetronomeSplit.toString()); } else { LOG.info("IR_AM: Culled out 0 length Split: " + convertedToMetronomeSplit.toString()); } } LOG.info("Total Splits/Workers: " + configTuples.size()); confTuples = configTuples; return configTuples; }
From source file:org.gridgain.grid.kernal.processors.hadoop.v1.GridHadoopV1Splitter.java
License:Open Source License
/** * @param jobConf Job configuration.//from ww w . j a va 2s . co m * @return Collection of mapped splits. * @throws GridException If mapping failed. */ public static Collection<GridHadoopInputSplit> splitJob(JobConf jobConf) throws GridException { try { InputFormat<?, ?> format = jobConf.getInputFormat(); assert format != null; InputSplit[] splits = format.getSplits(jobConf, 0); Collection<GridHadoopInputSplit> res = new ArrayList<>(splits.length); for (int i = 0; i < splits.length; i++) { InputSplit nativeSplit = splits[i]; if (nativeSplit instanceof FileSplit) { FileSplit s = (FileSplit) nativeSplit; res.add(new GridHadoopFileBlock(s.getLocations(), s.getPath().toUri(), s.getStart(), s.getLength())); } else res.add(GridHadoopUtils.wrapSplit(i, nativeSplit, nativeSplit.getLocations())); } return res; } catch (IOException e) { throw new GridException(e); } }
From source file:org.kiji.hive.KijiTableInputFormat.java
License:Apache License
/** * Returns an object responsible for generating records contained in a * given input split.// w w w . j ava2 s . c o m * * @param split The input split to create a record reader for. * @param job The job configuration. * @param reporter A job info reporter (for counters, status, etc.). * @return The record reader. * @throws IOException If there is an error. */ @Override public RecordReader<ImmutableBytesWritable, KijiRowDataWritable> getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException { LOG.info("Getting record reader {}", split.getLocations()); return new KijiTableRecordReader((KijiTableInputSplit) split, job); }