List of usage examples for org.apache.hadoop.mapred Reporter NULL
Reporter NULL
To view the source code for org.apache.hadoop.mapred Reporter NULL.
Click Source Link
From source file:com.ibm.bi.dml.runtime.controlprogram.parfor.RemoteParForColocatedFileSplit.java
License:Open Source License
/** * Get the list of hostnames where the input split is located. */// w w w.j av a 2 s . co m @Override public String[] getLocations() throws IOException { //Timing time = new Timing(); //time.start(); JobConf job = new JobConf(ConfigurationManager.getCachedJobConf()); FileSystem fs = FileSystem.get(job); //read task string LongWritable key = new LongWritable(); Text value = new Text(); RecordReader<LongWritable, Text> reader = new NLineInputFormat().getRecordReader(this, job, Reporter.NULL); reader.next(key, value); reader.close(); //parse task Task t = Task.parseCompactString(value.toString()); //get all locations HashMap<String, Integer> hosts = new HashMap<String, Integer>(); if (t.getType() == TaskType.SET) { for (IntObject val : t.getIterations()) { String fname = _fname + "/" + String.valueOf(((val.getLongValue() - 1) / _blen + 1)); FileStatus status = fs.getFileStatus(new Path(fname)); BlockLocation[] tmp1 = fs.getFileBlockLocations(status, 0, status.getLen()); for (BlockLocation bl : tmp1) countHosts(hosts, bl.getHosts()); } } else //TaskType.RANGE { //since this is a serial process, we use just the first iteration //as a heuristic for location information long lFrom = t.getIterations().get(0).getLongValue(); long lTo = t.getIterations().get(1).getLongValue(); for (long li : new long[] { lFrom, lTo }) { String fname = _fname + "/" + String.valueOf(((li - 1) / _blen + 1)); FileStatus status = fs.getFileStatus(new Path(fname)); BlockLocation[] tmp1 = fs.getFileBlockLocations(status, 0, status.getLen()); for (BlockLocation bl : tmp1) countHosts(hosts, bl.getHosts()); } /* int lFrom = t.getIterations().get(0).getIntValue(); int lTo = t.getIterations().get(1).getIntValue(); int lIncr = t.getIterations().get(2).getIntValue(); for( int i=lFrom; i<=lTo; i+=lIncr ) { String fname = _fname+"/"+String.valueOf( ((i-_offset)/_blen+_offset) ); FileSystem fs = FileSystem.get(job); FileStatus status = fs.getFileStatus(new Path(fname)); BlockLocation[] tmp1 = fs.getFileBlockLocations(status, 0, status.getLen()); for( BlockLocation bl : tmp1 ) countHosts(hosts, bl.getHosts()); }*/ } //System.out.println("Get locations "+time.stop()+""); //majority consensus on top host return getTopHosts(hosts); }
From source file:com.ibm.bi.dml.runtime.controlprogram.parfor.ResultMergeLocalFile.java
License:Open Source License
/** * //from ww w. jav a2s .co m * @param fnameNew * @param outMo * @param inMO * @throws DMLRuntimeException */ private void mergeTextCellWithoutComp(String fnameNew, MatrixObject outMo, ArrayList<MatrixObject> inMO) throws DMLRuntimeException { try { //delete target file if already exists MapReduceTool.deleteFileIfExistOnHDFS(fnameNew); if (ALLOW_COPY_CELLFILES) { copyAllFiles(fnameNew, inMO); return; //we're done } //actual merge JobConf job = new JobConf(ConfigurationManager.getCachedJobConf()); FileSystem fs = FileSystem.get(job); Path path = new Path(fnameNew); BufferedWriter out = new BufferedWriter(new OutputStreamWriter(fs.create(path, true))); String valueStr = null; try { for (MatrixObject in : inMO) //read/write all inputs { LOG.trace("ResultMerge (local, file): Merge input " + in.getVarName() + " (fname=" + in.getFileName() + ") via stream merge"); JobConf tmpJob = new JobConf(ConfigurationManager.getCachedJobConf()); Path tmpPath = new Path(in.getFileName()); FileInputFormat.addInputPath(tmpJob, tmpPath); TextInputFormat informat = new TextInputFormat(); informat.configure(tmpJob); InputSplit[] splits = informat.getSplits(tmpJob, 1); LongWritable key = new LongWritable(); Text value = new Text(); for (InputSplit split : splits) { RecordReader<LongWritable, Text> reader = informat.getRecordReader(split, tmpJob, Reporter.NULL); try { while (reader.next(key, value)) { valueStr = value.toString().trim(); out.write(valueStr + "\n"); } } finally { if (reader != null) reader.close(); } } } } finally { if (out != null) out.close(); } } catch (Exception ex) { throw new DMLRuntimeException("Unable to merge text cell results.", ex); } }
From source file:com.ibm.bi.dml.runtime.controlprogram.parfor.ResultMergeLocalFile.java
License:Open Source License
/** * /*from ww w .jav a 2s. com*/ * @param fnameStaging * @param mo * @param ID * @throws IOException * @throws DMLRuntimeException */ private void createTextCellStagingFile(String fnameStaging, MatrixObject mo, long ID) throws IOException, DMLRuntimeException { JobConf job = new JobConf(ConfigurationManager.getCachedJobConf()); Path path = new Path(mo.getFileName()); FileInputFormat.addInputPath(job, path); TextInputFormat informat = new TextInputFormat(); informat.configure(job); InputSplit[] splits = informat.getSplits(job, 1); LinkedList<Cell> buffer = new LinkedList<Cell>(); LongWritable key = new LongWritable(); Text value = new Text(); MatrixCharacteristics mc = mo.getMatrixCharacteristics(); int brlen = mc.getRowsPerBlock(); int bclen = mc.getColsPerBlock(); //long row = -1, col = -1; //FIXME needs reconsideration whenever textcell is used actively //NOTE MB: Originally, we used long row, col but this led reproducibly to JIT compilation // errors during runtime; experienced under WINDOWS, Intel x86-64, IBM JDK 64bit/32bit. // It works fine with int row, col but we require long for larger matrices. // Since, textcell is never used for result merge (hybrid/hadoop: binaryblock, singlenode:binarycell) // we just propose the to exclude it with -Xjit:exclude={package.method*}(count=0,optLevel=0) FastStringTokenizer st = new FastStringTokenizer(' '); for (InputSplit split : splits) { RecordReader<LongWritable, Text> reader = informat.getRecordReader(split, job, Reporter.NULL); try { while (reader.next(key, value)) { st.reset(value.toString()); //reset tokenizer long row = st.nextLong(); long col = st.nextLong(); double lvalue = Double.parseDouble(st.nextToken()); Cell tmp = new Cell(row, col, lvalue); buffer.addLast(tmp); if (buffer.size() > StagingFileUtils.CELL_BUFFER_SIZE) //periodic flush { appendCellBufferToStagingArea(fnameStaging, ID, buffer, brlen, bclen); buffer.clear(); } } //final flush if (!buffer.isEmpty()) { appendCellBufferToStagingArea(fnameStaging, ID, buffer, brlen, bclen); buffer.clear(); } } finally { if (reader != null) reader.close(); } } }
From source file:com.ibm.bi.dml.runtime.io.ReaderTextCell.java
License:Open Source License
/** * //w ww. j a v a 2 s. c o m * @param path * @param job * @param dest * @param rlen * @param clen * @param brlen * @param bclen * @throws IOException * @throws IllegalAccessException * @throws InstantiationException */ private void readTextCellMatrixFromHDFS(Path path, JobConf job, MatrixBlock dest, long rlen, long clen, int brlen, int bclen) throws IOException { boolean sparse = dest.isInSparseFormat(); FileInputFormat.addInputPath(job, path); TextInputFormat informat = new TextInputFormat(); informat.configure(job); InputSplit[] splits = informat.getSplits(job, 1); LongWritable key = new LongWritable(); Text value = new Text(); int row = -1; int col = -1; try { FastStringTokenizer st = new FastStringTokenizer(' '); for (InputSplit split : splits) { RecordReader<LongWritable, Text> reader = informat.getRecordReader(split, job, Reporter.NULL); try { if (sparse) //SPARSE<-value { while (reader.next(key, value)) { st.reset(value.toString()); //reinit tokenizer row = st.nextInt() - 1; col = st.nextInt() - 1; double lvalue = st.nextDouble(); dest.appendValue(row, col, lvalue); } dest.sortSparseRows(); } else //DENSE<-value { while (reader.next(key, value)) { st.reset(value.toString()); //reinit tokenizer row = st.nextInt() - 1; col = st.nextInt() - 1; double lvalue = st.nextDouble(); dest.setValueDenseUnsafe(row, col, lvalue); } } } finally { if (reader != null) reader.close(); } } } catch (Exception ex) { //post-mortem error handling and bounds checking if (row < 0 || row + 1 > rlen || col < 0 || col + 1 > clen) { throw new IOException("Matrix cell [" + (row + 1) + "," + (col + 1) + "] " + "out of overall matrix range [1:" + rlen + ",1:" + clen + "]."); } else { throw new IOException("Unable to read matrix in text cell format.", ex); } } }
From source file:com.ibm.bi.dml.runtime.io.ReaderTextCSVParallel.java
License:Open Source License
/** * /*from w w w. j a v a 2 s. c om*/ * @param path * @param job * @param hasHeader * @param delim * @return * @throws IOException * @throws DMLRuntimeException */ private MatrixBlock computeCSVSizeAndCreateOutputMatrixBlock(InputSplit[] splits, Path path, JobConf job, boolean hasHeader, String delim, long estnnz) throws IOException, DMLRuntimeException { int nrow = 0; int ncol = 0; FileInputFormat.addInputPath(job, path); TextInputFormat informat = new TextInputFormat(); informat.configure(job); // count no of entities in the first non-header row LongWritable key = new LongWritable(); Text oneLine = new Text(); RecordReader<LongWritable, Text> reader = informat.getRecordReader(splits[0], job, Reporter.NULL); try { if (reader.next(key, oneLine)) { String cellStr = oneLine.toString().trim(); ncol = StringUtils.countMatches(cellStr, delim) + 1; } } finally { IOUtilFunctions.closeSilently(reader); } // count rows in parallel per split try { ExecutorService pool = Executors.newFixedThreadPool(_numThreads); ArrayList<CountRowsTask> tasks = new ArrayList<CountRowsTask>(); for (InputSplit split : splits) { tasks.add(new CountRowsTask(split, informat, job, hasHeader)); hasHeader = false; } pool.invokeAll(tasks); pool.shutdown(); // collect row counts for offset computation // early error notify in case not all tasks successful _offsets = new SplitOffsetInfos(tasks.size()); for (CountRowsTask rt : tasks) { if (!rt.getReturnCode()) throw new IOException("Count task for csv input failed: " + rt.getErrMsg()); _offsets.setOffsetPerSplit(tasks.indexOf(rt), nrow); _offsets.setLenghtPerSplit(tasks.indexOf(rt), rt.getRowCount()); nrow = nrow + rt.getRowCount(); } } catch (Exception e) { throw new IOException("Threadpool Error " + e.getMessage(), e); } // allocate target matrix block based on given size; // need to allocate sparse as well since lock-free insert into target return createOutputMatrixBlock(nrow, ncol, estnnz, true, true); }
From source file:com.ibm.bi.dml.udf.lib.RemoveEmptyRows.java
License:Open Source License
@Override public void execute() { Matrix mat = (Matrix) this.getFunctionInput(0); String fnameOld = mat.getFilePath(); HashMap<Long, Long> keyMap = new HashMap<Long, Long>(); //old,new rowID try {//from w ww . j a v a 2 s. c o m //prepare input JobConf job = new JobConf(ConfigurationManager.getCachedJobConf()); Path path = new Path(fnameOld); FileSystem fs = FileSystem.get(job); if (!fs.exists(path)) throw new IOException("File " + fnameOld + " does not exist on HDFS."); FileInputFormat.addInputPath(job, path); TextInputFormat informat = new TextInputFormat(); informat.configure(job); //prepare output String fnameNew = createOutputFilePathAndName(OUTPUT_FILE); DataOutputStream ostream = MapReduceTool.getHDFSDataOutputStream(fnameNew, true); //read and write if necessary InputSplit[] splits = informat.getSplits(job, 1); LongWritable key = new LongWritable(); Text value = new Text(); long ID = 1; try { //for obj reuse and preventing repeated buffer re-allocations StringBuilder sb = new StringBuilder(); for (InputSplit split : splits) { RecordReader<LongWritable, Text> reader = informat.getRecordReader(split, job, Reporter.NULL); try { while (reader.next(key, value)) { String cellStr = value.toString().trim(); StringTokenizer st = new StringTokenizer(cellStr, " "); long row = Integer.parseInt(st.nextToken()); long col = Integer.parseInt(st.nextToken()); double lvalue = Double.parseDouble(st.nextToken()); if (!keyMap.containsKey(row)) keyMap.put(row, ID++); long rowNew = keyMap.get(row); sb.append(rowNew); sb.append(' '); sb.append(col); sb.append(' '); sb.append(lvalue); sb.append('\n'); ostream.writeBytes(sb.toString()); sb.setLength(0); } } finally { if (reader != null) reader.close(); } } _ret = new Matrix(fnameNew, keyMap.size(), mat.getNumCols(), ValueType.Double); } finally { if (ostream != null) ostream.close(); } } catch (Exception ex) { throw new RuntimeException("Unable to execute external function.", ex); } }
From source file:com.ibm.jaql.io.hadoop.DefaultHadoopInputAdapter.java
License:Apache License
public void open() throws Exception { this.conf = new JobConf(); if (Globals.getJobConf() != null) conf.setWorkingDirectory(Globals.getJobConf().getWorkingDirectory()); this.reporter = Reporter.NULL; // write state to conf, pass in top-level args setSequential(conf);/*from w ww . ja va2 s . com*/ Globals.setJobConf(conf); // initialize the format from conf if (iFormat instanceof JobConfigurable) ((JobConfigurable) iFormat).configure(conf); }
From source file:com.ibm.jaql.io.hadoop.DefaultHadoopOutputAdapter.java
License:Apache License
public void open() throws Exception { this.conf = new JobConf(); this.reporter = Reporter.NULL; // Some OutputFormats (like FileOutputFormat) require that the job id/task id set. // So let's set it for all output formats, just in case they need it too. JobID jobid = new JobID("sequential", jobCounter.getAndIncrement()); TaskAttemptID taskid = new TaskAttemptID(new TaskID(jobid, true, 0), 0); conf.set("mapred.task.id", taskid.toString()); setSequential(conf);//from w ww. ja v a 2s. c o m // Create a task so we can use committers. sequentialJob = new ExposeJobContext(conf, jobid); sequentialTask = new ExposeTaskAttemptContext(conf, taskid); // Give the commiter a chance initialize. OutputCommitter committer = conf.getOutputCommitter(); // FIXME: We skip job setup for now because committer.setupJob(sequentialJob); committer.setupTask(sequentialTask); if (oFormat instanceof JobConfigurable) ((JobConfigurable) oFormat).configure(conf); }
From source file:com.ibm.jaql.lang.expr.io.ReadSplitFn.java
License:Apache License
@Override public JsonIterator iter(Context context) throws Exception { // Close the previous adapter, if still open: if (adapter != null) { adapter.close();/* w ww . j a va 2 s . c om*/ adapter = null; } // evaluate the arguments JsonValue args = exprs[0].eval(context); JsonRecord splitRec = (JsonRecord) exprs[1].eval(context); if (splitRec == null) { return JsonIterator.EMPTY; } // get the InputAdapter according to the type HadoopInputAdapter hia = (HadoopInputAdapter) JaqlUtil.getAdapterStore().input.getAdapter(args); adapter = hia; JobConf conf = new JobConf(); // TODO: allow configuration hia.setParallel(conf); // right thing to do? JsonString jsplitClassName = (JsonString) splitRec.get(InputSplitsFn.CLASS_TAG); Class<? extends InputSplit> splitCls = (Class<? extends InputSplit>) ClassLoaderMgr .resolveClass(jsplitClassName.toString()); InputSplit split = (InputSplit) ReflectionUtils.newInstance(splitCls, conf); DataInputBuffer in = new DataInputBuffer(); JsonBinary rawSplit = (JsonBinary) splitRec.get(InputSplitsFn.SPLIT_TAG); in.reset(rawSplit.getInternalBytes(), rawSplit.bytesOffset(), rawSplit.bytesLength()); split.readFields(in); RecordReader<JsonHolder, JsonHolder> rr = hia.getRecordReader(split, conf, Reporter.NULL); return new RecordReaderValueIter(rr); }
From source file:com.ibm.jaql.lang.util.JsonSorter.java
License:Apache License
/** * @param comparator//from w w w . ja v a2s .c om */ public JsonSorter(JsonComparator comparator) { conf.setMapOutputKeyClass(JsonHolderDefault.class); HadoopSerializationDefault.register(conf); if (comparator != null) { conf.setOutputKeyComparatorClass(comparator.getClass()); } else { conf.setOutputKeyComparatorClass(DefaultJsonComparator.class); } // sorter.configure(conf); // done below using setComparator sorter.setInputBuffer(keyValBuffer); sorter.setProgressable(Reporter.NULL); if (comparator != null) { sorter.setComparator(comparator); } else { sorter.setComparator(new DefaultJsonComparator()); } }