Example usage for org.apache.hadoop.mapred Reporter NULL

List of usage examples for org.apache.hadoop.mapred Reporter NULL

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred Reporter NULL.

Prototype

Reporter NULL

To view the source code for org.apache.hadoop.mapred Reporter NULL.

Click Source Link

Document

A constant of Reporter type that does nothing.

Usage

From source file:com.ibm.bi.dml.runtime.controlprogram.parfor.RemoteParForColocatedFileSplit.java

License:Open Source License

/**
 * Get the list of hostnames where the input split is located.
 *///  w  w w.j av a  2  s .  co  m
@Override
public String[] getLocations() throws IOException {
    //Timing time = new Timing();
    //time.start();

    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    FileSystem fs = FileSystem.get(job);

    //read task string
    LongWritable key = new LongWritable();
    Text value = new Text();
    RecordReader<LongWritable, Text> reader = new NLineInputFormat().getRecordReader(this, job, Reporter.NULL);
    reader.next(key, value);
    reader.close();

    //parse task
    Task t = Task.parseCompactString(value.toString());

    //get all locations
    HashMap<String, Integer> hosts = new HashMap<String, Integer>();

    if (t.getType() == TaskType.SET) {
        for (IntObject val : t.getIterations()) {
            String fname = _fname + "/" + String.valueOf(((val.getLongValue() - 1) / _blen + 1));
            FileStatus status = fs.getFileStatus(new Path(fname));
            BlockLocation[] tmp1 = fs.getFileBlockLocations(status, 0, status.getLen());
            for (BlockLocation bl : tmp1)
                countHosts(hosts, bl.getHosts());
        }
    } else //TaskType.RANGE
    {
        //since this is a serial process, we use just the first iteration
        //as a heuristic for location information
        long lFrom = t.getIterations().get(0).getLongValue();
        long lTo = t.getIterations().get(1).getLongValue();
        for (long li : new long[] { lFrom, lTo }) {
            String fname = _fname + "/" + String.valueOf(((li - 1) / _blen + 1));
            FileStatus status = fs.getFileStatus(new Path(fname));
            BlockLocation[] tmp1 = fs.getFileBlockLocations(status, 0, status.getLen());
            for (BlockLocation bl : tmp1)
                countHosts(hosts, bl.getHosts());
        }

        /*
        int lFrom  = t.getIterations().get(0).getIntValue();
        int lTo    = t.getIterations().get(1).getIntValue();
        int lIncr  = t.getIterations().get(2).getIntValue();            
        for( int i=lFrom; i<=lTo; i+=lIncr )
        {
           String fname = _fname+"/"+String.valueOf( ((i-_offset)/_blen+_offset) );
           FileSystem fs = FileSystem.get(job);
           FileStatus status = fs.getFileStatus(new Path(fname)); 
           BlockLocation[] tmp1 = fs.getFileBlockLocations(status, 0, status.getLen());
           for( BlockLocation bl : tmp1 )
              countHosts(hosts, bl.getHosts());
        }*/
    }

    //System.out.println("Get locations "+time.stop()+"");

    //majority consensus on top host
    return getTopHosts(hosts);
}

From source file:com.ibm.bi.dml.runtime.controlprogram.parfor.ResultMergeLocalFile.java

License:Open Source License

/**
 * //from   ww  w.  jav a2s  .co  m
 * @param fnameNew
 * @param outMo
 * @param inMO
 * @throws DMLRuntimeException
 */
private void mergeTextCellWithoutComp(String fnameNew, MatrixObject outMo, ArrayList<MatrixObject> inMO)
        throws DMLRuntimeException {
    try {
        //delete target file if already exists
        MapReduceTool.deleteFileIfExistOnHDFS(fnameNew);

        if (ALLOW_COPY_CELLFILES) {
            copyAllFiles(fnameNew, inMO);
            return; //we're done
        }

        //actual merge
        JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
        FileSystem fs = FileSystem.get(job);
        Path path = new Path(fnameNew);
        BufferedWriter out = new BufferedWriter(new OutputStreamWriter(fs.create(path, true)));

        String valueStr = null;

        try {
            for (MatrixObject in : inMO) //read/write all inputs
            {
                LOG.trace("ResultMerge (local, file): Merge input " + in.getVarName() + " (fname="
                        + in.getFileName() + ") via stream merge");

                JobConf tmpJob = new JobConf(ConfigurationManager.getCachedJobConf());
                Path tmpPath = new Path(in.getFileName());
                FileInputFormat.addInputPath(tmpJob, tmpPath);
                TextInputFormat informat = new TextInputFormat();
                informat.configure(tmpJob);
                InputSplit[] splits = informat.getSplits(tmpJob, 1);

                LongWritable key = new LongWritable();
                Text value = new Text();

                for (InputSplit split : splits) {
                    RecordReader<LongWritable, Text> reader = informat.getRecordReader(split, tmpJob,
                            Reporter.NULL);
                    try {
                        while (reader.next(key, value)) {
                            valueStr = value.toString().trim();
                            out.write(valueStr + "\n");
                        }
                    } finally {
                        if (reader != null)
                            reader.close();
                    }
                }
            }
        } finally {
            if (out != null)
                out.close();
        }
    } catch (Exception ex) {
        throw new DMLRuntimeException("Unable to merge text cell results.", ex);
    }
}

From source file:com.ibm.bi.dml.runtime.controlprogram.parfor.ResultMergeLocalFile.java

License:Open Source License

/**
 * /*from  ww  w  .jav a 2s. com*/
 * @param fnameStaging
 * @param mo
 * @param ID
 * @throws IOException
 * @throws DMLRuntimeException
 */

private void createTextCellStagingFile(String fnameStaging, MatrixObject mo, long ID)
        throws IOException, DMLRuntimeException {
    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    Path path = new Path(mo.getFileName());
    FileInputFormat.addInputPath(job, path);
    TextInputFormat informat = new TextInputFormat();
    informat.configure(job);
    InputSplit[] splits = informat.getSplits(job, 1);

    LinkedList<Cell> buffer = new LinkedList<Cell>();
    LongWritable key = new LongWritable();
    Text value = new Text();

    MatrixCharacteristics mc = mo.getMatrixCharacteristics();
    int brlen = mc.getRowsPerBlock();
    int bclen = mc.getColsPerBlock();
    //long row = -1, col = -1; //FIXME needs reconsideration whenever textcell is used actively
    //NOTE MB: Originally, we used long row, col but this led reproducibly to JIT compilation
    // errors during runtime; experienced under WINDOWS, Intel x86-64, IBM JDK 64bit/32bit.
    // It works fine with int row, col but we require long for larger matrices.
    // Since, textcell is never used for result merge (hybrid/hadoop: binaryblock, singlenode:binarycell)
    // we just propose the to exclude it with -Xjit:exclude={package.method*}(count=0,optLevel=0)

    FastStringTokenizer st = new FastStringTokenizer(' ');

    for (InputSplit split : splits) {
        RecordReader<LongWritable, Text> reader = informat.getRecordReader(split, job, Reporter.NULL);
        try {
            while (reader.next(key, value)) {
                st.reset(value.toString()); //reset tokenizer
                long row = st.nextLong();
                long col = st.nextLong();
                double lvalue = Double.parseDouble(st.nextToken());

                Cell tmp = new Cell(row, col, lvalue);

                buffer.addLast(tmp);
                if (buffer.size() > StagingFileUtils.CELL_BUFFER_SIZE) //periodic flush
                {
                    appendCellBufferToStagingArea(fnameStaging, ID, buffer, brlen, bclen);
                    buffer.clear();
                }
            }

            //final flush
            if (!buffer.isEmpty()) {
                appendCellBufferToStagingArea(fnameStaging, ID, buffer, brlen, bclen);
                buffer.clear();
            }
        } finally {
            if (reader != null)
                reader.close();
        }
    }
}

From source file:com.ibm.bi.dml.runtime.io.ReaderTextCell.java

License:Open Source License

/**
 * //w ww.  j a v a 2 s. c  o m
 * @param path
 * @param job
 * @param dest
 * @param rlen
 * @param clen
 * @param brlen
 * @param bclen
 * @throws IOException
 * @throws IllegalAccessException
 * @throws InstantiationException
 */
private void readTextCellMatrixFromHDFS(Path path, JobConf job, MatrixBlock dest, long rlen, long clen,
        int brlen, int bclen) throws IOException {
    boolean sparse = dest.isInSparseFormat();
    FileInputFormat.addInputPath(job, path);
    TextInputFormat informat = new TextInputFormat();
    informat.configure(job);
    InputSplit[] splits = informat.getSplits(job, 1);

    LongWritable key = new LongWritable();
    Text value = new Text();
    int row = -1;
    int col = -1;

    try {
        FastStringTokenizer st = new FastStringTokenizer(' ');

        for (InputSplit split : splits) {
            RecordReader<LongWritable, Text> reader = informat.getRecordReader(split, job, Reporter.NULL);

            try {
                if (sparse) //SPARSE<-value
                {
                    while (reader.next(key, value)) {
                        st.reset(value.toString()); //reinit tokenizer
                        row = st.nextInt() - 1;
                        col = st.nextInt() - 1;
                        double lvalue = st.nextDouble();
                        dest.appendValue(row, col, lvalue);
                    }

                    dest.sortSparseRows();
                } else //DENSE<-value
                {
                    while (reader.next(key, value)) {
                        st.reset(value.toString()); //reinit tokenizer
                        row = st.nextInt() - 1;
                        col = st.nextInt() - 1;
                        double lvalue = st.nextDouble();
                        dest.setValueDenseUnsafe(row, col, lvalue);
                    }
                }
            } finally {
                if (reader != null)
                    reader.close();
            }
        }
    } catch (Exception ex) {
        //post-mortem error handling and bounds checking
        if (row < 0 || row + 1 > rlen || col < 0 || col + 1 > clen) {
            throw new IOException("Matrix cell [" + (row + 1) + "," + (col + 1) + "] "
                    + "out of overall matrix range [1:" + rlen + ",1:" + clen + "].");
        } else {
            throw new IOException("Unable to read matrix in text cell format.", ex);
        }
    }
}

From source file:com.ibm.bi.dml.runtime.io.ReaderTextCSVParallel.java

License:Open Source License

/**
 * /*from w  w w. j  a  v a 2  s. c  om*/
 * @param path
 * @param job
 * @param hasHeader
 * @param delim
 * @return
 * @throws IOException
 * @throws DMLRuntimeException 
 */
private MatrixBlock computeCSVSizeAndCreateOutputMatrixBlock(InputSplit[] splits, Path path, JobConf job,
        boolean hasHeader, String delim, long estnnz) throws IOException, DMLRuntimeException {
    int nrow = 0;
    int ncol = 0;

    FileInputFormat.addInputPath(job, path);
    TextInputFormat informat = new TextInputFormat();
    informat.configure(job);

    // count no of entities in the first non-header row
    LongWritable key = new LongWritable();
    Text oneLine = new Text();
    RecordReader<LongWritable, Text> reader = informat.getRecordReader(splits[0], job, Reporter.NULL);
    try {
        if (reader.next(key, oneLine)) {
            String cellStr = oneLine.toString().trim();
            ncol = StringUtils.countMatches(cellStr, delim) + 1;
        }
    } finally {
        IOUtilFunctions.closeSilently(reader);
    }

    // count rows in parallel per split
    try {
        ExecutorService pool = Executors.newFixedThreadPool(_numThreads);
        ArrayList<CountRowsTask> tasks = new ArrayList<CountRowsTask>();
        for (InputSplit split : splits) {
            tasks.add(new CountRowsTask(split, informat, job, hasHeader));
            hasHeader = false;
        }
        pool.invokeAll(tasks);
        pool.shutdown();

        // collect row counts for offset computation
        // early error notify in case not all tasks successful
        _offsets = new SplitOffsetInfos(tasks.size());
        for (CountRowsTask rt : tasks) {
            if (!rt.getReturnCode())
                throw new IOException("Count task for csv input failed: " + rt.getErrMsg());
            _offsets.setOffsetPerSplit(tasks.indexOf(rt), nrow);
            _offsets.setLenghtPerSplit(tasks.indexOf(rt), rt.getRowCount());
            nrow = nrow + rt.getRowCount();
        }
    } catch (Exception e) {
        throw new IOException("Threadpool Error " + e.getMessage(), e);
    }

    // allocate target matrix block based on given size; 
    // need to allocate sparse as well since lock-free insert into target
    return createOutputMatrixBlock(nrow, ncol, estnnz, true, true);
}

From source file:com.ibm.bi.dml.udf.lib.RemoveEmptyRows.java

License:Open Source License

@Override
public void execute() {
    Matrix mat = (Matrix) this.getFunctionInput(0);
    String fnameOld = mat.getFilePath();

    HashMap<Long, Long> keyMap = new HashMap<Long, Long>(); //old,new rowID

    try {//from w  ww .  j  a v a 2 s.  c o m
        //prepare input
        JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
        Path path = new Path(fnameOld);
        FileSystem fs = FileSystem.get(job);
        if (!fs.exists(path))
            throw new IOException("File " + fnameOld + " does not exist on HDFS.");
        FileInputFormat.addInputPath(job, path);
        TextInputFormat informat = new TextInputFormat();
        informat.configure(job);

        //prepare output
        String fnameNew = createOutputFilePathAndName(OUTPUT_FILE);
        DataOutputStream ostream = MapReduceTool.getHDFSDataOutputStream(fnameNew, true);

        //read and write if necessary
        InputSplit[] splits = informat.getSplits(job, 1);

        LongWritable key = new LongWritable();
        Text value = new Text();
        long ID = 1;

        try {
            //for obj reuse and preventing repeated buffer re-allocations
            StringBuilder sb = new StringBuilder();

            for (InputSplit split : splits) {
                RecordReader<LongWritable, Text> reader = informat.getRecordReader(split, job, Reporter.NULL);
                try {
                    while (reader.next(key, value)) {
                        String cellStr = value.toString().trim();
                        StringTokenizer st = new StringTokenizer(cellStr, " ");
                        long row = Integer.parseInt(st.nextToken());
                        long col = Integer.parseInt(st.nextToken());
                        double lvalue = Double.parseDouble(st.nextToken());

                        if (!keyMap.containsKey(row))
                            keyMap.put(row, ID++);
                        long rowNew = keyMap.get(row);

                        sb.append(rowNew);
                        sb.append(' ');
                        sb.append(col);
                        sb.append(' ');
                        sb.append(lvalue);
                        sb.append('\n');

                        ostream.writeBytes(sb.toString());
                        sb.setLength(0);
                    }
                } finally {
                    if (reader != null)
                        reader.close();
                }
            }

            _ret = new Matrix(fnameNew, keyMap.size(), mat.getNumCols(), ValueType.Double);
        } finally {
            if (ostream != null)
                ostream.close();
        }
    } catch (Exception ex) {
        throw new RuntimeException("Unable to execute external function.", ex);
    }
}

From source file:com.ibm.jaql.io.hadoop.DefaultHadoopInputAdapter.java

License:Apache License

public void open() throws Exception {
    this.conf = new JobConf();
    if (Globals.getJobConf() != null)
        conf.setWorkingDirectory(Globals.getJobConf().getWorkingDirectory());

    this.reporter = Reporter.NULL;

    // write state to conf, pass in top-level args
    setSequential(conf);/*from w  ww  .  ja va2 s . com*/
    Globals.setJobConf(conf);
    // initialize the format from conf
    if (iFormat instanceof JobConfigurable)
        ((JobConfigurable) iFormat).configure(conf);
}

From source file:com.ibm.jaql.io.hadoop.DefaultHadoopOutputAdapter.java

License:Apache License

public void open() throws Exception {
    this.conf = new JobConf();
    this.reporter = Reporter.NULL;

    // Some OutputFormats (like FileOutputFormat) require that the job id/task id set.
    // So let's set it for all output formats, just in case they need it too.
    JobID jobid = new JobID("sequential", jobCounter.getAndIncrement());
    TaskAttemptID taskid = new TaskAttemptID(new TaskID(jobid, true, 0), 0);
    conf.set("mapred.task.id", taskid.toString());

    setSequential(conf);//from   w ww. ja v  a  2s. c  o m

    // Create a task so we can use committers.
    sequentialJob = new ExposeJobContext(conf, jobid);
    sequentialTask = new ExposeTaskAttemptContext(conf, taskid);

    // Give the commiter a chance initialize.
    OutputCommitter committer = conf.getOutputCommitter();
    // FIXME: We skip job setup for now because  
    committer.setupJob(sequentialJob);
    committer.setupTask(sequentialTask);

    if (oFormat instanceof JobConfigurable)
        ((JobConfigurable) oFormat).configure(conf);
}

From source file:com.ibm.jaql.lang.expr.io.ReadSplitFn.java

License:Apache License

@Override
public JsonIterator iter(Context context) throws Exception {
    // Close the previous adapter, if still open:
    if (adapter != null) {
        adapter.close();/* w  ww  . j a va  2 s  . c om*/
        adapter = null;
    }

    // evaluate the arguments
    JsonValue args = exprs[0].eval(context);
    JsonRecord splitRec = (JsonRecord) exprs[1].eval(context);

    if (splitRec == null) {
        return JsonIterator.EMPTY;
    }

    // get the InputAdapter according to the type
    HadoopInputAdapter hia = (HadoopInputAdapter) JaqlUtil.getAdapterStore().input.getAdapter(args);
    adapter = hia;
    JobConf conf = new JobConf(); // TODO: allow configuration
    hia.setParallel(conf); // right thing to do?

    JsonString jsplitClassName = (JsonString) splitRec.get(InputSplitsFn.CLASS_TAG);
    Class<? extends InputSplit> splitCls = (Class<? extends InputSplit>) ClassLoaderMgr
            .resolveClass(jsplitClassName.toString());
    InputSplit split = (InputSplit) ReflectionUtils.newInstance(splitCls, conf);

    DataInputBuffer in = new DataInputBuffer();
    JsonBinary rawSplit = (JsonBinary) splitRec.get(InputSplitsFn.SPLIT_TAG);
    in.reset(rawSplit.getInternalBytes(), rawSplit.bytesOffset(), rawSplit.bytesLength());
    split.readFields(in);

    RecordReader<JsonHolder, JsonHolder> rr = hia.getRecordReader(split, conf, Reporter.NULL);
    return new RecordReaderValueIter(rr);
}

From source file:com.ibm.jaql.lang.util.JsonSorter.java

License:Apache License

/**
 * @param comparator//from   w  w w  . ja  v a2s .c  om
 */
public JsonSorter(JsonComparator comparator) {
    conf.setMapOutputKeyClass(JsonHolderDefault.class);
    HadoopSerializationDefault.register(conf);
    if (comparator != null) {
        conf.setOutputKeyComparatorClass(comparator.getClass());
    } else {
        conf.setOutputKeyComparatorClass(DefaultJsonComparator.class);
    }
    //    sorter.configure(conf); // done below using setComparator    
    sorter.setInputBuffer(keyValBuffer);
    sorter.setProgressable(Reporter.NULL);
    if (comparator != null) {
        sorter.setComparator(comparator);
    } else {
        sorter.setComparator(new DefaultJsonComparator());
    }
}