Example usage for org.apache.hadoop.io Text set

List of usage examples for org.apache.hadoop.io Text set

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text set.

Prototype

public void set(Text other) 

Source Link

Document

copy a text.

Usage

From source file:com.blackberry.logdriver.mapred.boom.BoomRecordReader.java

License:Apache License

@SuppressWarnings("unchecked")
@Override//  w  w  w .j a va 2s.c o m
public boolean next(LogLineData key, Text value) throws IOException {
    while (lines.size() == 0) {
        // If we're out of lines in the current record, then get the next record -
        // unless we're out of records or past the end of where we should be.
        while (reader == null || reader.hasNext() == false || reader.pastSync(end)) {
            // If we have another file, then init that and keep going, otherwise,
            // just return false.
            currentFile++;
            if (currentFile >= split.getNumPaths()) {
                return false;
            }
            initCurrentFile();
        }

        Record record = reader.next();
        lld = new LogLineData();
        lld.setBlockNumber((Long) record.get("blockNumber"));
        lld.setCreateTime((Long) record.get("createTime"));
        second = (Long) record.get("second");
        lineNumber = 0;

        lines.addAll((List<Record>) record.get("logLines"));
    }

    Record line = lines.pollFirst();
    long ms = (Long) line.get("ms");
    String message = line.get("message").toString();
    int eventId = (Integer) line.get("eventId");

    ++lineNumber;

    key.set(lld);
    key.setLineNumber(lineNumber);
    key.setTimestamp(second * 1000 + ms);
    key.setEventId(eventId);

    value.set(message);

    pos = reader.tell();

    return true;
}

From source file:com.chimpler.example.bayes.DataToSeq.java

License:Apache License

public static void main(String args[]) throws Exception {
    if (args.length != 2) {
        System.err.println("Arguments: [input tsv file] [output sequence file]");
        return;/*from   w ww.  ja  va 2s.  com*/
    }
    String inputFileName = args[0];
    String outputDirName = args[1];
    Configuration configuration = new Configuration();
    FileSystem fs = FileSystem.get(configuration);
    Writer writer = new SequenceFile.Writer(fs, configuration, new Path(outputDirName + "/chunk-0"), Text.class,
            Text.class);

    int count = 0;
    BufferedReader reader = new BufferedReader(new FileReader(inputFileName));
    Text key = new Text();
    Text value = new Text();
    while (true) {
        String line = reader.readLine();
        if (line == null) {
            break;
        }
        String[] tokens = line.split("\t", 2);
        if (tokens.length != 2) {
            System.out.println("Skip line: " + line);
            continue;
        }
        String category = tokens[0];
        //   String id = tokens[1];
        String message = tokens[1];
        key.set("/" + category + "/");
        value.set(message);
        writer.append(key, value);
        count++;
    }
    reader.close();
    writer.close();
    System.out.println("Wrote " + count + " entries.");
}

From source file:com.chinamobile.bcbsp.io.titan.TitanRecordWriter.java

License:Apache License

@Override
public void write(Text keyValue) throws IOException, InterruptedException {
    Text key = new Text();
    Text value = new Text();
    StringTokenizer str1 = new StringTokenizer(keyValue.toString(), "\t");
    if (str1.hasMoreElements()) {
        key.set(str1.nextToken());
    }/*  www .  j ava 2s  . co m*/
    if (str1.hasMoreElements()) {
        value.set(str1.nextToken());
    }
    if (key == new Text()) {
        return;
    }
    String[] vertexInfo = key.toString().split(":");
    String vertexID = vertexInfo[0];
    String vertexValue = vertexInfo[1];
    if (value == new Text()) {
        try {
            if (!hasVertex(vertexID)) {
                client.execute("g.addVertex([vertexID:'" + vertexID + "', value:'" + vertexValue + "'])");
            } else {
                client.execute(
                        "g.V('vertexID','" + vertexID + "').sideEffect{it.value = '" + vertexValue + "'}");
            }
        } catch (RexProException e) {
            LOG.error("Can not write record to database!");
            return;
        }
        return;
    }
    String[] strs = value.toString().split(" ");
    String[] outgoingVertexIDs = new String[strs.length];
    String[] weights = new String[strs.length];
    for (int i = 0; i < strs.length; i++) {
        String[] str = strs[i].split(":");
        outgoingVertexIDs[i] = str[0];
        weights[i] = str[1];
    }
    try {
        if (!hasVertex(vertexID)) {
            client.execute("g.addVertex([vertexID:'" + vertexID + "', value:'" + vertexValue + "'])");
        } else {
            client.execute("g.V('vertexID','" + vertexID + "').sideEffect{it.value = '" + vertexValue + "'}");
        }
        for (int i = 0; i < outgoingVertexIDs.length; i++) {
            if (!hasVertex(outgoingVertexIDs[i])) {
                client.execute("g.addVertex([vertexID:'" + outgoingVertexIDs[i] + "', value:''])");
            } /*
               * else { client.execute("g.V('vertexID','" + outgoingVertexIDs[i] +
               * "')"); }
               */
            client.execute("g.addEdge(g.V('vertexID','" + vertexID + "').next(), g.V('vertexID','"
                    + outgoingVertexIDs[i] + "').next(), 'outgoing', [weight:" + weights[i] + "])");
        }
    } catch (RexProException e) {
        LOG.error("Can not write record to database!");
        return;
    }
}

From source file:com.cloudera.dataflow.spark.HadoopFileFormatPipelineTest.java

License:Open Source License

private void populateFile() throws IOException {
    IntWritable key = new IntWritable();
    Text value = new Text();
    try (Writer writer = SequenceFile.createWriter(new Configuration(), Writer.keyClass(IntWritable.class),
            Writer.valueClass(Text.class), Writer.file(new Path(this.inputFile.toURI())))) {
        for (int i = 0; i < 5; i++) {
            key.set(i);//w  ww .ja v  a  2s  .c o  m
            value.set("value-" + i);
            writer.append(key, value);
        }
    }
}

From source file:com.cloudera.sa.ExcelRecordReader.java

License:Apache License

private Text getCellValue(Cell cell) {
    Text out = new Text();
    CellType cellType = cell.getCellTypeEnum();

    if (cellType == CellType.STRING) {
        out.set(cell.getStringCellValue());
    } else if (cellType == CellType.NUMERIC) {
        out.set(String.valueOf(cell.getNumericCellValue()));
    } else if (cellType == CellType.FORMULA) {
        out.set(cell.getCellFormula());/*from w  ww .j a v  a 2 s. c o  m*/
    } else if (cellType == CellType.ERROR) {
        out.set(String.valueOf(cell.getErrorCellValue()));
    } else if (cellType == CellType.BOOLEAN) {
        out.set(String.valueOf(cell.getBooleanCellValue()));
    } else {
        out.set("");
    }

    return out;
}

From source file:com.cloudera.science.avro.streaming.AvroAsJSONRecordReader.java

License:Open Source License

@Override
public boolean next(Text key, Text value) throws IOException {
    if (!reader.hasNext() || reader.pastSync(end)) {
        return false;
    }//from w w w.j a v  a 2  s .  co m
    datum = reader.next(datum);
    key.set(datum.toString());
    return true;
}

From source file:com.cmcc.hy.bigdata.weijifen.jobs.hubei.score.ScoreInfoDayMapper.java

License:Open Source License

@Override
protected void map(BytesWritable key, Text value, Mapper<BytesWritable, Text, Text, ScoreInfo>.Context context)
        throws IOException, InterruptedException {
    // TODO Auto-generated method stub
    String converted = ConvertUtil.convertEncoding(value, "GBK");
    if (converted == null) {
        return;/*from   w w  w.j  a  va 2s.c om*/
    }
    String[] values = converted.toString().split("\t");
    for (int i = 0; i < values.length; i++) {
        logger.info("the i=" + i + " value=" + values[i]);

    }

    Map<String, String> map = new HashMap<String, String>();
    for (ValueExtractor extractor : extractors) {
        String result = extractor.validateAndExtract(values);
        map.put(extractor.getMapping(), result);
    }
    String enttid = map.get(ENTT_ID);
    if (enttid == null) {
        // ??
        return;
    }

    ScoreInfo valueout = new ScoreInfo();

    valueout.setEnttId(map.get(ENTT_ID));
    valueout.setScoreType(map.get(SCORE_TYPE));
    valueout.setScoreBalance(map.get(SCORE_BALANCE));
    valueout.setStatDate(context.getConfiguration().get(STAT_MONTH));
    valueout.setStatMonth(context.getConfiguration().get(STAT_MONTH));
    logger.info("ScoreInfo tostr:" + valueout.toString());
    Text keyout = new Text();
    keyout.set(enttid);
    /*   context.write(keyout, valueout);*/

}

From source file:com.datasalt.utils.commons.io.DumpTextFileAsSequenceFile.java

License:Apache License

public static void dump(String input, String output) throws IOException {
    Configuration conf = new Configuration();
    FileSystem fS = FileSystem.get(conf);

    BufferedReader reader = new BufferedReader(new FileReader(new File(input)));
    String line = "";
    Text t1 = new Text();
    Text t2 = new Text();

    SequenceFile.Writer writer = new SequenceFile.Writer(fS, conf, new Path(output), Text.class, Text.class);

    while ((line = reader.readLine()) != null) {
        String[] fields = line.split("\t");
        t1.set(fields[0]);
        t2.set(fields[1]);/*from   ww  w  .  j  a va 2  s  .  c o m*/
        writer.append(t1, t2);
    }
    writer.close();
    reader.close();
}

From source file:com.datascience.hadoop.CsvRecordReader.java

License:Apache License

@Override
public boolean next(LongWritable key, ListWritable<Text> value) throws IOException {
    value.clear();//from   www. j  av  a  2 s  .  c  o  m
    try {
        if (iterator.hasNext()) {
            CSVRecord record = iterator.next();
            position++;
            colLength = colLength == null ? record.size() : colLength;
            if ((!record.isConsistent() || record.size() != colLength) && strict) {
                String message = String.format("%s: %s", "inconsistent record at position", position);
                throw new CsvParseException(message);
            }

            key.set(record.getRecordNumber());

            for (int i = 0; i < record.size(); i++) {
                String item = record.get(i);
                if (item == null) {
                    value.add(null);
                } else {
                    Text text = cache[i];
                    if (text == null) {
                        text = new Text();
                        cache[i] = text;
                    }
                    text.set(item);
                    value.add(text);
                }
            }
            //position = record.getCharacterPosition();
            return true;
        }

    } catch (Exception e) {
        LOGGER.warn("failed to parse record at position: " + position);
        if (strict) {
            throw e;
        } else {
            return next(key, value);
        }
    }
    return false;
}

From source file:com.datatorrent.contrib.hdht.HadoopFilePerformanceTest.java

License:Open Source License

private void writeMapFile() throws Exception {
    Path path = Testfile.MAPFILE.filepath();

    Text key = new Text();
    Text value = new Text();

    long fsMinBlockSize = conf.getLong("dfs.namenode.fs-limits.min-block-size", 0);

    long testBlockSize = (blockSize < fsMinBlockSize) ? fsMinBlockSize : (long) blockSize;

    MapFile.Writer writer = new MapFile.Writer(conf, path, MapFile.Writer.keyClass(key.getClass()),
            MapFile.Writer.valueClass(value.getClass()),
            MapFile.Writer.compression(SequenceFile.CompressionType.NONE),
            SequenceFile.Writer.blockSize(testBlockSize), SequenceFile.Writer.bufferSize((int) testBlockSize));
    for (int i = 0; i < testSize; i++) {
        key.set(getKey(i));
        value.set(getValue());/*from  w w  w .  j  a  v a  2 s  .  c om*/
        writer.append(key, value);
    }
    IOUtils.closeStream(writer);
}