Example usage for org.apache.hadoop.io Text set

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text set.

Prototype

public void set(Text other)

Source Link

Document

copy a text.

Usage

From source file:com.blackberry.logdriver.mapred.boom.BoomRecordReader.java

License:Apache License

@SuppressWarnings("unchecked")
@Override//  w  w  w .j a va 2s.c o m
public boolean next(LogLineData key, Text value) throws IOException {
    while (lines.size() == 0) {
        // If we're out of lines in the current record, then get the next record -
        // unless we're out of records or past the end of where we should be.
        while (reader == null || reader.hasNext() == false || reader.pastSync(end)) {
            // If we have another file, then init that and keep going, otherwise,
            // just return false.
            currentFile++;
            if (currentFile >= split.getNumPaths()) {
                return false;
            }
            initCurrentFile();
        }

        Record record = reader.next();
        lld = new LogLineData();
        lld.setBlockNumber((Long) record.get("blockNumber"));
        lld.setCreateTime((Long) record.get("createTime"));
        second = (Long) record.get("second");
        lineNumber = 0;

        lines.addAll((List<Record>) record.get("logLines"));
    }

    Record line = lines.pollFirst();
    long ms = (Long) line.get("ms");
    String message = line.get("message").toString();
    int eventId = (Integer) line.get("eventId");

    ++lineNumber;

    key.set(lld);
    key.setLineNumber(lineNumber);
    key.setTimestamp(second * 1000 + ms);
    key.setEventId(eventId);

    value.set(message);

    pos = reader.tell();

    return true;
}

From source file:com.chimpler.example.bayes.DataToSeq.java

License:Apache License

public static void main(String args[]) throws Exception {
    if (args.length != 2) {
        System.err.println("Arguments: [input tsv file] [output sequence file]");
        return;/*from   w ww.  ja  va 2s.  com*/
    }
    String inputFileName = args[0];
    String outputDirName = args[1];
    Configuration configuration = new Configuration();
    FileSystem fs = FileSystem.get(configuration);
    Writer writer = new SequenceFile.Writer(fs, configuration, new Path(outputDirName + "/chunk-0"), Text.class,
            Text.class);

    int count = 0;
    BufferedReader reader = new BufferedReader(new FileReader(inputFileName));
    Text key = new Text();
    Text value = new Text();
    while (true) {
        String line = reader.readLine();
        if (line == null) {
            break;
        }
        String[] tokens = line.split("\t", 2);
        if (tokens.length != 2) {
            System.out.println("Skip line: " + line);
            continue;
        }
        String category = tokens[0];
        //   String id = tokens[1];
        String message = tokens[1];
        key.set("/" + category + "/");
        value.set(message);
        writer.append(key, value);
        count++;
    }
    reader.close();
    writer.close();
    System.out.println("Wrote " + count + " entries.");
}

From source file:com.chinamobile.bcbsp.io.titan.TitanRecordWriter.java

License:Apache License

@Override
public void write(Text keyValue) throws IOException, InterruptedException {
    Text key = new Text();
    Text value = new Text();
    StringTokenizer str1 = new StringTokenizer(keyValue.toString(), "\t");
    if (str1.hasMoreElements()) {
        key.set(str1.nextToken());
    }/*  www .  j ava 2s  . co m*/
    if (str1.hasMoreElements()) {
        value.set(str1.nextToken());
    }
    if (key == new Text()) {
        return;
    }
    String[] vertexInfo = key.toString().split(":");
    String vertexID = vertexInfo[0];
    String vertexValue = vertexInfo[1];
    if (value == new Text()) {
        try {
            if (!hasVertex(vertexID)) {
                client.execute("g.addVertex([vertexID:'" + vertexID + "', value:'" + vertexValue + "'])");
            } else {
                client.execute(
                        "g.V('vertexID','" + vertexID + "').sideEffect{it.value = '" + vertexValue + "'}");
            }
        } catch (RexProException e) {
            LOG.error("Can not write record to database!");
            return;
        }
        return;
    }
    String[] strs = value.toString().split(" ");
    String[] outgoingVertexIDs = new String[strs.length];
    String[] weights = new String[strs.length];
    for (int i = 0; i < strs.length; i++) {
        String[] str = strs[i].split(":");
        outgoingVertexIDs[i] = str[0];
        weights[i] = str[1];
    }
    try {
        if (!hasVertex(vertexID)) {
            client.execute("g.addVertex([vertexID:'" + vertexID + "', value:'" + vertexValue + "'])");
        } else {
            client.execute("g.V('vertexID','" + vertexID + "').sideEffect{it.value = '" + vertexValue + "'}");
        }
        for (int i = 0; i < outgoingVertexIDs.length; i++) {
            if (!hasVertex(outgoingVertexIDs[i])) {
                client.execute("g.addVertex([vertexID:'" + outgoingVertexIDs[i] + "', value:''])");
            } /*
               * else { client.execute("g.V('vertexID','" + outgoingVertexIDs[i] +
               * "')"); }
               */
            client.execute("g.addEdge(g.V('vertexID','" + vertexID + "').next(), g.V('vertexID','"
                    + outgoingVertexIDs[i] + "').next(), 'outgoing', [weight:" + weights[i] + "])");
        }
    } catch (RexProException e) {
        LOG.error("Can not write record to database!");
        return;
    }
}

From source file:com.cloudera.dataflow.spark.HadoopFileFormatPipelineTest.java

License:Open Source License

private void populateFile() throws IOException {
    IntWritable key = new IntWritable();
    Text value = new Text();
    try (Writer writer = SequenceFile.createWriter(new Configuration(), Writer.keyClass(IntWritable.class),
            Writer.valueClass(Text.class), Writer.file(new Path(this.inputFile.toURI())))) {
        for (int i = 0; i < 5; i++) {
            key.set(i);//w  ww .ja v  a  2s  .c o  m
            value.set("value-" + i);
            writer.append(key, value);
        }
    }
}

From source file:com.cloudera.sa.ExcelRecordReader.java

License:Apache License

private Text getCellValue(Cell cell) {
    Text out = new Text();
    CellType cellType = cell.getCellTypeEnum();

    if (cellType == CellType.STRING) {
        out.set(cell.getStringCellValue());
    } else if (cellType == CellType.NUMERIC) {
        out.set(String.valueOf(cell.getNumericCellValue()));
    } else if (cellType == CellType.FORMULA) {
        out.set(cell.getCellFormula());/*from w  ww .j a v  a 2 s. c o  m*/
    } else if (cellType == CellType.ERROR) {
        out.set(String.valueOf(cell.getErrorCellValue()));
    } else if (cellType == CellType.BOOLEAN) {
        out.set(String.valueOf(cell.getBooleanCellValue()));
    } else {
        out.set("");
    }

    return out;
}

From source file:com.cloudera.science.avro.streaming.AvroAsJSONRecordReader.java

License:Open Source License

@Override
public boolean next(Text key, Text value) throws IOException {
    if (!reader.hasNext() || reader.pastSync(end)) {
        return false;
    }//from w w w.j a v  a 2  s .  co m
    datum = reader.next(datum);
    key.set(datum.toString());
    return true;
}

From source file:com.cmcc.hy.bigdata.weijifen.jobs.hubei.score.ScoreInfoDayMapper.java

License:Open Source License

@Override
protected void map(BytesWritable key, Text value, Mapper<BytesWritable, Text, Text, ScoreInfo>.Context context)
        throws IOException, InterruptedException {
    // TODO Auto-generated method stub
    String converted = ConvertUtil.convertEncoding(value, "GBK");
    if (converted == null) {
        return;/*from   w w  w.j  a  va 2s.c om*/
    }
    String[] values = converted.toString().split("\t");
    for (int i = 0; i < values.length; i++) {
        logger.info("the i=" + i + " value=" + values[i]);

    }

    Map<String, String> map = new HashMap<String, String>();
    for (ValueExtractor extractor : extractors) {
        String result = extractor.validateAndExtract(values);
        map.put(extractor.getMapping(), result);
    }
    String enttid = map.get(ENTT_ID);
    if (enttid == null) {
        // ??
        return;
    }

    ScoreInfo valueout = new ScoreInfo();

    valueout.setEnttId(map.get(ENTT_ID));
    valueout.setScoreType(map.get(SCORE_TYPE));
    valueout.setScoreBalance(map.get(SCORE_BALANCE));
    valueout.setStatDate(context.getConfiguration().get(STAT_MONTH));
    valueout.setStatMonth(context.getConfiguration().get(STAT_MONTH));
    logger.info("ScoreInfo tostr:" + valueout.toString());
    Text keyout = new Text();
    keyout.set(enttid);
    /*   context.write(keyout, valueout);*/

}

From source file:com.datasalt.utils.commons.io.DumpTextFileAsSequenceFile.java

License:Apache License

public static void dump(String input, String output) throws IOException {
    Configuration conf = new Configuration();
    FileSystem fS = FileSystem.get(conf);

    BufferedReader reader = new BufferedReader(new FileReader(new File(input)));
    String line = "";
    Text t1 = new Text();
    Text t2 = new Text();

    SequenceFile.Writer writer = new SequenceFile.Writer(fS, conf, new Path(output), Text.class, Text.class);

    while ((line = reader.readLine()) != null) {
        String[] fields = line.split("\t");
        t1.set(fields[0]);
        t2.set(fields[1]);/*from   ww  w  .  j  a va 2  s  .  c o m*/
        writer.append(t1, t2);
    }
    writer.close();
    reader.close();
}

From source file:com.datascience.hadoop.CsvRecordReader.java

License:Apache License

@Override
public boolean next(LongWritable key, ListWritable<Text> value) throws IOException {
    value.clear();//from   www. j  av  a  2 s  .  c  o  m
    try {
        if (iterator.hasNext()) {
            CSVRecord record = iterator.next();
            position++;
            colLength = colLength == null ? record.size() : colLength;
            if ((!record.isConsistent() || record.size() != colLength) && strict) {
                String message = String.format("%s: %s", "inconsistent record at position", position);
                throw new CsvParseException(message);
            }

            key.set(record.getRecordNumber());

            for (int i = 0; i < record.size(); i++) {
                String item = record.get(i);
                if (item == null) {
                    value.add(null);
                } else {
                    Text text = cache[i];
                    if (text == null) {
                        text = new Text();
                        cache[i] = text;
                    }
                    text.set(item);
                    value.add(text);
                }
            }
            //position = record.getCharacterPosition();
            return true;
        }

    } catch (Exception e) {
        LOGGER.warn("failed to parse record at position: " + position);
        if (strict) {
            throw e;
        } else {
            return next(key, value);
        }
    }
    return false;
}

From source file:com.datatorrent.contrib.hdht.HadoopFilePerformanceTest.java

License:Open Source License

private void writeMapFile() throws Exception {
    Path path = Testfile.MAPFILE.filepath();

    Text key = new Text();
    Text value = new Text();

    long fsMinBlockSize = conf.getLong("dfs.namenode.fs-limits.min-block-size", 0);

    long testBlockSize = (blockSize < fsMinBlockSize) ? fsMinBlockSize : (long) blockSize;

    MapFile.Writer writer = new MapFile.Writer(conf, path, MapFile.Writer.keyClass(key.getClass()),
            MapFile.Writer.valueClass(value.getClass()),
            MapFile.Writer.compression(SequenceFile.CompressionType.NONE),
            SequenceFile.Writer.blockSize(testBlockSize), SequenceFile.Writer.bufferSize((int) testBlockSize));
    for (int i = 0; i < testSize; i++) {
        key.set(getKey(i));
        value.set(getValue());/*from  w w  w .  j  a  v a  2 s  .  c om*/
        writer.append(key, value);
    }
    IOUtils.closeStream(writer);
}