Example usage for org.apache.hadoop.io Text set

List of usage examples for org.apache.hadoop.io Text set

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text set.

Prototype

public void set(Text other) 

Source Link

Document

copy a text.

Usage

From source file:gobblin.compat.TextSerializerTest.java

License:Apache License

@Test
public void testDeserialize() throws IOException {
    // Use Hadoop's serializer, verify our deserializer can read the string back
    for (String textToSerialize : textsToSerialize) {
        ByteArrayOutputStream bOs = new ByteArrayOutputStream();
        DataOutputStream dataOutputStream = new DataOutputStream(bOs);

        Text hadoopText = new Text();
        hadoopText.set(textToSerialize);
        hadoopText.write(dataOutputStream);
        dataOutputStream.close();/*from w  ww .  j  a v  a2 s .c o m*/

        ByteArrayInputStream bIn = new ByteArrayInputStream(bOs.toByteArray());
        DataInputStream dataInputStream = new DataInputStream(bIn);

        String deserializedString = TextSerializer.readTextAsString(dataInputStream);

        Assert.assertEquals(deserializedString, textToSerialize);
    }
}

From source file:gobblin.runtime.JobState.java

License:Apache License

public void write(DataOutput out, boolean writeTasks) throws IOException {
    Text text = new Text();
    text.set(this.jobName);
    text.write(out);/*  ww  w.j  ava 2 s.co  m*/
    text.set(this.jobId);
    text.write(out);
    out.writeLong(this.startTime);
    out.writeLong(this.endTime);
    out.writeLong(this.duration);
    text.set(this.state.name());
    text.write(out);
    out.writeInt(this.taskCount);
    if (writeTasks) {
        out.writeInt(this.taskStates.size() + this.skippedTaskStates.size());
        for (TaskState taskState : this.taskStates.values()) {
            taskState.write(out);
        }
        for (TaskState taskState : this.skippedTaskStates.values()) {
            taskState.write(out);
        }
    } else {
        out.writeInt(0);
    }
    super.write(out);
}

From source file:gobblin.runtime.TaskState.java

License:Apache License

@Override
public void write(DataOutput out) throws IOException {
    Text text = new Text();
    text.set(this.jobId);
    text.write(out);// w  ww . j a  v a 2  s .  c  o  m
    text.set(this.taskId);
    text.write(out);
    out.writeLong(this.startTime);
    out.writeLong(this.endTime);
    out.writeLong(this.duration);
    super.write(out);
}

From source file:gov.jgi.meta.hadoop.input.FastaLineReader.java

License:Open Source License

/**
 * Read one line from the InputStream into the given Text.  A line
 * can be terminated by one of the following: '\n' (LF) , '\r' (CR),
 * or '\r\n' (CR+LF).  EOF also terminates an otherwise unterminated
 * line./*  w  w  w .  j a  v  a2s .c  o  m*/
 *
 * @param str the object to store the given line (without newline)
 * @param maxLineLength the maximum number of bytes to store into str;
 *  the rest of the line is silently discarded.
 * @param maxBytesToConsume the maximum number of bytes to consume
 *  in this call.  This is only a hint, because if the line cross
 *  this threshold, we allow it to happen.  It can overshoot
 *  potentially by as much as one buffer length.
 *
 * @return the number of bytes read including the (longest) newline
 * found.
 *
 * @throws IOException if the underlying stream throws
 */
public int readLine(Text key, Text str, int maxLineLength, int maxBytesToConsume) throws IOException {
    int totalBytesRead = 0;
    int numRecordsRead = 0;
    Boolean eof = false;
    int startPosn;
    StringBuilder recordBlock = new StringBuilder(this.bufferSize);

    /*
    first thing to do is to move forward till you see a start character
     */
    startPosn = bufferPosn;
    do {
        if (bufferPosn >= bufferLength) {
            totalBytesRead += bufferPosn - startPosn;
            bufferPosn = 0;
            bufferLength = in.read(buffer);
            if (bufferLength <= 0) {
                eof = true;
                break; // EOF
            }
        }
    } while (buffer[bufferPosn++] != '>');

    /*
    if we hit the end of file already, then just return 0 bytes processed
     */
    if (eof)
        return totalBytesRead;

    /*
    now bufferPosn should be at the start of a fasta record
     */
    totalBytesRead += (bufferPosn - 1) - startPosn;
    startPosn = bufferPosn - 1; // startPosn guaranteed to be at a ">"

    /*
    find the next record start:  first scan to end of the line
     */
    eof = false;
    do {
        if (bufferPosn >= bufferLength) {

            /*
            copy the current buffer before refreshing the buffer
             */
            int appendLength = bufferPosn - startPosn;
            for (int copyi = startPosn; copyi < startPosn + appendLength; copyi++) {
                recordBlock.append((char) buffer[copyi]);
            }
            //recordBlock.append(buffer, startPosn, appendLength);
            totalBytesRead += appendLength;

            startPosn = bufferPosn = 0;
            bufferLength = in.read(buffer);
            if (bufferLength <= 0) {
                eof = true;
                break; // EOF
            }
        }
        bufferPosn++;
    } while (buffer[bufferPosn - 1] != CR && buffer[bufferPosn - 1] != LF);

    /*
    find the next record start:  scan till next ">"
     */
    do {
        if (bufferPosn >= bufferLength) {

            /*
            copy the current buffer before refreshing the buffer
             */
            int appendLength = bufferPosn - startPosn;
            for (int copyi = startPosn; copyi < startPosn + appendLength; copyi++) {
                recordBlock.append((char) buffer[copyi]);
            }
            //recordBlock.append(buffer, startPosn, appendLength);
            totalBytesRead += appendLength;

            startPosn = bufferPosn = 0;
            bufferLength = in.read(buffer);
            if (bufferLength <= 0) {
                eof = true;
                break; // EOF
            }
        }
    } while (buffer[bufferPosn++] != '>'); // only read one record at a time

    if (!eof) {
        bufferPosn--; // make sure we leave bufferPosn pointing to the next record
        int appendLength = bufferPosn - startPosn;
        for (int copyi = startPosn; copyi < startPosn + appendLength; copyi++) {
            recordBlock.append((char) buffer[copyi]);
        }
        //recordBlock.append(buffer, startPosn, appendLength);
        totalBytesRead += appendLength;
    }

    /*
    record block now has the byte array we want to process for reads
     */

    int i = 1; // skip initial record seperator ">"
    int j = 1;
    do {
        key.clear();
        str.clear();
        /*
        first parse the key
         */
        i = j;
        Boolean junkOnLine = false;
        while (j < recordBlock.length()) {
            int c = recordBlock.charAt(j++);
            if (c == CR || c == LF) {
                break;
            } //else if (c == ' ' || c == '\t') {
              //  junkOnLine = true;
              //  break;
              //}
        }
        if (j == i) {
            // then we didn't parse out a proper id
            LOG.error("Unable to parse entry: " + recordBlock);
            str.clear();
            key.clear();
            return totalBytesRead;
        }
        key.set(recordBlock.substring(i, j - 1));

        /*
        in case there is additional metadata on the header line, ignore everything after
        the first word.
         */
        if (junkOnLine) {
            while (j < recordBlock.length() && recordBlock.charAt(j) != CR && recordBlock.charAt(j) != LF)
                j++;
        }

        //LOG.info ("key = " + k.toString());

        /*
        now skip the newlines
        */
        while (j < recordBlock.length() && (recordBlock.charAt(j) == CR || recordBlock.charAt(j) == LF))
            j++;

        /*
        now read the sequence
        */
        StringBuilder sequenceTmp = new StringBuilder(recordBlock.length());
        do {
            i = j;
            while (j < recordBlock.length()) {
                int c = recordBlock.charAt(j++);
                if (c == CR || c == LF) {
                    break;
                }
            }
            //byte[] ba = recordBlock.getBytes();
            //if (ba.length <= i || ba.length <= j - i - 1) {
            //    LOG.fatal("hmm... ba.length = " + ba.length + " i = " + i + " j-i-1 = " + (j-i-1));
            //}

            if (j == i) {
                // then we didn't parse out a proper id
                LOG.error("Unable to parse entry: " + recordBlock);
                str.clear();
                key.clear();
                return totalBytesRead;
            }
            for (int copyi = i; copyi < j - 1; copyi++) {
                sequenceTmp.append((char) recordBlock.charAt(copyi));
            }

            while (j < recordBlock.length() && (recordBlock.charAt(j) == CR || recordBlock.charAt(j) == LF))
                j++;

        } while (j < recordBlock.length() && recordBlock.charAt(j) != '>');
        str.set(sequenceTmp.toString());

        numRecordsRead++;

        /*
        now skip characters (newline or carige return most likely) till record start
        */
        while (j < recordBlock.length() && recordBlock.charAt(j) != '>') {
            j++;
        }

        j++; // skip the ">"

    } while (j < recordBlock.length());

    //        LOG.info("");
    //        LOG.info("object key = " + key);
    byte[] strpacked = SequenceString.sequenceToByteArray(str.toString().toLowerCase());

    str.clear();
    str.append(strpacked, 0, strpacked.length);

    return totalBytesRead;
}

From source file:gr.ntua.h2rdf.partialJoin.HbaseJoinBGPReducer.java

License:Open Source License

private void writeOut(Text outKey2, Text outValue2, Context context) {

    if (isLast == 2) {// prepare data for MapReduce translation
        StringTokenizer list;//  w w  w  .  j av a 2  s . c  o  m
        list = new StringTokenizer(outKey2.toString());
        list.nextToken("!");
        StringTokenizer tok;
        String newline = "";
        while (list.hasMoreTokens()) {
            String binding = list.nextToken("!");
            tok = new StringTokenizer(binding);
            String pred = tok.nextToken("#");
            if (resultVars.contains(pred.substring(1))) {
                newline += binding + "!";
                pred += "#";
                if (!tok.hasMoreTokens()) {
                    System.exit(2);
                }
                String b = tok.nextToken("#");
                StringTokenizer tokenizer = new StringTokenizer(b);
                while (tokenizer.hasMoreTokens()) {
                    String temp = tokenizer.nextToken("_");
                    Integer id = CastLongToInt.castLong(Long.parseLong(temp));
                    trans_hash.add(id);
                }
            }
        }
        outKey2.set(newline);
    }
    /*if(isLast==1){// Index Translate
        StringTokenizer list;
        list=new StringTokenizer(outKey2.toString());
                
        String newline = "";
        list.nextToken("!");
       StringTokenizer tok;
       while (list.hasMoreTokens()) {
    String binding=list.nextToken("!");
    //if(binding.startsWith("?")){
       tok=new StringTokenizer(binding);
       String pred=tok.nextToken("#");
       if(resultVars.contains(pred.substring(1))){
          pred+="#";
          if(!tok.hasMoreTokens()){
             System.exit(2);
          }
          //byte[] b = Bytes.toBytes(tok.nextToken("#").toCharArray());
          String b = tok.nextToken("#");
          newline=transform(newline, b, pred);
       }
    //}
       }
       outKey2.set(newline);
     }*/
    try {
        context.write(outKey2, outValue2);
    } catch (InterruptedException e) {
        e.printStackTrace();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
}

From source file:gr.ntua.h2rdf.partialJoin.ThreadedProcessor.java

License:Open Source License

public void run() {
    String key, tpat, else_value;
    ValueDoubleMerger value;/*  www.  ja  v  a  2  s .c  o m*/
    Text outKey = new Text();
    try {
        int end = (threadId + 1) * size;
        if (end > keyTable.length)
            end = keyTable.length;

        long time = 0, nextTime;
        float averLatency = 0;
        int num = 0;
        for (int i = threadId * size; i < end; i++) {

            /*if(num%100==0)
               System.out.println("Average Latency (ms): "+averLatency);*/

            key = keyTable[i];
            value = bindings.get(key);
            int bind_no = value.getTotalPatterns();

            if (nonJoinSize == 0) {//Full input
                //System.out.println("bind_no="+bind_no+" in_no="+in_no);
                if (bind_no == in_no) {
                    //out.writeBytes(sum+key+"!"+else_value+"\n");
                    if (value.itter()) {
                        while (value.hasMore()) {
                            String s1 = value.getValue();
                            if (!s1.equals("")) {
                                outKey.set(sum + key + "!" + value.getTotal() + "!" + s1);
                                SerialQueryProcessorThreaded.writeOut(outKey);
                            }
                        }
                    } else {
                        outKey.set(sum + key + "!" + value.getTotal() + "!");
                        SerialQueryProcessorThreaded.writeOut(outKey);
                    }

                    //outKey.set(sum+key+"!"+else_value+"!");
                    //SerialQueryProcessorThreaded.writeOut(outKey);
                }
            } else {

                int vid = SerialQueryProcessorThreaded.getvarind(var);
                int nonjno = SerialQueryProcessorThreaded.nonJoinSizeTab[vid];
                //System.out.println(vid);
                //System.out.println(nonjno);
                String foundkvals1 = null;
                //System.out.println("bind_no="+bind_no+" in_no="+in_no+" nonjno="+nonjno);
                if (bind_no == in_no - nonjno) {
                    StringTokenizer vt = new StringTokenizer(key.toString());
                    String patvals = vt.nextToken("#");
                    String keyvals = vt.nextToken("#");
                    foundkvals1 = patvals + "#";
                    String foundkb = "";
                    int foundsize = 0;
                    boolean findDoub = SerialQueryProcessorThreaded.findDouble(vid);
                    StringTokenizer tokenizer1 = new StringTokenizer(keyvals);
                    while (tokenizer1.hasMoreTokens()) {
                        String temp2 = tokenizer1.nextToken("_");
                        //System.out.println(temp2);
                        byte[] temp3 = Bytes.toBytes(Long.parseLong(temp2.substring(temp2.indexOf("|") + 1)));
                        byte[] temp1 = new byte[totsize];
                        temp1[0] = (byte) new Byte(temp2.substring(0, temp2.indexOf("|")));
                        for (int j = 0; j < 8; j++) {
                            temp1[j + 1] = temp3[j];
                        }
                        int found = 0;
                        String outkeytemp = "";

                        //time = new Date().getTime();

                        for (int jj = 0; jj < nonjno; jj++) {
                            SerialQueryProcessorThreaded.nonJoinCol[vid][jj] = SerialQueryProcessorThreaded.nonJoinCol[vid][jj]
                                    .replace(":", "");
                            //System.out.println(nonJoinCol[vid][jj]);
                            if (SerialQueryProcessorThreaded.nonJoinCol[vid][jj].contains("?")) {
                                StringTokenizer tt1 = new StringTokenizer(
                                        SerialQueryProcessorThreaded.nonJoinCol[vid][jj]);
                                String varname1 = tt1.nextToken("|");
                                String varname2 = tt1.nextToken("|");
                                //System.out.println(varname1+" "+varname2);
                                byte[] b = new byte[totsize];
                                for (int j = 0; j < totsize; j++) {
                                    b[j] = SerialQueryProcessorThreaded.nonJoinStartRow[vid][jj][j + 1];
                                }
                                if (SerialQueryProcessorThreaded.nonJoinStartRow[vid][jj][0] == (byte) 2) {//osp
                                    if (patvals.equals(varname1)) {
                                        //osp
                                        byte pinakas = (byte) 2;
                                        String outkeytemp1 = SerialQueryProcessorThreaded.reduceJoin(pinakas, b,
                                                temp1, varname2);
                                        value.merge(outkeytemp1, "K" + jj);
                                        if (!outkeytemp1.equals("")) {
                                            found++;
                                            outkeytemp += outkeytemp1;
                                        }
                                    } else {
                                        //pos
                                        byte pinakas = (byte) 3;
                                        String outkeytemp1 = SerialQueryProcessorThreaded.reduceJoin(pinakas,
                                                temp1, b, varname1);
                                        value.merge(outkeytemp1, "K" + jj);
                                        if (!outkeytemp1.equals("")) {
                                            found++;
                                            outkeytemp += outkeytemp1;
                                        }

                                    }
                                } else if (SerialQueryProcessorThreaded.nonJoinStartRow[vid][jj][0] == (byte) 3) {//pos
                                    if (patvals.equals(varname1)) {
                                        //pos
                                        byte pinakas = (byte) 3;
                                        String outkeytemp1 = SerialQueryProcessorThreaded.reduceJoin(pinakas, b,
                                                temp1, varname2);
                                        value.merge(outkeytemp1, "K" + jj);
                                        if (!outkeytemp1.equals("")) {
                                            found++;
                                            outkeytemp += outkeytemp1;
                                        }
                                    } else {
                                        //spo
                                        byte pinakas = (byte) 4;
                                        String outkeytemp1 = SerialQueryProcessorThreaded.reduceJoin(pinakas,
                                                temp1, b, varname1);
                                        value.merge(outkeytemp1, "K" + jj);
                                        if (!outkeytemp1.equals("")) {
                                            found++;
                                            outkeytemp += outkeytemp1;
                                        }
                                    }
                                } else if (SerialQueryProcessorThreaded.nonJoinStartRow[vid][jj][0] == (byte) 4) {//spo
                                    if (patvals.equals(varname1)) {//spo
                                        byte pinakas = (byte) 4;
                                        String outkeytemp1 = SerialQueryProcessorThreaded.reduceJoin(pinakas, b,
                                                temp1, varname2);
                                        value.merge(outkeytemp1, "K" + jj);
                                        if (!outkeytemp1.equals("")) {
                                            found++;
                                            outkeytemp += outkeytemp1;
                                        }
                                    } else {
                                        //osp
                                        byte pinakas = (byte) 2;
                                        String outkeytemp1 = SerialQueryProcessorThreaded.reduceJoin(pinakas,
                                                temp1, b, varname1);
                                        value.merge(outkeytemp1, "K" + jj);
                                        if (!outkeytemp1.equals("")) {
                                            found++;
                                            outkeytemp += outkeytemp1;
                                        }
                                    }
                                }
                            } else {//have all three values pame panta sto osp

                                if (SerialQueryProcessorThreaded.nonJoinStartRow[vid][jj][0] == (byte) 2) {//osp
                                    byte pinakas = (byte) 2;
                                    byte[] b1 = new byte[totsize];
                                    for (int j = 0; j < totsize; j++) {
                                        b1[j] = SerialQueryProcessorThreaded.nonJoinStartRow[vid][jj][j + 1];
                                    }
                                    byte[] b2 = new byte[totsize];
                                    for (int j = 0; j < totsize; j++) {
                                        b2[j] = SerialQueryProcessorThreaded.nonJoinStartRow[vid][jj][j
                                                + totsize + 1];
                                    }
                                    found += SerialQueryProcessorThreaded.reduceJoinAllVar(pinakas, b1, b2,
                                            temp1);
                                } else if (SerialQueryProcessorThreaded.nonJoinStartRow[vid][jj][0] == (byte) 3) {//pos
                                    byte[] b1 = new byte[totsize];
                                    byte[] b2 = new byte[totsize];
                                    byte[] b3 = new byte[totsize];
                                    int size = SerialQueryProcessorThreaded.nonJoinStartRow[vid][jj].length;
                                    byte pinakas = (byte) 2;//osp
                                    for (int j = 0; j < totsize; j++) {
                                        b2[j] = temp1[j];
                                    }
                                    for (int i1 = 0; i1 < totsize; i1++) {
                                        b3[i1] = SerialQueryProcessorThreaded.nonJoinStartRow[vid][jj][i1 + 1];
                                    }
                                    //find subclasses
                                    if (size > SerialQueryProcessorThreaded.rowlength) {//uparxoun subclasses
                                        int ffound = 0;
                                        for (int ik = 0; ik < (size - totsize - 1) / totsize; ik++) {
                                            for (int j = 0; j < totsize; j++) {
                                                b1[j] = SerialQueryProcessorThreaded.nonJoinStartRow[vid][jj][j
                                                        + totsize + 1 + ik * totsize];
                                            }
                                            //System.out.println(Bytes.toStringBinary(b1));
                                            ffound += SerialQueryProcessorThreaded.reduceJoinAllVar(pinakas, b1,
                                                    b2, b3);

                                        }
                                        if (ffound > 0) {
                                            found++;
                                        }
                                    } else {//no subclasses
                                        for (int j = 0; j < totsize; j++) {
                                            b1[j] = SerialQueryProcessorThreaded.nonJoinStartRow[vid][jj][totsize
                                                    + 1 + j];
                                        }
                                        //System.out.println(Bytes.toStringBinary(b1));
                                        found += SerialQueryProcessorThreaded.reduceJoinAllVar(pinakas, b1, b2,
                                                b3);
                                    }

                                } else if (SerialQueryProcessorThreaded.nonJoinStartRow[vid][jj][0] == (byte) 4) {//spo
                                    byte pinakas = (byte) 2;
                                    byte[] b1 = new byte[totsize];
                                    for (int j = 0; j < totsize; j++) {
                                        b1[j] = SerialQueryProcessorThreaded.nonJoinStartRow[vid][jj][j + 1];
                                    }
                                    byte[] b2 = new byte[totsize];
                                    for (int j = 0; j < totsize; j++) {
                                        b2[j] = SerialQueryProcessorThreaded.nonJoinStartRow[vid][jj][j
                                                + totsize + 1];
                                    }
                                    found += SerialQueryProcessorThreaded.reduceJoinAllVar(pinakas, temp1, b1,
                                            b2);
                                }
                            }
                            if (found == nonjno) {
                                break;
                            }
                        }
                        if (found == nonjno) {

                            /*nextTime = new Date().getTime();
                            long latency = nextTime -time;
                            averLatency = (averLatency*num+latency)/(num+1);
                            num++;*/

                            if (findDoub) {
                                String fkvals = temp2 + "_";
                                if (value.itter()) {
                                    while (value.hasMore()) {
                                        String s1 = value.getValue();
                                        if (!s1.equals("")) {
                                            outKey.set(sum + foundkvals1 + fkvals + "!" + value.getTotal() + "!"
                                                    + s1);
                                            SerialQueryProcessorThreaded.writeOut(outKey);
                                        }
                                    }
                                } else {
                                    outKey.set(sum + foundkvals1 + fkvals + "!" + value.getTotal() + "!");
                                    SerialQueryProcessorThreaded.writeOut(outKey);
                                }

                                //outKey.set(sum+foundkvals1+fkvals+"!"+outkeytemp+else_value+"!");
                                //SerialQueryProcessorThreaded.writeOut(outKey);
                            } else {
                                foundkb += temp2 + "_";
                                foundsize++;
                            }

                        }

                    }
                    if ((foundsize > 0) && (!findDoub)) {

                        /*nextTime = new Date().getTime();
                        long latency = nextTime -time;
                        averLatency = (averLatency*num+latency)/(num+1);
                        num++;*/

                        foundkvals1 += foundkb;
                        if (value.itter()) {
                            while (value.hasMore()) {
                                String s1 = value.getValue();
                                if (!s1.equals("")) {
                                    outKey.set(sum + foundkvals1 + "!" + value.getTotal() + "!" + s1);
                                    SerialQueryProcessorThreaded.writeOut(outKey);
                                }
                            }
                        } else {
                            outKey.set(sum + foundkvals1 + "!" + value.getTotal() + "!");
                            SerialQueryProcessorThreaded.writeOut(outKey);
                        }

                    }
                }

            }

        }
        //System.out.println("Average Latency (ms): "+averLatency);
    } catch (NumberFormatException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (NotSupportedDatatypeException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
}

From source file:hadoop.examples.hdfs.SequenceFileWriteDemo.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String uri = "hdfs://exoplatform:9000/user/haint/temp.file";
    Path path = new Path(uri);
    FileSystem fs = FileSystem.get(URI.create(uri), conf);

    ///*from w w w .  jav  a2  s. c  o  m*/
    IntWritable key = new IntWritable();
    Text value = new Text();
    SequenceFile.Writer writer = null;
    try {
        writer = SequenceFile.createWriter(fs, conf, path, key.getClass(), value.getClass());
        for (int i = 0; i < 100; i++) {
            key.set(100 - i);
            value.set(DATA[i % DATA.length]);
            System.out.printf("[%s]\t%s\t%s\n", writer.getLength(), key, value);
            writer.append(key, value);
        }
    } finally {
        IOUtils.closeStream(writer);
    }
}

From source file:hip.ch3.seqfile.writable.seqfile.SequenceFileStockWriter.java

/**
 * Write the sequence file./* w  w  w  . j a  va2 s .c o  m*/
 *
 * @param args the command-line arguments
 * @return the process exit code
 * @throws Exception if something goes wrong
 */
public int run(final String[] args) throws Exception {
    Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.MrIoOpts.values()).build();
    int result = cli.runCmd();

    if (result != 0) {
        return result;
    }

    File inputFile = new File(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.INPUT));
    Path outputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.OUTPUT));
    Configuration conf = super.getConf();
    SequenceFile.Writer writer = //<co id="ch03_comment_seqfile_write1"/>
            SequenceFile.createWriter(conf, SequenceFile.Writer.file(outputPath),
                    SequenceFile.Writer.keyClass(Text.class),
                    SequenceFile.Writer.valueClass(StockPriceWritable.class),
                    SequenceFile.Writer.compression(SequenceFile.CompressionType.BLOCK, new DefaultCodec()));
    try {
        Text key = new Text();
        for (String line : FileUtils.readLines(inputFile)) {

            StockPriceWritable stock = StockPriceWritable.fromLine(line);
            System.out.println("Stock = " + stock);

            key.set(stock.getSymbol());

            writer.append(key, stock); //<co id="ch03_comment_seqfile_write4"/>

        }
    } finally {
        writer.close();
    }
    return 0;
}

From source file:hivemall.fm.FactorizationMachineUDTF.java

License:Apache License

private void forwardAsStringFeature(@Nonnull final FMStringFeatureMapModel model, final int factors)
        throws HiveException {
    final Text feature = new Text();
    final FloatWritable f_Wi = new FloatWritable(0.f);
    final FloatWritable[] f_Vi = HiveUtils.newFloatArray(factors, 0.f);

    final Object[] forwardObjs = new Object[3];
    forwardObjs[0] = feature;//  www.  ja v a  2  s .co m
    forwardObjs[1] = f_Wi;
    forwardObjs[2] = null;
    // W0
    feature.set("0");
    f_Wi.set(model.getW0());
    // V0 is null
    forward(forwardObjs);

    // Wi, Vif (i starts from 1..P)
    forwardObjs[2] = Arrays.asList(f_Vi);

    final IMapIterator<String, Entry> itor = model.entries();
    while (itor.next() != -1) {
        String i = itor.getKey();
        assert (i != null);
        // set i
        feature.set(i);
        Entry entry = itor.getValue();
        // set Wi
        f_Wi.set(entry.W);
        // set Vif
        final float[] Vi = entry.Vf;
        for (int f = 0; f < factors; f++) {
            float v = Vi[f];
            f_Vi[f].set(v);
        }
        forward(forwardObjs);
    }
}

From source file:hivemall.fm.FieldAwareFactorizationMachineUDTF.java

License:Apache License

@Override
protected void forwardModel() throws HiveException {
    this._model = null;
    this._fieldList = null;
    this._sumVfX = null;

    Text modelId = new Text();
    String taskId = HadoopUtils.getUniqueTaskIdString();
    modelId.set(taskId);

    FFMPredictionModel predModel = _ffmModel.toPredictionModel();
    this._ffmModel = null; // help GC

    if (LOG.isInfoEnabled()) {
        LOG.info("Serializing a model '" + modelId + "'... Configured # features: " + _numFeatures
                + ", Configured # fields: " + _numFields + ", Actual # features: "
                + predModel.getActualNumFeatures() + ", Estimated uncompressed bytes: "
                + NumberUtils.prettySize(predModel.approxBytesConsumed()));
    }//from  w  ww .ja va2s .co  m

    byte[] serialized;
    try {
        serialized = predModel.serialize();
        predModel = null;
    } catch (IOException e) {
        throw new HiveException("Failed to serialize a model", e);
    }

    if (LOG.isInfoEnabled()) {
        LOG.info("Forwarding a serialized/compressed model '" + modelId + "' of size: "
                + NumberUtils.prettySize(serialized.length));
    }

    Text modelObj = new Text3(serialized);
    serialized = null;
    Object[] forwardObjs = new Object[] { modelId, modelObj };

    forward(forwardObjs);
}