Example usage for org.apache.hadoop.io Text Text

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text Text.

Prototype

public Text()

Source Link

Usage

From source file:PostgresToSeq.java

License:Apache License

public static void main(String args[]) throws Exception {
    if (args.length != 2) {
        System.err.println("Arguments: [input postgres table] [output sequence file]");
        return;//from   w  w w .  j av  a  2 s . c  o m
    }
    String inputFileName = args[0];
    String outputDirName = args[1];
    Configuration configuration = new Configuration();
    FileSystem fs = FileSystem.get(configuration);
    Writer writer = new SequenceFile.Writer(fs, configuration, new Path(outputDirName + "/chunk-0"), Text.class,
            Text.class);
    Connection c = null;
    Statement stmt = null;
    try {
        Class.forName("org.postgresql.Driver");
        c = DriverManager.getConnection("jdbc:postgresql://192.168.50.170:5432/uzeni", "postgres", "dbwpsdkdl");
        c.setAutoCommit(false);
        System.out.println("Opened database successfully");
        stmt = c.createStatement();
        ResultSet rs = stmt.executeQuery("SELECT * FROM " + inputFileName);
        int count = 0;
        Text key = new Text();
        Text value = new Text();

        while (rs.next()) {
            String seq = rs.getString("seq");
            String rep = rs.getString("rep");
            String body = rs.getString("body");
            String category = rep;
            String id = seq;
            String message = body;
            key.set("/" + category + "/" + id);
            value.set(message);
            writer.append(key, value);
            count++;
        }
        rs.close();
        stmt.close();
        c.close();
        writer.close();
        System.out.println("Wrote " + count + " entries.");
    } catch (Exception e) {
        System.err.println(e.getClass().getName() + ": " + e.getMessage());
        System.exit(0);
    }
}

From source file:Job1RecordReader.java

License:Apache License

public Text createKey() {
    return new Text();
}

From source file:Job1RecordReader.java

License:Apache License

public Text createValue() {
    return new Text();
}

From source file:DocToSeq.java

License:Apache License

public static void main(String args[]) throws Exception {
    if (args.length != 2) {
        System.err.println("Arguments: [input tsv file] [output sequence file]");
        return;/* w  w  w.java2 s.c  o m*/
    }
    String inputFileName = args[0];
    String outputDirName = args[1];
    Configuration configuration = new Configuration();
    FileSystem fs = FileSystem.get(configuration);
    Writer writer = new SequenceFile.Writer(fs, configuration, new Path(outputDirName + "/chunk-0"), Text.class,
            Text.class);

    int count = 0;
    BufferedReader reader = new BufferedReader(new FileReader(inputFileName));
    Text key = new Text();
    Text value = new Text();
    while (true) {
        String line = reader.readLine();
        if (line == null) {
            break;
        }
        String[] tokens = line.split("\t", 3);
        if (tokens.length != 3) {
            System.out.println("Skip line: " + line);
            continue;
        }
        String category = tokens[0];
        String id = tokens[1];
        String message = tokens[2];
        key.set("/" + category + "/" + id);
        value.set(message);
        writer.append(key, value);
        count++;
    }
    reader.close();
    writer.close();
    System.out.println("Wrote " + count + " entries.");
}

From source file:accumulo.ingest.AccumuloLiveCsv.java

License:Apache License

@Override
public void run() {
    FileReader fileReader = null;
    CSVReader reader = null;/*  w  w w. ja  va  2  s .c o m*/
    final Text rowId = new Text();
    long totalRecordsInserted = 0;

    for (File f : inputs.getInputFiles()) {
        String absoluteFileName;
        try {
            absoluteFileName = f.getCanonicalPath();
        } catch (IOException e) {
            log.error("Could not determine path for file: {}", f, e);
            continue;
        }

        log.info("Starting to process {}", absoluteFileName);

        absoluteFileName += ROW_SEPARATOR;
        Text fileName = new Text(absoluteFileName);

        try {
            try {
                fileReader = new FileReader(f);
            } catch (FileNotFoundException e) {
                log.error("Could not read file {}", f.toString());
                continue;
            }

            reader = new CSVReader(fileReader);

            String[] header;
            try {
                header = reader.readNext();
            } catch (IOException e) {
                log.error("Error reading header", e);
                continue;
            }

            try {
                writeSchema(fileName, header);
            } catch (AccumuloException e) {
                log.error("Could not write header to schema table", e);
                continue;
            } catch (AccumuloSecurityException e) {
                log.error("Could not write header to schema table", e);
                continue;
            }

            String[] record;
            long recordCount = 0l;

            try {
                while (null != (record = reader.readNext())) {
                    // Make a unique row id from the filename and record offset
                    setRowId(rowId, fileName, recordCount);

                    try {
                        writeRecord(header, record, rowId, fileName);
                    } catch (AccumuloException e) {
                        log.error("Could not write record to record table", e);
                    } catch (AccumuloSecurityException e) {
                        log.error("Could not write record to record table", e);
                    }

                    recordCount++;
                    totalRecordsInserted++;

                    if (0 == totalRecordsInserted % 1000) {
                        mtbw.flush();
                    }
                }
            } catch (IOException e) {
                log.error("Error reading records from CSV file", e);
                continue;
            } catch (MutationsRejectedException e) {
                log.error("Error flushing mutations to server", e);
                throw new RuntimeException(e);
            } finally {
                log.info("Processed {} records from {}", recordCount, absoluteFileName);
            }
        } finally {

            if (null != reader) {
                try {
                    reader.close();
                } catch (IOException e) {
                    log.error("Error closing CSV reader", e);
                }
            }

            if (null != fileReader) {
                try {
                    fileReader.close();
                } catch (IOException e) {
                    log.error("Error closing file reader", e);
                }
            }
        }
    }

    log.info("Processed {} records in total", totalRecordsInserted);
}

From source file:accumulo.ingest.AccumuloLiveCsv.java

License:Apache License

protected void writeRecord(String[] header, String[] record, Text rowId, Text fileName)
        throws AccumuloException, AccumuloSecurityException {
    Preconditions.checkArgument(header.length >= record.length,
            "Cannot have more columns in record (%s) than defined in header (%s)",
            new Object[] { header.length, record.length });

    final BatchWriter recordBw, schemaBw;
    try {// w ww  .  j a  v  a2 s  .c  o  m
        recordBw = mtbw.getBatchWriter(recordTableName);
        schemaBw = mtbw.getBatchWriter(schemaTableName);
    } catch (TableNotFoundException e) {
        log.error("Table(s) ({}, {}) were deleted", recordTableName, schemaTableName, e);
        throw new RuntimeException(e);
    }

    // Some temp Texts to avoid lots of object allocations
    final Text cfHolder = new Text();
    final HashMap<String, Long> counts = new HashMap<String, Long>();

    // write records
    Mutation recordMutation = new Mutation(rowId);
    for (int i = 0; i < record.length; i++) {
        final String columnName = header[i];
        final String columnValue = record[i];

        if (counts.containsKey(columnName)) {
            counts.put(columnName, counts.get(columnName) + 1);
        } else {
            counts.put(columnName, 1l);
        }

        cfHolder.set(columnName);

        recordMutation.put(cfHolder, EMPTY_TEXT, new Value(columnValue.getBytes()));
    }

    recordBw.addMutation(recordMutation);

    // update counts in schema
    for (Entry<String, Long> schemaUpdate : counts.entrySet()) {
        Mutation schemaMutation = new Mutation(schemaUpdate.getKey());

        schemaMutation.put(SCHEMA_COLUMN_FREQ, fileName, longToValue(schemaUpdate.getValue()));
        schemaBw.addMutation(schemaMutation);
    }
}

From source file:ar.edu.ungs.garules.CensusJob.java

License:Apache License

/**
 * Toma la salida del reducer del file system distribuido y la carga en el mapa "ocurrencias" en memoria
 * @param conf// w w w  .  j  a  v a 2 s  . co m
 * @param path
 * @throws IOException
 */
@SuppressWarnings("deprecation")
private static void llenarOcurrencias(Configuration conf, String path) throws IOException {
    FileSystem fs = new DistributedFileSystem(
            new InetSocketAddress(DEFAULT_FILE_SYSTEM_HOST, DEFAULT_FILE_SYSTEM_PORT), conf);
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path(path + "/part-r-00000"), conf);

    Text key = new Text();
    IntWritable value = new IntWritable();
    while (reader.next(key, value))
        ocurrencias.put(key.toString(), value.get());
    reader.close();
}

From source file:at.illecker.hama.hybrid.examples.onlinecf.OnlineCF.java

License:Apache License

@Override
public boolean load(String path, boolean lazy) {
    this.m_isLazyLoadModel = lazy;
    this.m_modelPath = path;

    if (lazy == false) {
        Path dataPath = new Path(m_modelPath);
        Configuration conf = new Configuration();
        try {// ww  w .  jav  a 2 s.  c  o  m
            FileSystem fs = dataPath.getFileSystem(conf);
            LinkedList<Path> files = new LinkedList<Path>();

            if (!fs.exists(dataPath)) {
                this.m_isLazyLoadModel = false;
                this.m_modelPath = null;
                return false;
            }

            if (!fs.isFile(dataPath)) {
                for (int i = 0; i < 100000; i++) {
                    Path partFile = new Path(
                            m_modelPath + "/part-" + String.valueOf(100000 + i).substring(1, 6));
                    if (fs.exists(partFile)) {
                        files.add(partFile);
                    } else {
                        break;
                    }
                }
            } else {
                files.add(dataPath);
            }

            LOG.info("loading model from " + path);
            for (Path file : files) {
                SequenceFile.Reader reader = new SequenceFile.Reader(fs, file, conf);

                Text key = new Text();
                PipesVectorWritable value = new PipesVectorWritable();
                String strKey = null;
                Long actualKey = null;
                String firstSymbol = null;

                while (reader.next(key, value) != false) {
                    strKey = key.toString();
                    firstSymbol = strKey.substring(0, 1);
                    try {
                        actualKey = Long.valueOf(strKey.substring(1));
                    } catch (Exception e) {
                        actualKey = new Long(0);
                    }

                    if (firstSymbol.equals(OnlineCF.DFLT_MODEL_ITEM_DELIM)) {
                        // LOG.info("loaded itemId: " + actualKey + " itemVector: "
                        // + value.getVector());
                        m_modelItemFactorizedValues.put(actualKey, new PipesVectorWritable(value));
                    } else if (firstSymbol.equals(OnlineCF.DFLT_MODEL_USER_DELIM)) {
                        // LOG.info("loaded userId: " + actualKey + " userVector: "
                        // + value.getVector());
                        m_modelUserFactorizedValues.put(actualKey, new PipesVectorWritable(value));
                    } else {
                        // unknown
                        continue;
                    }
                }
                reader.close();
            }

            LOG.info("loaded: " + m_modelUserFactorizedValues.size() + " users, "
                    + m_modelItemFactorizedValues.size() + " items");
            // for (Long user : m_modelUserFactorizedValues.keySet()) {
            // LOG.info("userId: " + user + " userVector: "
            // + m_modelUserFactorizedValues.get(user));
            // }
            // for (Long item : m_modelItemFactorizedValues.keySet()) {
            // LOG.info("itemId: " + item + " itemVector: "
            // + m_modelItemFactorizedValues.get(item));
            // }

        } catch (Exception e) {
            e.printStackTrace();
            this.m_isLazyLoadModel = false;
            this.m_modelPath = null;
            return false;
        }
    }
    return true;
}

From source file:at.illecker.hama.hybrid.examples.summation.SummationBSP.java

License:Apache License

@Override
public void bsp(BSPPeer<Text, Text, Text, DoubleWritable, DoubleWritable> peer)
        throws IOException, SyncException, InterruptedException {

    BSPJob job = new BSPJob((HamaConfiguration) peer.getConfiguration());
    FileSystem fs = FileSystem.get(peer.getConfiguration());
    FSDataOutputStream outStream = fs//from   ww  w . ja  va  2 s  . co  m
            .create(new Path(FileOutputFormat.getOutputPath(job), peer.getTaskId() + ".log"));

    outStream.writeChars("SummationBSP.bsp executed on CPU!\n");

    double intermediateSum = 0.0;
    Text key = new Text();
    Text value = new Text();

    while (peer.readNext(key, value)) {
        outStream.writeChars("SummationBSP.bsp key: " + key + " value: " + value + "\n");
        intermediateSum += Double.parseDouble(value.toString());
    }

    outStream.writeChars("SummationBSP.bsp send intermediateSum: " + intermediateSum + "\n");

    peer.send(m_masterTask, new DoubleWritable(intermediateSum));
    peer.sync();

    // Consume messages
    if (peer.getPeerName().equals(m_masterTask)) {
        outStream.writeChars("SummationBSP.bsp consume messages...\n");

        double sum = 0.0;
        int msg_count = peer.getNumCurrentMessages();

        for (int i = 0; i < msg_count; i++) {
            DoubleWritable msg = peer.getCurrentMessage();
            outStream.writeChars("SummationBSP.bsp message: " + msg.get() + "\n");
            sum += msg.get();
        }

        outStream.writeChars("SummationBSP.bsp write Sum: " + sum + "\n");
        peer.write(new Text("Sum"), new DoubleWritable(sum));
    }
    outStream.close();
}

From source file:at.illecker.hama.hybrid.examples.summation.SummationBSP.java

License:Apache License

static void printOutput(BSPJob job, BigDecimal sum) throws IOException {
    FileSystem fs = FileSystem.get(job.getConfiguration());
    FileStatus[] listStatus = fs.listStatus(FileOutputFormat.getOutputPath(job));
    for (FileStatus status : listStatus) {
        if (!status.isDir()) {
            try {
                SequenceFile.Reader reader = new SequenceFile.Reader(fs, status.getPath(),
                        job.getConfiguration());

                Text key = new Text();
                DoubleWritable value = new DoubleWritable();

                if (reader.next(key, value)) {
                    LOG.info("Output File: " + status.getPath());
                    LOG.info("key: '" + key + "' value: '" + value + "' expected: '" + sum.doubleValue() + "'");
                    Assert.assertEquals("Expected value: '" + sum.doubleValue() + "' != '" + value + "'",
                            sum.doubleValue(), value.get(), Math.pow(10, (DOUBLE_PRECISION * -1)));
                }//from  ww w  . jav a  2s . com
                reader.close();

            } catch (IOException e) {
                if (status.getLen() > 0) {
                    System.out.println("Output File " + status.getPath());
                    FSDataInputStream in = fs.open(status.getPath());
                    IOUtils.copyBytes(in, System.out, job.getConfiguration(), false);
                    in.close();
                }
            }
        }
    }
    // fs.delete(FileOutputFormat.getOutputPath(job), true);
}