Example usage for org.apache.hadoop.io Text Text

List of usage examples for org.apache.hadoop.io Text Text

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text Text.

Prototype

public Text() 

Source Link

Usage

From source file:PostgresToSeq.java

License:Apache License

public static void main(String args[]) throws Exception {
    if (args.length != 2) {
        System.err.println("Arguments: [input postgres table] [output sequence file]");
        return;//from   w  w w .  j av  a  2 s . c  o m
    }
    String inputFileName = args[0];
    String outputDirName = args[1];
    Configuration configuration = new Configuration();
    FileSystem fs = FileSystem.get(configuration);
    Writer writer = new SequenceFile.Writer(fs, configuration, new Path(outputDirName + "/chunk-0"), Text.class,
            Text.class);
    Connection c = null;
    Statement stmt = null;
    try {
        Class.forName("org.postgresql.Driver");
        c = DriverManager.getConnection("jdbc:postgresql://192.168.50.170:5432/uzeni", "postgres", "dbwpsdkdl");
        c.setAutoCommit(false);
        System.out.println("Opened database successfully");
        stmt = c.createStatement();
        ResultSet rs = stmt.executeQuery("SELECT * FROM " + inputFileName);
        int count = 0;
        Text key = new Text();
        Text value = new Text();

        while (rs.next()) {
            String seq = rs.getString("seq");
            String rep = rs.getString("rep");
            String body = rs.getString("body");
            String category = rep;
            String id = seq;
            String message = body;
            key.set("/" + category + "/" + id);
            value.set(message);
            writer.append(key, value);
            count++;
        }
        rs.close();
        stmt.close();
        c.close();
        writer.close();
        System.out.println("Wrote " + count + " entries.");
    } catch (Exception e) {
        System.err.println(e.getClass().getName() + ": " + e.getMessage());
        System.exit(0);
    }
}

From source file:Job1RecordReader.java

License:Apache License

public Text createKey() {
    return new Text();
}

From source file:Job1RecordReader.java

License:Apache License

public Text createValue() {
    return new Text();
}

From source file:DocToSeq.java

License:Apache License

public static void main(String args[]) throws Exception {
    if (args.length != 2) {
        System.err.println("Arguments: [input tsv file] [output sequence file]");
        return;/* w  w  w.java2 s.c  o m*/
    }
    String inputFileName = args[0];
    String outputDirName = args[1];
    Configuration configuration = new Configuration();
    FileSystem fs = FileSystem.get(configuration);
    Writer writer = new SequenceFile.Writer(fs, configuration, new Path(outputDirName + "/chunk-0"), Text.class,
            Text.class);

    int count = 0;
    BufferedReader reader = new BufferedReader(new FileReader(inputFileName));
    Text key = new Text();
    Text value = new Text();
    while (true) {
        String line = reader.readLine();
        if (line == null) {
            break;
        }
        String[] tokens = line.split("\t", 3);
        if (tokens.length != 3) {
            System.out.println("Skip line: " + line);
            continue;
        }
        String category = tokens[0];
        String id = tokens[1];
        String message = tokens[2];
        key.set("/" + category + "/" + id);
        value.set(message);
        writer.append(key, value);
        count++;
    }
    reader.close();
    writer.close();
    System.out.println("Wrote " + count + " entries.");
}

From source file:accumulo.ingest.AccumuloLiveCsv.java

License:Apache License

@Override
public void run() {
    FileReader fileReader = null;
    CSVReader reader = null;/*  w  w w. ja  va  2  s .c o m*/
    final Text rowId = new Text();
    long totalRecordsInserted = 0;

    for (File f : inputs.getInputFiles()) {
        String absoluteFileName;
        try {
            absoluteFileName = f.getCanonicalPath();
        } catch (IOException e) {
            log.error("Could not determine path for file: {}", f, e);
            continue;
        }

        log.info("Starting to process {}", absoluteFileName);

        absoluteFileName += ROW_SEPARATOR;
        Text fileName = new Text(absoluteFileName);

        try {
            try {
                fileReader = new FileReader(f);
            } catch (FileNotFoundException e) {
                log.error("Could not read file {}", f.toString());
                continue;
            }

            reader = new CSVReader(fileReader);

            String[] header;
            try {
                header = reader.readNext();
            } catch (IOException e) {
                log.error("Error reading header", e);
                continue;
            }

            try {
                writeSchema(fileName, header);
            } catch (AccumuloException e) {
                log.error("Could not write header to schema table", e);
                continue;
            } catch (AccumuloSecurityException e) {
                log.error("Could not write header to schema table", e);
                continue;
            }

            String[] record;
            long recordCount = 0l;

            try {
                while (null != (record = reader.readNext())) {
                    // Make a unique row id from the filename and record offset
                    setRowId(rowId, fileName, recordCount);

                    try {
                        writeRecord(header, record, rowId, fileName);
                    } catch (AccumuloException e) {
                        log.error("Could not write record to record table", e);
                    } catch (AccumuloSecurityException e) {
                        log.error("Could not write record to record table", e);
                    }

                    recordCount++;
                    totalRecordsInserted++;

                    if (0 == totalRecordsInserted % 1000) {
                        mtbw.flush();
                    }
                }
            } catch (IOException e) {
                log.error("Error reading records from CSV file", e);
                continue;
            } catch (MutationsRejectedException e) {
                log.error("Error flushing mutations to server", e);
                throw new RuntimeException(e);
            } finally {
                log.info("Processed {} records from {}", recordCount, absoluteFileName);
            }
        } finally {

            if (null != reader) {
                try {
                    reader.close();
                } catch (IOException e) {
                    log.error("Error closing CSV reader", e);
                }
            }

            if (null != fileReader) {
                try {
                    fileReader.close();
                } catch (IOException e) {
                    log.error("Error closing file reader", e);
                }
            }
        }
    }

    log.info("Processed {} records in total", totalRecordsInserted);
}

From source file:accumulo.ingest.AccumuloLiveCsv.java

License:Apache License

protected void writeRecord(String[] header, String[] record, Text rowId, Text fileName)
        throws AccumuloException, AccumuloSecurityException {
    Preconditions.checkArgument(header.length >= record.length,
            "Cannot have more columns in record (%s) than defined in header (%s)",
            new Object[] { header.length, record.length });

    final BatchWriter recordBw, schemaBw;
    try {// w ww  .  j a  v  a2 s  .c  o  m
        recordBw = mtbw.getBatchWriter(recordTableName);
        schemaBw = mtbw.getBatchWriter(schemaTableName);
    } catch (TableNotFoundException e) {
        log.error("Table(s) ({}, {}) were deleted", recordTableName, schemaTableName, e);
        throw new RuntimeException(e);
    }

    // Some temp Texts to avoid lots of object allocations
    final Text cfHolder = new Text();
    final HashMap<String, Long> counts = new HashMap<String, Long>();

    // write records
    Mutation recordMutation = new Mutation(rowId);
    for (int i = 0; i < record.length; i++) {
        final String columnName = header[i];
        final String columnValue = record[i];

        if (counts.containsKey(columnName)) {
            counts.put(columnName, counts.get(columnName) + 1);
        } else {
            counts.put(columnName, 1l);
        }

        cfHolder.set(columnName);

        recordMutation.put(cfHolder, EMPTY_TEXT, new Value(columnValue.getBytes()));
    }

    recordBw.addMutation(recordMutation);

    // update counts in schema
    for (Entry<String, Long> schemaUpdate : counts.entrySet()) {
        Mutation schemaMutation = new Mutation(schemaUpdate.getKey());

        schemaMutation.put(SCHEMA_COLUMN_FREQ, fileName, longToValue(schemaUpdate.getValue()));
        schemaBw.addMutation(schemaMutation);
    }
}

From source file:ar.edu.ungs.garules.CensusJob.java

License:Apache License

/**
 * Toma la salida del reducer del file system distribuido y la carga en el mapa "ocurrencias" en memoria
 * @param conf// w w w  .  j  a  v a 2 s  . co m
 * @param path
 * @throws IOException
 */
@SuppressWarnings("deprecation")
private static void llenarOcurrencias(Configuration conf, String path) throws IOException {
    FileSystem fs = new DistributedFileSystem(
            new InetSocketAddress(DEFAULT_FILE_SYSTEM_HOST, DEFAULT_FILE_SYSTEM_PORT), conf);
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path(path + "/part-r-00000"), conf);

    Text key = new Text();
    IntWritable value = new IntWritable();
    while (reader.next(key, value))
        ocurrencias.put(key.toString(), value.get());
    reader.close();
}

From source file:at.illecker.hama.hybrid.examples.onlinecf.OnlineCF.java

License:Apache License

@Override
public boolean load(String path, boolean lazy) {
    this.m_isLazyLoadModel = lazy;
    this.m_modelPath = path;

    if (lazy == false) {
        Path dataPath = new Path(m_modelPath);
        Configuration conf = new Configuration();
        try {// ww  w .  jav  a 2 s.  c  o  m
            FileSystem fs = dataPath.getFileSystem(conf);
            LinkedList<Path> files = new LinkedList<Path>();

            if (!fs.exists(dataPath)) {
                this.m_isLazyLoadModel = false;
                this.m_modelPath = null;
                return false;
            }

            if (!fs.isFile(dataPath)) {
                for (int i = 0; i < 100000; i++) {
                    Path partFile = new Path(
                            m_modelPath + "/part-" + String.valueOf(100000 + i).substring(1, 6));
                    if (fs.exists(partFile)) {
                        files.add(partFile);
                    } else {
                        break;
                    }
                }
            } else {
                files.add(dataPath);
            }

            LOG.info("loading model from " + path);
            for (Path file : files) {
                SequenceFile.Reader reader = new SequenceFile.Reader(fs, file, conf);

                Text key = new Text();
                PipesVectorWritable value = new PipesVectorWritable();
                String strKey = null;
                Long actualKey = null;
                String firstSymbol = null;

                while (reader.next(key, value) != false) {
                    strKey = key.toString();
                    firstSymbol = strKey.substring(0, 1);
                    try {
                        actualKey = Long.valueOf(strKey.substring(1));
                    } catch (Exception e) {
                        actualKey = new Long(0);
                    }

                    if (firstSymbol.equals(OnlineCF.DFLT_MODEL_ITEM_DELIM)) {
                        // LOG.info("loaded itemId: " + actualKey + " itemVector: "
                        // + value.getVector());
                        m_modelItemFactorizedValues.put(actualKey, new PipesVectorWritable(value));
                    } else if (firstSymbol.equals(OnlineCF.DFLT_MODEL_USER_DELIM)) {
                        // LOG.info("loaded userId: " + actualKey + " userVector: "
                        // + value.getVector());
                        m_modelUserFactorizedValues.put(actualKey, new PipesVectorWritable(value));
                    } else {
                        // unknown
                        continue;
                    }
                }
                reader.close();
            }

            LOG.info("loaded: " + m_modelUserFactorizedValues.size() + " users, "
                    + m_modelItemFactorizedValues.size() + " items");
            // for (Long user : m_modelUserFactorizedValues.keySet()) {
            // LOG.info("userId: " + user + " userVector: "
            // + m_modelUserFactorizedValues.get(user));
            // }
            // for (Long item : m_modelItemFactorizedValues.keySet()) {
            // LOG.info("itemId: " + item + " itemVector: "
            // + m_modelItemFactorizedValues.get(item));
            // }

        } catch (Exception e) {
            e.printStackTrace();
            this.m_isLazyLoadModel = false;
            this.m_modelPath = null;
            return false;
        }
    }
    return true;
}

From source file:at.illecker.hama.hybrid.examples.summation.SummationBSP.java

License:Apache License

@Override
public void bsp(BSPPeer<Text, Text, Text, DoubleWritable, DoubleWritable> peer)
        throws IOException, SyncException, InterruptedException {

    BSPJob job = new BSPJob((HamaConfiguration) peer.getConfiguration());
    FileSystem fs = FileSystem.get(peer.getConfiguration());
    FSDataOutputStream outStream = fs//from   ww  w . ja  va  2 s  . co  m
            .create(new Path(FileOutputFormat.getOutputPath(job), peer.getTaskId() + ".log"));

    outStream.writeChars("SummationBSP.bsp executed on CPU!\n");

    double intermediateSum = 0.0;
    Text key = new Text();
    Text value = new Text();

    while (peer.readNext(key, value)) {
        outStream.writeChars("SummationBSP.bsp key: " + key + " value: " + value + "\n");
        intermediateSum += Double.parseDouble(value.toString());
    }

    outStream.writeChars("SummationBSP.bsp send intermediateSum: " + intermediateSum + "\n");

    peer.send(m_masterTask, new DoubleWritable(intermediateSum));
    peer.sync();

    // Consume messages
    if (peer.getPeerName().equals(m_masterTask)) {
        outStream.writeChars("SummationBSP.bsp consume messages...\n");

        double sum = 0.0;
        int msg_count = peer.getNumCurrentMessages();

        for (int i = 0; i < msg_count; i++) {
            DoubleWritable msg = peer.getCurrentMessage();
            outStream.writeChars("SummationBSP.bsp message: " + msg.get() + "\n");
            sum += msg.get();
        }

        outStream.writeChars("SummationBSP.bsp write Sum: " + sum + "\n");
        peer.write(new Text("Sum"), new DoubleWritable(sum));
    }
    outStream.close();
}

From source file:at.illecker.hama.hybrid.examples.summation.SummationBSP.java

License:Apache License

static void printOutput(BSPJob job, BigDecimal sum) throws IOException {
    FileSystem fs = FileSystem.get(job.getConfiguration());
    FileStatus[] listStatus = fs.listStatus(FileOutputFormat.getOutputPath(job));
    for (FileStatus status : listStatus) {
        if (!status.isDir()) {
            try {
                SequenceFile.Reader reader = new SequenceFile.Reader(fs, status.getPath(),
                        job.getConfiguration());

                Text key = new Text();
                DoubleWritable value = new DoubleWritable();

                if (reader.next(key, value)) {
                    LOG.info("Output File: " + status.getPath());
                    LOG.info("key: '" + key + "' value: '" + value + "' expected: '" + sum.doubleValue() + "'");
                    Assert.assertEquals("Expected value: '" + sum.doubleValue() + "' != '" + value + "'",
                            sum.doubleValue(), value.get(), Math.pow(10, (DOUBLE_PRECISION * -1)));
                }//from  ww w  . jav a  2s . com
                reader.close();

            } catch (IOException e) {
                if (status.getLen() > 0) {
                    System.out.println("Output File " + status.getPath());
                    FSDataInputStream in = fs.open(status.getPath());
                    IOUtils.copyBytes(in, System.out, job.getConfiguration(), false);
                    in.close();
                }
            }
        }
    }
    // fs.delete(FileOutputFormat.getOutputPath(job), true);
}