List of usage examples for org.apache.hadoop.io Text Text
public Text()
From source file:PostgresToSeq.java
License:Apache License
public static void main(String args[]) throws Exception { if (args.length != 2) { System.err.println("Arguments: [input postgres table] [output sequence file]"); return;//from w w w . j av a 2 s . c o m } String inputFileName = args[0]; String outputDirName = args[1]; Configuration configuration = new Configuration(); FileSystem fs = FileSystem.get(configuration); Writer writer = new SequenceFile.Writer(fs, configuration, new Path(outputDirName + "/chunk-0"), Text.class, Text.class); Connection c = null; Statement stmt = null; try { Class.forName("org.postgresql.Driver"); c = DriverManager.getConnection("jdbc:postgresql://192.168.50.170:5432/uzeni", "postgres", "dbwpsdkdl"); c.setAutoCommit(false); System.out.println("Opened database successfully"); stmt = c.createStatement(); ResultSet rs = stmt.executeQuery("SELECT * FROM " + inputFileName); int count = 0; Text key = new Text(); Text value = new Text(); while (rs.next()) { String seq = rs.getString("seq"); String rep = rs.getString("rep"); String body = rs.getString("body"); String category = rep; String id = seq; String message = body; key.set("/" + category + "/" + id); value.set(message); writer.append(key, value); count++; } rs.close(); stmt.close(); c.close(); writer.close(); System.out.println("Wrote " + count + " entries."); } catch (Exception e) { System.err.println(e.getClass().getName() + ": " + e.getMessage()); System.exit(0); } }
From source file:Job1RecordReader.java
License:Apache License
public Text createKey() { return new Text(); }
From source file:Job1RecordReader.java
License:Apache License
public Text createValue() { return new Text(); }
From source file:DocToSeq.java
License:Apache License
public static void main(String args[]) throws Exception { if (args.length != 2) { System.err.println("Arguments: [input tsv file] [output sequence file]"); return;/* w w w.java2 s.c o m*/ } String inputFileName = args[0]; String outputDirName = args[1]; Configuration configuration = new Configuration(); FileSystem fs = FileSystem.get(configuration); Writer writer = new SequenceFile.Writer(fs, configuration, new Path(outputDirName + "/chunk-0"), Text.class, Text.class); int count = 0; BufferedReader reader = new BufferedReader(new FileReader(inputFileName)); Text key = new Text(); Text value = new Text(); while (true) { String line = reader.readLine(); if (line == null) { break; } String[] tokens = line.split("\t", 3); if (tokens.length != 3) { System.out.println("Skip line: " + line); continue; } String category = tokens[0]; String id = tokens[1]; String message = tokens[2]; key.set("/" + category + "/" + id); value.set(message); writer.append(key, value); count++; } reader.close(); writer.close(); System.out.println("Wrote " + count + " entries."); }
From source file:accumulo.ingest.AccumuloLiveCsv.java
License:Apache License
@Override public void run() { FileReader fileReader = null; CSVReader reader = null;/* w w w. ja va 2 s .c o m*/ final Text rowId = new Text(); long totalRecordsInserted = 0; for (File f : inputs.getInputFiles()) { String absoluteFileName; try { absoluteFileName = f.getCanonicalPath(); } catch (IOException e) { log.error("Could not determine path for file: {}", f, e); continue; } log.info("Starting to process {}", absoluteFileName); absoluteFileName += ROW_SEPARATOR; Text fileName = new Text(absoluteFileName); try { try { fileReader = new FileReader(f); } catch (FileNotFoundException e) { log.error("Could not read file {}", f.toString()); continue; } reader = new CSVReader(fileReader); String[] header; try { header = reader.readNext(); } catch (IOException e) { log.error("Error reading header", e); continue; } try { writeSchema(fileName, header); } catch (AccumuloException e) { log.error("Could not write header to schema table", e); continue; } catch (AccumuloSecurityException e) { log.error("Could not write header to schema table", e); continue; } String[] record; long recordCount = 0l; try { while (null != (record = reader.readNext())) { // Make a unique row id from the filename and record offset setRowId(rowId, fileName, recordCount); try { writeRecord(header, record, rowId, fileName); } catch (AccumuloException e) { log.error("Could not write record to record table", e); } catch (AccumuloSecurityException e) { log.error("Could not write record to record table", e); } recordCount++; totalRecordsInserted++; if (0 == totalRecordsInserted % 1000) { mtbw.flush(); } } } catch (IOException e) { log.error("Error reading records from CSV file", e); continue; } catch (MutationsRejectedException e) { log.error("Error flushing mutations to server", e); throw new RuntimeException(e); } finally { log.info("Processed {} records from {}", recordCount, absoluteFileName); } } finally { if (null != reader) { try { reader.close(); } catch (IOException e) { log.error("Error closing CSV reader", e); } } if (null != fileReader) { try { fileReader.close(); } catch (IOException e) { log.error("Error closing file reader", e); } } } } log.info("Processed {} records in total", totalRecordsInserted); }
From source file:accumulo.ingest.AccumuloLiveCsv.java
License:Apache License
protected void writeRecord(String[] header, String[] record, Text rowId, Text fileName) throws AccumuloException, AccumuloSecurityException { Preconditions.checkArgument(header.length >= record.length, "Cannot have more columns in record (%s) than defined in header (%s)", new Object[] { header.length, record.length }); final BatchWriter recordBw, schemaBw; try {// w ww . j a v a2 s .c o m recordBw = mtbw.getBatchWriter(recordTableName); schemaBw = mtbw.getBatchWriter(schemaTableName); } catch (TableNotFoundException e) { log.error("Table(s) ({}, {}) were deleted", recordTableName, schemaTableName, e); throw new RuntimeException(e); } // Some temp Texts to avoid lots of object allocations final Text cfHolder = new Text(); final HashMap<String, Long> counts = new HashMap<String, Long>(); // write records Mutation recordMutation = new Mutation(rowId); for (int i = 0; i < record.length; i++) { final String columnName = header[i]; final String columnValue = record[i]; if (counts.containsKey(columnName)) { counts.put(columnName, counts.get(columnName) + 1); } else { counts.put(columnName, 1l); } cfHolder.set(columnName); recordMutation.put(cfHolder, EMPTY_TEXT, new Value(columnValue.getBytes())); } recordBw.addMutation(recordMutation); // update counts in schema for (Entry<String, Long> schemaUpdate : counts.entrySet()) { Mutation schemaMutation = new Mutation(schemaUpdate.getKey()); schemaMutation.put(SCHEMA_COLUMN_FREQ, fileName, longToValue(schemaUpdate.getValue())); schemaBw.addMutation(schemaMutation); } }
From source file:ar.edu.ungs.garules.CensusJob.java
License:Apache License
/** * Toma la salida del reducer del file system distribuido y la carga en el mapa "ocurrencias" en memoria * @param conf// w w w . j a v a 2 s . co m * @param path * @throws IOException */ @SuppressWarnings("deprecation") private static void llenarOcurrencias(Configuration conf, String path) throws IOException { FileSystem fs = new DistributedFileSystem( new InetSocketAddress(DEFAULT_FILE_SYSTEM_HOST, DEFAULT_FILE_SYSTEM_PORT), conf); SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path(path + "/part-r-00000"), conf); Text key = new Text(); IntWritable value = new IntWritable(); while (reader.next(key, value)) ocurrencias.put(key.toString(), value.get()); reader.close(); }
From source file:at.illecker.hama.hybrid.examples.onlinecf.OnlineCF.java
License:Apache License
@Override public boolean load(String path, boolean lazy) { this.m_isLazyLoadModel = lazy; this.m_modelPath = path; if (lazy == false) { Path dataPath = new Path(m_modelPath); Configuration conf = new Configuration(); try {// ww w . jav a 2 s. c o m FileSystem fs = dataPath.getFileSystem(conf); LinkedList<Path> files = new LinkedList<Path>(); if (!fs.exists(dataPath)) { this.m_isLazyLoadModel = false; this.m_modelPath = null; return false; } if (!fs.isFile(dataPath)) { for (int i = 0; i < 100000; i++) { Path partFile = new Path( m_modelPath + "/part-" + String.valueOf(100000 + i).substring(1, 6)); if (fs.exists(partFile)) { files.add(partFile); } else { break; } } } else { files.add(dataPath); } LOG.info("loading model from " + path); for (Path file : files) { SequenceFile.Reader reader = new SequenceFile.Reader(fs, file, conf); Text key = new Text(); PipesVectorWritable value = new PipesVectorWritable(); String strKey = null; Long actualKey = null; String firstSymbol = null; while (reader.next(key, value) != false) { strKey = key.toString(); firstSymbol = strKey.substring(0, 1); try { actualKey = Long.valueOf(strKey.substring(1)); } catch (Exception e) { actualKey = new Long(0); } if (firstSymbol.equals(OnlineCF.DFLT_MODEL_ITEM_DELIM)) { // LOG.info("loaded itemId: " + actualKey + " itemVector: " // + value.getVector()); m_modelItemFactorizedValues.put(actualKey, new PipesVectorWritable(value)); } else if (firstSymbol.equals(OnlineCF.DFLT_MODEL_USER_DELIM)) { // LOG.info("loaded userId: " + actualKey + " userVector: " // + value.getVector()); m_modelUserFactorizedValues.put(actualKey, new PipesVectorWritable(value)); } else { // unknown continue; } } reader.close(); } LOG.info("loaded: " + m_modelUserFactorizedValues.size() + " users, " + m_modelItemFactorizedValues.size() + " items"); // for (Long user : m_modelUserFactorizedValues.keySet()) { // LOG.info("userId: " + user + " userVector: " // + m_modelUserFactorizedValues.get(user)); // } // for (Long item : m_modelItemFactorizedValues.keySet()) { // LOG.info("itemId: " + item + " itemVector: " // + m_modelItemFactorizedValues.get(item)); // } } catch (Exception e) { e.printStackTrace(); this.m_isLazyLoadModel = false; this.m_modelPath = null; return false; } } return true; }
From source file:at.illecker.hama.hybrid.examples.summation.SummationBSP.java
License:Apache License
@Override public void bsp(BSPPeer<Text, Text, Text, DoubleWritable, DoubleWritable> peer) throws IOException, SyncException, InterruptedException { BSPJob job = new BSPJob((HamaConfiguration) peer.getConfiguration()); FileSystem fs = FileSystem.get(peer.getConfiguration()); FSDataOutputStream outStream = fs//from ww w . ja va 2 s . co m .create(new Path(FileOutputFormat.getOutputPath(job), peer.getTaskId() + ".log")); outStream.writeChars("SummationBSP.bsp executed on CPU!\n"); double intermediateSum = 0.0; Text key = new Text(); Text value = new Text(); while (peer.readNext(key, value)) { outStream.writeChars("SummationBSP.bsp key: " + key + " value: " + value + "\n"); intermediateSum += Double.parseDouble(value.toString()); } outStream.writeChars("SummationBSP.bsp send intermediateSum: " + intermediateSum + "\n"); peer.send(m_masterTask, new DoubleWritable(intermediateSum)); peer.sync(); // Consume messages if (peer.getPeerName().equals(m_masterTask)) { outStream.writeChars("SummationBSP.bsp consume messages...\n"); double sum = 0.0; int msg_count = peer.getNumCurrentMessages(); for (int i = 0; i < msg_count; i++) { DoubleWritable msg = peer.getCurrentMessage(); outStream.writeChars("SummationBSP.bsp message: " + msg.get() + "\n"); sum += msg.get(); } outStream.writeChars("SummationBSP.bsp write Sum: " + sum + "\n"); peer.write(new Text("Sum"), new DoubleWritable(sum)); } outStream.close(); }
From source file:at.illecker.hama.hybrid.examples.summation.SummationBSP.java
License:Apache License
static void printOutput(BSPJob job, BigDecimal sum) throws IOException { FileSystem fs = FileSystem.get(job.getConfiguration()); FileStatus[] listStatus = fs.listStatus(FileOutputFormat.getOutputPath(job)); for (FileStatus status : listStatus) { if (!status.isDir()) { try { SequenceFile.Reader reader = new SequenceFile.Reader(fs, status.getPath(), job.getConfiguration()); Text key = new Text(); DoubleWritable value = new DoubleWritable(); if (reader.next(key, value)) { LOG.info("Output File: " + status.getPath()); LOG.info("key: '" + key + "' value: '" + value + "' expected: '" + sum.doubleValue() + "'"); Assert.assertEquals("Expected value: '" + sum.doubleValue() + "' != '" + value + "'", sum.doubleValue(), value.get(), Math.pow(10, (DOUBLE_PRECISION * -1))); }//from ww w . jav a 2s . com reader.close(); } catch (IOException e) { if (status.getLen() > 0) { System.out.println("Output File " + status.getPath()); FSDataInputStream in = fs.open(status.getPath()); IOUtils.copyBytes(in, System.out, job.getConfiguration(), false); in.close(); } } } } // fs.delete(FileOutputFormat.getOutputPath(job), true); }