List of usage examples for org.apache.hadoop.conf Configuration Configuration
public Configuration()
From source file:ParascaleFsTestCase.java
License:Apache License
/** * Creates a new Hadoop Configuration object. * * @return a new Hadoop configuration object * * @see Configuration// w w w. ja v a2s.c o m */ protected Configuration getConf() { final Configuration conf = new Configuration(); if (setDefaultBlockSize) { conf.setLong(RawParascaleFileSystem.PS_DEFAULT_BLOCKSIZE, defaultBlockSize); } if (setDefaultReplication) { conf.setLong(RawParascaleFileSystem.PS_DEFAULT_REPLICATION, defaultReplication); } if (setMountPoint) { conf.set(RawParascaleFileSystem.PS_MOUNT_POINT, String.format("%s/%s", getTempDir(), mountPoint)); } if (setDefaultFsName) { conf.set(FS_DEFAULT_NAME, String.format("%s%s@%s", fsScheme, virtualFs, controlNode)); } return conf; }
From source file:MapFileRW.java
License:Open Source License
static void write() throws IOException { String filename = "/indextest/testmapfile"; Configuration conf = new Configuration(); MapFile.Writer writer = new MapFile.Writer(conf, FileSystem.get(conf), filename, IndexKey.class, IndexValue.class); writer.close();/*from ww w. j a v a2 s . c o m*/ }
From source file:MapFileRW.java
License:Open Source License
static void read(String filename, int num) throws Exception { Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); MapFile.Reader reader = new MapFile.Reader(fs, filename, conf); IndexKey key = new IndexKey(); IndexValue value = new IndexValue(); int i = 0;//from w w w . j ava2 s. c o m while (reader.next(key, value)) { System.out.print(value.getFileindex() + " "); System.out.println(value.getRowid()); key.reset(); if ((i++) >= num) break; } }
From source file:DumpPageRankRecordsToPlainText.java
License:Apache License
/** * Runs this tool.// w w w . j a v a2 s . c o m */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String inputPath = cmdline.getOptionValue(INPUT); String outputPath = cmdline.getOptionValue(OUTPUT); LOG.info("Tool name: " + DumpPageRankRecordsToPlainText.class.getSimpleName()); LOG.info(" - input: " + inputPath); LOG.info(" - output: " + outputPath); Configuration conf = new Configuration(); conf.setInt("mapred.min.split.size", 1024 * 1024 * 1024); Job job = Job.getInstance(conf); job.setJobName(DumpPageRankRecordsToPlainText.class.getSimpleName()); job.setJarByClass(DumpPageRankRecordsToPlainText.class); job.setNumReduceTasks(0); FileInputFormat.addInputPath(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(PageRankNode.class); // Delete the output directory if it exists already. FileSystem.get(conf).delete(new Path(outputPath), true); job.waitForCompletion(true); return 0; }
From source file:SleepJob.java
License:Apache License
public static void main(String[] args) throws Exception { int res = ToolRunner.run(new Configuration(), new SleepJob(), args); System.exit(res);/*from ww w . j a v a 2s. co m*/ }
From source file:PostgresToSeq.java
License:Apache License
public static void main(String args[]) throws Exception { if (args.length != 2) { System.err.println("Arguments: [input postgres table] [output sequence file]"); return;/*from w ww. j a va 2s.co m*/ } String inputFileName = args[0]; String outputDirName = args[1]; Configuration configuration = new Configuration(); FileSystem fs = FileSystem.get(configuration); Writer writer = new SequenceFile.Writer(fs, configuration, new Path(outputDirName + "/chunk-0"), Text.class, Text.class); Connection c = null; Statement stmt = null; try { Class.forName("org.postgresql.Driver"); c = DriverManager.getConnection("jdbc:postgresql://192.168.50.170:5432/uzeni", "postgres", "dbwpsdkdl"); c.setAutoCommit(false); System.out.println("Opened database successfully"); stmt = c.createStatement(); ResultSet rs = stmt.executeQuery("SELECT * FROM " + inputFileName); int count = 0; Text key = new Text(); Text value = new Text(); while (rs.next()) { String seq = rs.getString("seq"); String rep = rs.getString("rep"); String body = rs.getString("body"); String category = rep; String id = seq; String message = body; key.set("/" + category + "/" + id); value.set(message); writer.append(key, value); count++; } rs.close(); stmt.close(); c.close(); writer.close(); System.out.println("Wrote " + count + " entries."); } catch (Exception e) { System.err.println(e.getClass().getName() + ": " + e.getMessage()); System.exit(0); } }
From source file:JavaCustomReceiver.java
License:Apache License
/** Create a socket connection and receive data until receiver is stopped */ private void receive() { Socket socket = null;//from w w w . ja v a 2s . c o m String userInput = null; try { // connect to the server socket = new Socket(host, port); // BufferedReader reader = new BufferedReader(new InputStreamReader(socket.getInputStream())); // Path pt=new Path("hdfs://192.168.0.1:9000/equinox-sanjose.20120119-netflow.txt"); // FileSystem fs = FileSystem.get(new Configuration()); // BufferedReader in=new BufferedReader(new InputStreamReader(fs.open(pt))); Path pt = new Path("hdfs://192.168.0.1:9000/user/hduser/equinox-sanjose.20120119-netflow.txt"); Configuration conf = new Configuration(); conf.addResource(new Path("/usr/local/hadoop/conf/core-site.xml")); conf.addResource(new Path("/usr/local/hadoop/conf/hdfs-site.xml")); // FileSystem fs = FileSystem.get(conf); FileSystem fs = pt.getFileSystem(conf); System.out.println(fs.getHomeDirectory()); BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(pt))); // BufferedReader in = new BufferedReader( // new FileReader( // "/home/hduser/spark_scratchPad/equinox-sanjose.20120119-netflow.txt")); // // Until stopped or connection broken continue reading while (!isStopped() && (userInput = in.readLine()) != null) { System.out.println("Received data '" + userInput + "'"); store(userInput); } in.close(); socket.close(); // Restart in an attempt to connect again when server is active again restart("Trying to connect again"); } catch (ConnectException ce) { // restart if could not connect to server restart("Could not connect", ce); } catch (Throwable t) { restart("Error receiving data", t); } }
From source file:ClassifierHD.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length < 5) { System.out.println(//from w w w . j a v a 2 s .c om "Arguments: [model] [label index] [dictionnary] [document frequency] [postgres table] [hdfs dir] [job_id]"); return; } String modelPath = args[0]; String labelIndexPath = args[1]; String dictionaryPath = args[2]; String documentFrequencyPath = args[3]; String tablename = args[4]; String inputDir = args[5]; Configuration configuration = new Configuration(); // model is a matrix (wordId, labelId) => probability score NaiveBayesModel model = NaiveBayesModel.materialize(new Path(modelPath), configuration); StandardNaiveBayesClassifier classifier = new StandardNaiveBayesClassifier(model); // labels is a map label => classId Map<Integer, String> labels = BayesUtils.readLabelIndex(configuration, new Path(labelIndexPath)); Map<String, Integer> dictionary = readDictionnary(configuration, new Path(dictionaryPath)); Map<Integer, Long> documentFrequency = readDocumentFrequency(configuration, new Path(documentFrequencyPath)); // analyzer used to extract word from tweet Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_43); int labelCount = labels.size(); int documentCount = documentFrequency.get(-1).intValue(); System.out.println("Number of labels: " + labelCount); System.out.println("Number of documents in training set: " + documentCount); Connection conn = null; PreparedStatement pstmt = null; try { Class.forName("org.postgresql.Driver"); conn = DriverManager.getConnection("jdbc:postgresql://192.168.50.170:5432/uzeni", "postgres", "dbwpsdkdl"); conn.setAutoCommit(false); String sql = "INSERT INTO " + tablename + " (id,gtime,wtime,target,num,link,body,rep) VALUES (?,?,?,?,?,?,?,?);"; pstmt = conn.prepareStatement(sql); FileSystem fs = FileSystem.get(configuration); FileStatus[] status = fs.listStatus(new Path(inputDir)); BufferedWriter bw = new BufferedWriter( new OutputStreamWriter(fs.create(new Path(inputDir + "/rep.list"), true))); for (int i = 0; i < status.length; i++) { BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(status[i].getPath()))); if (new String(status[i].getPath().getName()).equals("rep.list")) { continue; } int lv_HEAD = 1; int lv_cnt = 0; String lv_gtime = null; String lv_wtime = null; String lv_target = null; BigDecimal lv_num = null; String lv_link = null; String[] lv_args; String lv_line; StringBuilder lv_txt = new StringBuilder(); while ((lv_line = br.readLine()) != null) { if (lv_cnt < lv_HEAD) { lv_args = lv_line.split(","); lv_gtime = lv_args[0]; lv_wtime = lv_args[1]; lv_target = lv_args[2]; lv_num = new BigDecimal(lv_args[3]); lv_link = lv_args[4]; } else { lv_txt.append(lv_line + '\n'); } lv_cnt++; } br.close(); String id = status[i].getPath().getName(); String message = lv_txt.toString(); Multiset<String> words = ConcurrentHashMultiset.create(); TokenStream ts = analyzer.tokenStream("text", new StringReader(message)); CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); ts.reset(); int wordCount = 0; while (ts.incrementToken()) { if (termAtt.length() > 0) { String word = ts.getAttribute(CharTermAttribute.class).toString(); Integer wordId = dictionary.get(word); if (wordId != null) { words.add(word); wordCount++; } } } ts.end(); ts.close(); Vector vector = new RandomAccessSparseVector(10000); TFIDF tfidf = new TFIDF(); for (Multiset.Entry<String> entry : words.entrySet()) { String word = entry.getElement(); int count = entry.getCount(); Integer wordId = dictionary.get(word); Long freq = documentFrequency.get(wordId); double tfIdfValue = tfidf.calculate(count, freq.intValue(), wordCount, documentCount); vector.setQuick(wordId, tfIdfValue); } Vector resultVector = classifier.classifyFull(vector); double bestScore = -Double.MAX_VALUE; int bestCategoryId = -1; for (Element element : resultVector.all()) { int categoryId = element.index(); double score = element.get(); if (score > bestScore) { bestScore = score; bestCategoryId = categoryId; } } //System.out.println(message); //System.out.println(" => "+ lv_gtime + lv_wtime + lv_link + id + ":" + labels.get(bestCategoryId)); pstmt.setString(1, id); pstmt.setString(2, lv_gtime); pstmt.setString(3, lv_wtime); pstmt.setString(4, lv_target); pstmt.setBigDecimal(5, lv_num); pstmt.setString(6, lv_link); pstmt.setString(7, message.substring(1, Math.min(50, message.length()))); pstmt.setString(8, labels.get(bestCategoryId)); pstmt.addBatch(); bw.write(id + "\t" + labels.get(bestCategoryId) + "\n"); } pstmt.executeBatch(); //pstmt.clearParameters(); pstmt.close(); conn.commit(); conn.close(); bw.close(); } catch (Exception e) { System.err.println(e.getClass().getName() + ": " + e.getMessage()); System.exit(0); } analyzer.close(); }
From source file:ColumnStorageBasicTest.java
License:Open Source License
public void createAllSingleProject(FileSystem fs) throws Exception { Configuration conf = new Configuration(); conf.setInt("dfs.replication", 1); ;/*ww w .j av a 2 s.co m*/ FormatDataFile[] fd = new FormatDataFile[7]; for (int i = 0; i < 7; i++) { fd[i] = new FormatDataFile(conf); } FieldMap fieldMap = new FieldMap(); fieldMap.addField(new Field(ConstVar.FieldType_Byte, ConstVar.Sizeof_Byte, (short) 0)); Head head = new Head(); head.setFieldMap(fieldMap); fd[0].create(byteFileName, head); fieldMap = new FieldMap(); fieldMap.addField(new Field(ConstVar.FieldType_Short, ConstVar.Sizeof_Short, (short) 1)); head = new Head(); head.setFieldMap(fieldMap); fd[1].create(shortFileName, head); fieldMap = new FieldMap(); fieldMap.addField(new Field(ConstVar.FieldType_Int, ConstVar.Sizeof_Int, (short) 2)); head = new Head(); head.setFieldMap(fieldMap); fd[2].create(intFileName, head); fieldMap = new FieldMap(); fieldMap.addField(new Field(ConstVar.FieldType_Long, ConstVar.Sizeof_Long, (short) 3)); head = new Head(); head.setFieldMap(fieldMap); fd[3].create(longFileName, head); fieldMap = new FieldMap(); fieldMap.addField(new Field(ConstVar.FieldType_Float, ConstVar.Sizeof_Float, (short) 4)); head = new Head(); head.setFieldMap(fieldMap); fd[4].create(floatFileName, head); fieldMap = new FieldMap(); fieldMap.addField(new Field(ConstVar.FieldType_Double, ConstVar.Sizeof_Double, (short) 5)); head = new Head(); head.setFieldMap(fieldMap); fd[5].create(doubleFileName, head); fieldMap = new FieldMap(); fieldMap.addField(new Field(ConstVar.FieldType_String, 0, (short) 6)); head = new Head(); head.setFieldMap(fieldMap); fd[6].create(stringFileName, head); long begin = System.currentTimeMillis(); int count = 10; for (int i = 0; i < count; i++) { Record record = new Record(1); record.addValue(new FieldValue((byte) i, (short) 0)); fd[0].addRecord(record); record = new Record(1); record.addValue(new FieldValue((short) i, (short) 1)); fd[1].addRecord(record); record = new Record(1); record.addValue(new FieldValue((int) i, (short) 2)); fd[2].addRecord(record); record = new Record(1); record.addValue(new FieldValue((long) i, (short) 3)); fd[3].addRecord(record); record = new Record(1); record.addValue(new FieldValue((float) i, (short) 4)); fd[4].addRecord(record); record = new Record(1); record.addValue(new FieldValue((double) i, (short) 5)); fd[5].addRecord(record); record = new Record(1); record.addValue(new FieldValue("hello konten" + i, (short) 6)); fd[6].addRecord(record); } for (int i = 0; i < 7; i++) { fd[i].close(); } /* createProjectByte(fs); createProjectShort(fs); createProjectInt(fs); createProjectLong(fs); createProjectFloat(fs); createProjectDouble(fs); createProjectString(fs); */ long end = System.currentTimeMillis(); System.out.println("createAllProject delay:" + (end - begin) / 1000); }
From source file:ColumnStorageBasicTest.java
License:Open Source License
public void createProjectByte(FileSystem fs) throws Exception { FieldMap fieldMap = new FieldMap(); fieldMap.addField(new Field(ConstVar.FieldType_Byte, ConstVar.Sizeof_Byte, (short) 0)); Head head = new Head(); head.setFieldMap(fieldMap);/* w ww . j a va 2 s .c o m*/ Configuration conf = new Configuration(); conf.setInt("dfs.replication", 1); ; FormatDataFile fd = new FormatDataFile(conf); int count = 10; for (int i = 0; i < count; i++) { Record record = new Record(1); record.addValue(new FieldValue((byte) i, (short) 0)); fd.addRecord(record); } fd.close(); }