List of usage examples for org.apache.hadoop.conf Configuration Configuration
public Configuration()
From source file:ColumnStorageBasicTest.java
License:Open Source License
public void testConstructorFieldNoExist() { try {/* ww w . j av a 2 s. co m*/ Configuration conf = new Configuration(); Path path = new Path(prefix); FileSystem fs = FileSystem.get(conf); fs.delete(path, true); createAllSingleProject(fs); createMultiProject(fs); ArrayList<Short> idxs = new ArrayList<Short>(10); idxs.add((short) 10); ColumnStorageClient client = new ColumnStorageClient(path, idxs, conf); fail("should get exception"); } catch (SEException.InvalidParameterException e) { } catch (Exception e) { e.printStackTrace(); fail("get exception:" + e.getMessage()); } }
From source file:ColumnStorageBasicTest.java
License:Open Source License
public void testConstructorFieldInSameFile() { try {//from w w w . j a v a 2s .c om Configuration conf = new Configuration(); Path path = new Path(prefix); FileSystem fs = FileSystem.get(conf); ArrayList<Short> idxs = new ArrayList<Short>(10); idxs.add((short) 7); idxs.add((short) 9); ColumnStorageClient client = new ColumnStorageClient(path, idxs, conf); if (client.cp == null) { fail("cp null"); } if (client.list.size() != 1) { fail("error list size:" + client.list.size()); } if (!client.list.get(0).equals(multiFileNameString)) { fail("error filename:" + client.list.get(0)); } } catch (Exception e) { e.printStackTrace(); fail("get exception:" + e.getMessage()); } }
From source file:ColumnStorageBasicTest.java
License:Open Source License
public void testConstructorFieldInDiffFile() { try {//from w w w . j a v a2 s .com Configuration conf = new Configuration(); Path path = new Path(prefix); FileSystem fs = FileSystem.get(conf); ArrayList<Short> idxs = new ArrayList<Short>(10); idxs.add((short) 0); idxs.add((short) 7); idxs.add((short) 4); ColumnStorageClient client = new ColumnStorageClient(path, idxs, conf); if (client.cp == null) { fail("cp null"); } if (client.fds.length != 3) { fail("error fds.len:" + client.fds.length); } for (int i = 0; i < client.fds.length; i++) { if (client.fds[i] == null) { fail("null fd:" + i); } } if (client.list.size() != 3) { fail("error list size:" + client.list.size()); } if (!client.list.get(0).equals(byteFileName)) { fail("error filename:" + client.list.get(0)); } if (!client.list.get(1).equals(multiFileNameString)) { fail("error filename:" + client.list.get(1)); } if (!client.list.get(2).equals(floatFileName)) { fail("error filename:" + client.list.get(2)); } } catch (Exception e) { e.printStackTrace(); fail("get exception:" + e.getMessage()); } }
From source file:ColumnStorageBasicTest.java
License:Open Source License
public void testGetRecordByLine() { try {// w ww. j a v a2 s . c o m Configuration conf = new Configuration(); Path path = new Path(prefix); FileSystem fs = FileSystem.get(conf); ArrayList<Short> idxs = new ArrayList<Short>(10); idxs.add((short) 0); idxs.add((short) 7); idxs.add((short) 4); ColumnStorageClient client = new ColumnStorageClient(path, idxs, conf); Record record = client.getRecordByLine(-1); if (record != null) { fail("should return null record 1"); } record = client.getRecordByLine(10); if (record != null) { fail("should return null record 2"); } for (int i = 0; i < 10; i++) { record = client.getRecordByLine(i); if (record == null) { fail("should not return null record"); } if (record.fieldValues().size() != 5) { fail("error field num:" + record.fieldValues().size()); } record.show(); judgeNofixRecord(record, i); } } catch (Exception e) { e.printStackTrace(); fail("get exception:" + e.getMessage()); } }
From source file:MaleUsersBelow7Years.java
public static void main(String args[]) throws Exception { Configuration configuration = new Configuration(); Job job = new Job(configuration, "CountMaleUsersLessThan7"); job.setJarByClass(MaleUsersBelow7Years.class); job.setMapperClass(Map.class); job.setReducerClass(Reducer.class); job.setCombinerClass(Reducer.class); //set output and input formats;mapper-input reducer-output job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); //path for input file FileOutputFormat.setOutputPath(job, new Path(args[1])); // Path for output file System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:PostgresClassifier.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length < 5) { System.out.println(/*from w ww. j a va2 s .c o m*/ "Arguments: [model] [label index] [dictionnary] [document frequency] [input postgres table]"); return; } String modelPath = args[0]; String labelIndexPath = args[1]; String dictionaryPath = args[2]; String documentFrequencyPath = args[3]; String tablename = args[4]; Configuration configuration = new Configuration(); // model is a matrix (wordId, labelId) => probability score NaiveBayesModel model = NaiveBayesModel.materialize(new Path(modelPath), configuration); StandardNaiveBayesClassifier classifier = new StandardNaiveBayesClassifier(model); // labels is a map label => classId Map<Integer, String> labels = BayesUtils.readLabelIndex(configuration, new Path(labelIndexPath)); Map<String, Integer> dictionary = readDictionnary(configuration, new Path(dictionaryPath)); Map<Integer, Long> documentFrequency = readDocumentFrequency(configuration, new Path(documentFrequencyPath)); // analyzer used to extract word from tweet Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_43); int labelCount = labels.size(); int documentCount = documentFrequency.get(-1).intValue(); System.out.println("Number of labels: " + labelCount); System.out.println("Number of documents in training set: " + documentCount); Connection c = null; Statement stmt = null; Statement stmtU = null; try { Class.forName("org.postgresql.Driver"); c = DriverManager.getConnection("jdbc:postgresql://192.168.50.170:5432/uzeni", "postgres", "dbwpsdkdl"); c.setAutoCommit(false); System.out.println("Opened database successfully"); stmt = c.createStatement(); stmtU = c.createStatement(); ResultSet rs = stmt.executeQuery("SELECT * FROM " + tablename + " WHERE rep is null"); while (rs.next()) { String seq = rs.getString("seq"); //String rep = rs.getString("rep"); String body = rs.getString("body"); //String category = rep; String id = seq; String message = body; //System.out.println("Doc: " + id + "\t" + message); Multiset<String> words = ConcurrentHashMultiset.create(); // extract words from tweet TokenStream ts = analyzer.tokenStream("text", new StringReader(message)); CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); ts.reset(); int wordCount = 0; while (ts.incrementToken()) { if (termAtt.length() > 0) { String word = ts.getAttribute(CharTermAttribute.class).toString(); Integer wordId = dictionary.get(word); // if the word is not in the dictionary, skip it if (wordId != null) { words.add(word); wordCount++; } } } // Mark : Modified ts.end(); ts.close(); // create vector wordId => weight using tfidf Vector vector = new RandomAccessSparseVector(10000); TFIDF tfidf = new TFIDF(); for (Multiset.Entry<String> entry : words.entrySet()) { String word = entry.getElement(); int count = entry.getCount(); Integer wordId = dictionary.get(word); Long freq = documentFrequency.get(wordId); double tfIdfValue = tfidf.calculate(count, freq.intValue(), wordCount, documentCount); vector.setQuick(wordId, tfIdfValue); } // With the classifier, we get one score for each label // The label with the highest score is the one the tweet is more likely to // be associated to Vector resultVector = classifier.classifyFull(vector); double bestScore = -Double.MAX_VALUE; int bestCategoryId = -1; for (Element element : resultVector.all()) { int categoryId = element.index(); double score = element.get(); if (score > bestScore) { bestScore = score; bestCategoryId = categoryId; } //System.out.print(" " + labels.get(categoryId) + ": " + score); } //System.out.println(" => " + labels.get(bestCategoryId)); //System.out.println("UPDATE " + tablename + " SET rep = '" + labels.get(bestCategoryId) + "' WHERE seq = " + id ); stmtU.executeUpdate("UPDATE " + tablename + " SET rep = '" + labels.get(bestCategoryId) + "' WHERE seq = " + id); } rs.close(); stmt.close(); stmtU.close(); c.commit(); c.close(); analyzer.close(); } catch (Exception e) { System.err.println(e.getClass().getName() + ": " + e.getMessage()); System.exit(0); } }
From source file:TestCodec.java
License:Open Source License
public static void main(String[] args) throws IOException { Configuration conf = new Configuration(); DefaultCodec codec = new DefaultCodec(); codec.setConf(conf);/*from w w w . j a v a2 s.c o m*/ DataOutputBuffer chunksWriteBuffer = new DataOutputBuffer(); CompressionOutputStream compressionOutputStream = codec.createOutputStream(chunksWriteBuffer); DataInputBuffer chunkReadBuffer = new DataInputBuffer(); CompressionInputStream compressionInputStream = codec.createInputStream(chunkReadBuffer); String str = "laksjldfkjalskdjfl;aksjdflkajsldkfjalksjdflkajlsdkfjlaksjdflka"; compressionOutputStream.write(str.getBytes()); compressionOutputStream.finish(); byte[] data = chunksWriteBuffer.getData(); System.out.println(str.length()); System.out.println(chunksWriteBuffer.getLength()); chunkReadBuffer.reset(data, chunksWriteBuffer.getLength()); DataOutputBuffer dob = new DataOutputBuffer(); IOUtils.copyBytes(compressionInputStream, dob, conf); System.out.println(dob.getData()); }
From source file:PrimeDivisor.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); System.exit(2);/*from ww w.ja v a 2 s .c o m*/ } Job job = new Job(conf, "word count"); job.setJarByClass(PrimeDivisor.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:HadoopWordCount.java
License:Open Source License
public static void main(String[] args) throws Exception { System.setProperty("hazelcast.logging.type", "log4j"); Path inputPath = new Path(HadoopWordCount.class.getClassLoader().getResource("books").getPath()); Path outputPath = new Path(OUTPUT_PATH); // set up the Hadoop job config, the input and output paths and formats JobConf jobConfig = new JobConf(); jobConfig.setInputFormat(TextInputFormat.class); jobConfig.setOutputFormat(TextOutputFormat.class); TextOutputFormat.setOutputPath(jobConfig, outputPath); TextInputFormat.addInputPath(jobConfig, inputPath); // Delete the output directory, if already exists FileSystem.get(new Configuration()).delete(outputPath, true); JetConfig cfg = new JetConfig(); cfg.setInstanceConfig(new InstanceConfig() .setCooperativeThreadCount(Math.max(1, getRuntime().availableProcessors() / 2))); JetInstance jetInstance = Jet.newJetInstance(cfg); Jet.newJetInstance(cfg);//from w ww . j a va2s . co m try { System.out.print("\nCounting words from " + inputPath); long start = nanoTime(); jetInstance.newJob(buildDag(jobConfig)).execute().get(); System.out.print("Done in " + NANOSECONDS.toMillis(nanoTime() - start) + " milliseconds."); System.out.println("Output written to " + outputPath); } finally { Jet.shutdownAll(); } }
From source file:BMTTableLoader.java
License:Apache License
public static void main(String[] args) throws Exception { int res = ToolRunner.run(new Configuration(), new BMTTableLoader(), args); System.exit(res);/*from w w w. jav a 2 s.c o m*/ }