List of usage examples for org.apache.hadoop.conf Configuration Configuration
public Configuration()
From source file:TestIFormatDataFile.java
License:Open Source License
@Override protected void setUp() throws Exception { super.setUp(); conf = new Configuration(); }
From source file:Hw2Part1.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: <input file> <output directory>"); System.exit(2);//from w w w.j a v a2 s . c o m } // FileSystem hdfs = FileSystem.get(conf); String target = "hdfs://localhost:9000/"; FileSystem fs = FileSystem.get(URI.create(target), conf);//is diffrent Path outputpath = new Path(otherArgs[otherArgs.length - 1]); if (fs.exists(outputpath)) { fs.delete(outputpath, true); } Job job = Job.getInstance(conf, "Hw2Part1"); job.setJarByClass(Hw2Part1.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumCombiner.class); job.setReducerClass(IntSumReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(InfoWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(InfoWritable.class); // add the input paths as given by command line for (int i = 0; i < otherArgs.length - 1; ++i) { FileInputFormat.addInputPath(job, new Path(otherArgs[i])); } // add the output path as given by the command line FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:ReadIndexFileTest.java
License:Open Source License
static void readtest() throws Exception { String filename = "/tmp/output/part-00000"; FormatDataFile fdf = new FormatDataFile(new Configuration()); fdf.open(filename);/* w w w . j a v a 2 s. co m*/ for (int i = 0; i < 5000; i++) { Record rec = fdf.getRecordByLine(i); rec.show(); } fdf.close(); }
From source file:ReadIndexFileTest.java
License:Open Source License
static void readvaluetest() throws Exception { String filename = "/tmp/output/part-00000"; FormatDataFile fdf = new FormatDataFile(new Configuration()); fdf.open(filename);/*from w w w.j a va 2 s. com*/ FieldValue[] values = new FieldValue[2]; values[0] = new FieldValue((byte) 100, (short) 0); values[1] = new FieldValue((short) -4078, (short) 1); Record[] recs = fdf.getRecordByValue(values, 100); System.out.println(recs.length); for (int i = 0; i < recs.length; i++) { recs[i].show(); } }
From source file:Tmptest.java
License:Open Source License
public static void main(String[] args) throws IOException { Configuration conf = new Configuration(); Path path = new Path("/se/tmp/628892613/part-00000"); String filename = path.toString(); IFormatDataFile ifd = new IFormatDataFile(conf); ifd.open(filename);// w w w. java 2 s . c o m ISegmentIndex segmentIndex = ifd.segIndex(); for (String str : ifd.fileInfo().head().getUdi().infos().values()) { System.out.println(str); } System.out.println(segmentIndex.getSegnum()); IRecord record = new IRecord(); ifd.next(record); record.show(); ifd.next().show(); ifd.next().show(); ifd.close(); }
From source file:TestFuseDFS.java
License:Apache License
static public void startStuff() { try {// w w w. j av a 2 s . c o m Configuration conf = new Configuration(); conf.setBoolean("dfs.permissions", false); cluster = new MiniDFSCluster(conf, 1, true, null); fileSys = (DistributedFileSystem) cluster.getFileSystem(); assertTrue(fileSys.getFileStatus(new Path("/")).isDir()); mount(mpoint, fileSys.getUri()); } catch (Exception e) { e.printStackTrace(); } }
From source file:WordCountD.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: wordcount <in> [<in>...] <out>"); System.exit(2);/*from www .j av a2 s. com*/ } Job job = Job.getInstance(conf, "word count"); job.setJarByClass(WordCountC.class); job.setMapperClass(TokenizerMapper.class); // Disable the combiner // job.setCombinerClass(IntSumReducer.class); // Setup the Partitioner job.setPartitionerClass(Letterpartitioner.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); for (int i = 0; i < otherArgs.length - 1; ++i) { FileInputFormat.addInputPath(job, new Path(otherArgs[i])); } FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:LookupPostingsCompressed.java
License:Apache License
/** * Runs this tool./* w w w . j av a 2 s. com*/ */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INDEX)); options.addOption( OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(COLLECTION)); CommandLine cmdline = null; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); System.exit(-1); } if (!cmdline.hasOption(INDEX) || !cmdline.hasOption(COLLECTION)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(LookupPostingsCompressed.class.getName(), options); ToolRunner.printGenericCommandUsage(System.out); System.exit(-1); } String indexPath = cmdline.getOptionValue(INDEX); String collectionPath = cmdline.getOptionValue(COLLECTION); if (collectionPath.endsWith(".gz")) { System.out.println("gzipped collection is not seekable: use compressed version!"); System.exit(-1); } Configuration config = new Configuration(); FileSystem fs = FileSystem.get(config); MapFile.Reader reader = new MapFile.Reader(new Path(indexPath + "/part-r-00000"), config); FSDataInputStream collection = fs.open(new Path(collectionPath)); BufferedReader d = new BufferedReader(new InputStreamReader(collection)); Text key = new Text(); PairOfWritables<VIntWritable, BytesWritable> value = new PairOfWritables<VIntWritable, BytesWritable>(); System.out.println("Looking up postings for the term \"starcross'd\""); key.set("starcross'd"); reader.get(key, value); BytesWritable postings = value.getRightElement(); ByteArrayInputStream buffer = new ByteArrayInputStream(postings.copyBytes()); DataInputStream in = new DataInputStream(buffer); int OFFSET = 0; int count; while (in.available() != 0) { OFFSET = OFFSET + WritableUtils.readVInt(in); count = WritableUtils.readVInt(in); System.out.print("(" + OFFSET + ", " + count + ")"); collection.seek(OFFSET); System.out.println(d.readLine()); } OFFSET = 0; key.set("gold"); reader.get(key, value); postings = value.getRightElement(); buffer = new ByteArrayInputStream(postings.copyBytes()); in = new DataInputStream(buffer); System.out.println("Complete postings list for 'gold': (" + value.getLeftElement() + ", ["); while (in.available() != 0) { OFFSET = OFFSET + WritableUtils.readVInt(in); count = WritableUtils.readVInt(in); System.out.print("(" + OFFSET + ", " + count + ")"); //collection.seek(OFFSET); //System.out.println(d.readLine()); System.out.print(", "); } System.out.print("])\n"); Int2IntFrequencyDistribution goldHist = new Int2IntFrequencyDistributionEntry(); buffer.reset(); OFFSET = 0; while (in.available() != 0) { OFFSET = OFFSET + WritableUtils.readVInt(in); count = WritableUtils.readVInt(in); goldHist.increment(count); } System.out.println("histogram of tf values for gold"); for (PairOfInts pair : goldHist) { System.out.println(pair.getLeftElement() + "\t" + pair.getRightElement()); } buffer.close(); //Silver key.set("silver"); reader.get(key, value); postings = value.getRightElement(); buffer = new ByteArrayInputStream(postings.copyBytes()); in = new DataInputStream(buffer); System.out.println("Complete postings list for 'silver': (" + value.getLeftElement() + ", ["); while (in.available() != 0) { OFFSET = OFFSET + WritableUtils.readVInt(in); count = WritableUtils.readVInt(in); System.out.print("(" + OFFSET + ", " + count + ")"); //collection.seek(OFFSET); //System.out.println(d.readLine()); System.out.print(", "); } System.out.print("])\n"); Int2IntFrequencyDistribution silverHist = new Int2IntFrequencyDistributionEntry(); buffer.reset(); OFFSET = 0; while (in.available() != 0) { OFFSET = OFFSET + WritableUtils.readVInt(in); count = WritableUtils.readVInt(in); silverHist.increment(count); } System.out.println("histogram of tf values for silver"); for (PairOfInts pair : goldHist) { System.out.println(pair.getLeftElement() + "\t" + pair.getRightElement()); } buffer.close(); key.set("bronze"); Writable w = reader.get(key, value); if (w == null) { System.out.println("the term bronze does not appear in the collection"); } collection.close(); reader.close(); return 0; }
From source file:FormatStoragePerformanceTest.java
License:Open Source License
static void doInitFile(int count, boolean var) { try {/* ww w .j a va 2 s. c o m*/ String textFile = "MR_input_text/testPerformanceReadText"; if (var) { textFile += "_var"; } Path path = new Path(textFile); FileSystem fs = FileSystem.get(new Configuration()); FSDataOutputStream out = fs.create(path); OutputStream stream = new BufferedOutputStream(out); BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(stream)); String value = null; if (var) { value = "111,22222,33333333,444444444444,5555555.5555,6666666666.666666,hello konten\n"; } else { value = "111,22222,33333333,444444444444,5555555.5555,6666666666.666666\n"; } long begin = System.currentTimeMillis(); for (int i = 0; i < count; i++) { writer.write(value); if (i % 10000000 == 0) { String string = "write " + i + " record, delay: " + ((System.currentTimeMillis() - begin) / 1000) + " s \n"; output.write(string.getBytes()); } } writer.close(); long end = System.currentTimeMillis(); String string = "write " + count + " record over, delay: " + ((end - begin) / 1000) + " s \n"; output.write(string.getBytes()); } catch (Exception e) { e.printStackTrace(); System.out.println(e.getMessage()); } }
From source file:FormatStoragePerformanceTest.java
License:Open Source License
static void doFormatReadRand(int count, boolean var) { try {//from w w w . j a va 2 s . c o m String fileName = "MR_input/testMassRecord"; if (var) { fileName += "_var"; } Configuration conf = new Configuration(); FormatDataFile fd = new FormatDataFile(conf); fd.open(fileName); long begin = System.currentTimeMillis(); for (int i = 0; i < count; i++) { int rand = (int) (Math.random() * count); Record record = fd.getRecordByLine(rand); if (record == null) { String string = "record no:" + rand + " return null"; output.write(string.getBytes()); } if (i % (1 * 10000) == 0) { String string = "read format rand " + i + " record, delay:" + ((System.currentTimeMillis() - begin) / 1000) + " s \n"; output.write(string.getBytes()); } } long end = System.currentTimeMillis(); String string = "Read Foramt Rand over, count:" + count + ", delay:" + (long) ((end - begin) / 1000) + " s"; output.write(string.getBytes()); System.out.println(string); } catch (IOException e) { e.printStackTrace(); System.out.println("get IOException:" + e.getMessage()); } catch (Exception e) { e.printStackTrace(); System.out.println("get exception:" + e.getMessage()); } }