Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.iflytek.spider.parse; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import org.apache.commons.cli.Options; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.ArrayFile; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.VersionMismatchException; import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.WritableUtils; import org.apache.hadoop.util.GenericOptionsParser; import com.iflytek.spider.util.SpiderConfiguration; /* The text conversion of page's content, stored using gzip compression. * @see Parse#getText() */ public final class ParseText implements Writable { public static final String DIR_NAME = "parse_text"; private final static byte VERSION = 2; public ParseText() { } private String text; public ParseText(String text) { this.text = text; } public void readFields(DataInput in) throws IOException { byte version = in.readByte(); switch (version) { case 1: text = WritableUtils.readCompressedString(in); break; case VERSION: text = Text.readString(in); break; default: throw new VersionMismatchException(VERSION, version); } } public final void write(DataOutput out) throws IOException { out.write(VERSION); Text.writeString(out, text); } public final static ParseText read(DataInput in) throws IOException { ParseText parseText = new ParseText(); parseText.readFields(in); return parseText; } // // Accessor methods // public String getText() { return text; } public boolean equals(Object o) { if (!(o instanceof ParseText)) return false; ParseText other = (ParseText) o; return this.text.equals(other.text); } public String toString() { return text; } public static void main(String argv[]) throws Exception { String usage = "ParseText (-local | -dfs <namenode:port>) recno segment"; if (argv.length < 3) { System.out.println("usage:" + usage); return; } Options opts = new Options(); Configuration conf = SpiderConfiguration.create(); GenericOptionsParser parser = new GenericOptionsParser(conf, opts, argv); String[] remainingArgs = parser.getRemainingArgs(); FileSystem fs = FileSystem.get(conf); try { int recno = Integer.parseInt(remainingArgs[0]); String segment = remainingArgs[1]; String filename = new Path(segment, ParseText.DIR_NAME).toString(); ParseText parseText = new ParseText(); ArrayFile.Reader parseTexts = new ArrayFile.Reader(fs, filename, conf); parseTexts.get(recno, parseText); System.out.println("Retrieved " + recno + " from file " + filename); System.out.println(parseText); parseTexts.close(); } finally { fs.close(); } } }