Java tutorial
/* TarToSeqFile.java - Convert tar files into Hadoop SequenceFiles. * * Copyright (C) 2008 Stuart Sierra * * Licensed under the Apache License, Version 2.0 (the "License"); you * may not use this file except in compliance with the License. You * may obtain a copy of the License at * http:www.apache.org/licenses/LICENSE-2.0 * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or * implied. See the License for the specific language governing * permissions and limitations under the License. */ package org.altlaw.hadoop; /* From ant.jar, http://ant.apache.org/ */ import org.apache.tools.bzip2.CBZip2InputStream; import org.apache.tools.tar.TarEntry; import org.apache.tools.tar.TarInputStream; /* From hadoop-*-core.jar, http://hadoop.apache.org/ * Developed with Hadoop 0.16.3. */ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocalFileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.Text; import java.io.File; import java.io.FileInputStream; import java.io.InputStream; import java.io.IOException; import java.io.StringWriter; import java.util.zip.GZIPInputStream; import java.util.ArrayList; import java.io.BufferedReader; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.DataInputStream; //import java.io.File; //import java.io.FileInputStream; import java.io.InputStreamReader; import org.json.simple.JSONObject; import org.json.simple.JSONValue; /** * @author SHIBAO KOUICHIRO <shibacow@gmail.com> * Parase JSON add INFO * ????SMID??SequenceFile?? */ public class JsonReader { public JsonReader() { } private String getSMID(String stm) { String[] basenames = stm.split("/", -1); int sz = basenames.length; String basename = basenames[sz - 1]; String filename = basename.split("\\.", -1)[0]; return filename; } private void appendStringToOuput(String k, String v, SequenceFile.Writer output) throws IOException { Text key = new Text(k); BytesWritable value = new BytesWritable(v.getBytes()); output.append(key, value); } public void addSmidInJson(String src, String filename, SequenceFile.Writer output) throws Exception { InputStreamReader isr = new InputStreamReader(new FileInputStream(src)); BufferedReader br = new BufferedReader(isr); String line; StringBuffer strings = new StringBuffer(); while ((line = br.readLine()) != null) { JSONObject obj = (JSONObject) JSONValue.parse(line); obj.put("filename", filename); obj.put("video_id", this.getSMID(filename)); StringWriter out = new StringWriter(); obj.writeJSONString(out); String jsonText = out.toString(); this.appendStringToOuput(filename, jsonText, output); } br.close(); isr.close(); } }