Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.pig.test; import static org.junit.Assert.assertEquals; import java.io.BufferedReader; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.PrintStream; import java.util.Iterator; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.pig.EvalFunc; import org.apache.pig.FuncSpec; import org.apache.pig.PigServer; import org.apache.pig.backend.executionengine.ExecException; import org.apache.pig.builtin.COUNT; import org.apache.pig.builtin.PigStorage; import org.apache.pig.data.BagFactory; import org.apache.pig.data.DataBag; import org.apache.pig.data.DataType; import org.apache.pig.data.Tuple; import org.apache.pig.data.TupleFactory; import org.apache.pig.impl.io.FileLocalizer; import org.apache.pig.test.utils.TestHelper; import org.junit.Before; import org.junit.Test; public class TestLocal { private Log log = LogFactory.getLog(getClass()); private PigServer pig; @Before public void setUp() throws Exception { pig = new PigServer(Util.getLocalTestMode()); } @Test public void testBigGroupAll() throws Throwable { int LOOP_COUNT = 4 * 1024; File tmpFile = File.createTempFile(this.getClass().getName(), ".txt"); PrintStream ps = new PrintStream(new FileOutputStream(tmpFile)); for (int i = 0; i < LOOP_COUNT; i++) { ps.println(i); } ps.close(); assertEquals(new Double(LOOP_COUNT), bigGroupAll(tmpFile)); tmpFile.delete(); } @Test public void testBigGroupAllWithNull() throws Throwable { int LOOP_COUNT = 4 * 1024; File tmpFile = File.createTempFile(this.getClass().getName(), ".txt"); PrintStream ps = new PrintStream(new FileOutputStream(tmpFile)); long nonNullCnt = 0; for (int i = 0; i < LOOP_COUNT; i++) { if (i % 10 == 0) { ps.println(""); } else { ps.println(i); nonNullCnt++; } } ps.close(); assertEquals(new Double(nonNullCnt), bigGroupAll(tmpFile)); tmpFile.delete(); } public Double bigGroupAll(File tmpFile) throws Throwable { String query = "foreach (group (load '" + Util.generateURI(tmpFile.toString(), pig.getPigContext()) + "') all) generate " + COUNT.class.getName() + "($1) ;"; System.out.println(query); pig.registerQuery("asdf_id = " + query); Iterator<Tuple> it = pig.openIterator("asdf_id"); Tuple t = it.next(); return DataType.toDouble(t.get(0)); } static public class MyApply extends EvalFunc<DataBag> { String field0 = "Got"; public MyApply() { } public MyApply(String field0) { this.field0 = field0; } @Override public DataBag exec(Tuple input) throws IOException { DataBag output = BagFactory.getInstance().newDefaultBag(); for (Tuple t : DataType.toBag(input.get(0))) { Tuple newT = TupleFactory.getInstance().newTuple(2); newT.set(0, field0); newT.set(1, t.get(0).toString()); output.add(newT); } return output; } } static public class MyGroup extends EvalFunc<Tuple> { @Override public Tuple exec(Tuple input) throws IOException { try { Tuple output = TupleFactory.getInstance().newTuple(1); output.set(0, new String("g")); return output; } catch (ExecException ee) { IOException ioe = new IOException(ee.getMessage()); ioe.initCause(ee); throw ioe; } } } static public class MyStorage extends PigStorage { final static int COUNT = 10; int count = 0; boolean hasNulls = false; public void setNulls(boolean hasNulls) { this.hasNulls = hasNulls; } /** * */ public MyStorage() { // initialize delimiter to be "-" for output // since that is the delimiter in the tests below super("-"); } @Override public Tuple getNext() throws IOException { if (count < COUNT) { Tuple t = TupleFactory.getInstance().newTuple(Integer.toString(count++)); return t; } return null; } } @Test public void testStoreFunction() throws Throwable { File tmpFile = File.createTempFile("test", ".txt"); PrintStream ps = new PrintStream(new FileOutputStream(tmpFile)); for (int i = 0; i < 10; i++) { ps.println(i + "\t" + i); } ps.close(); //Load, Execute and Store query String query = "foreach (load '" + Util.generateURI(tmpFile.toString(), pig.getPigContext()) + "') generate $0,$1;"; System.out.println(query); pig.registerQuery("asdf_id = " + query); try { pig.deleteFile("frog"); } catch (Exception e) { } pig.store("asdf_id", "frog", MyStorage.class.getName() + "()"); //verify query InputStream is = FileLocalizer.open("frog", pig.getPigContext()); BufferedReader br = new BufferedReader(new InputStreamReader(is)); String line; int i = 0; while ((line = br.readLine()) != null) { assertEquals(line, Integer.toString(i) + '-' + Integer.toString(i)); i++; } br.close(); try { pig.deleteFile("frog"); } catch (Exception e) { } tmpFile.delete(); } // This test: "testStoreFunction()" is equivalent to testStoreFunctionNoNulls() @Test public void testStoreFunctionNoNulls() throws Throwable { String[][] data = genDataSetFile1(10, false); storeFunction(data); } @Test public void testStoreFunctionWithNulls() throws Throwable { String[][] data = genDataSetFile1(10, true); storeFunction(data); } public void storeFunction(String[][] data) throws Throwable { File tmpFile = TestHelper.createTempFile(data); // Load, Execute and Store query String query = "foreach (load '" + Util.generateURI(tmpFile.toString(), pig.getPigContext()) + "') generate $0,$1;"; System.out.println(query); pig.registerQuery("asdf_id = " + query); try { pig.deleteFile("frog"); } catch (Exception e) { } pig.store("asdf_id", "frog", MyStorage.class.getName() + "()"); InputStream is = FileLocalizer.open("frog", pig.getPigContext()); BufferedReader br = new BufferedReader(new InputStreamReader(is)); String line; //verify query int i = 0; while ((line = br.readLine()) != null) { assertEquals(data[i][0] + '-' + data[i][1], line); i++; } br.close(); try { pig.deleteFile("frog"); } catch (Exception e) { } tmpFile.delete(); } @Test public void testQualifiedFunctions() throws Throwable { //create file File tmpFile = File.createTempFile("test", ".txt"); PrintStream ps = new PrintStream(new FileOutputStream(tmpFile)); for (int i = 0; i < 1; i++) { ps.println(i); } ps.close(); // execute query String query = "foreach (group (load '" + Util.generateURI(tmpFile.toString(), pig.getPigContext()) + "' using " + MyStorage.class.getName() + "()) by " + MyGroup.class.getName() + "('all')) generate flatten(" + MyApply.class.getName() + "($1)) ;"; System.out.println(query); pig.registerQuery("asdf_id = " + query); //Verfiy query Iterator<Tuple> it = pig.openIterator("asdf_id"); Tuple t; int count = 0; while (it.hasNext()) { t = it.next(); assertEquals(t.get(0).toString(), "Got"); Integer.parseInt(t.get(1).toString()); count++; } assertEquals(MyStorage.COUNT, count); tmpFile.delete(); } @Test public void testQualifiedFunctionsWithNulls() throws Throwable { //create file File tmpFile = File.createTempFile("test", ".txt"); PrintStream ps = new PrintStream(new FileOutputStream(tmpFile)); for (int i = 0; i < 1; i++) { if (i % 10 == 0) { ps.println(""); } else { ps.println(i); } } ps.close(); // execute query String query = "foreach (group (load '" + Util.generateURI(tmpFile.toString(), pig.getPigContext()) + "' using " + MyStorage.class.getName() + "()) by " + MyGroup.class.getName() + "('all')) generate flatten(" + MyApply.class.getName() + "($1)) ;"; System.out.println(query); pig.registerQuery("asdf_id = " + query); //Verfiy query Iterator it = pig.openIterator("asdf_id"); Tuple t; int count = 0; while (it.hasNext()) { t = (Tuple) it.next(); assertEquals(t.get(0).toString(), "Got"); Integer.parseInt(t.get(1).toString()); count++; } assertEquals(MyStorage.COUNT, count); tmpFile.delete(); } @Test public void testDefinedFunctions() throws Throwable { File tmpFile = File.createTempFile("test", ".txt"); PrintStream ps = new PrintStream(new FileOutputStream(tmpFile)); for (int i = 0; i < 1; i++) { ps.println(i); } ps.close(); pig.registerFunction("foo", new FuncSpec(MyApply.class.getName() + "('foo')")); String query = "foreach (group (load '" + Util.generateURI(tmpFile.toString(), pig.getPigContext()) + "' using " + MyStorage.class.getName() + "()) by " + MyGroup.class.getName() + "('all')) generate flatten(foo($1)) ;"; System.out.println(query); pig.registerQuery("asdf_id = " + query); Iterator it = pig.openIterator("asdf_id"); tmpFile.delete(); Tuple t; int count = 0; while (it.hasNext()) { t = (Tuple) it.next(); assertEquals("foo", t.get(0).toString()); Integer.parseInt(t.get(1).toString()); count++; } assertEquals(count, MyStorage.COUNT); } // this test is equivalent to testDefinedFunctions() @Test public void testDefinedFunctionsNoNulls() throws Throwable { String[][] data = genDataSetFile1(10, false); definedFunctions(data); } @Test public void testDefinedFunctionsWithNulls() throws Throwable { String[][] data = genDataSetFile1(10, true); definedFunctions(data); } public void definedFunctions(String[][] data) throws Throwable { File tmpFile = TestHelper.createTempFile(data); PrintStream ps = new PrintStream(new FileOutputStream(tmpFile)); for (int i = 0; i < 1; i++) { ps.println(i); } ps.close(); pig.registerFunction("foo", new FuncSpec(MyApply.class.getName() + "('foo')")); String query = "foreach (group (load '" + Util.generateURI(tmpFile.toString(), pig.getPigContext()) + "' using " + MyStorage.class.getName() + "()) by " + MyGroup.class.getName() + "('all')) generate flatten(foo($1)) ;"; System.out.println(query); pig.registerQuery("asdf_id = " + query); Iterator it = pig.openIterator("asdf_id"); tmpFile.delete(); Tuple t; int count = 0; while (it.hasNext()) { t = (Tuple) it.next(); assertEquals("foo", t.get(0).toString()); if (t.get(1).toString() != "") { Integer.parseInt(t.get(1).toString()); } count++; } assertEquals(count, MyStorage.COUNT); } // @Test // public void testPigServer() throws Throwable { // log.debug("creating pig server"); // PigContext pigContext = new PigContext(ExecType.MAPREDUCE, cluster.getProperties()); // PigServer pig = new PigServer(pigContext); // System.out.println("testing capacity"); // long capacity = pig.capacity(); // assertTrue(capacity > 0); // String sampleFileName = "/tmp/fileTest"; // if (!pig.existsFile(sampleFileName)) { // ElementDescriptor path = pigContext.getDfs().asElement(sampleFileName); // OutputStream os = path.create(); // os.write("Ben was here!".getBytes()); // os.close(); // } // long length = pig.fileSize(sampleFileName); // assertTrue(length > 0); // } /*** * For generating a sample dataset as * * no nulls: * $0 $1 * 0 9 * 1 1 * .... * 9 9 * * has nulls: * $0 $1 * 0 9 * 1 1 * 2 * 3 3 * 4 4 * 5 5 * 6 * 7 7 * * 9 9 * */ private String[][] genDataSetFile1(int dataLength, boolean hasNulls) throws IOException { String[][] data = new String[dataLength][]; if (hasNulls == true) { for (int i = 0; i < dataLength; i++) { data[i] = new String[2]; if (i == 2) { data[i][0] = ""; data[i][1] = new Integer(i).toString(); } else if (i == 6) { data[i][0] = new Integer(i).toString(); data[i][1] = ""; } else if (i == 8) { data[i][0] = ""; data[i][1] = ""; } else { data[i][0] = new Integer(i).toString(); data[i][1] = new Integer(i).toString(); } } } else { for (int i = 0; i < dataLength; i++) { data[i] = new String[2]; data[i][0] = new Integer(i).toString(); data[i][1] = new Integer(i).toString(); } } return data; } }