Java tutorial
/* * Copyright 2010 LinkedIn, Inc * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package com.linkedin.mr_kluj; import azkaban.common.jobs.AbstractJob; import clojure.lang.IFn; import clojure.lang.RT; import clojure.lang.Symbol; import clojure.lang.Var; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapreduce.Job; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.StringReader; import java.net.MalformedURLException; import java.net.URI; import java.net.URISyntaxException; import java.net.URL; import java.util.Properties; /** * */ public class GenericClojureJob extends AbstractJob { public static final String LI_CLJ_SOURCE = "li.clj.source"; public static final String LI_CLJ_JOB_INDEX = "li.clj.job-index"; public static final String LI_CLJ_PROPERTIES = "li.clj.properties"; private final Properties props; public GenericClojureJob(String name, Properties props) { super(name); this.props = new Properties(); for (String propKey : props.stringPropertyNames()) { this.props.setProperty(propKey, props.getProperty(propKey)); } } public void run() { info("Starting " + getClass().getSimpleName()); /*** Get clojure source ***/ final String cljSource; if (props.getProperty(LI_CLJ_SOURCE) == null) { final String resourceName = props.getProperty("li.clj.source.file"); if (resourceName == null) { throw new RuntimeException( "Must define either li.clj.source or li.clj.source.file on the Props object."); } URL resource = getClass().getClassLoader().getResource(resourceName); if (resource == null) { // Perhaps it's a URL for a Hadoop-understood file-system try { resource = getScriptFromPath(new Configuration(), resourceName).toURI().toURL(); } catch (Exception e) { // perhaps it wasn't... } } if (resource == null) { // Maybe it's a file File theFile = new File(resourceName); if (theFile.exists()) { try { resource = theFile.toURI().toURL(); } catch (MalformedURLException e) { throw new RuntimeException("WTF?", e); } } } if (resource == null) { throw new RuntimeException( String.format("Resource[%s] does not exist on the classpath.", resourceName)); } try { cljSource = new String(getBytes(resource.openStream())); } catch (IOException e) { throw new RuntimeException(e); } props.setProperty(LI_CLJ_SOURCE, cljSource); } else { cljSource = props.getProperty(LI_CLJ_SOURCE); } final String theActualFunction = String.format( "(require '[com.linkedin.mr-kluj.job :as job])\n\n" + "%s\n" + "(map job/starter the-jobs)\n", cljSource); info("--- Source: ---"); info(theActualFunction); info(" --------- "); boolean jobCompleted; try { RT.var("clojure.core", "require").invoke(Symbol.intern("clojure.main")); Var.pushThreadBindings(RT.map(RT.var("clojure.core", "*warn-on-reflection*"), RT.T, RT.var("user", "*context*"), null, RT.var("user", "*props*"), props)); Iterable<IFn> jobs = (Iterable<IFn>) clojure.lang.Compiler.load(new StringReader(theActualFunction), "start-job-input", "clj-job"); int count = 0; for (IFn ifn : jobs) { Job job = (Job) ifn.invoke(); job.getConfiguration().set(LI_CLJ_SOURCE, cljSource); job.getConfiguration().set(LI_CLJ_JOB_INDEX, String.valueOf(count)); ByteArrayOutputStream baos = new ByteArrayOutputStream(1024 * 10); props.storeToXML(baos, null); job.getConfiguration().set(LI_CLJ_PROPERTIES, new String(baos.toByteArray())); info(String.format("Starting job %s[%s]", job.getJobID(), job.getJobName())); jobCompleted = job.waitForCompletion(true); ++count; if (!jobCompleted) { throw new RuntimeException(String.format("Job[%s] failed for some reason.", job.getJobID())); } } } catch (Exception e) { throw new RuntimeException(e); } } public static void main(String[] args) throws IOException { final Properties props = new Properties(); if (args.length % 2 != 1) { System.out.println("Usage: <java-command> clj-script-file key value key value"); return; } props.put("li.clj.source.file", args[0]); for (int i = 1; i < args.length; i += 2) { props.put(args[i], args[i + 1]); } new GenericClojureJob("scrap-job-Cheddar", props).run(); } private static byte[] getBytes(final InputStream in) throws IOException { byte[] buffer = new byte[16 * 1024]; ByteArrayOutputStream cljBytes = new ByteArrayOutputStream(); int numRead; while ((numRead = in.read(buffer)) > 0) { cljBytes.write(buffer, 0, numRead); } return cljBytes.toByteArray(); } static File getScriptFromPath(Configuration conf, String path) throws Exception { FileSystem fs = FileSystem.get(new URI(path), conf); File localCopyOfPath = File.createTempFile("resources", "tmp"); fs.copyToLocalFile(new Path(path), new Path(localCopyOfPath.getAbsolutePath())); return localCopyOfPath; } }