Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package com.aliyun.odps.graph; import java.io.IOException; import java.util.Properties; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import com.aliyun.odps.Odps; import com.aliyun.odps.OdpsException; import com.aliyun.odps.account.Account; import com.aliyun.odps.account.AliyunAccount; import com.aliyun.odps.conf.Configuration; import com.aliyun.odps.counter.Counters; import com.aliyun.odps.graph.job.JobRunner; import com.aliyun.odps.mapred.RunningJob; import com.aliyun.odps.mapred.conf.SessionState; import com.aliyun.odps.utils.ReflectionUtils; import com.aliyun.odps.utils.StringUtils; /** * GraphJob {@link JobConf}???? ODPS Graph . * * <p> * ODPS Graph <a * href="http://en.wikipedia.org/wiki/Bulk_synchronous_parallel" >BSP (Bulk * Synchronous Parallel)</a> ??????? * </p> * * <p> * ODPS Graph ? * <ol> * <li><b></b> * <ul> * <li> {@link Vertex}/{@link Edge}??? * <li> {@link GraphLoader} ?? * <li>? ID ? Worker * </ul> * <li><b>super step</b> * <ul> * <li>???? * {@linkplain Vertex#compute(ComputeContext, Iterable) compute} * <li> {@linkplain Vertex#compute(ComputeContext, Iterable) compute} * <ul> * <li>???? * <li>??1/?2????3/ * <li> {@link Aggregator} ?? * <li>?????? * <li>???? Worker ? * </ul> * </ul> * <li><b>??</b> * <ul> * <li>??? * <li> * <li>? {@link Aggregator} * {@linkplain Aggregator#terminate(WorkerContext, com.aliyun.odps.io.Writable) * terminate} true * </ul> * </ol> * </p> * * <p> * GraphJob ??? * </p> * * <p> * <b> ODPS Graph ? {@link JobConf}?</b> * </p> * * <p> * ODPS Graph * <ul> * <li>{@link #setWorkerComputerClass(Class)} * <li>{@link #setGraphLoaderClass(Class)} * <li>{@link #setVertexClass(Class)} * <li>{@link #setAggregatorClass(Class)} * <li>{@link #setAggregatorClass(Class...)} * <li>{@link #setPartitionerClass(Class)} * <li>{@link #setCombinerClass(Class)} * </ul> * {@link GraphLoader} {@link Vertex} ????? * </p> * * <p> * * <ul> * <li>{@link #addInput(TableInfo)} * <li>{@link #addInput(TableInfo, String[])} * <li>{@link #addOutput(TableInfo)} * <li>{@link #addOutput(TableInfo, boolean)} * <li>{@link #addOutput(TableInfo, String)} * <li>{@link #addOutput(TableInfo, String, boolean)} * </ul> * </p> * * <p> * ODPS ? * <ul> * <li>{@link #addCacheResources(String)}? jar -resources ? * <li>{@link #addCacheResourcesToClassPath(String)} ? jar -libjars * ? * </ul> * </p> * * <p> * ODPS Graph * <ul> * <li>{@link #setSplitSize(long)} ??? MB 256? Worker * <li>{@link #setRuntimePartitioning(boolean)} Worker ???? * true * <li>{@link #setMaxIteration(int)} ? * -1?<=0?? * </ul> * </p> * * <p> * <b>??? ODPS Graph ?</b> * <ul> * <li>{@link #run()} ????? * <li>{@link #submit()} ????? * <li>{@link #isComplete()} ????????? * <li>{@link #isSuccessful()} ?????? * <li>{@link #getCounters()} ?? * </ul> * </p> * * <p> * ?PageRank * * <pre> * {@code * public static void main(String[] args) throws IOException { * * GraphJob job = new GraphJob(); * * job.setGraphLoaderClass(PageRankGraphLoader.class); * job.setVertexClass(PageRankVertex.class); * job.addInput(new TableInfo(args[0])); * job.addOutput(new TableInfo(args[1])); * * job.setMaxIteration(30); * * job.run(); * } } * </pre> * * </blockquote> * </p> * * @see Vertex * @see GraphLoader * @see Aggregator * @see WorkerComputer */ public class GraphJob extends JobConf { private static final Log LOG = LogFactory.getLog(GraphJob.class); private RunningJob rJob = null; /** * ODPS Graph . */ public GraphJob() { super(); } /** * ODPS Graph ?? CLASSPATH odps-graph.xml ?. * * @param loadDefaults * ? CLASSPATH odps-graph.xml ? */ @Deprecated public GraphJob(boolean loadDefaults) { super(loadDefaults); } /** * ODPS Graph . * * @param conf * ?? * @param js * ???? */ @Deprecated public GraphJob(Configuration conf, JobState js) { super(conf, js); } /** * ODPS Graph . * * @param conf * ?? */ public GraphJob(Configuration conf) { super(conf); } /** * ODPS Graph . * * Configuration-format XML ?? * * <pre> * <configuration> * <property> * <name>com.mycomp.xxx</name> * <value>xxx</value> * </property> * ... ... * </configuration> * </pre> * * @param config * Configuration-format XML ? */ @Deprecated public GraphJob(String config) { super(config); } /** * ??. * * @return ?true?false * @throws IOException */ public boolean isComplete() throws IOException { ensureState(JobState.RUNNING); return rJob.isComplete(); } /** * ???. * * @return ?true?false * @throws IOException */ public boolean isSuccessful() throws IOException { ensureState(JobState.RUNNING); return rJob.isSuccessful(); } /** * Kill ? * * @throws IOException */ public void killJob() throws IOException { ensureState(JobState.RUNNING); rJob.killJob(); } /** * ???? ODPS Graph ??. * * <p> * ????{@link IOException}? {@link #run()} ?? {@link #run()} * * </p> * * <p> * ????? * * <pre> * GraphJob job = new GraphJob(); * ... //config job * job.submit(); * while (!job.isComplete()) { * Thread.sleep(4000); // do your work or sleep * } * if (job.isSuccessful()) { * System.out.println("Job Success!"); * } else { * System.err.println("Job Failed!"); * } * </pre> * * </p> * * @throws IOException * ?? */ public void submit() throws IOException { ensureState(JobState.DEFINE); try { parseArgs(); String runner = "com.aliyun.odps.graph.job.NetworkJobRunner"; if (SessionState.get().isLocalRun()) { runner = "com.aliyun.odps.graph.local.LocalGraphJobRunner"; } JobRunner jobrunner = null; try { Class<? extends JobRunner> clz = (Class<? extends JobRunner>) Class.forName(runner); jobrunner = ReflectionUtils.newInstance(clz, this); } catch (ClassNotFoundException e) { LOG.fatal("Internal error: currupted installation.", e); throw new RuntimeException(e); } rJob = jobrunner.submit(); } catch (OdpsException oe) { LOG.error(StringUtils.stringifyException(oe)); throw new IOException(oe.getMessage()); } catch (Exception e) { LOG.error(StringUtils.stringifyException(e)); throw new IOException(e.getMessage()); } state = JobState.RUNNING; } /** * ???? ODPS Graph ?. * * <p> * ?{@link IOException} * <ul> * <li>?? * <li>? * <li>?{@link #submit()}?? * </ul> * </p> * * <p> * ?main???console <br/> * ?catchconsole?0catch????console0<br/> * </p> * * ? * * <pre> * GraphJob job = new GraphJob(); * ... //config job * job.run(); * </pre> * * @throws IOException * ????? IOException * @see #submit() */ public void run() throws IOException { if (state == JobState.DEFINE) { submit(); } rJob.waitForCompletion(); if (!rJob.isSuccessful()) { throw new IOException("Job failed!"); } } /** * ?? Counters ?ODPS Graph ? Worker Counters. * * @return ? Counters ? * @throws IOException */ public Counters getCounters() throws IOException { return rJob.getCounters(); } private void parseArgs() { Properties prop = System.getProperties(); String runmode = prop.getProperty("odps.runner.mode"); if (runmode != null && runmode.length() != 0) { SessionState.get().setLocalRun(runmode.equalsIgnoreCase("local")); } String resources = prop.getProperty("odps.cache.resources"); if (resources != null && !resources.trim().isEmpty()) { this.addCacheResourcesToClassPath(resources); } String project = prop.getProperty("odps.project.name"); if (prop.getProperty("odps.access.id") != null) { String endpoint = prop.getProperty("odps.end.point"); String accessId = prop.getProperty("odps.access.id"); String accessKey = prop.getProperty("odps.access.key"); Account account = new AliyunAccount(accessId, accessKey); Odps odps = new Odps(account); odps.setDefaultProject(project); if (endpoint != null && endpoint.length() != 0) { odps.setEndpoint(endpoint); } String logViewHost = prop.getProperty("odps.logview.host"); if (logViewHost != null && logViewHost.length() != 0) { odps.setLogViewHost(logViewHost); } // set running cluster to new odps String runningCluster = SessionState.get().getOdps().instances().getDefaultRunningCluster(); odps.instances().setDefaultRunningCluster(runningCluster); SessionState.get().setOdps(odps); } // if in local mode and no odps in sessionState, fill it. else if (SessionState.get().isLocalRun() && SessionState.get().getOdps() == null) { Account account = new AliyunAccount("defaultId", "defaultKey"); Odps odps = new Odps(account); odps.setDefaultProject(project); SessionState.get().setOdps(odps); } } }