Java tutorial
/* * Copyright 2009-2013 by The Regents of the University of California * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * you may obtain a copy of the License from * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package edu.uci.ics.pregelix.dataflow; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.nio.ByteBuffer; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.mapreduce.JobContext; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hyracks.api.context.IHyracksTaskContext; import org.apache.hyracks.api.dataflow.IOperatorNodePushable; import org.apache.hyracks.api.dataflow.value.IRecordDescriptorProvider; import org.apache.hyracks.api.dataflow.value.RecordDescriptor; import org.apache.hyracks.api.exceptions.HyracksDataException; import org.apache.hyracks.api.job.JobSpecification; import org.apache.hyracks.dataflow.common.comm.io.FrameDeserializer; import org.apache.hyracks.dataflow.std.base.AbstractSingleActivityOperatorDescriptor; import org.apache.hyracks.dataflow.std.base.AbstractUnaryInputSinkOperatorNodePushable; import org.apache.hyracks.hdfs.ContextFactory; import edu.uci.ics.pregelix.api.graph.Vertex; import edu.uci.ics.pregelix.api.io.VertexOutputFormat; import edu.uci.ics.pregelix.api.io.VertexWriter; import edu.uci.ics.pregelix.api.util.BspUtils; import edu.uci.ics.pregelix.dataflow.base.IConfigurationFactory; import edu.uci.ics.pregelix.dataflow.std.base.IRecordDescriptorFactory; import edu.uci.ics.pregelix.dataflow.std.base.IRuntimeHookFactory; public class VertexFileWriteOperatorDescriptor extends AbstractSingleActivityOperatorDescriptor { private static final long serialVersionUID = 1L; private final IConfigurationFactory confFactory; private final IRecordDescriptorFactory inputRdFactory; private final IRuntimeHookFactory preHookFactory; public VertexFileWriteOperatorDescriptor(JobSpecification spec, IConfigurationFactory confFactory, IRecordDescriptorFactory inputRdFactory, IRuntimeHookFactory preHookFactory) { super(spec, 1, 0); this.confFactory = confFactory; this.inputRdFactory = inputRdFactory; this.preHookFactory = preHookFactory; } @SuppressWarnings("rawtypes") @Override public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx, final IRecordDescriptorProvider recordDescProvider, final int partition, int nPartitions) throws HyracksDataException { return new AbstractUnaryInputSinkOperatorNodePushable() { private RecordDescriptor rd0; private FrameDeserializer frameDeserializer; private Configuration conf; private VertexWriter vertexWriter; private TaskAttemptContext context; private String TEMP_DIR = "_temporary"; private ClassLoader ctxCL; private ContextFactory ctxFactory = new ContextFactory(); @Override public void open() throws HyracksDataException { rd0 = inputRdFactory == null ? recordDescProvider.getInputRecordDescriptor(getActivityId(), 0) : inputRdFactory.createRecordDescriptor(ctx); frameDeserializer = new FrameDeserializer(rd0); ctxCL = Thread.currentThread().getContextClassLoader(); Thread.currentThread().setContextClassLoader(this.getClass().getClassLoader()); conf = confFactory.createConfiguration(ctx); VertexOutputFormat outputFormat = BspUtils.createVertexOutputFormat(conf); context = ctxFactory.createContext(conf, partition); context.getConfiguration().setClassLoader(ctx.getJobletContext().getClassLoader()); try { if (preHookFactory != null) { preHookFactory.createRuntimeHook().configure(ctx); } vertexWriter = outputFormat.createVertexWriter(context); } catch (InterruptedException e) { throw new HyracksDataException(e); } catch (IOException e) { throw new HyracksDataException(e); } } @SuppressWarnings("unchecked") @Override public void nextFrame(ByteBuffer frame) throws HyracksDataException { frameDeserializer.reset(frame); try { while (!frameDeserializer.done()) { Object[] tuple = frameDeserializer.deserializeRecord(); Vertex value = (Vertex) tuple[1]; vertexWriter.writeVertex(value); } } catch (InterruptedException e) { throw new HyracksDataException(e); } catch (IOException e) { throw new HyracksDataException(e); } } @Override public void fail() throws HyracksDataException { Thread.currentThread().setContextClassLoader(ctxCL); } @Override public void close() throws HyracksDataException { try { vertexWriter.close(context); moveFilesToFinalPath(); } catch (InterruptedException e) { throw new HyracksDataException(e); } catch (IOException e) { throw new HyracksDataException(e); } } private void moveFilesToFinalPath() throws HyracksDataException { try { JobContext job = ctxFactory.createJobContext(conf); Path outputPath = FileOutputFormat.getOutputPath(job); FileSystem dfs = FileSystem.get(conf); Path filePath = new Path(outputPath, "part-" + new Integer(partition).toString()); FileStatus[] results = findPartitionPaths(outputPath, dfs); if (results.length >= 1) { /** * for Hadoop-0.20.2 */ renameFile(dfs, filePath, results); } else { /** * for Hadoop-0.23.1 */ int jobId = job.getJobID().getId(); outputPath = new Path( outputPath.toString() + File.separator + TEMP_DIR + File.separator + jobId); results = findPartitionPaths(outputPath, dfs); renameFile(dfs, filePath, results); } } catch (IOException e) { throw new HyracksDataException(e); } finally { Thread.currentThread().setContextClassLoader(ctxCL); } } private FileStatus[] findPartitionPaths(Path outputPath, FileSystem dfs) throws FileNotFoundException, IOException { FileStatus[] tempPaths = dfs.listStatus(outputPath, new PathFilter() { @Override public boolean accept(Path dir) { return dir.getName().endsWith(TEMP_DIR) && dir.getName().indexOf(".crc") < 0; } }); Path tempDir = tempPaths[0].getPath(); FileStatus[] results = dfs.listStatus(tempDir, new PathFilter() { @Override public boolean accept(Path dir) { return dir.getName().indexOf(context.getTaskAttemptID().toString()) >= 0 && dir.getName().indexOf(".crc") < 0; } }); return results; } private void renameFile(FileSystem dfs, Path filePath, FileStatus[] results) throws IOException, HyracksDataException, FileNotFoundException { Path srcDir = results[0].getPath(); if (!dfs.exists(srcDir)) { throw new HyracksDataException("file " + srcDir.toString() + " does not exist!"); } FileStatus[] srcFiles = dfs.listStatus(srcDir); Path srcFile = srcFiles[0].getPath(); dfs.delete(filePath, true); dfs.rename(srcFile, filePath); } }; } }