Java tutorial
/* * Copyright 2014 Aerospike, Inc. * * Portions may be licensed to Aerospike, Inc. under one or more * contributor license agreements. * * Licensed under the Apache License, Version 2.0 (the "License"); you * may not use this file except in compliance with the License. You * may obtain a copy of the License at * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or * implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.aerospike.hadoop.mapreduce; import java.io.IOException; import java.net.InetAddress; import java.net.UnknownHostException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.mapreduce.InputFormat; import org.apache.hadoop.mapreduce.InputSplit; import org.apache.hadoop.mapreduce.JobContext; import org.apache.hadoop.mapreduce.RecordReader; import org.apache.hadoop.mapreduce.TaskAttemptContext; import com.aerospike.client.AerospikeClient; import com.aerospike.client.AerospikeException; import com.aerospike.client.Host; import com.aerospike.client.cluster.Node; import com.aerospike.client.policy.ClientPolicy; /** * An {@link InputFormat} for data stored in an Aerospike database. */ public class AerospikeInputFormat extends InputFormat<AerospikeKey, AerospikeRecord> implements org.apache.hadoop.mapred.InputFormat<AerospikeKey, AerospikeRecord> { private static final Log log = LogFactory.getLog(AerospikeInputFormat.class); // ---------------- NEW API ---------------- public List<InputSplit> getSplits(JobContext context) throws IOException { // Delegate to the old API. Configuration cfg = context.getConfiguration(); JobConf jobconf = AerospikeConfigUtil.asJobConf(cfg); return Arrays.asList((InputSplit[]) getSplits(jobconf, jobconf.getNumMapTasks())); } public RecordReader<AerospikeKey, AerospikeRecord> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { return new AerospikeRecordReader(); } // ---------------- OLD API ---------------- public org.apache.hadoop.mapred.InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { try { String oper = AerospikeConfigUtil.getInputOperation(job); String host = AerospikeConfigUtil.getInputHost(job); int port = AerospikeConfigUtil.getInputPort(job); String namespace = AerospikeConfigUtil.getInputNamespace(job); String setName = AerospikeConfigUtil.getInputSetName(job); String[] binNames = AerospikeConfigUtil.getInputBinNames(job); String numrangeBin = ""; long numrangeBegin = 0; long numrangeEnd = 0; if (oper.equals("numrange")) { numrangeBin = AerospikeConfigUtil.getInputNumRangeBin(job); numrangeBegin = AerospikeConfigUtil.getInputNumRangeBegin(job); numrangeEnd = AerospikeConfigUtil.getInputNumRangeEnd(job); } log.info(String.format("using: %s %d %s %s", host, port, namespace, setName)); AerospikeClient client = AerospikeClientSingleton.getInstance(new ClientPolicy(), host, port); Node[] nodes = client.getNodes(); int nsplits = nodes.length; if (nsplits == 0) { throw new IOException("no Aerospike nodes found"); } log.info(String.format("found %d nodes", nsplits)); AerospikeSplit[] splits = new AerospikeSplit[nsplits]; for (int ii = 0; ii < nsplits; ii++) { Node node = nodes[ii]; String nodeName = node.getName(); // We want to avoid 127.0.0.1 as a hostname // because this value will be transferred to a // different hadoop node to be processed. // List<Host> aliases = getAliases(node.getHost()); Host nodehost = aliases.get(0); if (aliases.size() > 1) { for (Host a : aliases) { if (!a.name.equals("127.0.0.1")) { nodehost = a; break; } } } splits[ii] = new AerospikeSplit(oper, nodeName, nodehost.name, nodehost.port, namespace, setName, binNames, numrangeBin, numrangeBegin, numrangeEnd); log.info("split: " + splits[ii]); } return splits; } catch (Exception ex) { throw new IOException("exception in getSplits", ex); } } public org.apache.hadoop.mapred.RecordReader<AerospikeKey, AerospikeRecord> getRecordReader( org.apache.hadoop.mapred.InputSplit split, JobConf job, Reporter reporter) throws IOException { return new AerospikeRecordReader((AerospikeSplit) split); } private List<Host> getAliases(Host host) { InetAddress[] addresses; try { addresses = InetAddress.getAllByName(host.name); } catch (UnknownHostException uhe) { throw new AerospikeException.Connection("Invalid host: " + host); } if (addresses.length == 0) { throw new AerospikeException.Connection("Failed to find addresses for " + host); } // Add capacity for current address aliases plus IPV6 address and hostname. List<Host> aliases = new ArrayList<Host>(addresses.length + 2); for (InetAddress address : addresses) { aliases.add(new Host(address.getHostAddress(), host.tlsName, host.port)); } return aliases; } } // Local Variables: // mode: java // c-basic-offset: 4 // tab-width: 4 // indent-tabs-mode: nil // End: // vim: softtabstop=4:shiftwidth=4:expandtab