List of usage examples for org.apache.hadoop.conf Configuration setIfUnset
public synchronized void setIfUnset(String name, String value)
From source file:com.mellanox.r4h.MiniDFSCluster.java
License:Apache License
/** * Modify the config and start up additional DataNodes. The info port for * DataNodes is guaranteed to use a free port. * /* w w w . j a v a 2s . co m*/ * Data nodes can run with the name node in the mini cluster or * a real name node. For example, running with a real name node is useful * when running simulated data nodes with a real name node. * If minicluster's name node is null assume that the conf has been * set with the right address:port of the name node. * * @param conf * the base configuration to use in starting the DataNodes. This * will be modified as necessary. * @param numDataNodes * Number of DataNodes to start; may be zero * @param manageDfsDirs * if true, the data directories for DataNodes will be * created and {@link #DFS_DATANODE_DATA_DIR_KEY} will be * set in the conf * @param operation * the operation with which to start the DataNodes. If null * or StartupOption.FORMAT, then StartupOption.REGULAR will be used. * @param racks * array of strings indicating the rack that each DataNode is on * @param hosts * array of strings indicating the hostnames for each DataNode * @param simulatedCapacities * array of capacities of the simulated data nodes * @param setupHostsFile * add new nodes to dfs hosts files * @param checkDataNodeAddrConfig * if true, only set DataNode port addresses if not already set in config * @param checkDataNodeHostConfig * if true, only set DataNode hostname key if not already set in config * @param dnConfOverlays * An array of {@link Configuration} objects that will overlay the * global MiniDFSCluster Configuration for the corresponding DataNode. * @throws IllegalStateException * if NameNode has been shutdown */ public synchronized void startDataNodes(Configuration conf, int numDataNodes, StorageType storageType, boolean manageDfsDirs, StartupOption operation, String[] racks, String[] hosts, long[] simulatedCapacities, boolean setupHostsFile, boolean checkDataNodeAddrConfig, boolean checkDataNodeHostConfig, Configuration[] dnConfOverlays) throws IOException { if (operation == StartupOption.RECOVER) { return; } if (checkDataNodeHostConfig) { conf.setIfUnset(DFS_DATANODE_HOST_NAME_KEY, "127.0.0.1"); } else { conf.set(DFS_DATANODE_HOST_NAME_KEY, "127.0.0.1"); } int curDatanodesNum = dataNodes.size(); // for mincluster's the default initialDelay for BRs is 0 if (conf.get(DFS_BLOCKREPORT_INITIAL_DELAY_KEY) == null) { conf.setLong(DFS_BLOCKREPORT_INITIAL_DELAY_KEY, 0); } // If minicluster's name node is null assume that the conf has been // set with the right address:port of the name node. // if (racks != null && numDataNodes > racks.length) { throw new IllegalArgumentException("The length of racks [" + racks.length + "] is less than the number of datanodes [" + numDataNodes + "]."); } if (hosts != null && numDataNodes > hosts.length) { throw new IllegalArgumentException("The length of hosts [" + hosts.length + "] is less than the number of datanodes [" + numDataNodes + "]."); } // Generate some hostnames if required if (racks != null && hosts == null) { hosts = new String[numDataNodes]; for (int i = curDatanodesNum; i < curDatanodesNum + numDataNodes; i++) { hosts[i - curDatanodesNum] = "host" + i + ".foo.com"; } } if (simulatedCapacities != null && numDataNodes > simulatedCapacities.length) { throw new IllegalArgumentException("The length of simulatedCapacities [" + simulatedCapacities.length + "] is less than the number of datanodes [" + numDataNodes + "]."); } if (dnConfOverlays != null && numDataNodes > dnConfOverlays.length) { throw new IllegalArgumentException("The length of dnConfOverlays [" + dnConfOverlays.length + "] is less than the number of datanodes [" + numDataNodes + "]."); } String[] dnArgs = (operation == null || operation != StartupOption.ROLLBACK) ? null : new String[] { operation.getName() }; for (int i = curDatanodesNum; i < curDatanodesNum + numDataNodes; i++) { Configuration dnConf = new HdfsConfiguration(conf); if (dnConfOverlays != null) { dnConf.addResource(dnConfOverlays[i]); } // Set up datanode address setupDatanodeAddress(dnConf, setupHostsFile, checkDataNodeAddrConfig); if (manageDfsDirs) { String dirs = makeDataNodeDirs(i, storageType); dnConf.set(DFS_DATANODE_DATA_DIR_KEY, dirs); conf.set(DFS_DATANODE_DATA_DIR_KEY, dirs); } if (simulatedCapacities != null) { SimulatedFSDataset.setFactory(dnConf); dnConf.setLong(SimulatedFSDataset.CONFIG_PROPERTY_CAPACITY, simulatedCapacities[i - curDatanodesNum]); } LOG.info("Starting DataNode " + i + " with " + DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY + ": " + dnConf.get(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY)); if (hosts != null) { dnConf.set(DFSConfigKeys.DFS_DATANODE_HOST_NAME_KEY, hosts[i - curDatanodesNum]); LOG.info("Starting DataNode " + i + " with hostname set to: " + dnConf.get(DFSConfigKeys.DFS_DATANODE_HOST_NAME_KEY)); } if (racks != null) { String name = hosts[i - curDatanodesNum]; LOG.info("Adding node with hostname : " + name + " to rack " + racks[i - curDatanodesNum]); StaticMapping.addNodeToRack(name, racks[i - curDatanodesNum]); } Configuration newconf = new HdfsConfiguration(dnConf); // save config if (hosts != null) { NetUtils.addStaticResolution(hosts[i - curDatanodesNum], "localhost"); } SecureResources secureResources = null; if (UserGroupInformation.isSecurityEnabled() && conf.get(DFS_DATA_TRANSFER_PROTECTION_KEY) == null) { try { secureResources = SecureDataNodeStarter.getSecureResources(dnConf); } catch (Exception ex) { ex.printStackTrace(); } } final int maxRetriesOnSasl = conf.getInt(IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SASL_KEY, IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SASL_DEFAULT); int numRetries = 0; DataNode dn = null; while (true) { try { dn = DataNode.instantiateDataNode(dnArgs, dnConf, secureResources); break; } catch (IOException e) { // Work around issue testing security where rapidly starting multiple // DataNodes using the same principal gets rejected by the KDC as a // replay attack. if (UserGroupInformation.isSecurityEnabled() && numRetries < maxRetriesOnSasl) { try { Thread.sleep(1000); } catch (InterruptedException ie) { Thread.currentThread().interrupt(); break; } ++numRetries; continue; } throw e; } } if (dn == null) throw new IOException("Cannot start DataNode in " + dnConf.get(DFS_DATANODE_DATA_DIR_KEY)); // since the HDFS does things based on host|ip:port, we need to add the // mapping for the service to rackId String service = SecurityUtil.buildTokenService(dn.getXferAddress()).toString(); if (racks != null) { LOG.info("Adding node with service : " + service + " to rack " + racks[i - curDatanodesNum]); StaticMapping.addNodeToRack(service, racks[i - curDatanodesNum]); } dn.runDatanodeDaemon(); dataNodes.add(new DataNodeProperties(dn, newconf, dnArgs, secureResources, dn.getIpcPort())); } curDatanodesNum += numDataNodes; this.numDataNodes += numDataNodes; waitActive(); }
From source file:com.mellanox.r4h.MiniDFSCluster.java
License:Apache License
protected void setupDatanodeAddress(Configuration conf, boolean setupHostsFile, boolean checkDataNodeAddrConfig) throws IOException { if (setupHostsFile) { String hostsFile = conf.get(DFS_HOSTS, "").trim(); if (hostsFile.length() == 0) { throw new IOException("Parameter dfs.hosts is not setup in conf"); }/* www .j ava 2 s . c o m*/ // Setup datanode in the include file, if it is defined in the conf String address = "127.0.0.1:" + NetUtils.getFreeSocketPort(); if (checkDataNodeAddrConfig) { conf.setIfUnset(DFS_DATANODE_ADDRESS_KEY, address); } else { conf.set(DFS_DATANODE_ADDRESS_KEY, address); } addToFile(hostsFile, address); LOG.info("Adding datanode " + address + " to hosts file " + hostsFile); } else { if (checkDataNodeAddrConfig) { conf.setIfUnset(DFS_DATANODE_ADDRESS_KEY, "127.0.0.1:0"); } else { conf.set(DFS_DATANODE_ADDRESS_KEY, "127.0.0.1:0"); } } if (checkDataNodeAddrConfig) { conf.setIfUnset(DFS_DATANODE_HTTP_ADDRESS_KEY, "127.0.0.1:0"); conf.setIfUnset(DFS_DATANODE_IPC_ADDRESS_KEY, "127.0.0.1:0"); } else { conf.set(DFS_DATANODE_HTTP_ADDRESS_KEY, "127.0.0.1:0"); conf.set(DFS_DATANODE_IPC_ADDRESS_KEY, "127.0.0.1:0"); } }
From source file:com.rim.logdriver.util.Cat.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); // Configuration processed by ToolRunner // If run by Oozie, then load the Oozie conf too if (System.getProperty("oozie.action.conf.xml") != null) { conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml"))); }/*w ww . ja va2 s.com*/ FileSystem fs = FileSystem.get(conf); // The command line options List<Path> paths = new ArrayList<Path>(); Path outputDir = null; // Load input files from the command line if (args.length < 2) { System.out.println("usage: [genericOptions] input [input ...] output"); System.exit(1); } // Get the files we need from the command line. for (int i = 0; i < args.length - 1; i++) { for (FileStatus f : fs.globStatus(new Path(args[i]))) { paths.add(f.getPath()); } } outputDir = new Path(args[args.length - 1]); Job job = new Job(conf); Configuration jobConf = job.getConfiguration(); job.setJarByClass(Cat.class); jobConf.setIfUnset("mapred.job.name", "Cat Files"); // To propagate credentials within Oozie if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) { jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION")); } // Good output separators include things that are unsupported by XML. So we // just send the byte value of the character through. The restriction here // is that it can't be more than 1 byte when UTF-8 encoded, since it will be // read by Pig which only deals with single byte separators. { String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR); byte[] bytes = outputSeparator.getBytes(UTF_8); if (bytes.length != 1) { LOG.error("The output separator must be a single byte in UTF-8."); return 1; } jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0])); } job.setInputFormatClass(BoomInputFormat.class); job.setMapperClass(CatMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NullWritable.class); job.setNumReduceTasks(0); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, outputDir); for (Path path : paths) { BoomInputFormat.addInputPath(job, path); } // Run the job. if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) { return job.waitForCompletion(true) ? 0 : 1; } else { job.submit(); return 0; } }
From source file:com.rim.logdriver.util.FastSearch.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); // Configuration processed by ToolRunner // If run by Oozie, then load the Oozie conf too if (System.getProperty("oozie.action.conf.xml") != null) { conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml"))); }//from w ww . j a v a2 s .co m FileSystem fs = FileSystem.get(conf); // The command line options String searchString = null; List<Path> paths = new ArrayList<Path>(); Path outputDir = null; // Load input files from the command line if (args.length < 3) { System.out.println("usage: [genericOptions] searchString input [input ...] output"); System.exit(1); } // Get the files we need from the command line. searchString = args[0]; for (int i = 1; i < args.length - 1; i++) { for (FileStatus f : fs.globStatus(new Path(args[i]))) { paths.add(f.getPath()); } } outputDir = new Path(args[args.length - 1]); Job job = new Job(conf); Configuration jobConf = job.getConfiguration(); job.setJarByClass(FastSearch.class); jobConf.setIfUnset("mapred.job.name", "Search Files"); // To propagate credentials within Oozie if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) { jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION")); } // Good output separators include things that are unsupported by XML. So we // just send the byte value of the character through. The restriction here // is that it can't be more than 1 byte when UTF-8 encoded, since it will be // read by Pig which only deals with single byte separators. { String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR); byte[] bytes = outputSeparator.getBytes(UTF_8); if (bytes.length != 1) { LOG.error("The output separator must be a single byte in UTF-8."); return 1; } jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0])); } jobConf.set("logdriver.search.string", Base64.encodeBase64String(searchString.getBytes("UTF-8"))); job.setInputFormatClass(AvroBlockInputFormat.class); job.setMapperClass(SearchMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NullWritable.class); job.setNumReduceTasks(0); // And set the output as usual job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, outputDir); for (Path path : paths) { AvroBlockInputFormat.addInputPath(job, path); } // Run the job. if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) { return job.waitForCompletion(true) ? 0 : 1; } else { job.submit(); return 0; } }
From source file:com.rim.logdriver.util.Grep.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); // Configuration processed by ToolRunner // If run by Oozie, then load the Oozie conf too if (System.getProperty("oozie.action.conf.xml") != null) { conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml"))); }/*from w w w. j a v a 2s .co m*/ FileSystem fs = FileSystem.get(conf); // The command line options String regex = null; List<Path> paths = new ArrayList<Path>(); Path outputDir = null; // Load input files from the command line if (args.length < 3) { System.out.println("usage: [genericOptions] regex input [input ...] output"); System.exit(1); } // Get the files we need from the command line. regex = args[0]; for (int i = 1; i < args.length - 1; i++) { for (FileStatus f : fs.globStatus(new Path(args[i]))) { paths.add(f.getPath()); } } outputDir = new Path(args[args.length - 1]); Job job = new Job(conf); Configuration jobConf = job.getConfiguration(); job.setJarByClass(Grep.class); jobConf.setIfUnset("mapred.job.name", "Grep Files"); // To propagate credentials within Oozie if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) { jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION")); } // Good output separators include things that are unsupported by XML. So we // just send the byte value of the character through. The restriction here // is that it can't be more than 1 byte when UTF-8 encoded, since it will be // read by Pig which only deals with single byte separators. { String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR); byte[] bytes = outputSeparator.getBytes(UTF_8); if (bytes.length != 1) { LOG.error("The output separator must be a single byte in UTF-8."); return 1; } jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0])); } jobConf.set("logdriver.grep.regex", Base64.encodeBase64String(regex.getBytes("UTF-8"))); job.setInputFormatClass(BoomInputFormat.class); job.setMapperClass(GrepMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NullWritable.class); job.setNumReduceTasks(0); // And set the output as usual job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, outputDir); for (Path path : paths) { BoomInputFormat.addInputPath(job, path); } // Run the job. if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) { return job.waitForCompletion(true) ? 0 : 1; } else { job.submit(); return 0; } }
From source file:com.rim.logdriver.util.MultiSearch.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); // Configuration processed by ToolRunner // If run by Oozie, then load the Oozie conf too if (System.getProperty("oozie.action.conf.xml") != null) { conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml"))); }// w ww.j av a 2 s . c o m FileSystem fs = FileSystem.get(conf); // The command line options String searchStringDir = null; List<Path> paths = new ArrayList<Path>(); Path outputDir = null; // Load input files from the command line if (args.length < 3) { System.out.println("usage: [genericOptions] searchStringDirectory input [input ...] output"); System.exit(1); } // Get the files we need from the command line. searchStringDir = args[0]; // We are going to be reading all the files in this directory a lot. So // let's up the replication factor by a lot so that they're easy to read. for (FileStatus f : fs.listStatus(new Path(searchStringDir))) { fs.setReplication(f.getPath(), (short) 16); } for (int i = 1; i < args.length - 1; i++) { for (FileStatus f : fs.globStatus(new Path(args[i]))) { paths.add(f.getPath()); } } outputDir = new Path(args[args.length - 1]); Job job = new Job(conf); Configuration jobConf = job.getConfiguration(); job.setJarByClass(MultiSearch.class); jobConf.setIfUnset("mapred.job.name", "MultiSearch"); // To propagate credentials within Oozie if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) { jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION")); } // Good output separators include things that are unsupported by XML. So we // just send the byte value of the character through. The restriction here // is that it can't be more than 1 byte when UTF-8 encoded, since it will be // read by Pig which only deals with single byte separators. { String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR); byte[] bytes = outputSeparator.getBytes(UTF_8); if (bytes.length != 1) { LOG.error("The output separator must be a single byte in UTF-8."); return 1; } jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0])); } jobConf.set("logdriver.search.string.dir", searchStringDir); // This search is generally too fast to make good use of 128MB blocks, so // let's set the value to 256MB (if it's not set already) if (jobConf.get("mapred.max.split.size") == null) { jobConf.setLong("mapred.max.split.size", 256 * 1024 * 1024); } job.setInputFormatClass(AvroBlockInputFormat.class); job.setMapperClass(SearchMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NullWritable.class); job.setNumReduceTasks(0); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, outputDir); for (Path path : paths) { AvroBlockInputFormat.addInputPath(job, path); } // Run the job. if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) { return job.waitForCompletion(true) ? 0 : 1; } else { job.submit(); return 0; } }
From source file:com.rim.logdriver.util.Search.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); // Configuration processed by ToolRunner // If run by Oozie, then load the Oozie conf too if (System.getProperty("oozie.action.conf.xml") != null) { conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml"))); }/*w ww .j a v a 2 s . c om*/ FileSystem fs = FileSystem.get(conf); // The command line options String searchString = null; List<Path> paths = new ArrayList<Path>(); Path outputDir = null; // Load input files from the command line if (args.length < 3) { System.out.println("usage: [genericOptions] searchString input [input ...] output"); System.exit(1); } // Get the files we need from the command line. searchString = args[0]; for (int i = 1; i < args.length - 1; i++) { for (FileStatus f : fs.globStatus(new Path(args[i]))) { paths.add(f.getPath()); } } outputDir = new Path(args[args.length - 1]); Job job = new Job(conf); Configuration jobConf = job.getConfiguration(); job.setJarByClass(Search.class); jobConf.setIfUnset("mapred.job.name", "Search Files"); // To propagate credentials within Oozie if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) { jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION")); } // Good output separators include things that are unsupported by XML. So we // just send the byte value of the character through. The restriction here // is that it can't be more than 1 byte when UTF-8 encoded, since it will be // read by Pig which only deals with single byte separators. { String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR); byte[] bytes = outputSeparator.getBytes(UTF_8); if (bytes.length != 1) { LOG.error("The output separator must be a single byte in UTF-8."); return 1; } jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0])); } jobConf.set("logdriver.search.string", searchString); job.setInputFormatClass(BoomInputFormat.class); job.setMapperClass(SearchMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NullWritable.class); job.setNumReduceTasks(0); // And set the output as usual job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, outputDir); for (Path path : paths) { BoomInputFormat.addInputPath(job, path); } // Run the job. if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) { return job.waitForCompletion(true) ? 0 : 1; } else { job.submit(); return 0; } }
From source file:com.spotify.ratatool.GcsConfiguration.java
License:Apache License
public static Configuration get() { Configuration conf = new Configuration(); File cloudConfigPath;//from w w w. ja va 2 s . c o m if (isWindows()) { cloudConfigPath = new File(getEnvironment().get("APPDATA"), "gcloud"); } else { cloudConfigPath = new File(System.getProperty("user.home"), ".config/gcloud"); } File credentialFilePath = new File(cloudConfigPath, "application_default_credentials.json"); if (!credentialFilePath.exists()) { return conf; } try { JsonFactory jsonFactory = Utils.getDefaultJsonFactory(); InputStream inputStream = new FileInputStream(credentialFilePath); JsonObjectParser parser = new JsonObjectParser(jsonFactory); GenericJson fileContents = parser.parseAndClose(inputStream, Charsets.UTF_8, GenericJson.class); String fileType = (String) fileContents.get("type"); if ("authorized_user".equals(fileType)) { String clientId = (String) fileContents.get("client_id"); String clientSecret = (String) fileContents.get("client_secret"); if (clientId != null && clientSecret != null) { LOG.debug("Using GCP user credential from '{}'", credentialFilePath); conf.setIfUnset("fs.gs.impl", GoogleHadoopFileSystem.class.getName()); conf.setIfUnset("fs.AbstractFileSystem.gs.impl", GoogleHadoopFS.class.getName()); conf.setIfUnset(GoogleHadoopFileSystemBase.GCS_PROJECT_ID_KEY, defaultProject()); conf.setIfUnset(GoogleHadoopFileSystemBase.GCS_WORKING_DIRECTORY_KEY, "/hadoop"); conf.setIfUnset(HadoopCredentialConfiguration.BASE_KEY_PREFIX + HadoopCredentialConfiguration.ENABLE_SERVICE_ACCOUNTS_SUFFIX, "false"); conf.setIfUnset(HadoopCredentialConfiguration.BASE_KEY_PREFIX + HadoopCredentialConfiguration.CLIENT_ID_SUFFIX, clientId); conf.setIfUnset(HadoopCredentialConfiguration.BASE_KEY_PREFIX + HadoopCredentialConfiguration.CLIENT_SECRET_SUFFIX, clientSecret); } } } catch (IOException e) { LOG.warn("Failed to load GCP user credential from '{}'", credentialFilePath); } return conf; }
From source file:com.yahoo.labs.yamall.hadoop.Test.java
License:Open Source License
/** * Run the map/reduce job// w ww .j av a 2 s . co m */ public final int run(final String[] args) throws Exception { startLogger(Level.INFO); Configuration conf = getConf(); conf.set("yamall.vw_model", args[2]); conf.setIfUnset("yamall.bit_precision", "18"); conf.setIfUnset("yamall.parser", "vw"); // Print to screen all the options TreeMap<String, String> map = new TreeMap<String, String>(); for (Map.Entry<String, String> entry : conf) { map.put(entry.getKey(), entry.getValue()); } for (Map.Entry<String, String> entry : map.entrySet()) { System.out.printf("%s=%s\n", entry.getKey(), entry.getValue()); } Job job = Job.getInstance(conf, "Yamall Test on MapReduce"); job.setNumReduceTasks(1); job.setJarByClass(Test.class); job.setMapperClass(TestMapper.class); job.setMapOutputKeyClass(DoubleWritable.class); job.setReducerClass(TestReducer.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(CompositeDoubleTextWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); MultipleOutputs.addNamedOutput(job, "out", TextOutputFormat.class, NullWritable.class, Text.class); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.yahoo.labs.yamall.hadoop.Train.java
License:Open Source License
/** * Run the map/reduce job/*from w w w . j ava2s . com*/ */ public final int run(final String[] args) throws Exception { startLogger(Level.INFO); Configuration conf = getConf(); conf.set("yamall.output", args[1]); conf.setIfUnset("yamall.bit_precision", "18"); conf.setIfUnset("yamall.parser", "vw"); // Print to screen all the options TreeMap<String, String> map = new TreeMap<String, String>(); for (Map.Entry<String, String> entry : conf) { map.put(entry.getKey(), entry.getValue()); } for (Map.Entry<String, String> entry : map.entrySet()) { System.out.printf("%s=%s\n", entry.getKey(), entry.getValue()); } Job job = Job.getInstance(conf, "Yamall Train on MapReduce"); job.setNumReduceTasks(1); // important job.setJarByClass(Train.class); job.setMapperClass(TrainMapper.class); job.setMapOutputKeyClass(DoubleWritable.class); job.setMapOutputValueClass(InstanceOrHashMapWritable.class); job.setReducerClass(TrainReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); return job.waitForCompletion(true) ? 0 : 1; }