List of usage examples for org.apache.hadoop.conf Configuration get
public String get(String name)
name
property, null
if no such property exists. From source file:ImportTsv.java
License:Apache License
/** * Sets up the actual job./*from ww w . j a v a2 s. c o m*/ * * @param conf The current configuration. * @param args The command line parameters. * @return The newly created job. * @throws IOException When setting up the job fails. */ public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException, ClassNotFoundException { Job job = null; try (Connection connection = ConnectionFactory.createConnection(conf)) { try (Admin admin = connection.getAdmin()) { // Support non-XML supported characters // by re-encoding the passed separator as a Base64 string. String actualSeparator = conf.get(SEPARATOR_CONF_KEY); if (actualSeparator != null) { conf.set(SEPARATOR_CONF_KEY, Base64.encodeBytes(actualSeparator.getBytes())); } // See if a non-default Mapper was set String mapperClassName = conf.get(MAPPER_CONF_KEY); Class mapperClass = mapperClassName != null ? Class.forName(mapperClassName) : DEFAULT_MAPPER; TableName tableName = TableName.valueOf(args[0]); Path inputDir = new Path(args[1]); // set filter conf.set(EASTCOM_FILTER_PARAMS, args[3]); conf.set(EASTCOM_FILTER_DEFINE, args[4]); String jobName = conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName.getNameAsString()); job = Job.getInstance(conf, jobName); job.setJarByClass(mapperClass); FileInputFormat.setInputPaths(job, inputDir); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(mapperClass); String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY); String columns[] = conf.getStrings(COLUMNS_CONF_KEY); if (StringUtils.isNotEmpty(conf.get(CREDENTIALS_LOCATION))) { String fileLoc = conf.get(CREDENTIALS_LOCATION); Credentials cred = Credentials.readTokenStorageFile(new File(fileLoc), conf); job.getCredentials().addAll(cred); } if (hfileOutPath != null) { if (!admin.tableExists(tableName)) { String errorMsg = format("Table '%s' does not exist.", tableName); if ("yes".equalsIgnoreCase(conf.get(CREATE_TABLE_CONF_KEY, "yes"))) { LOG.warn(errorMsg); // TODO: this is backwards. Instead of depending on the existence of a table, // create a sane splits file for HFileOutputFormat based on data sampling. createTable(admin, tableName, columns); } else { LOG.error(errorMsg); throw new TableNotFoundException(errorMsg); } } try (HTable table = (HTable) connection.getTable(tableName)) { boolean noStrict = conf.getBoolean(NO_STRICT_COL_FAMILY, false); // if no.strict is false then check column family if (!noStrict) { ArrayList<String> unmatchedFamilies = new ArrayList<String>(); Set<String> cfSet = getColumnFamilies(columns); HTableDescriptor tDesc = table.getTableDescriptor(); for (String cf : cfSet) { if (tDesc.getFamily(Bytes.toBytes(cf)) == null) { unmatchedFamilies.add(cf); } } if (unmatchedFamilies.size() > 0) { ArrayList<String> familyNames = new ArrayList<String>(); for (HColumnDescriptor family : table.getTableDescriptor().getFamilies()) { familyNames.add(family.getNameAsString()); } String msg = "Column Families " + unmatchedFamilies + " specified in " + COLUMNS_CONF_KEY + " does not match with any of the table " + tableName + " column families " + familyNames + ".\n" + "To disable column family check, use -D" + NO_STRICT_COL_FAMILY + "=true.\n"; usage(msg); System.exit(-1); } } job.setReducerClass(PutSortReducer.class); Path outputDir = new Path(hfileOutPath); FileOutputFormat.setOutputPath(job, outputDir); job.setMapOutputKeyClass(ImmutableBytesWritable.class); if (mapperClass.equals(TsvImporterTextMapper.class)) { job.setMapOutputValueClass(Text.class); job.setReducerClass(TextSortReducer.class); } else { job.setMapOutputValueClass(Put.class); job.setCombinerClass(PutCombiner.class); } HFileOutputFormat2.configureIncrementalLoad(job, table, table); } } else { if (!admin.tableExists(tableName)) { String errorMsg = format("Table '%s' does not exist.", tableName); LOG.error(errorMsg); throw new TableNotFoundException(errorMsg); } if (mapperClass.equals(TsvImporterTextMapper.class)) { usage(TsvImporterTextMapper.class.toString() + " should not be used for non bulkloading case. use " + TsvImporterMapper.class.toString() + " or custom mapper whose value type is Put."); System.exit(-1); } // No reducers. Just write straight to table. Call initTableReducerJob // to set up the TableOutputFormat. TableMapReduceUtil.initTableReducerJob(tableName.getNameAsString(), null, job); job.setNumReduceTasks(0); } TableMapReduceUtil.addDependencyJars(job); TableMapReduceUtil.addDependencyJars(job.getConfiguration(), com.google.common.base.Function.class /* Guava used by TsvParser */); } } return job; }
From source file:ConfTest.java
License:Open Source License
public static void main(String[] args) { Configuration conf = new Configuration(); conf.addResource(new Path("d:\\test\\a.xml")); System.out.println(conf.get("aaa")); }
From source file:TestRawParascaleFileSystemBase.java
License:Apache License
protected void init() throws URISyntaxException, IOException { groupInformation = UserGroupInformation.createRemoteUser("hadoop"); fs = getFileSystem(groupInformation); final Configuration conf = getConf(); fs.initialize(new URI(conf.get(FS_DEFAULT_NAME)), getConf()); }
From source file:LungDriver.java
License:Creative Commons License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); String author = conf.get("com.marcolotz.author"); String jobName = conf.get("mapreduce.job.name"); String inputPath = conf.get("mapred.input.dir"); String outputPath = conf.get("mapred.output.dir"); System.out.println("\nApplication author: " + author + "\n"); System.out.println("Configurations stored at: conf/lungConfiguration.xml"); System.out.println("Input path: " + inputPath); System.out.println("Output path: " + outputPath); /* For non-standard operation (i.e. with arguments) */ if (args.length != 0) { /* terminates the program if there is an incorrect input */ if (processInputs(args, conf) != 0) { return 1; }//from www .ja v a 2s.co m } System.out.println("Bottom Threshold for nodules candidates detection:" + conf.getInt("com.marcolotz.grayNoduleCandidates.bottomThreshold", 110)); System.out.println("Top Threshold for nodules candidates detection:" + conf.getInt("com.marcolotz.grayNoduleCandidates.topThreshold", 120)); System.out.print("Cleaning output path: "); cleanOutputPath(conf, outputPath); System.out.print("Configuring the job " + jobName + ": "); /* Makes a new job */ // The classic Job constructor is deprecated. Job job = Job.getInstance(conf); /* * This method sets the jar file in which each node will look for the * Mapper and Reducer classes. */ job.setJarByClass(this.getClass()); System.out.println("[DONE]\n"); // Submits the job to the cluster System.out.println("Distributing the job:"); return job.waitForCompletion(true) ? 0 : 1; }
From source file:Importer.java
License:Open Source License
public static void copyFile(File file) throws Exception { // String TEST_PREFIX = ""; File destFile = new File(outDir, file.getName() + ".seq"); Path dest = new Path(destFile.getAbsolutePath()); Configuration conf = new Configuration(); FileSystem fileSys = org.apache.hadoop.fs.FileSystem.get(new java.net.URI(conf.get("fs.default.name")), conf);/*from ww w . j av a2 s.c o m*/ CompressionCodec codec = new DefaultCodec(); fileSys.mkdirs(dest.getParent()); FSDataOutputStream outputStr = fileSys.create(dest); seqFileWriter = SequenceFile.createWriter(conf, outputStr, Text.class, Text.class, SequenceFile.CompressionType.BLOCK, codec); String filename = file.getName(); InputStream in = new BufferedInputStream(new FileInputStream(file)); if (filename.endsWith(".bz2")) { in.read(); in.read(); //snarf header in = new CBZip2InputStream(in); } BufferedReader br = new BufferedReader(new InputStreamReader(in, "US-ASCII")); System.out.println("working on file " + file); int records = 0; long bytes = 0, bytes_since_status = 0; long startTime = System.currentTimeMillis(); String s = null; Text content = new Text(); while ((s = br.readLine()) != null) { if (s.startsWith("---END.OF.DOCUMENT---")) { Text name = new Text(hash(content)); seqFileWriter.append(name, content); records++; content = new Text(); } else { byte[] line_as_bytes = (s + " ").getBytes(); for (byte b : line_as_bytes) { assert b < 128 : "found an unexpected high-bit set"; } content.append(line_as_bytes, 0, line_as_bytes.length); bytes += line_as_bytes.length; /* bytes_since_status += line_as_bytes.length; if(bytes_since_status > 10 * 1024 * 1024) { //every 10 MB System.err.print('.'); bytes_since_status = 0; }*/ } } //end while if (content.getLength() > 5) { Text name = new Text(hash(content)); seqFileWriter.append(name, content); records++; } totalBytes += bytes; totalRecords += records; long time = (System.currentTimeMillis() - startTime) / 1000 + 1; long kbSec = bytes / 1024 / time; System.out.println(new java.util.Date()); System.out.println("File " + file.getName() + " " + records + " records, " + bytes + " bytes in " + time + " seconds (" + kbSec + " KB/sec)."); in.close(); seqFileWriter.close(); outputStr.close(); }
From source file:MedianMaper.java
@Override public void map(LongWritable Key, Text Value, Context context) throws IOException, InterruptedException { int YearWeek; double Price; Configuration conf = context.getConfiguration(); String Cheapest_Carrier = conf.get("Cheapest_Carrier"); Calendar Cal = Calendar.getInstance(); FlightPriceParser FParser = new FlightPriceParser(); if (!FParser.map(Value.toString())) { return;// w w w . j a v a 2s. c om } if (FParser.Carrier != Cheapest_Carrier) return; Cal.set(FParser.Year, FParser.Month - 1, FParser.DayOfMonth); YearWeek = FParser.Year * 100 + Cal.get(Calendar.WEEK_OF_YEAR); context.write(new IntWritable(YearWeek), new IntWritable((int) Math.round(FParser.Price * 100))); }
From source file:ConfigurationTest.java
License:Apache License
@Test public void test() { Configuration conf = CosmosWebConfiguration.get(); Assert.assertEquals("localhost", conf.get(CosmosWebConfiguration.ZOOKEEPERS)); Assert.assertEquals("accumulo", conf.get(CosmosWebConfiguration.ACCUMULO_INSTANCE)); Assert.assertEquals("root", conf.get(CosmosWebConfiguration.ACCUMULO_USER)); Assert.assertEquals("secret", conf.get(CosmosWebConfiguration.ACCUMULO_PASSWORD)); }
From source file:HoopRemoteTask.java
License:Open Source License
/** * *//*from w w w . jav a 2 s . com*/ public static int countTerms(Configuration conf) { dbg("postProcess ()"); int count = 0; String output = conf.get("mapred.output.dir"); if (output != null) { if (output.isEmpty() == true) output = HoopLink.outputpath; } else output = HoopLink.outputpath; Path inFile = new Path(output + "/part-r-00000"); FSDataInputStream in = null; @SuppressWarnings("unused") String thisLine = null; try { in = HoopRemoteTask.hdfs.open(inFile); BufferedReader reader = new BufferedReader(new InputStreamReader(in)); while ((thisLine = reader.readLine()) != null) { count++; } in.close(); } catch (IOException e) { e.printStackTrace(); dbg("Error opening file in HDFS"); } return (count); }
From source file:HoopRemoteTask.java
License:Open Source License
/** * */// ww w .j a va 2 s .c o m public static void postProcess(Configuration conf) { dbg("postProcess ()"); if (HoopLink.nrshards == 1) { dbg("Only 1 shard needed, skipping post processing"); return; } if (HoopLink.shardcreate.equals("mos") == true) { dbg("We shouldn't be pos-processing since the HoopShardedOutputFormat class already did this"); return; } if (HoopLink.shardcreate.equals("hdfs") == true) { dbg("Starting shard post-process task ..."); int termCount = countTerms(conf); String output = conf.get("mapred.output.dir"); if (output != null) { if (output.isEmpty() == true) output = HoopLink.outputpath; } else output = HoopLink.outputpath; dbg("Post processing " + termCount + " items in: " + output); Path inFile = new Path(output + "/part-r-00000"); Path outFile = null; FSDataInputStream in = null; FSDataOutputStream out = null; try { in = HoopRemoteTask.hdfs.open(inFile); BufferedReader reader = new BufferedReader(new InputStreamReader(in)); String thisLine; int count = 0; int split = Math.round(termCount / HoopLink.nrshards); int partition = 0; outFile = new Path(output + "/partition-" + partition + "-00000.txt"); out = HoopRemoteTask.hdfs.create(outFile); if (out != null) { while ((thisLine = reader.readLine()) != null) { StringBuffer formatted = new StringBuffer(); formatted.append(thisLine); formatted.append("\n"); count++; if (count > split) { out.close(); partition++; outFile = new Path(output + "/partition-" + partition + "-00000.txt"); out = HoopRemoteTask.hdfs.create(outFile); split++; count = 0; } byte[] utf8Bytes = formatted.toString().getBytes("UTF8"); // We get an additional 0 because of Java string encoding. leave it out! out.write(utf8Bytes); } if (in != null) in.close(); if (out != null) out.close(); } else dbg("Error: unable to open output file"); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } dbg("Starting rudimentary sharding into " + HoopLink.nrshards); if (in != null) { try { in.close(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } } HoopStatistics stats = new HoopStatistics(); String results = stats.printStatistics(null); dbg(results); }
From source file:ParascaleFileSystem.java
License:Apache License
/** * {@inheritDoc}// w w w .j ava2s . c om */ @Override public void initialize(final URI uri, final Configuration conf) throws IOException { final URI rawUri; final RawParascaleFileSystem rawParascaleFileSystem; UserGroupInformation groupInformation; try { if (conf.get("hadoop.job.ugi") != null) { String username = new StringTokenizer(conf.get("hadoop.job.ugi"), ",").nextToken(); groupInformation = UserGroupInformation.createRemoteUser(username); } else { groupInformation = UserGroupInformation.getCurrentUser(); } rawParascaleFileSystem = new RawParascaleFileSystem(groupInformation); fs = conf.getBoolean(CRC_FILESYSTEM, false) ? new ChecksumFsWrapper(rawParascaleFileSystem) : rawParascaleFileSystem; rawUri = new URI(uri.getScheme(), uri.getAuthority(), null, null, null); } catch (final URISyntaxException e) { throw (IOException) new IOException().initCause(e); } // initialize with the raw URI - RawFS expects it without a path! fs.initialize(rawUri, conf); if (!rawParascaleFileSystem.isMountPointAbsolute()) { throw new IOException( "Mountpoint " + rawParascaleFileSystem.getMountPoint() + " is not an absolute path"); } if (!rawParascaleFileSystem.mountPointExists()) { throw new IOException("WorkingDirectory does not exist - can not mount Parascale " + "filesystem at " + rawParascaleFileSystem.getMountPath()); } if (!rawParascaleFileSystem.createHomeDirectory()) { throw new IOException("Can not create HomeDirectory"); } }