List of usage examples for org.apache.hadoop.fs FileSystem listStatus
public FileStatus[] listStatus(Path[] files) throws FileNotFoundException, IOException
From source file:co.nubetech.hiho.job.TestExportToOracleDb.java
License:Apache License
@Test public void testAlterTableDMl() throws HIHOException, IOException { Configuration conf = mock(Configuration.class); Path path = mock(Path.class); FileStatus status1 = mock(FileStatus.class); Path path1 = mock(Path.class); when(path1.getName()).thenReturn("part-xxxxx"); when(status1.getPath()).thenReturn(path1); FileStatus status2 = mock(FileStatus.class); Path path2 = mock(Path.class); when(path2.getName()).thenReturn("part-yyyyy"); when(status2.getPath()).thenReturn(path2); FileSystem fs = mock(FileSystem.class); when(fs.listStatus(path)).thenReturn(new FileStatus[] { status1, status2 }); when(path.getFileSystem(conf)).thenReturn(fs); when(conf.get(HIHOConf.EXTERNAL_TABLE_DML)).thenReturn( "create table age( i Number, n Varchar(20), a Number)organization external ( type oracle_loader default directory ext_dir access parameters (records delimited by newlinefields terminated by ','missing field values are null )location (/home/nube/:file.txt) reject' limit unlimited;"); String dml = ExportToOracleDb.getAlterTableDML(path, conf); assertEquals(" ALTER TABLE age LOCATION ('part-xxxxx','part-yyyyy')", dml); }
From source file:colossal.pipe.ColFile.java
License:Apache License
public boolean isObsolete(Configuration conf) { Path dfsPath = new Path(path); try {/*from w ww . j a va2s . c om*/ FileSystem fs = dfsPath.getFileSystem(conf); // this needs to be smart - we should encode in the file metadata the dependents and their dates used // so we can verify that any existing antecedent is not newer and declare victory... if (fs.exists(dfsPath)) { FileStatus[] statuses = fs.listStatus(dfsPath); for (FileStatus status : statuses) { if (!status.isDir()) { if (format != Formats.AVRO_FORMAT || status.getPath().toString().endsWith(".avro")) { return false; // may check for extension for other types } } else { if (!status.getPath().toString().endsWith("/_logs") && !status.getPath().toString().endsWith("/_temporary")) { return false; } } } } return true; // needs more work! } catch (IOException e) { throw new RuntimeException(e); } }
From source file:colossal.pipe.ColFile.java
License:Apache License
public void clearAndPrepareOutput(Configuration conf) { try {//from w ww. j a v a 2 s . c om Path dfsPath = new Path(path); FileSystem fs = dfsPath.getFileSystem(conf); if (fs.exists(dfsPath)) { FileStatus[] statuses = fs.listStatus(dfsPath); for (FileStatus status : statuses) { if (status.isDir()) { if (!status.getPath().toString().endsWith("/_logs") && !status.getPath().toString().endsWith("/_temporary")) { throw new IllegalArgumentException( "Trying to overwrite directory with child directories: " + path); } } } } else { fs.mkdirs(dfsPath); } fs.delete(dfsPath, true); } catch (IOException e) { throw new RuntimeException(e); } }
From source file:ColumnStorage.ColumnProject.java
License:Open Source License
void loadColmnInfoFromHeadInfo(FileSystem fs, Path path) throws Exception { FileStatus[] status = fs.listStatus(path); if (status == null) { return;/*from w ww . ja v a 2 s . c o m*/ } if (status == null || status.length == 0) { return; } for (int i = 0; i < status.length; i++) { String fileName = status[i].getPath().toString(); try { FormatDataFile fd = new FormatDataFile(conf); fd.open(fileName); ColumnInfo columnInfo = new ColumnInfo(); columnInfo.idxs = fd.head.fieldMap().idxs(); columnInfo.name = fileName; infos.add(columnInfo); fd.close(); } catch (SEException.ErrorFileFormat e) { LOG.info("get error file format exception:" + e.getMessage() + ", file:" + fileName); continue; } catch (Exception e) { LOG.error("load column info fail:" + e.getMessage()); throw e; } } }
From source file:com.ailk.oci.ocnosql.tools.load.mutiple.MutipleColumnImportTsv.java
License:Apache License
public boolean execute(Connection conn, OciTableRef table) { if (conn == null) { msg = "Connection object must not be null"; retMap.put(FAILED_REASON, msg);// w w w . j a va 2 s. co m LOG.error(msg); throw new ClientRuntimeException(msg); } Configuration conf = conn.getConf(); if (table == null) { msg = "table must not be null"; retMap.put(FAILED_REASON, msg); LOG.error(msg); throw new ClientRuntimeException(msg); } String tableName = table.getName(); String column = table.getColumns(); String seperator = table.getSeperator(); String inputPath = table.getInputPath(); String tmpOutPut = table.getImportTmpOutputPath(); String skipBadLine = table.getSkipBadLine(); String compressor = table.getCompressor(); String rowkeyUnique = table.getRowKeyUnique(); String algoColumn = table.getAlgoColumn(); String rowkeyGenerator = table.getRowkeyGenerator(); String rowkeyColumn = table.getRowkeyColumn(); String callback = table.getCallback(); if (StringUtils.isEmpty(tableName)) { msg = "No " + CommonConstants.TABLE_NAME + " specified. Please check config,then try again after refreshing cache"; retMap.put(FAILED_REASON, msg); LOG.error(msg); throw new ConfigException(msg); } conf.set(CommonConstants.TABLE_NAME, tableName); if (StringUtils.isEmpty(seperator)) { msg = "No " + CommonConstants.SEPARATOR + " specified. Please check config,then try again after refreshing cache"; retMap.put(FAILED_REASON, msg); LOG.error(msg); throw new ConfigException(msg); } conf.set(CommonConstants.SEPARATOR, seperator); // Make sure columns are specified String columns[] = StringUtils.splitByWholeSeparatorPreserveAllTokens(column, ","); if (columns == null) { msg = "No " + CommonConstants.COLUMNS + " specified. Please check config,then try again after refreshing cache"; retMap.put(FAILED_REASON, msg); LOG.error(msg); throw new ConfigException(msg); } conf.set(CommonConstants.COLUMNS, column); // int rowkeysFound = 0; // for (String col : columns) { // if (col.equals(CommonConstants.ROW_KEY)) // rowkeysFound++; // } // if (rowkeysFound != 1) { // msg = "Must specify exactly one column as " + CommonConstants.ROW_KEY + ". Please check config,then again after refreshing cache"; // retMap.put(FAILED_REASON, msg); // LOG.error(msg); // throw new ConfigException(msg); // } if (columns.length < 2) { msg = "One or more columns in addition to the row key are required. Please check config,then try again after refreshing cache"; retMap.put(FAILED_REASON, msg); LOG.error(msg); throw new ConfigException(msg); } String[] columnTmp = null; for (int i = 0; i < columns.length; i++) { columnTmp = columns[i].split(":"); if (columnTmp != null && columnTmp.length == 2) { break; } } conf.set(CommonConstants.SINGLE_FAMILY, columnTmp[0]); if (!StringUtils.isEmpty(skipBadLine)) { conf.set(CommonConstants.SKIPBADLINE, skipBadLine); } //? conf.set(CommonConstants.COMPRESSOR, (compressor == null) ? DEFAULT_COMPRESSOR : compressor); conf.set(CommonConstants.ALGOCOLUMN, algoColumn); conf.set(CommonConstants.ROWKEY_GENERATOR, rowkeyGenerator); conf.set(CommonConstants.ROWKEYCOLUMN, rowkeyColumn); conf.set(CommonConstants.ROWKEYCALLBACK, callback); boolean ret = false; Counter failCounter = null; try { hbaseAdmin = new HBaseAdmin(conf); TableConfiguration.getInstance().writeTableConfiguration(tableName, column, seperator, conf); conf.set(CommonConstants.TABLE_NAME, tableName); String hdfs_url = conf.get(CommonConstants.HDFS_URL); FileSystem fs = FileSystem.get(URI.create(hdfs_url), conf); FileStatus[] fileStatusArr = fs.listStatus(new Path(hdfs_url + inputPath)); if (fileStatusArr != null && fileStatusArr.length > 0) { if (fileStatusArr[0].isFile()) { ret = (Boolean) runJob(conf, tableName, inputPath, tmpOutPut)[0]; } int inputPathNum = 0; for (FileStatus everyInputPath : fileStatusArr) { Path inputPathStr = everyInputPath.getPath(); String absoluteInputPathStr = inputPath + "/" + inputPathStr.getName(); boolean retCode = (Boolean) runJob(conf, tableName, absoluteInputPathStr, tmpOutPut + "/" + inputPathStr.getName())[0]; if (retCode) { String base64Seperator = conf.get(CommonConstants.SEPARATOR); conf.set(CommonConstants.SEPARATOR, new String(Base64.decode(base64Seperator))); //?separator if (inputPathNum == fileStatusArr.length - 1) { ret = true; } inputPathNum++; continue; } else { // ret = false; inputPathNum++; break; } } } } catch (Exception e) { msg = "job execute failed,nested exception is " + e; retMap.put(FAILED_REASON, msg); LOG.error(msg); throw new ClientRuntimeException(msg); } if (!ret) { msg = "execute job failed,please check map/reduce log in jobtracker page"; retMap.put(FAILED_REASON, msg); return false; } return true; }
From source file:com.ailk.oci.ocnosql.tools.load.mutiple.MutipleColumnImportTsv.java
License:Apache License
/** * Main entry point.//from w ww . ja v a 2s . c o m * * @param args The command line parameters. * @throws Exception When running the job fails. */ public static void main(String[] args) throws Exception { long inputLineNum = 0L; long badLineNum = 0L; long outputLineNum = 0L; Configuration conf = HBaseConfiguration.create(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { usage("Wrong number of arguments: " + otherArgs.length); System.exit(-1); } // Make sure columns are specified String columns = conf.get(CommonConstants.COLUMNS); if (columns == null) { usage("No columns specified. Please specify with -D" + CommonConstants.COLUMNS + "=..."); System.exit(-1); } String seperator = conf.get(CommonConstants.SEPARATOR); if (StringUtils.isEmpty(seperator)) { conf.set(CommonConstants.SEPARATOR, CommonConstants.DEFAULT_SEPARATOR); seperator = CommonConstants.DEFAULT_SEPARATOR; } // Make sure one or more columns are specified if (columns.split(",").length < 2) { usage("One or more columns in addition to the row key are required"); System.exit(-1); } //make sure tableName and columns are upper to used by phoenix. columns = columns.toUpperCase(); String notNeedLoadColumnsStr = conf.get(CommonConstants.NOTNEEDLOADCOLUMNS); String notNeedLoadColumns = null; if (!StringUtils.isEmpty(notNeedLoadColumnsStr)) { notNeedLoadColumns = notNeedLoadColumnsStr.toUpperCase(); conf.set(CommonConstants.NOTNEEDLOADCOLUMNS, notNeedLoadColumns); } String writeTableConfigColumns = getWriteConfigColumn(columns, notNeedLoadColumns); hbaseAdmin = new HBaseAdmin(conf); String tableName = otherArgs[0].toUpperCase(); String inputPath = otherArgs[1]; String tmpOutputPath = conf.get(CommonConstants.IMPORT_TMP_OUTPUT); conf.set(CommonConstants.TABLE_NAME, tableName); conf.set(CommonConstants.COLUMNS, columns); String pathStr = conf.get(CommonConstants.HDFS_URL) + inputPath; FileSystem fs = FileSystem.get(URI.create(conf.get(CommonConstants.HDFS_URL)), conf); FileStatus[] fileStatusArr = fs.listStatus(new Path(pathStr)); if (fileStatusArr != null && fileStatusArr.length > 0) { TableConfiguration.getInstance().writeTableConfiguration(tableName, writeTableConfigColumns, seperator, conf); if (fileStatusArr[0].isFile()) { Object[] resObjs = runJob(conf, tableName, inputPath, tmpOutputPath); inputLineNum = (Long) resObjs[1]; outputLineNum = (Long) resObjs[2]; badLineNum = (Long) resObjs[3]; LOG.info("Bulkload Result={inputLine:" + inputLineNum + ",outputLine:" + outputLineNum + ",badLine:" + badLineNum + "}"); boolean result = (Boolean) resObjs[0]; if (result) { System.exit(0); } System.exit(-1); } for (FileStatus everyInputPath : fileStatusArr) { Path inputPathStr = everyInputPath.getPath(); String absoluteInputPathStr = inputPath + "/" + inputPathStr.getName(); FileStatus[] subFileStatusArr = fs .listStatus(new Path(conf.get(CommonConstants.HDFS_URL) + absoluteInputPathStr)); if (subFileStatusArr == null || subFileStatusArr.length == 0)//?job continue; Object[] resObjs = runJob(conf, tableName, absoluteInputPathStr, tmpOutputPath + "/" + inputPathStr.getName()); boolean ret = (Boolean) resObjs[0]; if (ret) { inputLineNum += (Long) resObjs[1]; outputLineNum += (Long) resObjs[2]; badLineNum += (Long) resObjs[3]; String seperatorStr = conf.get(CommonConstants.SEPARATOR); conf.set(CommonConstants.SEPARATOR, new String(Base64.decode(seperatorStr))); //?separator continue; } else { // LOG.error("Bulkload Result={inputLine:" + inputLineNum + ",outputLine:" + outputLineNum + ",badLine:" + badLineNum + "}"); System.exit(-1); } } LOG.info("Bulkload Result={inputLine:" + inputLineNum + ",outputLine:" + outputLineNum + ",badLine:" + badLineNum + "}"); } LOG.info("Bulkload Result={inputLine:" + inputLineNum + ",outputLine:" + outputLineNum + ",badLine:" + badLineNum + "}"); System.exit(0);// }
From source file:com.ailk.oci.ocnosql.tools.load.single.SingleColumnImportTsv.java
License:Apache License
public boolean execute(Connection conn, OciTableRef table) { if (conn == null) { msg = "Connection object must not be null"; retMap.put(FAILED_REASON, msg);/*from w ww.java2 s . c o m*/ LOG.error(msg); throw new ClientRuntimeException(msg); } Configuration conf = conn.getConf(); if (table == null) { msg = "table must not be null"; retMap.put(FAILED_REASON, msg); LOG.error(msg); throw new ClientRuntimeException(msg); } String tableName = table.getName(); String column = table.getColumns(); String seperator = table.getSeperator(); String inputPath = table.getInputPath(); String tmpOutPut = table.getImportTmpOutputPath(); String skipBadLine = table.getSkipBadLine(); String compressor = table.getCompressor(); String rowkeyUnique = table.getRowKeyUnique(); String algoColumn = table.getAlgoColumn(); String rowkeyGenerator = table.getRowkeyGenerator(); String rowkeyColumn = table.getRowkeyColumn(); String callback = table.getCallback(); if (StringUtils.isEmpty(tableName)) { msg = "No " + CommonConstants.TABLE_NAME + " specified. Please check config,then try again after refreshing cache"; retMap.put(FAILED_REASON, msg); LOG.error(msg); throw new ConfigException(msg); } conf.set(CommonConstants.TABLE_NAME, tableName); // if(StringUtils.isEmpty(seperator)){ // msg = "No " + CommonConstants.SEPARATOR + " specified. Please check config,then try again after refreshing cache"; // retMap.put(FAILED_REASON, msg); // LOG.error(msg); // throw new ConfigException(msg); // } // conf.set(CommonConstants.SEPARATOR, seperator); if (StringUtils.isEmpty(seperator)) { conf.set(CommonConstants.SEPARATOR, CommonConstants.DEFAULT_SEPARATOR); } // Make sure columns are specified, splited by "," String columns[] = StringUtils.splitByWholeSeparatorPreserveAllTokens(column, ","); if (columns == null) { msg = "No " + CommonConstants.COLUMNS + " specified. Please check config,then try again after refreshing cache"; retMap.put(FAILED_REASON, msg); LOG.error(msg); throw new ConfigException(msg); } conf.set(CommonConstants.COLUMNS, column); if (StringUtils.isEmpty(rowkeyColumn) && StringUtils.isEmpty(algoColumn)) { msg = "No " + CommonConstants.ROW_KEY + " rule specified. Please check config,then try again after refreshing cache"; retMap.put(FAILED_REASON, msg); LOG.error(msg); throw new ConfigException(msg); } conf.set(CommonConstants.SEPARATOR, seperator); // int rowkeysFound = 0; // for (String col : columns) { // if (col.equals(CommonConstants.ROW_KEY)) // rowkeysFound++; // } // //HBASE_ROW_KEY? // if (rowkeysFound != 1) { // msg = "Must specify exactly one column as " + CommonConstants.ROW_KEY + ". Please check config,then again after refreshing cache"; // retMap.put(FAILED_REASON, msg); // LOG.error(msg); // throw new ConfigException(msg); // } //HBASE_ROW_KEY?column if (columns.length < 2) { msg = "One or more columns in addition to the row key are required. Please check config,then try again after refreshing cache"; retMap.put(FAILED_REASON, msg); LOG.error(msg); throw new ConfigException(msg); } //":" String[] columnTmp = null; for (int i = 0; i < columns.length; i++) { columnTmp = columns[i].split(":"); if (columnTmp != null && columnTmp.length == 2) { break; } } //??? conf.set(CommonConstants.SINGLE_FAMILY, columnTmp[0]); //? if (!StringUtils.isEmpty(skipBadLine)) { conf.set(CommonConstants.SKIPBADLINE, skipBadLine); } //? conf.set(CommonConstants.COMPRESSOR, (compressor == null) ? DEFAULT_COMPRESSOR : compressor); conf.set(CommonConstants.ALGOCOLUMN, algoColumn); conf.set(CommonConstants.ROWKEY_GENERATOR, rowkeyGenerator); conf.set(CommonConstants.ROWKEYCOLUMN, rowkeyColumn); conf.set(CommonConstants.ROWKEYCALLBACK, callback); boolean ret = false; // Counter failCounter = null; try { hbaseAdmin = new HBaseAdmin(conf); TableConfiguration.getInstance().writeTableConfiguration(tableName, column, seperator, conf); // Job job = createSubmittableJob(conf, tableName, inputPath, tmpOutPut); // //job // ret = job.waitForCompletion(true); // Counters counters = job.getCounters(); // for (String groupName : counters.getGroupNames()) { // failCounter = counters.findCounter(groupName, "NUM_FAILED_MAPS"); // if(failCounter != null){ // break; // } // } conf.set(CommonConstants.TABLE_NAME, tableName); String hdfs_url = conf.get(CommonConstants.HDFS_URL); FileSystem fs = FileSystem.get(URI.create(hdfs_url), conf); FileStatus[] fileStatusArr = fs.listStatus(new Path(hdfs_url + inputPath)); if (fileStatusArr != null && fileStatusArr.length > 0) { if (fileStatusArr[0].isFile()) { ret = runJob(conf, tableName, inputPath, tmpOutPut); } int inputPathNum = 0; for (FileStatus everyInputPath : fileStatusArr) { Path inputPathStr = everyInputPath.getPath(); String absoluteInputPathStr = inputPath + "/" + inputPathStr.getName(); boolean retCode = runJob(conf, tableName, absoluteInputPathStr, tmpOutPut + "/" + inputPathStr.getName()); if (retCode) { String base64Seperator = conf.get(CommonConstants.SEPARATOR); conf.set(CommonConstants.SEPARATOR, new String(Base64.decode(base64Seperator))); //?separator if (inputPathNum == fileStatusArr.length - 1) { ret = true; } inputPathNum++; continue; } else { // ret = false; inputPathNum++; break; } } } } catch (Exception e) { msg = "job execute failed,nested exception is " + e; retMap.put(FAILED_REASON, msg); LOG.error(msg); throw new ClientRuntimeException(msg); } boolean result = true; if (!ret) { msg = "execute job failed,please check map/reduce log in jobtracker page"; retMap.put(FAILED_REASON, msg); result = false; } /* else { String[] params = new String[2]; params[0] = tmpOutPut; params[1] = tableName; int retrunCode = -1; try { //bulkload complete retrunCode = ToolRunner.run(new LoadIncrementalHFiles(conf), params); } catch (Exception e) { msg = "job execute failed,nested exception is " + e; retMap.put(FAILED_REASON, msg); LOG.error(msg); throw new ClientRuntimeException(msg); } if(retrunCode != 0) result = false; } */ return result; }
From source file:com.ailk.oci.ocnosql.tools.load.single.SingleColumnImportTsv.java
License:Apache License
/** * Main entry point./* w w w . ja va2s . com*/ * * @param args The command line parameters. * @throws Exception When running the job fails. */ public static void main(String[] args) throws Exception { Map<String, String> map = getProperty(); if (map == null || map.size() == 0) { System.err.println("Error: read conf file " + CONF_FILE + " occur error."); System.exit(0); } Configuration conf = Connection.getInstance().getConf(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { usage("Wrong number of arguments: " + otherArgs.length); System.exit(-1); } // Make sure columns are specified String columns = conf.get(CommonConstants.COLUMNS); if (columns == null) { usage("No columns specified. Please specify with -D" + CommonConstants.COLUMNS + "=..."); System.exit(-1); } String seperator = conf.get(CommonConstants.SEPARATOR); if (seperator == null) { conf.set(CommonConstants.SEPARATOR, CommonConstants.DEFAULT_SEPARATOR); seperator = CommonConstants.DEFAULT_SEPARATOR; } // Make sure one or more columns are specified if (columns.split(",").length < 2) { usage("One or more columns in addition to the row key are required"); System.exit(-1); } //make sure tableName and columns are upper to used by phoenix. columns = columns.toUpperCase(); String tableName = otherArgs[0].toUpperCase(); String inputPath = otherArgs[1]; hbaseAdmin = new HBaseAdmin(conf); String tmpOutputPath = conf.get(CommonConstants.IMPORT_TMP_OUTPUT); conf.set(CommonConstants.TABLE_NAME, tableName); conf.set(CommonConstants.COLUMNS, columns); String hdfs_url = conf.get(CommonConstants.HDFS_URL); FileSystem fs = FileSystem.get(URI.create(hdfs_url), conf); FileStatus[] fileStatusArr = fs.listStatus(new Path(hdfs_url + inputPath)); if (fileStatusArr != null && fileStatusArr.length > 0) { TableConfiguration.getInstance().writeTableConfiguration(tableName, columns, seperator, conf); if (fileStatusArr[0].isFile()) { //?? boolean result = runJob(conf, tableName, inputPath, tmpOutputPath); if (result) { System.exit(0); } System.exit(-1); } for (FileStatus everyInputPath : fileStatusArr) { //?? Path inputPathStr = everyInputPath.getPath(); String absoluteInputPathStr = inputPath + "/" + inputPathStr.getName(); FileStatus[] subFileStatusArr = fs.listStatus(new Path(hdfs_url + absoluteInputPathStr)); if (subFileStatusArr == null || subFileStatusArr.length == 0)//?job continue; boolean ret = runJob(conf, tableName, absoluteInputPathStr, tmpOutputPath + "/" + inputPathStr.getName()); if (ret) { String base64Seperator = conf.get(CommonConstants.SEPARATOR); conf.set(CommonConstants.SEPARATOR, new String(Base64.decode(base64Seperator))); //?separator continue; } else // System.exit(-1); } } System.exit(0); // }
From source file:com.amintor.hdfs.client.kerberizedhdfsclient.KerberizedHDFSClient.java
/** * @param args the command line arguments *///from ww w . j a v a 2 s . c o m public static void main(String[] args) { try { Configuration conf = new Configuration(); conf.addResource(new FileInputStream(HDFS_SITE_LOCATION)); conf.addResource(new FileInputStream(CORE_SITE_LOCATION)); String authType = conf.get("hadoop.security.authentication"); System.out.println("Authentication Type:" + authType); if (authType.trim().equalsIgnoreCase("kerberos")) { // Login through UGI keytab UserGroupInformation.setConfiguration(conf); UserGroupInformation.loginUserFromKeytab("vijay", "/Users/vsingh/Software/vijay.keytab"); FileSystem hdFS = FileSystem.get(conf); FileStatus[] listStatus = hdFS.listStatus(new Path(args[0])); for (FileStatus statusFile : listStatus) { System.out.print("Replication:" + statusFile.getReplication() + "\t"); System.out.print("Owner:" + statusFile.getOwner() + "\t"); System.out.print("Group:" + statusFile.getGroup() + "\t"); System.out.println("Path:" + statusFile.getPath() + "\t"); } } } catch (IOException ex) { Logger.getLogger(KerberizedHDFSClient.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:com.architecting.ch07.MapReduceIndexerTool.java
License:Apache License
/** API for Java clients;visible for testing;may become a public API eventually */ int run(Options options) throws Exception { if (getConf().getBoolean("isMR1", false) && "local".equals(getConf().get("mapred.job.tracker"))) { throw new IllegalStateException( "Running with LocalJobRunner (i.e. all of Hadoop inside a single JVM) is not supported " + "because LocalJobRunner does not (yet) implement the Hadoop Distributed Cache feature, " + "which is required for passing files via --files and --libjars"); }//from w ww .j a va2 s . co m long programStartTime = System.nanoTime(); getConf().setInt(SolrOutputFormat.SOLR_RECORD_WRITER_MAX_SEGMENTS, options.maxSegments); // switch off a false warning about allegedly not implementing Tool // also see http://hadoop.6.n7.nabble.com/GenericOptionsParser-warning-td8103.html // also see https://issues.apache.org/jira/browse/HADOOP-8183 getConf().setBoolean("mapred.used.genericoptionsparser", true); if (options.log4jConfigFile != null) { Utils.setLogConfigFile(options.log4jConfigFile, getConf()); addDistributedCacheFile(options.log4jConfigFile, getConf()); } Configuration config = HBaseConfiguration.create(); Job job = Job.getInstance(config); job.setJarByClass(getClass()); // To be able to run this example from eclipse, we need to make sure // the built jar is distributed to the map-reduce tasks from the // local file system. job.addCacheArchive(new URI("file:///home/cloudera/ahae/target/ahae.jar")); FileSystem fs = options.outputDir.getFileSystem(job.getConfiguration()); if (fs.exists(options.outputDir) && !delete(options.outputDir, true, fs)) { return -1; } Path outputResultsDir = new Path(options.outputDir, RESULTS_DIR); Path outputReduceDir = new Path(options.outputDir, "reducers"); int reducers = 1; Scan scan = new Scan(); scan.addFamily(CF); // tag::SETUP[] scan.setCaching(500); // <1> scan.setCacheBlocks(false); // <2> TableMapReduceUtil.initTableMapperJob( // <3> options.inputTable, // Input HBase table name scan, // Scan instance to control what to index HBaseAvroToSOLRMapper.class, // Mapper to parse cells content. Text.class, // Mapper output key SolrInputDocumentWritable.class, // Mapper output value job); FileOutputFormat.setOutputPath(job, outputReduceDir); job.setJobName(getClass().getName() + "/" + Utils.getShortClassName(HBaseAvroToSOLRMapper.class)); job.setReducerClass(SolrReducer.class); // <4> job.setPartitionerClass(SolrCloudPartitioner.class); // <5> job.getConfiguration().set(SolrCloudPartitioner.ZKHOST, options.zkHost); job.getConfiguration().set(SolrCloudPartitioner.COLLECTION, options.collection); job.getConfiguration().setInt(SolrCloudPartitioner.SHARDS, options.shards); job.setOutputFormatClass(SolrOutputFormat.class); SolrOutputFormat.setupSolrHomeCache(options.solrHomeDir, job); job.setOutputKeyClass(Text.class); job.setOutputValueClass(SolrInputDocumentWritable.class); job.setSpeculativeExecution(false); // end::SETUP[] job.setNumReduceTasks(reducers); // Set the number of reducers based on the number of shards we have. if (!waitForCompletion(job, true)) { return -1;// job failed } // ------------------------------------------------------------------------------------------------------------------------------------- assert reducers == options.shards; // normalize output shard dir prefix, i.e. // rename part-r-00000 to part-00000 (stems from zero tree merge iterations) // rename part-m-00000 to part-00000 (stems from > 0 tree merge iterations) for (FileStatus stats : fs.listStatus(outputReduceDir)) { String dirPrefix = SolrOutputFormat.getOutputName(job); Path srcPath = stats.getPath(); if (stats.isDirectory() && srcPath.getName().startsWith(dirPrefix)) { String dstName = dirPrefix + srcPath.getName().substring(dirPrefix.length() + "-m".length()); Path dstPath = new Path(srcPath.getParent(), dstName); if (!rename(srcPath, dstPath, fs)) { return -1; } } } ; // publish results dir if (!rename(outputReduceDir, outputResultsDir, fs)) { return -1; } if (options.goLive && !new GoLive().goLive(options, listSortedOutputShardDirs(job, outputResultsDir, fs))) { return -1; } goodbye(job, programStartTime); return 0; }