Example usage for org.apache.hadoop.fs FileSystem listStatus

List of usage examples for org.apache.hadoop.fs FileSystem listStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem listStatus.

Prototype

public FileStatus[] listStatus(Path[] files) throws FileNotFoundException, IOException 

Source Link

Document

Filter files/directories in the given list of paths using default path filter.

Usage

From source file:co.nubetech.hiho.job.TestExportToOracleDb.java

License:Apache License

@Test
public void testAlterTableDMl() throws HIHOException, IOException {
    Configuration conf = mock(Configuration.class);
    Path path = mock(Path.class);
    FileStatus status1 = mock(FileStatus.class);
    Path path1 = mock(Path.class);
    when(path1.getName()).thenReturn("part-xxxxx");
    when(status1.getPath()).thenReturn(path1);
    FileStatus status2 = mock(FileStatus.class);
    Path path2 = mock(Path.class);
    when(path2.getName()).thenReturn("part-yyyyy");
    when(status2.getPath()).thenReturn(path2);
    FileSystem fs = mock(FileSystem.class);
    when(fs.listStatus(path)).thenReturn(new FileStatus[] { status1, status2 });
    when(path.getFileSystem(conf)).thenReturn(fs);
    when(conf.get(HIHOConf.EXTERNAL_TABLE_DML)).thenReturn(
            "create table age(  i   Number,  n   Varchar(20),  a   Number)organization external (  type  oracle_loader default directory ext_dir access parameters (records delimited  by newlinefields  terminated by ','missing field values are null )location  (/home/nube/:file.txt) reject' limit unlimited;");
    String dml = ExportToOracleDb.getAlterTableDML(path, conf);
    assertEquals(" ALTER TABLE age LOCATION ('part-xxxxx','part-yyyyy')", dml);
}

From source file:colossal.pipe.ColFile.java

License:Apache License

public boolean isObsolete(Configuration conf) {
    Path dfsPath = new Path(path);
    try {/*from   w ww .  j  a  va2s  .  c  om*/
        FileSystem fs = dfsPath.getFileSystem(conf);
        // this needs to be smart - we should encode in the file metadata the dependents and their dates used
        // so we can verify that any existing antecedent is not newer and declare victory...
        if (fs.exists(dfsPath)) {
            FileStatus[] statuses = fs.listStatus(dfsPath);
            for (FileStatus status : statuses) {
                if (!status.isDir()) {
                    if (format != Formats.AVRO_FORMAT || status.getPath().toString().endsWith(".avro")) {
                        return false; // may check for extension for other types
                    }
                } else {
                    if (!status.getPath().toString().endsWith("/_logs")
                            && !status.getPath().toString().endsWith("/_temporary")) {
                        return false;
                    }
                }
            }
        }
        return true; // needs more work!
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}

From source file:colossal.pipe.ColFile.java

License:Apache License

public void clearAndPrepareOutput(Configuration conf) {
    try {//from   w  ww.  j  a  v  a  2 s .  c  om
        Path dfsPath = new Path(path);
        FileSystem fs = dfsPath.getFileSystem(conf);
        if (fs.exists(dfsPath)) {
            FileStatus[] statuses = fs.listStatus(dfsPath);
            for (FileStatus status : statuses) {
                if (status.isDir()) {
                    if (!status.getPath().toString().endsWith("/_logs")
                            && !status.getPath().toString().endsWith("/_temporary")) {
                        throw new IllegalArgumentException(
                                "Trying to overwrite directory with child directories: " + path);
                    }
                }
            }
        } else {
            fs.mkdirs(dfsPath);
        }
        fs.delete(dfsPath, true);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}

From source file:ColumnStorage.ColumnProject.java

License:Open Source License

void loadColmnInfoFromHeadInfo(FileSystem fs, Path path) throws Exception {
    FileStatus[] status = fs.listStatus(path);
    if (status == null) {
        return;/*from  w  ww . ja  v a 2 s  .  c  o  m*/
    }
    if (status == null || status.length == 0) {
        return;
    }

    for (int i = 0; i < status.length; i++) {
        String fileName = status[i].getPath().toString();
        try {
            FormatDataFile fd = new FormatDataFile(conf);
            fd.open(fileName);

            ColumnInfo columnInfo = new ColumnInfo();
            columnInfo.idxs = fd.head.fieldMap().idxs();
            columnInfo.name = fileName;

            infos.add(columnInfo);

            fd.close();
        } catch (SEException.ErrorFileFormat e) {
            LOG.info("get error file format exception:" + e.getMessage() + ", file:" + fileName);
            continue;
        } catch (Exception e) {
            LOG.error("load column info fail:" + e.getMessage());
            throw e;
        }
    }

}

From source file:com.ailk.oci.ocnosql.tools.load.mutiple.MutipleColumnImportTsv.java

License:Apache License

public boolean execute(Connection conn, OciTableRef table) {
    if (conn == null) {
        msg = "Connection object must not be null";
        retMap.put(FAILED_REASON, msg);//  w w  w . j  a va 2 s.  co m
        LOG.error(msg);
        throw new ClientRuntimeException(msg);
    }
    Configuration conf = conn.getConf();
    if (table == null) {
        msg = "table must not be null";
        retMap.put(FAILED_REASON, msg);
        LOG.error(msg);
        throw new ClientRuntimeException(msg);
    }

    String tableName = table.getName();
    String column = table.getColumns();
    String seperator = table.getSeperator();
    String inputPath = table.getInputPath();
    String tmpOutPut = table.getImportTmpOutputPath();
    String skipBadLine = table.getSkipBadLine();
    String compressor = table.getCompressor();
    String rowkeyUnique = table.getRowKeyUnique();
    String algoColumn = table.getAlgoColumn();
    String rowkeyGenerator = table.getRowkeyGenerator();
    String rowkeyColumn = table.getRowkeyColumn();
    String callback = table.getCallback();

    if (StringUtils.isEmpty(tableName)) {
        msg = "No " + CommonConstants.TABLE_NAME
                + " specified. Please check config,then try again after refreshing cache";
        retMap.put(FAILED_REASON, msg);
        LOG.error(msg);
        throw new ConfigException(msg);
    }
    conf.set(CommonConstants.TABLE_NAME, tableName);

    if (StringUtils.isEmpty(seperator)) {
        msg = "No " + CommonConstants.SEPARATOR
                + " specified. Please check config,then try again after refreshing cache";
        retMap.put(FAILED_REASON, msg);
        LOG.error(msg);
        throw new ConfigException(msg);
    }
    conf.set(CommonConstants.SEPARATOR, seperator);

    // Make sure columns are specified
    String columns[] = StringUtils.splitByWholeSeparatorPreserveAllTokens(column, ",");
    if (columns == null) {
        msg = "No " + CommonConstants.COLUMNS
                + " specified. Please check config,then try again after refreshing cache";
        retMap.put(FAILED_REASON, msg);
        LOG.error(msg);
        throw new ConfigException(msg);
    }
    conf.set(CommonConstants.COLUMNS, column);

    //      int rowkeysFound = 0;
    //      for (String col : columns) {
    //         if (col.equals(CommonConstants.ROW_KEY))
    //            rowkeysFound++;
    //      }
    //      if (rowkeysFound != 1) {
    //         msg = "Must specify exactly one column as " + CommonConstants.ROW_KEY + ". Please check config,then again after refreshing cache";
    //         retMap.put(FAILED_REASON, msg);
    //         LOG.error(msg);
    //         throw new ConfigException(msg);
    //      }

    if (columns.length < 2) {
        msg = "One or more columns in addition to the row key are required. Please check config,then try again after refreshing cache";
        retMap.put(FAILED_REASON, msg);
        LOG.error(msg);
        throw new ConfigException(msg);
    }

    String[] columnTmp = null;
    for (int i = 0; i < columns.length; i++) {
        columnTmp = columns[i].split(":");
        if (columnTmp != null && columnTmp.length == 2) {
            break;
        }
    }
    conf.set(CommonConstants.SINGLE_FAMILY, columnTmp[0]);
    if (!StringUtils.isEmpty(skipBadLine)) {
        conf.set(CommonConstants.SKIPBADLINE, skipBadLine);
    }
    //?
    conf.set(CommonConstants.COMPRESSOR, (compressor == null) ? DEFAULT_COMPRESSOR : compressor);
    conf.set(CommonConstants.ALGOCOLUMN, algoColumn);
    conf.set(CommonConstants.ROWKEY_GENERATOR, rowkeyGenerator);
    conf.set(CommonConstants.ROWKEYCOLUMN, rowkeyColumn);
    conf.set(CommonConstants.ROWKEYCALLBACK, callback);

    boolean ret = false;
    Counter failCounter = null;
    try {
        hbaseAdmin = new HBaseAdmin(conf);
        TableConfiguration.getInstance().writeTableConfiguration(tableName, column, seperator, conf);
        conf.set(CommonConstants.TABLE_NAME, tableName);
        String hdfs_url = conf.get(CommonConstants.HDFS_URL);
        FileSystem fs = FileSystem.get(URI.create(hdfs_url), conf);
        FileStatus[] fileStatusArr = fs.listStatus(new Path(hdfs_url + inputPath));
        if (fileStatusArr != null && fileStatusArr.length > 0) {
            if (fileStatusArr[0].isFile()) {
                ret = (Boolean) runJob(conf, tableName, inputPath, tmpOutPut)[0];
            }
            int inputPathNum = 0;
            for (FileStatus everyInputPath : fileStatusArr) {
                Path inputPathStr = everyInputPath.getPath();
                String absoluteInputPathStr = inputPath + "/" + inputPathStr.getName();
                boolean retCode = (Boolean) runJob(conf, tableName, absoluteInputPathStr,
                        tmpOutPut + "/" + inputPathStr.getName())[0];
                if (retCode) {
                    String base64Seperator = conf.get(CommonConstants.SEPARATOR);
                    conf.set(CommonConstants.SEPARATOR, new String(Base64.decode(base64Seperator))); //?separator
                    if (inputPathNum == fileStatusArr.length - 1) {
                        ret = true;
                    }
                    inputPathNum++;
                    continue;
                } else { //
                    ret = false;
                    inputPathNum++;
                    break;
                }
            }
        }
    } catch (Exception e) {
        msg = "job execute failed,nested exception is " + e;
        retMap.put(FAILED_REASON, msg);
        LOG.error(msg);
        throw new ClientRuntimeException(msg);
    }

    if (!ret) {
        msg = "execute job failed,please check map/reduce log in jobtracker page";
        retMap.put(FAILED_REASON, msg);
        return false;
    }
    return true;
}

From source file:com.ailk.oci.ocnosql.tools.load.mutiple.MutipleColumnImportTsv.java

License:Apache License

/**
 * Main entry point.//from  w ww .  ja v  a 2s  . c  o m
 *
 * @param args  The command line parameters.
 * @throws Exception When running the job fails.
 */
public static void main(String[] args) throws Exception {
    long inputLineNum = 0L;
    long badLineNum = 0L;
    long outputLineNum = 0L;
    Configuration conf = HBaseConfiguration.create();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        usage("Wrong number of arguments: " + otherArgs.length);
        System.exit(-1);
    }
    // Make sure columns are specified
    String columns = conf.get(CommonConstants.COLUMNS);
    if (columns == null) {
        usage("No columns specified. Please specify with -D" + CommonConstants.COLUMNS + "=...");
        System.exit(-1);
    }
    String seperator = conf.get(CommonConstants.SEPARATOR);
    if (StringUtils.isEmpty(seperator)) {
        conf.set(CommonConstants.SEPARATOR, CommonConstants.DEFAULT_SEPARATOR);
        seperator = CommonConstants.DEFAULT_SEPARATOR;
    }
    // Make sure one or more columns are specified
    if (columns.split(",").length < 2) {
        usage("One or more columns in addition to the row key are required");
        System.exit(-1);
    }
    //make sure tableName and columns are upper to used by phoenix.
    columns = columns.toUpperCase();
    String notNeedLoadColumnsStr = conf.get(CommonConstants.NOTNEEDLOADCOLUMNS);
    String notNeedLoadColumns = null;
    if (!StringUtils.isEmpty(notNeedLoadColumnsStr)) {
        notNeedLoadColumns = notNeedLoadColumnsStr.toUpperCase();
        conf.set(CommonConstants.NOTNEEDLOADCOLUMNS, notNeedLoadColumns);
    }

    String writeTableConfigColumns = getWriteConfigColumn(columns, notNeedLoadColumns);
    hbaseAdmin = new HBaseAdmin(conf);
    String tableName = otherArgs[0].toUpperCase();
    String inputPath = otherArgs[1];
    String tmpOutputPath = conf.get(CommonConstants.IMPORT_TMP_OUTPUT);
    conf.set(CommonConstants.TABLE_NAME, tableName);
    conf.set(CommonConstants.COLUMNS, columns);
    String pathStr = conf.get(CommonConstants.HDFS_URL) + inputPath;
    FileSystem fs = FileSystem.get(URI.create(conf.get(CommonConstants.HDFS_URL)), conf);
    FileStatus[] fileStatusArr = fs.listStatus(new Path(pathStr));
    if (fileStatusArr != null && fileStatusArr.length > 0) {
        TableConfiguration.getInstance().writeTableConfiguration(tableName, writeTableConfigColumns, seperator,
                conf);
        if (fileStatusArr[0].isFile()) {
            Object[] resObjs = runJob(conf, tableName, inputPath, tmpOutputPath);
            inputLineNum = (Long) resObjs[1];
            outputLineNum = (Long) resObjs[2];
            badLineNum = (Long) resObjs[3];
            LOG.info("Bulkload Result={inputLine:" + inputLineNum + ",outputLine:" + outputLineNum + ",badLine:"
                    + badLineNum + "}");
            boolean result = (Boolean) resObjs[0];
            if (result) {
                System.exit(0);
            }
            System.exit(-1);
        }
        for (FileStatus everyInputPath : fileStatusArr) {
            Path inputPathStr = everyInputPath.getPath();
            String absoluteInputPathStr = inputPath + "/" + inputPathStr.getName();
            FileStatus[] subFileStatusArr = fs
                    .listStatus(new Path(conf.get(CommonConstants.HDFS_URL) + absoluteInputPathStr));
            if (subFileStatusArr == null || subFileStatusArr.length == 0)//?job
                continue;
            Object[] resObjs = runJob(conf, tableName, absoluteInputPathStr,
                    tmpOutputPath + "/" + inputPathStr.getName());
            boolean ret = (Boolean) resObjs[0];
            if (ret) {
                inputLineNum += (Long) resObjs[1];
                outputLineNum += (Long) resObjs[2];
                badLineNum += (Long) resObjs[3];
                String seperatorStr = conf.get(CommonConstants.SEPARATOR);
                conf.set(CommonConstants.SEPARATOR, new String(Base64.decode(seperatorStr))); //?separator
                continue;
            } else { //
                LOG.error("Bulkload Result={inputLine:" + inputLineNum + ",outputLine:" + outputLineNum
                        + ",badLine:" + badLineNum + "}");
                System.exit(-1);
            }
        }
        LOG.info("Bulkload Result={inputLine:" + inputLineNum + ",outputLine:" + outputLineNum + ",badLine:"
                + badLineNum + "}");
    }
    LOG.info("Bulkload Result={inputLine:" + inputLineNum + ",outputLine:" + outputLineNum + ",badLine:"
            + badLineNum + "}");
    System.exit(0);//
}

From source file:com.ailk.oci.ocnosql.tools.load.single.SingleColumnImportTsv.java

License:Apache License

public boolean execute(Connection conn, OciTableRef table) {
    if (conn == null) {
        msg = "Connection object must not be null";
        retMap.put(FAILED_REASON, msg);/*from  w  ww.java2  s . c o m*/
        LOG.error(msg);
        throw new ClientRuntimeException(msg);
    }
    Configuration conf = conn.getConf();
    if (table == null) {
        msg = "table must not be null";
        retMap.put(FAILED_REASON, msg);
        LOG.error(msg);
        throw new ClientRuntimeException(msg);
    }

    String tableName = table.getName();
    String column = table.getColumns();
    String seperator = table.getSeperator();
    String inputPath = table.getInputPath();
    String tmpOutPut = table.getImportTmpOutputPath();
    String skipBadLine = table.getSkipBadLine();
    String compressor = table.getCompressor();
    String rowkeyUnique = table.getRowKeyUnique();
    String algoColumn = table.getAlgoColumn();
    String rowkeyGenerator = table.getRowkeyGenerator();
    String rowkeyColumn = table.getRowkeyColumn();
    String callback = table.getCallback();

    if (StringUtils.isEmpty(tableName)) {
        msg = "No " + CommonConstants.TABLE_NAME
                + " specified. Please check config,then try again after refreshing cache";
        retMap.put(FAILED_REASON, msg);
        LOG.error(msg);
        throw new ConfigException(msg);
    }
    conf.set(CommonConstants.TABLE_NAME, tableName);

    //      if(StringUtils.isEmpty(seperator)){
    //         msg = "No " + CommonConstants.SEPARATOR + " specified. Please check config,then try again after refreshing cache";
    //         retMap.put(FAILED_REASON, msg);
    //         LOG.error(msg);
    //         throw new ConfigException(msg);
    //      }
    //      conf.set(CommonConstants.SEPARATOR, seperator);

    if (StringUtils.isEmpty(seperator)) {
        conf.set(CommonConstants.SEPARATOR, CommonConstants.DEFAULT_SEPARATOR);
    }

    // Make sure columns are specified, splited by ","
    String columns[] = StringUtils.splitByWholeSeparatorPreserveAllTokens(column, ",");
    if (columns == null) {
        msg = "No " + CommonConstants.COLUMNS
                + " specified. Please check config,then try again after refreshing cache";
        retMap.put(FAILED_REASON, msg);
        LOG.error(msg);
        throw new ConfigException(msg);
    }
    conf.set(CommonConstants.COLUMNS, column);

    if (StringUtils.isEmpty(rowkeyColumn) && StringUtils.isEmpty(algoColumn)) {
        msg = "No " + CommonConstants.ROW_KEY
                + " rule specified. Please check config,then try again after refreshing cache";
        retMap.put(FAILED_REASON, msg);
        LOG.error(msg);
        throw new ConfigException(msg);
    }
    conf.set(CommonConstants.SEPARATOR, seperator);

    //      int rowkeysFound = 0;
    //      for (String col : columns) {
    //         if (col.equals(CommonConstants.ROW_KEY))
    //            rowkeysFound++;
    //      }
    //      //HBASE_ROW_KEY?
    //      if (rowkeysFound != 1) {
    //         msg = "Must specify exactly one column as " + CommonConstants.ROW_KEY + ". Please check config,then again after refreshing cache";
    //         retMap.put(FAILED_REASON, msg);
    //         LOG.error(msg);
    //         throw new ConfigException(msg);
    //      }

    //HBASE_ROW_KEY?column
    if (columns.length < 2) {
        msg = "One or more columns in addition to the row key are required. Please check config,then try again after refreshing cache";
        retMap.put(FAILED_REASON, msg);
        LOG.error(msg);
        throw new ConfigException(msg);
    }

    //":"
    String[] columnTmp = null;
    for (int i = 0; i < columns.length; i++) {
        columnTmp = columns[i].split(":");
        if (columnTmp != null && columnTmp.length == 2) {
            break;
        }
    }

    //???
    conf.set(CommonConstants.SINGLE_FAMILY, columnTmp[0]);

    //?
    if (!StringUtils.isEmpty(skipBadLine)) {
        conf.set(CommonConstants.SKIPBADLINE, skipBadLine);
    }
    //?
    conf.set(CommonConstants.COMPRESSOR, (compressor == null) ? DEFAULT_COMPRESSOR : compressor);
    conf.set(CommonConstants.ALGOCOLUMN, algoColumn);
    conf.set(CommonConstants.ROWKEY_GENERATOR, rowkeyGenerator);
    conf.set(CommonConstants.ROWKEYCOLUMN, rowkeyColumn);
    conf.set(CommonConstants.ROWKEYCALLBACK, callback);

    boolean ret = false;
    //      Counter failCounter = null;
    try {
        hbaseAdmin = new HBaseAdmin(conf);
        TableConfiguration.getInstance().writeTableConfiguration(tableName, column, seperator, conf);
        //         Job job = createSubmittableJob(conf, tableName, inputPath, tmpOutPut);
        //         //job
        //         ret = job.waitForCompletion(true);
        //         Counters counters = job.getCounters();
        //         for (String groupName : counters.getGroupNames()) {
        //            failCounter = counters.findCounter(groupName, "NUM_FAILED_MAPS");
        //            if(failCounter != null){
        //               break;
        //            }
        //         }
        conf.set(CommonConstants.TABLE_NAME, tableName);
        String hdfs_url = conf.get(CommonConstants.HDFS_URL);
        FileSystem fs = FileSystem.get(URI.create(hdfs_url), conf);
        FileStatus[] fileStatusArr = fs.listStatus(new Path(hdfs_url + inputPath));
        if (fileStatusArr != null && fileStatusArr.length > 0) {
            if (fileStatusArr[0].isFile()) {
                ret = runJob(conf, tableName, inputPath, tmpOutPut);
            }
            int inputPathNum = 0;
            for (FileStatus everyInputPath : fileStatusArr) {
                Path inputPathStr = everyInputPath.getPath();
                String absoluteInputPathStr = inputPath + "/" + inputPathStr.getName();
                boolean retCode = runJob(conf, tableName, absoluteInputPathStr,
                        tmpOutPut + "/" + inputPathStr.getName());
                if (retCode) {
                    String base64Seperator = conf.get(CommonConstants.SEPARATOR);
                    conf.set(CommonConstants.SEPARATOR, new String(Base64.decode(base64Seperator))); //?separator
                    if (inputPathNum == fileStatusArr.length - 1) {
                        ret = true;
                    }
                    inputPathNum++;
                    continue;
                } else { //
                    ret = false;
                    inputPathNum++;
                    break;
                }
            }
        }

    } catch (Exception e) {
        msg = "job execute failed,nested exception is " + e;
        retMap.put(FAILED_REASON, msg);
        LOG.error(msg);
        throw new ClientRuntimeException(msg);
    }

    boolean result = true;
    if (!ret) {
        msg = "execute job failed,please check map/reduce log in jobtracker page";
        retMap.put(FAILED_REASON, msg);
        result = false;
    }
    /*
    else {
     String[] params = new String[2];
     params[0] = tmpOutPut;
     params[1] = tableName;
     int retrunCode = -1;
     try {
    //bulkload complete
    retrunCode = ToolRunner.run(new LoadIncrementalHFiles(conf),
          params);
     } catch (Exception e) {
    msg = "job execute failed,nested exception is " + e;
    retMap.put(FAILED_REASON, msg);
    LOG.error(msg);
    throw new ClientRuntimeException(msg);
     }
     if(retrunCode != 0) result = false;
    }
    */
    return result;
}

From source file:com.ailk.oci.ocnosql.tools.load.single.SingleColumnImportTsv.java

License:Apache License

/**
 * Main entry point./*  w w  w .  ja  va2s  . com*/
 *
 * @param args  The command line parameters.
 * @throws Exception When running the job fails.
 */
public static void main(String[] args) throws Exception {
    Map<String, String> map = getProperty();
    if (map == null || map.size() == 0) {
        System.err.println("Error: read conf file " + CONF_FILE + " occur error.");
        System.exit(0);
    }
    Configuration conf = Connection.getInstance().getConf();

    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        usage("Wrong number of arguments: " + otherArgs.length);
        System.exit(-1);
    }

    // Make sure columns are specified
    String columns = conf.get(CommonConstants.COLUMNS);
    if (columns == null) {
        usage("No columns specified. Please specify with -D" + CommonConstants.COLUMNS + "=...");
        System.exit(-1);
    }
    String seperator = conf.get(CommonConstants.SEPARATOR);
    if (seperator == null) {
        conf.set(CommonConstants.SEPARATOR, CommonConstants.DEFAULT_SEPARATOR);
        seperator = CommonConstants.DEFAULT_SEPARATOR;
    }
    // Make sure one or more columns are specified
    if (columns.split(",").length < 2) {
        usage("One or more columns in addition to the row key are required");
        System.exit(-1);
    }
    //make sure tableName and columns are upper to used by phoenix.
    columns = columns.toUpperCase();
    String tableName = otherArgs[0].toUpperCase();
    String inputPath = otherArgs[1];

    hbaseAdmin = new HBaseAdmin(conf);
    String tmpOutputPath = conf.get(CommonConstants.IMPORT_TMP_OUTPUT);
    conf.set(CommonConstants.TABLE_NAME, tableName);
    conf.set(CommonConstants.COLUMNS, columns);
    String hdfs_url = conf.get(CommonConstants.HDFS_URL);
    FileSystem fs = FileSystem.get(URI.create(hdfs_url), conf);
    FileStatus[] fileStatusArr = fs.listStatus(new Path(hdfs_url + inputPath));
    if (fileStatusArr != null && fileStatusArr.length > 0) {
        TableConfiguration.getInstance().writeTableConfiguration(tableName, columns, seperator, conf);
        if (fileStatusArr[0].isFile()) { //??
            boolean result = runJob(conf, tableName, inputPath, tmpOutputPath);
            if (result) {
                System.exit(0);
            }
            System.exit(-1);
        }
        for (FileStatus everyInputPath : fileStatusArr) { //??
            Path inputPathStr = everyInputPath.getPath();
            String absoluteInputPathStr = inputPath + "/" + inputPathStr.getName();
            FileStatus[] subFileStatusArr = fs.listStatus(new Path(hdfs_url + absoluteInputPathStr));
            if (subFileStatusArr == null || subFileStatusArr.length == 0)//?job
                continue;
            boolean ret = runJob(conf, tableName, absoluteInputPathStr,
                    tmpOutputPath + "/" + inputPathStr.getName());
            if (ret) {
                String base64Seperator = conf.get(CommonConstants.SEPARATOR);
                conf.set(CommonConstants.SEPARATOR, new String(Base64.decode(base64Seperator))); //?separator
                continue;
            } else //
                System.exit(-1);

        }
    }
    System.exit(0); //
}

From source file:com.amintor.hdfs.client.kerberizedhdfsclient.KerberizedHDFSClient.java

/**
 * @param args the command line arguments
 *///from ww  w  .  j a v a 2 s .  c o  m
public static void main(String[] args) {

    try {
        Configuration conf = new Configuration();
        conf.addResource(new FileInputStream(HDFS_SITE_LOCATION));
        conf.addResource(new FileInputStream(CORE_SITE_LOCATION));
        String authType = conf.get("hadoop.security.authentication");
        System.out.println("Authentication Type:" + authType);
        if (authType.trim().equalsIgnoreCase("kerberos")) {
            // Login through UGI keytab
            UserGroupInformation.setConfiguration(conf);
            UserGroupInformation.loginUserFromKeytab("vijay", "/Users/vsingh/Software/vijay.keytab");
            FileSystem hdFS = FileSystem.get(conf);
            FileStatus[] listStatus = hdFS.listStatus(new Path(args[0]));
            for (FileStatus statusFile : listStatus) {
                System.out.print("Replication:" + statusFile.getReplication() + "\t");
                System.out.print("Owner:" + statusFile.getOwner() + "\t");
                System.out.print("Group:" + statusFile.getGroup() + "\t");
                System.out.println("Path:" + statusFile.getPath() + "\t");
            }

        }
    } catch (IOException ex) {
        Logger.getLogger(KerberizedHDFSClient.class.getName()).log(Level.SEVERE, null, ex);
    }
}

From source file:com.architecting.ch07.MapReduceIndexerTool.java

License:Apache License

/** API for Java clients;visible for testing;may become a public API eventually */
int run(Options options) throws Exception {
    if (getConf().getBoolean("isMR1", false) && "local".equals(getConf().get("mapred.job.tracker"))) {
        throw new IllegalStateException(
                "Running with LocalJobRunner (i.e. all of Hadoop inside a single JVM) is not supported "
                        + "because LocalJobRunner does not (yet) implement the Hadoop Distributed Cache feature, "
                        + "which is required for passing files via --files and --libjars");
    }//from   w ww  .j a  va2 s . co  m

    long programStartTime = System.nanoTime();
    getConf().setInt(SolrOutputFormat.SOLR_RECORD_WRITER_MAX_SEGMENTS, options.maxSegments);

    // switch off a false warning about allegedly not implementing Tool
    // also see http://hadoop.6.n7.nabble.com/GenericOptionsParser-warning-td8103.html
    // also see https://issues.apache.org/jira/browse/HADOOP-8183
    getConf().setBoolean("mapred.used.genericoptionsparser", true);

    if (options.log4jConfigFile != null) {
        Utils.setLogConfigFile(options.log4jConfigFile, getConf());
        addDistributedCacheFile(options.log4jConfigFile, getConf());
    }

    Configuration config = HBaseConfiguration.create();
    Job job = Job.getInstance(config);
    job.setJarByClass(getClass());

    // To be able to run this example from eclipse, we need to make sure 
    // the built jar is distributed to the map-reduce tasks from the
    // local file system.
    job.addCacheArchive(new URI("file:///home/cloudera/ahae/target/ahae.jar"));

    FileSystem fs = options.outputDir.getFileSystem(job.getConfiguration());
    if (fs.exists(options.outputDir) && !delete(options.outputDir, true, fs)) {
        return -1;
    }
    Path outputResultsDir = new Path(options.outputDir, RESULTS_DIR);
    Path outputReduceDir = new Path(options.outputDir, "reducers");

    int reducers = 1;

    Scan scan = new Scan();
    scan.addFamily(CF);
    // tag::SETUP[]
    scan.setCaching(500); // <1>
    scan.setCacheBlocks(false); // <2>

    TableMapReduceUtil.initTableMapperJob( // <3>
            options.inputTable, // Input HBase table name
            scan, // Scan instance to control what to index
            HBaseAvroToSOLRMapper.class, // Mapper to parse cells content.
            Text.class, // Mapper output key
            SolrInputDocumentWritable.class, // Mapper output value
            job);

    FileOutputFormat.setOutputPath(job, outputReduceDir);

    job.setJobName(getClass().getName() + "/" + Utils.getShortClassName(HBaseAvroToSOLRMapper.class));
    job.setReducerClass(SolrReducer.class); // <4>
    job.setPartitionerClass(SolrCloudPartitioner.class); // <5>
    job.getConfiguration().set(SolrCloudPartitioner.ZKHOST, options.zkHost);
    job.getConfiguration().set(SolrCloudPartitioner.COLLECTION, options.collection);
    job.getConfiguration().setInt(SolrCloudPartitioner.SHARDS, options.shards);

    job.setOutputFormatClass(SolrOutputFormat.class);
    SolrOutputFormat.setupSolrHomeCache(options.solrHomeDir, job);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(SolrInputDocumentWritable.class);
    job.setSpeculativeExecution(false);
    // end::SETUP[]
    job.setNumReduceTasks(reducers); // Set the number of reducers based on the number of shards we have.
    if (!waitForCompletion(job, true)) {
        return -1;// job failed
    }

    // -------------------------------------------------------------------------------------------------------------------------------------

    assert reducers == options.shards;

    // normalize output shard dir prefix, i.e.
    // rename part-r-00000 to part-00000 (stems from zero tree merge iterations)
    // rename part-m-00000 to part-00000 (stems from > 0 tree merge iterations)
    for (FileStatus stats : fs.listStatus(outputReduceDir)) {
        String dirPrefix = SolrOutputFormat.getOutputName(job);
        Path srcPath = stats.getPath();
        if (stats.isDirectory() && srcPath.getName().startsWith(dirPrefix)) {
            String dstName = dirPrefix + srcPath.getName().substring(dirPrefix.length() + "-m".length());
            Path dstPath = new Path(srcPath.getParent(), dstName);
            if (!rename(srcPath, dstPath, fs)) {
                return -1;
            }
        }
    }
    ;

    // publish results dir
    if (!rename(outputReduceDir, outputResultsDir, fs)) {
        return -1;
    }

    if (options.goLive && !new GoLive().goLive(options, listSortedOutputShardDirs(job, outputResultsDir, fs))) {
        return -1;
    }

    goodbye(job, programStartTime);
    return 0;
}