Example usage for org.apache.hadoop.fs FileSystem deleteOnExit

List of usage examples for org.apache.hadoop.fs FileSystem deleteOnExit

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem deleteOnExit.

Prototype

Set deleteOnExit

To view the source code for org.apache.hadoop.fs FileSystem deleteOnExit.

Click Source Link

Document

A cache of files that should be deleted when the FileSystem is closed or the JVM is exited.

Usage

From source file:edu.umn.cs.spatialHadoop.operations.DistributedJoin.java

License:Open Source License

/**
 * Repartition a file to match the partitioning of the other file.
 * @param files Input files to partition
 * @param file_to_repartition//from w  w w  .  jav  a 2 s .c  om
 * @param params
 * @throws IOException
 * @throws InterruptedException
 */
protected static void repartitionStep(final Path[] files, int file_to_repartition, OperationsParams params)
        throws IOException, InterruptedException {

    // Do the repartition step
    long t1 = System.currentTimeMillis();

    // Repartition the smaller file
    Path partitioned_file;
    FileSystem fs = files[file_to_repartition].getFileSystem(params);
    do {
        partitioned_file = new Path(
                files[file_to_repartition].getName() + ".repartitioned_" + (int) (Math.random() * 1000000));
    } while (fs.exists(partitioned_file));

    // Get the cells to use for repartitioning
    GlobalIndex<Partition> gindex = SpatialSite.getGlobalIndex(fs, files[1 - file_to_repartition]);
    CellInfo[] cells = SpatialSite.cellsOf(fs, files[1 - file_to_repartition]);

    // Repartition the file to match the other file
    boolean isReplicated = gindex.isReplicated();
    boolean isCompact = gindex.isCompact();
    String sindex;
    if (isReplicated && !isCompact)
        sindex = "grid";
    else if (isReplicated && isCompact)
        sindex = "r+tree";
    else if (!isReplicated && isCompact)
        sindex = "rtree";
    else
        throw new RuntimeException("Unknown index at: " + files[1 - file_to_repartition]);

    params.set("sindex", sindex);

    if (isGeneralRepartitionMode) {
        // Repartition the smaller file with heuristics cells info (general
        // indexing)
        Repartition.repartitionMapReduce(files[file_to_repartition], partitioned_file, null, params);
    } else {
        // Repartition the smaller file on the larger file (specific
        // indexing)
        Repartition.repartitionMapReduce(files[file_to_repartition], partitioned_file, cells, params);
    }

    long t2 = System.currentTimeMillis();
    System.out.println("Repartition time " + (t2 - t1) + " millis");

    // Continue with the join step
    if (fs.exists(partitioned_file)) {
        // An output file might not existent if the two files are disjoint

        // Replace the smaller file with its repartitioned copy
        files[file_to_repartition] = partitioned_file;

        // Delete temporary repartitioned file upon exit
        fs.deleteOnExit(partitioned_file);
    }
}

From source file:eu.edisonproject.classification.tfidf.mapreduce.TermWordFrequency.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration jobconf = getConf();
    Job job = Job.getInstance(jobconf);//from   w  ww . j a  va2 s .  c o m
    FileSystem fs = FileSystem.get(jobconf);
    fs.delete(new Path(args[1]), true);
    Path dictionary = new Path(args[0]);
    Path dictionaryHdfs = dictionary;

    Path localDocs = new Path(args[2]);
    Path hdfsDocs = localDocs;

    Path stopwordsLocal = new Path(args[3]);
    Path stopwordsHDFS = stopwordsLocal;
    if (!jobconf.get(FileSystem.FS_DEFAULT_NAME_KEY).startsWith("file")) {
        dictionaryHdfs = new Path(dictionary.getName());
        if (!fs.exists(dictionaryHdfs)) {
            fs.copyFromLocalFile(dictionary, dictionaryHdfs);
        }
        hdfsDocs = new Path(localDocs.getName());
        fs.mkdirs(hdfsDocs);
        fs.deleteOnExit(hdfsDocs);

        File[] stats = new File(localDocs.toString()).listFiles();

        for (File stat : stats) {
            Path filePath = new Path(stat.getAbsolutePath());
            if (FilenameUtils.getExtension(filePath.getName()).endsWith("txt")) {
                Path dest = new Path(hdfsDocs.toUri() + "/" + filePath.getName());
                fs.copyFromLocalFile(filePath, dest);
            }
        }
        stopwordsHDFS = new Path(stopwordsLocal.getName());
        if (!fs.exists(stopwordsHDFS)) {
            fs.copyFromLocalFile(stopwordsLocal, stopwordsHDFS);
        }
    }

    FileStatus stopwordsStatus = fs.getFileStatus(stopwordsHDFS);
    stopwordsHDFS = stopwordsStatus.getPath();
    job.addCacheFile(stopwordsHDFS.toUri());

    job.addCacheFile(hdfsDocs.toUri());

    job.setJarByClass(TermWordFrequency.class);
    job.setJobName("Word Frequency Term Driver");

    FileInputFormat.setInputPaths(job, dictionaryHdfs);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    //        job.setInputFormatClass(TextInputFormat.class);
    job.setInputFormatClass(NLineInputFormat.class);
    NLineInputFormat.addInputPath(job, dictionaryHdfs);
    NLineInputFormat.setNumLinesPerSplit(job, Integer.valueOf(args[4]));
    NLineInputFormat.setMaxInputSplitSize(job, 500);

    job.setMapperClass(TermWordFrequencyMapper.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Integer.class);
    job.setReducerClass(TermWordFrequencyReducer.class);

    return (job.waitForCompletion(true) ? 0 : 1);

}

From source file:eu.edisonproject.classification.tfidf.mapreduce.WordFrequencyInDocDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    //        itemset = new LinkedList<String>();
    //        BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(args[2])));
    //        String line;
    //        while ((line = br.readLine()) != null) {
    //            String[] components = line.split("/");
    //            itemset.add(components[0]);
    //        }//  www.  j  a v  a2  s  .c  om
    Configuration conf = getConf();

    Job job = Job.getInstance(conf);
    job.setJarByClass(WordFrequencyInDocDriver.class);
    job.setJobName("Word Frequency In Doc Driver");

    FileSystem fs = FileSystem.get(conf);
    fs.delete(new Path(args[1]), true);
    Path in = new Path(args[0]);
    Path inHdfs = in;

    Path dictionaryLocal = new Path(args[2]);
    Path dictionaryHDFS = dictionaryLocal;

    Path stopwordsLocal = new Path(args[3]);
    Path stopwordsHDFS = stopwordsLocal;

    if (!conf.get(FileSystem.FS_DEFAULT_NAME_KEY).startsWith("file")) {
        inHdfs = new Path(in.getName());
        fs.delete(inHdfs, true);
        fs.copyFromLocalFile(in, inHdfs);
        fs.deleteOnExit(inHdfs);

        dictionaryHDFS = new Path(dictionaryLocal.getName());
        if (!fs.exists(dictionaryHDFS)) {
            fs.copyFromLocalFile(dictionaryLocal, dictionaryHDFS);
        }
        stopwordsHDFS = new Path(stopwordsLocal.getName());
        if (!fs.exists(stopwordsHDFS)) {
            fs.copyFromLocalFile(stopwordsLocal, stopwordsHDFS);
        }
    }

    FileStatus dictionaryStatus = fs.getFileStatus(dictionaryHDFS);
    dictionaryHDFS = dictionaryStatus.getPath();
    job.addCacheFile(dictionaryHDFS.toUri());

    FileStatus stopwordsStatus = fs.getFileStatus(stopwordsHDFS);
    stopwordsHDFS = stopwordsStatus.getPath();
    job.addCacheFile(stopwordsHDFS.toUri());

    FileInputFormat.setInputPaths(job, inHdfs);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setInputFormatClass(AvroKeyInputFormat.class);
    job.setMapperClass(WordFrequencyInDocMapper.class);
    AvroJob.setInputKeySchema(job, Document.getClassSchema());
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Integer.class);
    job.setReducerClass(WordFrequencyInDocReducer.class);
    return (job.waitForCompletion(true) ? 0 : 1);
}

From source file:eu.edisonproject.training.tfidf.mapreduce.TermWordFrequency.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration jobconf = getConf();

    FileSystem fs = FileSystem.get(jobconf);
    fs.delete(new Path(args[1]), true);
    Path in = new Path(args[0]);
    Path inHdfs = in;// w ww  .  jav a2 s . co  m
    if (!jobconf.get(FileSystem.FS_DEFAULT_NAME_KEY).startsWith("file")) {
        inHdfs = new Path(in.getName());
        fs.delete(inHdfs, true);
        fs.copyFromLocalFile(in, inHdfs);
        fs.deleteOnExit(inHdfs);
        FileStatus inHdfsStatus = fs.getFileStatus(inHdfs);
        //            Logger.getLogger(TermWordFrequency.class.getName()).log(Level.INFO, "Copied: {0} to: {1}", new Object[]{in.toUri(), inHdfsStatus.getPath().toUri()});
    }

    Job job = Job.getInstance(jobconf);
    Path stopwordsLocal = new Path(args[3]);
    stopwords = new Path(stopwordsLocal.getName());
    fs.delete(stopwords, true);
    fs.copyFromLocalFile(stopwordsLocal, stopwords);
    fs.deleteOnExit(stopwords);

    FileStatus stopwordsStatus = fs.getFileStatus(stopwords);
    stopwords = stopwordsStatus.getPath();
    job.addCacheFile(stopwords.toUri());

    Path localDocs = new Path(args[2]);
    Path hdfsDocs = new Path(localDocs.getName());
    fs.mkdirs(hdfsDocs);
    hdfsDocs = fs.getFileStatus(hdfsDocs).getPath();
    fs.delete(hdfsDocs, true);
    //        FileStatus[] stats = fs.listStatus(localDocs);
    File[] stats = new File(localDocs.toString()).listFiles();

    for (File stat : stats) {
        //        for (FileStatus stat : stats) {
        Path filePath = new Path(stat.getAbsolutePath());
        if (FilenameUtils.getExtension(filePath.getName()).endsWith("txt")) {
            Path dest = new Path(hdfsDocs.toUri() + "/" + filePath.getName());
            fs.copyFromLocalFile(filePath, dest);
        }
    }

    job.addCacheFile(hdfsDocs.toUri());

    job.setJarByClass(TermWordFrequency.class);
    job.setJobName("Word Frequency Term Driver");

    FileInputFormat.setInputPaths(job, inHdfs);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    //         job.setInputFormatClass(TextInputFormat.class);
    job.setInputFormatClass(NLineInputFormat.class);
    NLineInputFormat.addInputPath(job, inHdfs);
    NLineInputFormat.setNumLinesPerSplit(job, Integer.valueOf(args[4]));
    NLineInputFormat.setMaxInputSplitSize(job, 500);
    Logger.getLogger(TermWordFrequency.class.getName()).log(Level.INFO, "Num. of lines: {0}",
            NLineInputFormat.getNumLinesPerSplit(job));

    job.setMapperClass(TermWordFrequencyMapper.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Integer.class);
    job.setReducerClass(TermWordFrequencyReducer.class);

    return (job.waitForCompletion(true) ? 0 : 1);

}

From source file:hydrograph.server.debug.lingual.LingualSchemaCreatorTest.java

License:Apache License

@AfterClass
public static void cleanUp() {
    System.gc();//from  w ww.  j a v a2 s  .  c o m
    Configuration configuration = new Configuration();
    FileSystem fileSystem = null;

    try {
        fileSystem = FileSystem.get(configuration);
        Path deletingFilePath = new Path("testData/MetaData/");
        if (!fileSystem.exists(deletingFilePath)) {
            throw new PathNotFoundException(deletingFilePath.toString());
        } else {

            boolean isDeleted = fileSystem.delete(deletingFilePath, true);
            if (isDeleted) {
                fileSystem.deleteOnExit(deletingFilePath);
            }
        }
        fileSystem.close();
    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:hydrograph.server.service.HydrographService.java

License:Apache License

private void start() {
    int portNumber = Constants.DEFAULT_PORT_NUMBER;
    try {//from  w  w w. j a  v a  2  s. c o  m
        portNumber = Integer
                .parseInt(ServiceUtilities.getServiceConfigResourceBundle().getString(Constants.PORT_ID));
        LOG.debug("Port number '" + portNumber + "' fetched from properties file");
    } catch (Exception e) {
        LOG.error("Error fetching port number. Defaulting to " + Constants.DEFAULT_PORT_NUMBER, e);
    }

    /**
     * Setting Port number to the server
     */
    Spark.port(portNumber);

    /**
     * Creating Websocket on Server for Execution tracking service.
     */
    webSocket("/executionTracking", ExecutionTrackingWebsocketHandler.class);

    Spark.post("/getConnectionStatus", new Route() {
        @SuppressWarnings({ "unchecked", "rawtypes", "unused" })
        @Override
        public Object handle(Request request, Response response)
                throws InstantiationException, IllegalAccessException, ClassNotFoundException, JSONException {
            LOG.info("************************getConnectionStatus endpoint - started************************");
            LOG.info("+++ Start: " + new Timestamp((new Date()).getTime()));
            ObjectMapper objectMapper = new ObjectMapper();
            String requestParameters = request.queryParams(Constants.REQUEST_PARAMETERS), dbClassName = null,
                    objectAsString = null;
            JSONObject requestParameterValues = new JSONObject(requestParameters);
            Map metadataProperties = extractingJsonObjects(requestParameterValues);
            String dbTypeToTest = metadataProperties
                    .getOrDefault(Constants.dbType,
                            new ParamsCannotBeNullOrEmpty(Constants.dbType + " Cannot be null or empty"))
                    .toString();
            switch (dbTypeToTest.toLowerCase()) {
            case Constants.ORACLE:
                try {
                    if (ServiceUtilities.getConnectionStatus(metadataProperties,
                            Constants.ORACLE_JDBC_CLASSNAME, Constants.QUERY_TO_TEST)) {
                        LOG.trace("Connection Successful");
                        objectAsString = objectMapper
                                .writeValueAsString("Connection To Oracle database is Successful");
                    } else {
                        LOG.trace("Connection UnSuccessful");
                        objectAsString = objectMapper
                                .writeValueAsString("Connection To Oracle database UnSuccessful");
                    }
                } catch (Exception e) {
                    LOG.error("Connection fails with exception : " + e);
                    objectAsString = e.getLocalizedMessage();
                }
                break;
            case Constants.MYSQL:
                try {
                    if (ServiceUtilities.getConnectionStatus(metadataProperties, Constants.MYSQL_JDBC_CLASSNAME,
                            Constants.QUERY_TO_TEST)) {
                        LOG.trace("Connection Successful");
                        objectAsString = objectMapper
                                .writeValueAsString("Connection To MySQL database is Successful");
                    } else {
                        LOG.trace("Connection UnSuccessful");
                        objectAsString = objectMapper
                                .writeValueAsString("Connection To MySQL database UnSuccessful");
                    }
                } catch (Exception e) {
                    LOG.error("Connection fails with exception : " + e);
                    objectAsString = e.getLocalizedMessage();
                }
                break;

            case Constants.REDSHIFT:
                try {
                    if (ServiceUtilities.getConnectionStatus(metadataProperties,
                            Constants.REDSHIFT_JDBC_CLASSNAME, Constants.DEFAULT_REDRESHIFT_QUERY_TO_TEST)) {
                        LOG.trace("Connection Successful");
                        objectAsString = objectMapper
                                .writeValueAsString("Connection To Redshift database is Successful");
                    } else {
                        LOG.trace("Connection UnSuccessful");
                        objectAsString = objectMapper
                                .writeValueAsString("Connection To Redshift database UnSuccessful");
                    }
                } catch (Exception e) {
                    LOG.error("Connection fails with exception : " + e);
                    objectAsString = e.getLocalizedMessage();
                }
                break;
            case Constants.TERADATA:
                try {
                    if (ServiceUtilities.getConnectionStatus(metadataProperties,
                            Constants.TERADATA_JDBC_CLASSNAME, Constants.QUERY_TO_TEST_TERADATA)) {
                        LOG.trace("Connection Successful");
                        objectAsString = objectMapper
                                .writeValueAsString("Connection To Teradata database is Successful");
                    } else {
                        LOG.trace("Connection UnSuccessful");
                        objectAsString = objectMapper
                                .writeValueAsString("Connection To Teradata database UnSuccessful");
                    }
                } catch (Exception e) {
                    LOG.error("Connection fails with exception : " + e);
                    objectAsString = e.getLocalizedMessage();
                }
                break;
            }
            return objectAsString;
        }

        @SuppressWarnings({ "unchecked", "rawtypes" })
        private Map extractingJsonObjects(JSONObject requestParameterValues) throws JSONException {

            String dbType = null, userId = null, password = null, host = null, port = null, sid = null,
                    driverType = null, query = null, tableName = null, database = null;
            Map metadataProperties = new HashMap();
            if (!requestParameterValues.isNull(Constants.dbType)) {
                dbType = requestParameterValues.getString(Constants.dbType);
                metadataProperties.put(Constants.dbType, dbType);
            }
            if (!requestParameterValues.isNull(Constants.USERNAME)) {
                userId = requestParameterValues.getString(Constants.USERNAME);
                metadataProperties.put(Constants.USERNAME, userId);
            }
            if (!requestParameterValues.isNull(Constants.SERVICE_PWD)) {
                password = requestParameterValues.getString(Constants.SERVICE_PWD);
                metadataProperties.put(Constants.SERVICE_PWD, password);
            }
            if (!requestParameterValues.isNull(Constants.HOST_NAME)) {
                host = requestParameterValues.getString(Constants.HOST_NAME);
                metadataProperties.put(Constants.HOST_NAME, host);
            }
            if (!requestParameterValues.isNull(Constants.PORT_NUMBER)) {
                port = requestParameterValues.getString(Constants.PORT_NUMBER);
                metadataProperties.put(Constants.PORT_NUMBER, port);
            } else {
                if (metadataProperties.get(Constants.dbType).toString().equalsIgnoreCase("mysql")) {
                    port = Constants.MYSQL_DEFAULT_PORT;
                    metadataProperties.put(Constants.PORT_NUMBER, port);

                } else if (metadataProperties.get(Constants.dbType).toString().equalsIgnoreCase("oracle")) {
                    port = Constants.ORACLE_DEFAULT_PORT;
                    metadataProperties.put(Constants.PORT_NUMBER, port);
                }
                LOG.info("Connecting " + dbType + " port is not provided using default port : " + port);
            }
            if (!requestParameterValues.isNull(Constants.SID)) {
                sid = requestParameterValues.getString(Constants.SID);
                metadataProperties.put(Constants.SID, sid);
            }
            if (!requestParameterValues.isNull(Constants.DRIVER_TYPE)) {
                driverType = requestParameterValues.getString(Constants.DRIVER_TYPE);
                metadataProperties.put(Constants.DRIVER_TYPE, driverType);
            }
            if (!requestParameterValues.isNull(Constants.QUERY)) {
                query = requestParameterValues.getString(Constants.QUERY);
                metadataProperties.put(Constants.QUERY, query);
            }
            if (!requestParameterValues.isNull(Constants.TABLENAME)) {
                tableName = requestParameterValues.getString(Constants.TABLENAME);
                metadataProperties.put(Constants.TABLENAME, tableName);
            }
            if (!requestParameterValues.isNull(Constants.DATABASE_NAME)) {
                database = requestParameterValues.getString(Constants.DATABASE_NAME);
                metadataProperties.put(Constants.DATABASE_NAME, database);
            }

            LOG.info("Fetched request parameters are: " + Constants.dbType + " => " + dbType + " "
                    + Constants.USERNAME + " => " + userId + " " + Constants.HOST_NAME + " => " + host + " "
                    + Constants.PORT_NUMBER + " => " + port + " " + Constants.SID + " => " + sid + " "
                    + Constants.DRIVER_TYPE + " => " + driverType + " " + Constants.QUERY + " => " + query + " "
                    + Constants.TABLENAME + " => " + tableName + " " + Constants.DATABASE_NAME + " => "
                    + database + " ");
            return metadataProperties;
        }
    });

    Spark.post("readFromMetastore", new Route() {

        @Override
        public Object handle(Request request, Response response)
                throws ParamsCannotBeNullOrEmpty, ClassNotFoundException, IllegalAccessException, JSONException,
                JsonProcessingException, TableOrQueryParamNotFound, SQLException, InstantiationException {
            LOG.info("************************readFromMetastore endpoint - started************************");
            LOG.info("+++ Start: " + new Timestamp((new Date()).getTime()));
            ObjectMapper objectMapper = new ObjectMapper();
            String requestParameters = request.queryParams(Constants.REQUEST_PARAMETERS), objectAsString = null,
                    dbClassName = null;
            JSONObject requestParameterValues = new JSONObject(requestParameters);
            // Method to extracting request parameter details from input
            // json.
            Map metadataProperties = extractingJsonObjects(requestParameterValues);

            String dbType = metadataProperties
                    .getOrDefault(Constants.dbType,
                            new ParamsCannotBeNullOrEmpty(Constants.dbType + " Cannot be null or empty"))
                    .toString();
            LOG.info("Retrieving schema for " + dbType + " Database.");
            try {
                switch (dbType.toLowerCase()) {
                case Constants.ORACLE:
                    dbClassName = Constants.oracle;
                    OracleMetadataStrategy oracleMetadataHelper = (OracleMetadataStrategy) Class
                            .forName(dbClassName).newInstance();
                    oracleMetadataHelper.setConnection(metadataProperties);
                    objectAsString = objectMapper
                            .writeValueAsString(oracleMetadataHelper.fillComponentSchema(metadataProperties));
                    LOG.trace("Schema json for oracle : " + objectAsString);
                    LOG.info("+++ Stop: " + new Timestamp((new Date()).getTime()));
                    break;
                case Constants.HIVE:
                    dbClassName = Constants.hive;
                    HiveMetadataStrategy hiveMetadataHelper = (HiveMetadataStrategy) Class.forName(dbClassName)
                            .newInstance();
                    hiveMetadataHelper.setConnection(metadataProperties);
                    objectAsString = objectMapper
                            .writeValueAsString(hiveMetadataHelper.fillComponentSchema(metadataProperties));
                    LOG.trace("Schema json for hive : " + objectAsString);
                    LOG.info("+++ Stop: " + new Timestamp((new Date()).getTime()));
                    break;
                case Constants.REDSHIFT:
                    dbClassName = Constants.redshift;
                    RedshiftMetadataStrategy redShiftMetadataHelper = (RedshiftMetadataStrategy) Class
                            .forName(dbClassName).newInstance();
                    redShiftMetadataHelper.setConnection(metadataProperties);
                    objectAsString = objectMapper
                            .writeValueAsString(redShiftMetadataHelper.fillComponentSchema(metadataProperties));
                    LOG.trace("Schema json for redshift : " + objectAsString);
                    LOG.info("+++ Stop: " + new Timestamp((new Date()).getTime()));
                    break;
                case Constants.MYSQL:
                    dbClassName = Constants.mysql;
                    MysqlMetadataStrategy mysqlMetadataHelper = (MysqlMetadataStrategy) Class
                            .forName(dbClassName).newInstance();
                    mysqlMetadataHelper.setConnection(metadataProperties);
                    objectAsString = objectMapper
                            .writeValueAsString(mysqlMetadataHelper.fillComponentSchema(metadataProperties));
                    LOG.trace("Schema json for mysql : " + objectAsString);
                    LOG.info("+++ Stop: " + new Timestamp((new Date()).getTime()));
                    break;
                case Constants.TERADATA:
                    dbClassName = Constants.teradata;
                    TeradataMetadataStrategy teradataMetadataHelper = (TeradataMetadataStrategy) Class
                            .forName(dbClassName).newInstance();
                    teradataMetadataHelper.setConnection(metadataProperties);
                    objectAsString = objectMapper
                            .writeValueAsString(teradataMetadataHelper.fillComponentSchema(metadataProperties));
                    LOG.trace("Schema json for teradata : " + objectAsString);
                    LOG.info("+++ Stop: " + new Timestamp((new Date()).getTime()));
                    break;
                }
            } catch (Exception e) {
                LOG.error("Metadata read for database  '" + dbType + "' not completed.");
                LOG.error("Exception : " + e);
                response.status(400);
                return "Metadata read for database '" + dbType + "' not completed.";
            }
            LOG.info("Class Name used for " + dbType + " Is : " + dbClassName);
            LOG.debug("Json for " + dbType + " : " + objectAsString);
            return objectAsString;
        }

        @SuppressWarnings({ "unchecked", "rawtypes" })
        private Map extractingJsonObjects(JSONObject requestParameterValues) throws JSONException {

            String dbType = null, userId = null, password = null, host = null, port = null, sid = null,
                    driverType = null, query = null, tableName = null, database = null;
            Map metadataProperties = new HashMap();
            if (!requestParameterValues.isNull(Constants.dbType)) {
                dbType = requestParameterValues.getString(Constants.dbType);
                metadataProperties.put(Constants.dbType, dbType);
            }
            if (!requestParameterValues.isNull(Constants.USERNAME)) {
                userId = requestParameterValues.getString(Constants.USERNAME);
                metadataProperties.put(Constants.USERNAME, userId);
            }
            if (!requestParameterValues.isNull(Constants.SERVICE_PWD)) {
                password = requestParameterValues.getString(Constants.SERVICE_PWD);
                metadataProperties.put(Constants.SERVICE_PWD, password);
            }
            if (!requestParameterValues.isNull(Constants.HOST_NAME)) {
                host = requestParameterValues.getString(Constants.HOST_NAME);
                metadataProperties.put(Constants.HOST_NAME, host);
            }
            if (!requestParameterValues.isNull(Constants.PORT_NUMBER)) {
                port = requestParameterValues.getString(Constants.PORT_NUMBER);
                metadataProperties.put(Constants.PORT_NUMBER, port);
            } else {
                if (metadataProperties.get(Constants.dbType).toString().equalsIgnoreCase("mysql")) {
                    port = Constants.MYSQL_DEFAULT_PORT;
                    metadataProperties.put(Constants.PORT_NUMBER, port);

                } else if (metadataProperties.get(Constants.dbType).toString().equalsIgnoreCase("oracle")) {
                    port = Constants.ORACLE_DEFAULT_PORT;
                    metadataProperties.put(Constants.PORT_NUMBER, port);
                }
                LOG.info("Connecting " + dbType + " port is not provided using default port : " + port);
            }
            if (!requestParameterValues.isNull(Constants.SID)) {
                sid = requestParameterValues.getString(Constants.SID);
                metadataProperties.put(Constants.SID, sid);
            }
            if (!requestParameterValues.isNull(Constants.DRIVER_TYPE)) {
                driverType = requestParameterValues.getString(Constants.DRIVER_TYPE);
                metadataProperties.put(Constants.DRIVER_TYPE, driverType);
            }
            if (!requestParameterValues.isNull(Constants.QUERY)) {
                query = requestParameterValues.getString(Constants.QUERY);
                metadataProperties.put(Constants.QUERY, query);
            }
            if (!requestParameterValues.isNull(Constants.TABLENAME)) {
                tableName = requestParameterValues.getString(Constants.TABLENAME);
                metadataProperties.put(Constants.TABLENAME, tableName);
            }
            if (!requestParameterValues.isNull(Constants.DATABASE_NAME)) {
                database = requestParameterValues.getString(Constants.DATABASE_NAME);
                metadataProperties.put(Constants.DATABASE_NAME, database);
            }

            LOG.info("Fetched request parameters are: " + Constants.dbType + " => " + dbType + " "
                    + Constants.USERNAME + " => " + userId + " " + Constants.HOST_NAME + " => " + host + " "
                    + Constants.PORT_NUMBER + " => " + port + " " + Constants.SID + " => " + sid + " "
                    + Constants.DRIVER_TYPE + " => " + driverType + " " + Constants.QUERY + " => " + query + " "
                    + Constants.TABLENAME + " => " + tableName + " " + Constants.DATABASE_NAME + " => "
                    + database + " ");
            return metadataProperties;
        }
    });

    Spark.post("/read", new Route() {
        @Override
        public Object handle(Request request, Response response) {
            LOG.info("************************read endpoint - started************************");
            LOG.info("+++ Start: " + new Timestamp((new Date()).getTime()));
            String jobId = request.queryParams(Constants.JOB_ID);
            String componentId = request.queryParams(Constants.COMPONENT_ID);
            String socketId = request.queryParams(Constants.SOCKET_ID);
            String basePath = request.queryParams(Constants.BASE_PATH);

            // String host = request.queryParams(Constants.HOST);
            String userID = request.queryParams(Constants.USER_ID);
            String password = request.queryParams(Constants.SERVICE_PWD);

            double sizeOfData = Double.parseDouble(request.queryParams(Constants.FILE_SIZE)) * 1024 * 1024;
            LOG.info("Base Path: {}, Job Id: {}, Component Id: {}, Socket ID: {}, User ID:{}, DataSize:{}",
                    basePath, jobId, componentId, socketId, userID, sizeOfData);

            String batchID = jobId + "_" + componentId + "_" + socketId;
            String tempLocationPath = ServiceUtilities.getServiceConfigResourceBundle()
                    .getString(Constants.TEMP_LOCATION_PATH);
            String filePath = tempLocationPath + "/" + batchID + ".csv";
            try {
                readFileFromHDFS(basePath + "/debug/" + jobId + "/" + componentId + "_" + socketId, sizeOfData,
                        filePath, userID, password);
                LOG.info("+++ Stop: " + new Timestamp((new Date()).getTime()));
            } catch (Exception e) {
                LOG.error("Error in reading debug files", e);
                return "error";
            }
            return filePath;
        }

        /**
         * This method will read the HDFS file, fetch the records from it
         * and write its records to a local file on edge node with size <=
         * {@code sizeOfData} passed in parameter.
         *
         * @param hdfsFilePath   path of HDFS file from where records to be read
         * @param sizeOfData     defines the size of data (in bytes) to be read from
         *                       HDFS file
         * @param remoteFileName after reading the data of {@code sizeOfData} bytes
         *                       from HDFS file, it will be written to local file on
         *                       edge node with file name {@code remoteFileName}
         * @param userId
         * @param password
         */
        private void readFileFromHDFS(String hdfsFilePath, double sizeOfData, String remoteFileName,
                String userId, String password) {
            try {
                Path path = new Path(hdfsFilePath);
                LOG.debug("Reading Debug file:" + hdfsFilePath);
                Configuration conf = new Configuration();

                // load hdfs-site.xml and core-site.xml
                String hdfsConfigPath = ServiceUtilities.getServiceConfigResourceBundle()
                        .getString(Constants.HDFS_SITE_CONFIG_PATH);
                String coreSiteConfigPath = ServiceUtilities.getServiceConfigResourceBundle()
                        .getString(Constants.CORE_SITE_CONFIG_PATH);
                LOG.debug("Loading hdfs-site.xml:" + hdfsConfigPath);
                conf.addResource(new Path(hdfsConfigPath));
                LOG.debug("Loading hdfs-site.xml:" + coreSiteConfigPath);
                conf.addResource(new Path(coreSiteConfigPath));

                KerberosUtilities kerberosUtilities = new KerberosUtilities();
                // apply kerberos token
                kerberosUtilities.applyKerberosToken(userId, password, conf);

                listAndWriteFiles(remoteFileName, path, conf, sizeOfData);
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
        }

        /**
         * This method will list all files for {@code path}, read all files
         * and writes its data to a local file on edge node with size <=
         * {@code sizeOfData} passed in parameter.
         *
         * @param remoteFileName
         * @param path
         * @param conf
         * @param sizeOfData
         * @throws IOException
         */
        private void listAndWriteFiles(String remoteFileName, Path path, Configuration conf, double sizeOfData)
                throws IOException {
            FileSystem fs = FileSystem.get(conf);
            FileStatus[] status = fs.listStatus(path);
            File remoteFile = new File(remoteFileName);

            OutputStream os = new FileOutputStream(remoteFileName);
            try {

                int numOfBytes = 0;
                for (int i = 0; i < status.length; i++) {
                    BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(status[i].getPath())));
                    String line = "";
                    line = br.readLine();
                    if (line != null) {
                        // header will only get fetch from first part file
                        // and it
                        // will skip header from remaining files
                        if (numOfBytes == 0) {
                            os.write((line + "\n").toString().getBytes());
                            numOfBytes += line.toString().length();
                        }
                        while ((line = br.readLine()) != null) {
                            numOfBytes += line.toString().length();
                            // line = br.readLine();
                            if (numOfBytes <= sizeOfData) {
                                os.write((line + "\n").toString().getBytes());
                            } else {
                                break;
                            }
                        }
                    }
                    br.close();
                    remoteFile.setReadable(true, false);
                }
            } catch (Exception e) {
                throw new RuntimeException(e);
            } finally {
                os.close();
                fs.close();
            }
        }

    });

    Spark.post("/delete", new Route() {
        @Override
        public Object handle(Request request, Response response) {
            LOG.info("************************delete endpoint - started************************");
            LOG.info("+++ Start: " + new Timestamp((new Date()).getTime()));
            response.type("text/json");
            String jobId = request.queryParams(Constants.JOB_ID);
            String basePath = request.queryParams(Constants.BASE_PATH);
            String componentId = request.queryParams(Constants.COMPONENT_ID);
            String socketId = request.queryParams(Constants.SOCKET_ID);
            String userID = request.queryParams(Constants.USER_ID);
            String password = request.queryParams(Constants.SERVICE_PWD);

            LOG.info("Base Path: {}, Job Id: {}, Component Id: {}, Socket ID: {}, User ID:{}", basePath, jobId,
                    componentId, socketId, userID);

            try {
                removeDebugFiles(basePath, jobId, componentId, socketId, userID, password);
                LOG.info("+++ Stop: " + new Timestamp((new Date()).getTime()));
            } catch (Exception e) {
                LOG.error("Error in deleting debug files", e);
            }
            return "error";
        }

        private void removeDebugFiles(String basePath, String jobId, String componentId, String socketId,
                String userID, String password) {
            try {
                // DebugFilesReader debugFilesReader = new
                // DebugFilesReader(basePath, jobId, componentId, socketId,
                // userID,
                // password);
                delete(basePath, jobId, componentId, socketId, userID, password);
            } catch (Exception e) {
                LOG.error("Error while deleting the debug file", e);
                throw new RuntimeException(e);
            }
        }

        /**
         * Deletes the jobId directory
         *
         * @param password
         * @param userID
         * @param socketId
         * @param componentId
         * @param jobId
         * @param basePath
         * @throws IOException
         */
        public void delete(String basePath, String jobId, String componentId, String socketId, String userID,
                String password) throws IOException {
            LOG.trace("Entering method delete()");
            String deletePath = basePath + "/debug/" + jobId;
            Configuration configuration = new Configuration();
            FileSystem fileSystem = FileSystem.get(configuration);
            Path deletingFilePath = new Path(deletePath);
            if (!fileSystem.exists(deletingFilePath)) {
                throw new PathNotFoundException(deletingFilePath.toString());
            } else {
                // Delete file
                fileSystem.delete(deletingFilePath, true);
                LOG.info("Deleted path : " + deletePath);
            }
            fileSystem.close();
        }
    });

    Spark.post("/deleteLocalDebugFile", new Route() {
        @Override
        public Object handle(Request request, Response response) {
            String error = "";
            LOG.info("+++ Start: " + new Timestamp((new Date()).getTime()));
            LOG.info("************************deleteLocalDebugFile endpoint - started************************");
            try {
                String jobId = request.queryParams(Constants.JOB_ID);
                String componentId = request.queryParams(Constants.COMPONENT_ID);
                String socketId = request.queryParams(Constants.SOCKET_ID);
                String batchID = jobId + "_" + componentId + "_" + socketId;
                String tempLocationPath = ServiceUtilities.getServiceConfigResourceBundle()
                        .getString(Constants.TEMP_LOCATION_PATH);

                LOG.info("Job Id: {}, Component Id: {}, Socket ID: {}, TemporaryPath: {}", jobId, componentId,
                        socketId, tempLocationPath);
                LOG.debug("File to be deleted: " + tempLocationPath + "/" + batchID + ".csv");
                File file = new File(tempLocationPath + "/" + batchID + ".csv");
                file.delete();
                LOG.trace("Local debug file deleted successfully.");
                return "Success";
            } catch (Exception e) {
                LOG.error("Error in deleting local debug file.", e);
                error = e.getMessage();
            }
            LOG.info("+++ Stop: " + new Timestamp((new Date()).getTime()));
            return "Local file delete failed. Error: " + error;
        }
    });

    // TODO : Keep this for test
    Spark.post("/post", new Route() {

        @Override
        public Object handle(Request request, Response response) {
            LOG.info("****TEST SPARK POST STARTED**********");
            response.type("text/json");
            return "calling post...";
        }
    });

    // TODO : Keep this for test
    Spark.get("/test", new Route() {

        @Override
        public Object handle(Request request, Response response) {
            LOG.info("****TEST SPARK GET STARTED**********");
            response.type("text/json");
            response.status(200);
            response.body("Test successful!");
            return "Test successful!";
        }
    });

    Spark.post("/filter", new Route() {
        @Override
        public Object handle(Request request, Response response) {

            LOG.info("************************filter - started************************");
            LOG.info("+++ Start: " + new Timestamp((new Date()).getTime()));

            Gson gson = new Gson();
            String json = request.queryParams(Constants.REQUEST_PARAMETERS);
            RemoteFilterJson remoteFilterJson = gson.fromJson(json, RemoteFilterJson.class);

            String jobId = remoteFilterJson.getJobDetails().getUniqueJobID();
            String componentId = remoteFilterJson.getJobDetails().getComponentID();
            String socketId = remoteFilterJson.getJobDetails().getComponentSocketID();
            String basePath = remoteFilterJson.getJobDetails().getBasepath();
            String username = remoteFilterJson.getJobDetails().getUsername();
            String password = remoteFilterJson.getJobDetails().getService_pwd();
            double outputFileSizeInMB = remoteFilterJson.getFileSize();
            double sizeOfDataInByte = outputFileSizeInMB * 1024 * 1024;

            String condition = parseSQLQueryToLingualQuery(remoteFilterJson);

            LOG.info("Base Path: {}, Job Id: {}, Component Id: {}, Socket ID: {}, User ID:{}, DataSize:{}",
                    basePath, jobId, componentId, socketId, username, sizeOfDataInByte);

            String batchID = jobId + "_" + componentId + "_" + socketId;

            String tempLocationPath = ServiceUtilities.getServiceConfigResourceBundle()
                    .getString(Constants.TEMP_LOCATION_PATH);

            String filePath = tempLocationPath + "/" + batchID + ".csv";
            String UUID = generateUUID();
            String uniqueId = batchID + "_" + UUID;
            String linugalMetaDataPath = basePath + "/filter/" + UUID;

            String fieldNames[] = getHeader(basePath + "/debug/" + jobId + "/" + componentId + "_" + socketId,
                    username, password);
            try {
                HashMap<String, Type> fieldNameAndDatatype = getFieldNameAndType(remoteFilterJson);
                Type[] fieldTypes = getFieldTypeFromMap(fieldNames, fieldNameAndDatatype);
                Configuration conf = getConfiguration(username, password);

                new LingualFilter().filterData(linugalMetaDataPath, uniqueId,
                        basePath + "/debug/" + jobId + "/" + componentId + "_" + socketId, sizeOfDataInByte,
                        filePath, condition, fieldNames, fieldTypes, conf);

                LOG.info("debug output path : " + filePath);
                LOG.info("+++ Stop: " + new Timestamp((new Date()).getTime()));
            } catch (Exception e) {
                LOG.error("Error in reading debug files", e);
                return "error";
            } finally {
                try {
                    System.gc();
                    deleteLingualResult(linugalMetaDataPath);
                } catch (Exception e) {
                    LOG.error("Error in deleting lingual result", e);
                    return "Error in deleting lingual result: " + e.getMessage();
                }
            }

            return filePath;
        }

        private Type[] getFieldTypeFromMap(String[] fieldNames, HashMap<String, Type> fieldNameAndDatatype) {
            Type[] type = new Type[fieldNameAndDatatype.size()];
            int i = 0;
            for (String eachFieldName : fieldNames) {
                type[i++] = fieldNameAndDatatype.get(eachFieldName);
            }
            return type;
        }

        private String[] getHeader(String path, String username, String password) {
            String[] header = readFile(path, username, password);
            return header;
        }

        private String[] readFile(String hdfsFilePath, String username, String password) {
            String[] header = null;
            try {
                Path path = new Path(hdfsFilePath);
                LOG.debug("Reading Debug file:" + hdfsFilePath);
                Configuration conf = getConfiguration(username, password);

                header = getHeaderArray(path, conf);
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
            return header;
        }

        private Path filterOutSuccessFile(FileStatus[] fileStatus) {
            for (FileStatus status : fileStatus) {
                if (status.getPath().getName().toUpperCase().contains("_SUCCESS"))
                    continue;
                else
                    return status.getPath();
            }
            return null;
        }

        private String[] getHeaderArray(Path path, Configuration conf) throws IOException {
            FileSystem fs = FileSystem.get(conf);
            FileStatus[] status = fs.listStatus(path);
            String line = "";
            try {
                BufferedReader br = new BufferedReader(
                        new InputStreamReader(fs.open(filterOutSuccessFile(status))));

                line = br.readLine();
                br.close();

            } catch (Exception e) {
                throw new RuntimeException(e);
            } finally {
                fs.close();
            }
            return line.split(",");
        }

        private Configuration getConfiguration(String userId, String password)
                throws LoginException, IOException {
            Configuration conf = new Configuration();

            // load hdfs-site.xml and core-site.xml
            String hdfsConfigPath = ServiceUtilities.getServiceConfigResourceBundle()
                    .getString(Constants.HDFS_SITE_CONFIG_PATH);
            String coreSiteConfigPath = ServiceUtilities.getServiceConfigResourceBundle()
                    .getString(Constants.CORE_SITE_CONFIG_PATH);
            LOG.debug("Loading hdfs-site.xml:" + hdfsConfigPath);
            conf.addResource(new Path(hdfsConfigPath));
            LOG.debug("Loading hdfs-site.xml:" + coreSiteConfigPath);
            conf.addResource(new Path(coreSiteConfigPath));

            KerberosUtilities kerberosUtilities = new KerberosUtilities();
            // apply kerberos token
            kerberosUtilities.applyKerberosToken(userId, password, conf);
            return conf;
        }

        private void deleteLingualResult(String deletePath) throws IOException {
            Configuration configuration = new Configuration();
            FileSystem fileSystem = FileSystem.get(configuration);
            Path deletingFilePath = new Path(deletePath);

            if (!fileSystem.exists(deletingFilePath)) {
                throw new PathNotFoundException(deletingFilePath.toString());
            } else {
                boolean isDeleted = fileSystem.delete(deletingFilePath, true);
                if (isDeleted) {
                    fileSystem.deleteOnExit(deletingFilePath);
                }
                LOG.info("Deleted path : " + deletePath);
            }

            fileSystem.close();
        }

        private String generateUUID() {
            return String.valueOf(UUID.randomUUID());
        }

        private String parseSQLQueryToLingualQuery(RemoteFilterJson remoteFilterJson) {
            ANTLRInputStream stream = new ANTLRInputStream(remoteFilterJson.getCondition());
            QueryParserLexer lexer = new QueryParserLexer(stream);
            CommonTokenStream tokenStream = new CommonTokenStream(lexer);
            QueryParserParser parser = new QueryParserParser(tokenStream);
            parser.removeErrorListeners();
            LingualQueryCreator customVisitor = new LingualQueryCreator(remoteFilterJson.getSchema());
            String condition = customVisitor.visit(parser.eval());
            return condition;
        }

        private HashMap<String, Type> getFieldNameAndType(RemoteFilterJson remoteFilterJson)
                throws ClassNotFoundException {
            HashMap<String, Type> fieldDataTypeMap = new HashMap<>();
            Type type;
            for (int i = 0; i < remoteFilterJson.getSchema().size(); i++) {
                Class clazz = Class.forName(remoteFilterJson.getSchema().get(i).getDataTypeValue());
                if (clazz.getSimpleName().toString().equalsIgnoreCase("Date")) {
                    type = new SQLTimestampCoercibleType();
                } else {
                    type = clazz;
                }
                fieldDataTypeMap.put(remoteFilterJson.getSchema().get(i).getFieldName(), type);
            }
            return fieldDataTypeMap;
        }

    });
}

From source file:io.seqware.pipeline.plugins.sanity.checks.HDFS_Check.java

License:Open Source License

@Override
public boolean check(QueryRunner qRunner, Metadata metadataWS) throws SQLException {
    FileSystem fileSystem = null;

    HashMap<String, String> settings = (HashMap<String, String>) ConfigTools.getSettings();
    if (settings.isEmpty()) {
        return false;
    } else if (!settings.containsKey("FS.DEFAULTFS") || !settings.containsKey("FS.HDFS.IMPL")) {
        return false;
    } else if (!settings.containsKey("HBASE.ZOOKEEPER.QUORUM")
            || !settings.containsKey("HBASE.ZOOKEEPER.PROPERTY.CLIENTPORT")
            || !settings.containsKey("HBASE.MASTER") || !settings.containsKey("MAPRED.JOB.TRACKER")) {
        return false;
    }/*from  ww  w  .  j  a v  a  2 s. c  o m*/

    try {
        Configuration conf = new Configuration();

        conf.set("hbase.zookeeper.quorum", settings.get("HBASE.ZOOKEEPER.QUORUM"));
        conf.set("hbase.zookeeper.property.clientPort", settings.get("HBASE.ZOOKEEPER.PROPERTY.CLIENTPORT"));
        conf.set("hbase.master", settings.get("HBASE.MASTER"));
        conf.set("mapred.job.tracker", settings.get("MAPRED.JOB.TRACKER"));
        conf.set("fs.default.name", settings.get("FS.DEFAULTFS"));
        conf.set("fs.defaultfs", settings.get("FS.DEFAULTFS"));
        conf.set("fs.hdfs.impl", settings.get("FS.HDFS.IMPL"));
        fileSystem = FileSystem.get(conf);
        Path path = new Path("test");
        fileSystem.mkdirs(path);
        fileSystem.deleteOnExit(path);

    } catch (IOException ex) {
        System.err.println("Error connecting to hdfs" + ex.getMessage());
        return false;
    } finally {
        try {
            if (fileSystem != null) {
                fileSystem.close();
            }
        } catch (IOException ex) {
            Logger.getLogger(HDFS_Check.class.getName()).log(Level.SEVERE, null, ex);
        }
    }

    return true;
}

From source file:org.apache.accumulo.core.client.mock.MockTableOperationsTest.java

License:Apache License

private ImportTestFilesAndData prepareTestFiles() throws Throwable {
    Configuration defaultConf = new Configuration();
    Path tempFile = new Path("target/accumulo-test/import/sample.rf");
    Path failures = new Path("target/accumulo-test/failures/");
    FileSystem fs = FileSystem.get(new URI("file:///"), defaultConf);
    fs.deleteOnExit(tempFile);
    fs.deleteOnExit(failures);/*from w w  w .  jav  a  2  s.  co  m*/
    fs.delete(failures, true);
    fs.delete(tempFile, true);
    fs.mkdirs(failures);
    fs.mkdirs(tempFile.getParent());
    FileSKVWriter writer = FileOperations.getInstance().newWriterBuilder()
            .forFile(tempFile.toString(), fs, defaultConf)
            .withTableConfiguration(AccumuloConfiguration.getDefaultConfiguration()).build();
    writer.startDefaultLocalityGroup();
    List<Pair<Key, Value>> keyVals = new ArrayList<>();
    for (int i = 0; i < 5; i++) {
        keyVals.add(new Pair<>(new Key("a" + i, "b" + i, "c" + i, new ColumnVisibility(""), 1000l + i),
                new Value(Integer.toString(i).getBytes())));
    }
    for (Pair<Key, Value> keyVal : keyVals) {
        writer.append(keyVal.getFirst(), keyVal.getSecond());
    }
    writer.close();
    ImportTestFilesAndData files = new ImportTestFilesAndData();
    files.failurePath = failures;
    files.importPath = tempFile.getParent();
    files.keyVals = keyVals;
    return files;
}

From source file:org.apache.beam.sdk.extensions.sorter.ExternalSorter.java

License:Apache License

/**
 * Initializes the hadoop sorter. Does some local file system setup, and is somewhat expensive
 * (~20 ms on local machine). Only executed when necessary.
 *///from  w  w w .ja v  a2s.  com
private void initHadoopSorter() throws IOException {
    if (!initialized) {
        tempDir = new Path(options.getTempLocation(), "tmp" + UUID.randomUUID().toString());
        paths = new Path[] { new Path(tempDir, "test.seq") };

        JobConf conf = new JobConf();
        // Sets directory for intermediate files created during merge of merge sort
        conf.set("io.seqfile.local.dir", tempDir.toUri().getPath());

        writer = SequenceFile.createWriter(conf, Writer.valueClass(BytesWritable.class),
                Writer.keyClass(BytesWritable.class), Writer.file(paths[0]),
                Writer.compression(CompressionType.NONE));

        FileSystem fs = FileSystem.getLocal(conf);
        // Directory has to exist for Hadoop to recognize it as deletable on exit
        fs.mkdirs(tempDir);
        fs.deleteOnExit(tempDir);

        sorter = new SequenceFile.Sorter(fs, new BytesWritable.Comparator(), BytesWritable.class,
                BytesWritable.class, conf);
        sorter.setMemory(options.getMemoryMB() * 1024 * 1024);

        initialized = true;
    }
}

From source file:org.apache.carbondata.core.datastorage.store.impl.FileFactory.java

License:Apache License

/**
 * for creating a new Lock file and if it is successfully created
 * then in case of abrupt shutdown then the stream to that file will be closed.
 *
 * @param filePath//from w  w  w  .j a  v  a2s. c o m
 * @param fileType
 * @return
 * @throws IOException
 */
public static boolean createNewLockFile(String filePath, FileType fileType) throws IOException {
    filePath = filePath.replace("\\", "/");
    switch (fileType) {
    case HDFS:
    case VIEWFS:
        Path path = new Path(filePath);
        FileSystem fs = path.getFileSystem(configuration);
        if (fs.createNewFile(path)) {
            fs.deleteOnExit(path);
            return true;
        }
        return false;
    case LOCAL:
    default:
        File file = new File(filePath);
        return file.createNewFile();
    }
}