List of usage examples for org.apache.hadoop.fs FileSystem deleteOnExit
Set deleteOnExit
To view the source code for org.apache.hadoop.fs FileSystem deleteOnExit.
Click Source Link
From source file:edu.umn.cs.spatialHadoop.operations.DistributedJoin.java
License:Open Source License
/** * Repartition a file to match the partitioning of the other file. * @param files Input files to partition * @param file_to_repartition//from w w w . jav a 2 s .c om * @param params * @throws IOException * @throws InterruptedException */ protected static void repartitionStep(final Path[] files, int file_to_repartition, OperationsParams params) throws IOException, InterruptedException { // Do the repartition step long t1 = System.currentTimeMillis(); // Repartition the smaller file Path partitioned_file; FileSystem fs = files[file_to_repartition].getFileSystem(params); do { partitioned_file = new Path( files[file_to_repartition].getName() + ".repartitioned_" + (int) (Math.random() * 1000000)); } while (fs.exists(partitioned_file)); // Get the cells to use for repartitioning GlobalIndex<Partition> gindex = SpatialSite.getGlobalIndex(fs, files[1 - file_to_repartition]); CellInfo[] cells = SpatialSite.cellsOf(fs, files[1 - file_to_repartition]); // Repartition the file to match the other file boolean isReplicated = gindex.isReplicated(); boolean isCompact = gindex.isCompact(); String sindex; if (isReplicated && !isCompact) sindex = "grid"; else if (isReplicated && isCompact) sindex = "r+tree"; else if (!isReplicated && isCompact) sindex = "rtree"; else throw new RuntimeException("Unknown index at: " + files[1 - file_to_repartition]); params.set("sindex", sindex); if (isGeneralRepartitionMode) { // Repartition the smaller file with heuristics cells info (general // indexing) Repartition.repartitionMapReduce(files[file_to_repartition], partitioned_file, null, params); } else { // Repartition the smaller file on the larger file (specific // indexing) Repartition.repartitionMapReduce(files[file_to_repartition], partitioned_file, cells, params); } long t2 = System.currentTimeMillis(); System.out.println("Repartition time " + (t2 - t1) + " millis"); // Continue with the join step if (fs.exists(partitioned_file)) { // An output file might not existent if the two files are disjoint // Replace the smaller file with its repartitioned copy files[file_to_repartition] = partitioned_file; // Delete temporary repartitioned file upon exit fs.deleteOnExit(partitioned_file); } }
From source file:eu.edisonproject.classification.tfidf.mapreduce.TermWordFrequency.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration jobconf = getConf(); Job job = Job.getInstance(jobconf);//from w ww . j a va2 s . c o m FileSystem fs = FileSystem.get(jobconf); fs.delete(new Path(args[1]), true); Path dictionary = new Path(args[0]); Path dictionaryHdfs = dictionary; Path localDocs = new Path(args[2]); Path hdfsDocs = localDocs; Path stopwordsLocal = new Path(args[3]); Path stopwordsHDFS = stopwordsLocal; if (!jobconf.get(FileSystem.FS_DEFAULT_NAME_KEY).startsWith("file")) { dictionaryHdfs = new Path(dictionary.getName()); if (!fs.exists(dictionaryHdfs)) { fs.copyFromLocalFile(dictionary, dictionaryHdfs); } hdfsDocs = new Path(localDocs.getName()); fs.mkdirs(hdfsDocs); fs.deleteOnExit(hdfsDocs); File[] stats = new File(localDocs.toString()).listFiles(); for (File stat : stats) { Path filePath = new Path(stat.getAbsolutePath()); if (FilenameUtils.getExtension(filePath.getName()).endsWith("txt")) { Path dest = new Path(hdfsDocs.toUri() + "/" + filePath.getName()); fs.copyFromLocalFile(filePath, dest); } } stopwordsHDFS = new Path(stopwordsLocal.getName()); if (!fs.exists(stopwordsHDFS)) { fs.copyFromLocalFile(stopwordsLocal, stopwordsHDFS); } } FileStatus stopwordsStatus = fs.getFileStatus(stopwordsHDFS); stopwordsHDFS = stopwordsStatus.getPath(); job.addCacheFile(stopwordsHDFS.toUri()); job.addCacheFile(hdfsDocs.toUri()); job.setJarByClass(TermWordFrequency.class); job.setJobName("Word Frequency Term Driver"); FileInputFormat.setInputPaths(job, dictionaryHdfs); FileOutputFormat.setOutputPath(job, new Path(args[1])); // job.setInputFormatClass(TextInputFormat.class); job.setInputFormatClass(NLineInputFormat.class); NLineInputFormat.addInputPath(job, dictionaryHdfs); NLineInputFormat.setNumLinesPerSplit(job, Integer.valueOf(args[4])); NLineInputFormat.setMaxInputSplitSize(job, 500); job.setMapperClass(TermWordFrequencyMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Integer.class); job.setReducerClass(TermWordFrequencyReducer.class); return (job.waitForCompletion(true) ? 0 : 1); }
From source file:eu.edisonproject.classification.tfidf.mapreduce.WordFrequencyInDocDriver.java
License:Apache License
@Override public int run(String[] args) throws Exception { // itemset = new LinkedList<String>(); // BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(args[2]))); // String line; // while ((line = br.readLine()) != null) { // String[] components = line.split("/"); // itemset.add(components[0]); // }// www. j a v a2 s .c om Configuration conf = getConf(); Job job = Job.getInstance(conf); job.setJarByClass(WordFrequencyInDocDriver.class); job.setJobName("Word Frequency In Doc Driver"); FileSystem fs = FileSystem.get(conf); fs.delete(new Path(args[1]), true); Path in = new Path(args[0]); Path inHdfs = in; Path dictionaryLocal = new Path(args[2]); Path dictionaryHDFS = dictionaryLocal; Path stopwordsLocal = new Path(args[3]); Path stopwordsHDFS = stopwordsLocal; if (!conf.get(FileSystem.FS_DEFAULT_NAME_KEY).startsWith("file")) { inHdfs = new Path(in.getName()); fs.delete(inHdfs, true); fs.copyFromLocalFile(in, inHdfs); fs.deleteOnExit(inHdfs); dictionaryHDFS = new Path(dictionaryLocal.getName()); if (!fs.exists(dictionaryHDFS)) { fs.copyFromLocalFile(dictionaryLocal, dictionaryHDFS); } stopwordsHDFS = new Path(stopwordsLocal.getName()); if (!fs.exists(stopwordsHDFS)) { fs.copyFromLocalFile(stopwordsLocal, stopwordsHDFS); } } FileStatus dictionaryStatus = fs.getFileStatus(dictionaryHDFS); dictionaryHDFS = dictionaryStatus.getPath(); job.addCacheFile(dictionaryHDFS.toUri()); FileStatus stopwordsStatus = fs.getFileStatus(stopwordsHDFS); stopwordsHDFS = stopwordsStatus.getPath(); job.addCacheFile(stopwordsHDFS.toUri()); FileInputFormat.setInputPaths(job, inHdfs); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setInputFormatClass(AvroKeyInputFormat.class); job.setMapperClass(WordFrequencyInDocMapper.class); AvroJob.setInputKeySchema(job, Document.getClassSchema()); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Integer.class); job.setReducerClass(WordFrequencyInDocReducer.class); return (job.waitForCompletion(true) ? 0 : 1); }
From source file:eu.edisonproject.training.tfidf.mapreduce.TermWordFrequency.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration jobconf = getConf(); FileSystem fs = FileSystem.get(jobconf); fs.delete(new Path(args[1]), true); Path in = new Path(args[0]); Path inHdfs = in;// w ww . jav a2 s . co m if (!jobconf.get(FileSystem.FS_DEFAULT_NAME_KEY).startsWith("file")) { inHdfs = new Path(in.getName()); fs.delete(inHdfs, true); fs.copyFromLocalFile(in, inHdfs); fs.deleteOnExit(inHdfs); FileStatus inHdfsStatus = fs.getFileStatus(inHdfs); // Logger.getLogger(TermWordFrequency.class.getName()).log(Level.INFO, "Copied: {0} to: {1}", new Object[]{in.toUri(), inHdfsStatus.getPath().toUri()}); } Job job = Job.getInstance(jobconf); Path stopwordsLocal = new Path(args[3]); stopwords = new Path(stopwordsLocal.getName()); fs.delete(stopwords, true); fs.copyFromLocalFile(stopwordsLocal, stopwords); fs.deleteOnExit(stopwords); FileStatus stopwordsStatus = fs.getFileStatus(stopwords); stopwords = stopwordsStatus.getPath(); job.addCacheFile(stopwords.toUri()); Path localDocs = new Path(args[2]); Path hdfsDocs = new Path(localDocs.getName()); fs.mkdirs(hdfsDocs); hdfsDocs = fs.getFileStatus(hdfsDocs).getPath(); fs.delete(hdfsDocs, true); // FileStatus[] stats = fs.listStatus(localDocs); File[] stats = new File(localDocs.toString()).listFiles(); for (File stat : stats) { // for (FileStatus stat : stats) { Path filePath = new Path(stat.getAbsolutePath()); if (FilenameUtils.getExtension(filePath.getName()).endsWith("txt")) { Path dest = new Path(hdfsDocs.toUri() + "/" + filePath.getName()); fs.copyFromLocalFile(filePath, dest); } } job.addCacheFile(hdfsDocs.toUri()); job.setJarByClass(TermWordFrequency.class); job.setJobName("Word Frequency Term Driver"); FileInputFormat.setInputPaths(job, inHdfs); FileOutputFormat.setOutputPath(job, new Path(args[1])); // job.setInputFormatClass(TextInputFormat.class); job.setInputFormatClass(NLineInputFormat.class); NLineInputFormat.addInputPath(job, inHdfs); NLineInputFormat.setNumLinesPerSplit(job, Integer.valueOf(args[4])); NLineInputFormat.setMaxInputSplitSize(job, 500); Logger.getLogger(TermWordFrequency.class.getName()).log(Level.INFO, "Num. of lines: {0}", NLineInputFormat.getNumLinesPerSplit(job)); job.setMapperClass(TermWordFrequencyMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Integer.class); job.setReducerClass(TermWordFrequencyReducer.class); return (job.waitForCompletion(true) ? 0 : 1); }
From source file:hydrograph.server.debug.lingual.LingualSchemaCreatorTest.java
License:Apache License
@AfterClass public static void cleanUp() { System.gc();//from w ww. j a v a2 s . c o m Configuration configuration = new Configuration(); FileSystem fileSystem = null; try { fileSystem = FileSystem.get(configuration); Path deletingFilePath = new Path("testData/MetaData/"); if (!fileSystem.exists(deletingFilePath)) { throw new PathNotFoundException(deletingFilePath.toString()); } else { boolean isDeleted = fileSystem.delete(deletingFilePath, true); if (isDeleted) { fileSystem.deleteOnExit(deletingFilePath); } } fileSystem.close(); } catch (IOException e) { e.printStackTrace(); } }
From source file:hydrograph.server.service.HydrographService.java
License:Apache License
private void start() { int portNumber = Constants.DEFAULT_PORT_NUMBER; try {//from w w w. j a v a 2 s. c o m portNumber = Integer .parseInt(ServiceUtilities.getServiceConfigResourceBundle().getString(Constants.PORT_ID)); LOG.debug("Port number '" + portNumber + "' fetched from properties file"); } catch (Exception e) { LOG.error("Error fetching port number. Defaulting to " + Constants.DEFAULT_PORT_NUMBER, e); } /** * Setting Port number to the server */ Spark.port(portNumber); /** * Creating Websocket on Server for Execution tracking service. */ webSocket("/executionTracking", ExecutionTrackingWebsocketHandler.class); Spark.post("/getConnectionStatus", new Route() { @SuppressWarnings({ "unchecked", "rawtypes", "unused" }) @Override public Object handle(Request request, Response response) throws InstantiationException, IllegalAccessException, ClassNotFoundException, JSONException { LOG.info("************************getConnectionStatus endpoint - started************************"); LOG.info("+++ Start: " + new Timestamp((new Date()).getTime())); ObjectMapper objectMapper = new ObjectMapper(); String requestParameters = request.queryParams(Constants.REQUEST_PARAMETERS), dbClassName = null, objectAsString = null; JSONObject requestParameterValues = new JSONObject(requestParameters); Map metadataProperties = extractingJsonObjects(requestParameterValues); String dbTypeToTest = metadataProperties .getOrDefault(Constants.dbType, new ParamsCannotBeNullOrEmpty(Constants.dbType + " Cannot be null or empty")) .toString(); switch (dbTypeToTest.toLowerCase()) { case Constants.ORACLE: try { if (ServiceUtilities.getConnectionStatus(metadataProperties, Constants.ORACLE_JDBC_CLASSNAME, Constants.QUERY_TO_TEST)) { LOG.trace("Connection Successful"); objectAsString = objectMapper .writeValueAsString("Connection To Oracle database is Successful"); } else { LOG.trace("Connection UnSuccessful"); objectAsString = objectMapper .writeValueAsString("Connection To Oracle database UnSuccessful"); } } catch (Exception e) { LOG.error("Connection fails with exception : " + e); objectAsString = e.getLocalizedMessage(); } break; case Constants.MYSQL: try { if (ServiceUtilities.getConnectionStatus(metadataProperties, Constants.MYSQL_JDBC_CLASSNAME, Constants.QUERY_TO_TEST)) { LOG.trace("Connection Successful"); objectAsString = objectMapper .writeValueAsString("Connection To MySQL database is Successful"); } else { LOG.trace("Connection UnSuccessful"); objectAsString = objectMapper .writeValueAsString("Connection To MySQL database UnSuccessful"); } } catch (Exception e) { LOG.error("Connection fails with exception : " + e); objectAsString = e.getLocalizedMessage(); } break; case Constants.REDSHIFT: try { if (ServiceUtilities.getConnectionStatus(metadataProperties, Constants.REDSHIFT_JDBC_CLASSNAME, Constants.DEFAULT_REDRESHIFT_QUERY_TO_TEST)) { LOG.trace("Connection Successful"); objectAsString = objectMapper .writeValueAsString("Connection To Redshift database is Successful"); } else { LOG.trace("Connection UnSuccessful"); objectAsString = objectMapper .writeValueAsString("Connection To Redshift database UnSuccessful"); } } catch (Exception e) { LOG.error("Connection fails with exception : " + e); objectAsString = e.getLocalizedMessage(); } break; case Constants.TERADATA: try { if (ServiceUtilities.getConnectionStatus(metadataProperties, Constants.TERADATA_JDBC_CLASSNAME, Constants.QUERY_TO_TEST_TERADATA)) { LOG.trace("Connection Successful"); objectAsString = objectMapper .writeValueAsString("Connection To Teradata database is Successful"); } else { LOG.trace("Connection UnSuccessful"); objectAsString = objectMapper .writeValueAsString("Connection To Teradata database UnSuccessful"); } } catch (Exception e) { LOG.error("Connection fails with exception : " + e); objectAsString = e.getLocalizedMessage(); } break; } return objectAsString; } @SuppressWarnings({ "unchecked", "rawtypes" }) private Map extractingJsonObjects(JSONObject requestParameterValues) throws JSONException { String dbType = null, userId = null, password = null, host = null, port = null, sid = null, driverType = null, query = null, tableName = null, database = null; Map metadataProperties = new HashMap(); if (!requestParameterValues.isNull(Constants.dbType)) { dbType = requestParameterValues.getString(Constants.dbType); metadataProperties.put(Constants.dbType, dbType); } if (!requestParameterValues.isNull(Constants.USERNAME)) { userId = requestParameterValues.getString(Constants.USERNAME); metadataProperties.put(Constants.USERNAME, userId); } if (!requestParameterValues.isNull(Constants.SERVICE_PWD)) { password = requestParameterValues.getString(Constants.SERVICE_PWD); metadataProperties.put(Constants.SERVICE_PWD, password); } if (!requestParameterValues.isNull(Constants.HOST_NAME)) { host = requestParameterValues.getString(Constants.HOST_NAME); metadataProperties.put(Constants.HOST_NAME, host); } if (!requestParameterValues.isNull(Constants.PORT_NUMBER)) { port = requestParameterValues.getString(Constants.PORT_NUMBER); metadataProperties.put(Constants.PORT_NUMBER, port); } else { if (metadataProperties.get(Constants.dbType).toString().equalsIgnoreCase("mysql")) { port = Constants.MYSQL_DEFAULT_PORT; metadataProperties.put(Constants.PORT_NUMBER, port); } else if (metadataProperties.get(Constants.dbType).toString().equalsIgnoreCase("oracle")) { port = Constants.ORACLE_DEFAULT_PORT; metadataProperties.put(Constants.PORT_NUMBER, port); } LOG.info("Connecting " + dbType + " port is not provided using default port : " + port); } if (!requestParameterValues.isNull(Constants.SID)) { sid = requestParameterValues.getString(Constants.SID); metadataProperties.put(Constants.SID, sid); } if (!requestParameterValues.isNull(Constants.DRIVER_TYPE)) { driverType = requestParameterValues.getString(Constants.DRIVER_TYPE); metadataProperties.put(Constants.DRIVER_TYPE, driverType); } if (!requestParameterValues.isNull(Constants.QUERY)) { query = requestParameterValues.getString(Constants.QUERY); metadataProperties.put(Constants.QUERY, query); } if (!requestParameterValues.isNull(Constants.TABLENAME)) { tableName = requestParameterValues.getString(Constants.TABLENAME); metadataProperties.put(Constants.TABLENAME, tableName); } if (!requestParameterValues.isNull(Constants.DATABASE_NAME)) { database = requestParameterValues.getString(Constants.DATABASE_NAME); metadataProperties.put(Constants.DATABASE_NAME, database); } LOG.info("Fetched request parameters are: " + Constants.dbType + " => " + dbType + " " + Constants.USERNAME + " => " + userId + " " + Constants.HOST_NAME + " => " + host + " " + Constants.PORT_NUMBER + " => " + port + " " + Constants.SID + " => " + sid + " " + Constants.DRIVER_TYPE + " => " + driverType + " " + Constants.QUERY + " => " + query + " " + Constants.TABLENAME + " => " + tableName + " " + Constants.DATABASE_NAME + " => " + database + " "); return metadataProperties; } }); Spark.post("readFromMetastore", new Route() { @Override public Object handle(Request request, Response response) throws ParamsCannotBeNullOrEmpty, ClassNotFoundException, IllegalAccessException, JSONException, JsonProcessingException, TableOrQueryParamNotFound, SQLException, InstantiationException { LOG.info("************************readFromMetastore endpoint - started************************"); LOG.info("+++ Start: " + new Timestamp((new Date()).getTime())); ObjectMapper objectMapper = new ObjectMapper(); String requestParameters = request.queryParams(Constants.REQUEST_PARAMETERS), objectAsString = null, dbClassName = null; JSONObject requestParameterValues = new JSONObject(requestParameters); // Method to extracting request parameter details from input // json. Map metadataProperties = extractingJsonObjects(requestParameterValues); String dbType = metadataProperties .getOrDefault(Constants.dbType, new ParamsCannotBeNullOrEmpty(Constants.dbType + " Cannot be null or empty")) .toString(); LOG.info("Retrieving schema for " + dbType + " Database."); try { switch (dbType.toLowerCase()) { case Constants.ORACLE: dbClassName = Constants.oracle; OracleMetadataStrategy oracleMetadataHelper = (OracleMetadataStrategy) Class .forName(dbClassName).newInstance(); oracleMetadataHelper.setConnection(metadataProperties); objectAsString = objectMapper .writeValueAsString(oracleMetadataHelper.fillComponentSchema(metadataProperties)); LOG.trace("Schema json for oracle : " + objectAsString); LOG.info("+++ Stop: " + new Timestamp((new Date()).getTime())); break; case Constants.HIVE: dbClassName = Constants.hive; HiveMetadataStrategy hiveMetadataHelper = (HiveMetadataStrategy) Class.forName(dbClassName) .newInstance(); hiveMetadataHelper.setConnection(metadataProperties); objectAsString = objectMapper .writeValueAsString(hiveMetadataHelper.fillComponentSchema(metadataProperties)); LOG.trace("Schema json for hive : " + objectAsString); LOG.info("+++ Stop: " + new Timestamp((new Date()).getTime())); break; case Constants.REDSHIFT: dbClassName = Constants.redshift; RedshiftMetadataStrategy redShiftMetadataHelper = (RedshiftMetadataStrategy) Class .forName(dbClassName).newInstance(); redShiftMetadataHelper.setConnection(metadataProperties); objectAsString = objectMapper .writeValueAsString(redShiftMetadataHelper.fillComponentSchema(metadataProperties)); LOG.trace("Schema json for redshift : " + objectAsString); LOG.info("+++ Stop: " + new Timestamp((new Date()).getTime())); break; case Constants.MYSQL: dbClassName = Constants.mysql; MysqlMetadataStrategy mysqlMetadataHelper = (MysqlMetadataStrategy) Class .forName(dbClassName).newInstance(); mysqlMetadataHelper.setConnection(metadataProperties); objectAsString = objectMapper .writeValueAsString(mysqlMetadataHelper.fillComponentSchema(metadataProperties)); LOG.trace("Schema json for mysql : " + objectAsString); LOG.info("+++ Stop: " + new Timestamp((new Date()).getTime())); break; case Constants.TERADATA: dbClassName = Constants.teradata; TeradataMetadataStrategy teradataMetadataHelper = (TeradataMetadataStrategy) Class .forName(dbClassName).newInstance(); teradataMetadataHelper.setConnection(metadataProperties); objectAsString = objectMapper .writeValueAsString(teradataMetadataHelper.fillComponentSchema(metadataProperties)); LOG.trace("Schema json for teradata : " + objectAsString); LOG.info("+++ Stop: " + new Timestamp((new Date()).getTime())); break; } } catch (Exception e) { LOG.error("Metadata read for database '" + dbType + "' not completed."); LOG.error("Exception : " + e); response.status(400); return "Metadata read for database '" + dbType + "' not completed."; } LOG.info("Class Name used for " + dbType + " Is : " + dbClassName); LOG.debug("Json for " + dbType + " : " + objectAsString); return objectAsString; } @SuppressWarnings({ "unchecked", "rawtypes" }) private Map extractingJsonObjects(JSONObject requestParameterValues) throws JSONException { String dbType = null, userId = null, password = null, host = null, port = null, sid = null, driverType = null, query = null, tableName = null, database = null; Map metadataProperties = new HashMap(); if (!requestParameterValues.isNull(Constants.dbType)) { dbType = requestParameterValues.getString(Constants.dbType); metadataProperties.put(Constants.dbType, dbType); } if (!requestParameterValues.isNull(Constants.USERNAME)) { userId = requestParameterValues.getString(Constants.USERNAME); metadataProperties.put(Constants.USERNAME, userId); } if (!requestParameterValues.isNull(Constants.SERVICE_PWD)) { password = requestParameterValues.getString(Constants.SERVICE_PWD); metadataProperties.put(Constants.SERVICE_PWD, password); } if (!requestParameterValues.isNull(Constants.HOST_NAME)) { host = requestParameterValues.getString(Constants.HOST_NAME); metadataProperties.put(Constants.HOST_NAME, host); } if (!requestParameterValues.isNull(Constants.PORT_NUMBER)) { port = requestParameterValues.getString(Constants.PORT_NUMBER); metadataProperties.put(Constants.PORT_NUMBER, port); } else { if (metadataProperties.get(Constants.dbType).toString().equalsIgnoreCase("mysql")) { port = Constants.MYSQL_DEFAULT_PORT; metadataProperties.put(Constants.PORT_NUMBER, port); } else if (metadataProperties.get(Constants.dbType).toString().equalsIgnoreCase("oracle")) { port = Constants.ORACLE_DEFAULT_PORT; metadataProperties.put(Constants.PORT_NUMBER, port); } LOG.info("Connecting " + dbType + " port is not provided using default port : " + port); } if (!requestParameterValues.isNull(Constants.SID)) { sid = requestParameterValues.getString(Constants.SID); metadataProperties.put(Constants.SID, sid); } if (!requestParameterValues.isNull(Constants.DRIVER_TYPE)) { driverType = requestParameterValues.getString(Constants.DRIVER_TYPE); metadataProperties.put(Constants.DRIVER_TYPE, driverType); } if (!requestParameterValues.isNull(Constants.QUERY)) { query = requestParameterValues.getString(Constants.QUERY); metadataProperties.put(Constants.QUERY, query); } if (!requestParameterValues.isNull(Constants.TABLENAME)) { tableName = requestParameterValues.getString(Constants.TABLENAME); metadataProperties.put(Constants.TABLENAME, tableName); } if (!requestParameterValues.isNull(Constants.DATABASE_NAME)) { database = requestParameterValues.getString(Constants.DATABASE_NAME); metadataProperties.put(Constants.DATABASE_NAME, database); } LOG.info("Fetched request parameters are: " + Constants.dbType + " => " + dbType + " " + Constants.USERNAME + " => " + userId + " " + Constants.HOST_NAME + " => " + host + " " + Constants.PORT_NUMBER + " => " + port + " " + Constants.SID + " => " + sid + " " + Constants.DRIVER_TYPE + " => " + driverType + " " + Constants.QUERY + " => " + query + " " + Constants.TABLENAME + " => " + tableName + " " + Constants.DATABASE_NAME + " => " + database + " "); return metadataProperties; } }); Spark.post("/read", new Route() { @Override public Object handle(Request request, Response response) { LOG.info("************************read endpoint - started************************"); LOG.info("+++ Start: " + new Timestamp((new Date()).getTime())); String jobId = request.queryParams(Constants.JOB_ID); String componentId = request.queryParams(Constants.COMPONENT_ID); String socketId = request.queryParams(Constants.SOCKET_ID); String basePath = request.queryParams(Constants.BASE_PATH); // String host = request.queryParams(Constants.HOST); String userID = request.queryParams(Constants.USER_ID); String password = request.queryParams(Constants.SERVICE_PWD); double sizeOfData = Double.parseDouble(request.queryParams(Constants.FILE_SIZE)) * 1024 * 1024; LOG.info("Base Path: {}, Job Id: {}, Component Id: {}, Socket ID: {}, User ID:{}, DataSize:{}", basePath, jobId, componentId, socketId, userID, sizeOfData); String batchID = jobId + "_" + componentId + "_" + socketId; String tempLocationPath = ServiceUtilities.getServiceConfigResourceBundle() .getString(Constants.TEMP_LOCATION_PATH); String filePath = tempLocationPath + "/" + batchID + ".csv"; try { readFileFromHDFS(basePath + "/debug/" + jobId + "/" + componentId + "_" + socketId, sizeOfData, filePath, userID, password); LOG.info("+++ Stop: " + new Timestamp((new Date()).getTime())); } catch (Exception e) { LOG.error("Error in reading debug files", e); return "error"; } return filePath; } /** * This method will read the HDFS file, fetch the records from it * and write its records to a local file on edge node with size <= * {@code sizeOfData} passed in parameter. * * @param hdfsFilePath path of HDFS file from where records to be read * @param sizeOfData defines the size of data (in bytes) to be read from * HDFS file * @param remoteFileName after reading the data of {@code sizeOfData} bytes * from HDFS file, it will be written to local file on * edge node with file name {@code remoteFileName} * @param userId * @param password */ private void readFileFromHDFS(String hdfsFilePath, double sizeOfData, String remoteFileName, String userId, String password) { try { Path path = new Path(hdfsFilePath); LOG.debug("Reading Debug file:" + hdfsFilePath); Configuration conf = new Configuration(); // load hdfs-site.xml and core-site.xml String hdfsConfigPath = ServiceUtilities.getServiceConfigResourceBundle() .getString(Constants.HDFS_SITE_CONFIG_PATH); String coreSiteConfigPath = ServiceUtilities.getServiceConfigResourceBundle() .getString(Constants.CORE_SITE_CONFIG_PATH); LOG.debug("Loading hdfs-site.xml:" + hdfsConfigPath); conf.addResource(new Path(hdfsConfigPath)); LOG.debug("Loading hdfs-site.xml:" + coreSiteConfigPath); conf.addResource(new Path(coreSiteConfigPath)); KerberosUtilities kerberosUtilities = new KerberosUtilities(); // apply kerberos token kerberosUtilities.applyKerberosToken(userId, password, conf); listAndWriteFiles(remoteFileName, path, conf, sizeOfData); } catch (Exception e) { throw new RuntimeException(e); } } /** * This method will list all files for {@code path}, read all files * and writes its data to a local file on edge node with size <= * {@code sizeOfData} passed in parameter. * * @param remoteFileName * @param path * @param conf * @param sizeOfData * @throws IOException */ private void listAndWriteFiles(String remoteFileName, Path path, Configuration conf, double sizeOfData) throws IOException { FileSystem fs = FileSystem.get(conf); FileStatus[] status = fs.listStatus(path); File remoteFile = new File(remoteFileName); OutputStream os = new FileOutputStream(remoteFileName); try { int numOfBytes = 0; for (int i = 0; i < status.length; i++) { BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(status[i].getPath()))); String line = ""; line = br.readLine(); if (line != null) { // header will only get fetch from first part file // and it // will skip header from remaining files if (numOfBytes == 0) { os.write((line + "\n").toString().getBytes()); numOfBytes += line.toString().length(); } while ((line = br.readLine()) != null) { numOfBytes += line.toString().length(); // line = br.readLine(); if (numOfBytes <= sizeOfData) { os.write((line + "\n").toString().getBytes()); } else { break; } } } br.close(); remoteFile.setReadable(true, false); } } catch (Exception e) { throw new RuntimeException(e); } finally { os.close(); fs.close(); } } }); Spark.post("/delete", new Route() { @Override public Object handle(Request request, Response response) { LOG.info("************************delete endpoint - started************************"); LOG.info("+++ Start: " + new Timestamp((new Date()).getTime())); response.type("text/json"); String jobId = request.queryParams(Constants.JOB_ID); String basePath = request.queryParams(Constants.BASE_PATH); String componentId = request.queryParams(Constants.COMPONENT_ID); String socketId = request.queryParams(Constants.SOCKET_ID); String userID = request.queryParams(Constants.USER_ID); String password = request.queryParams(Constants.SERVICE_PWD); LOG.info("Base Path: {}, Job Id: {}, Component Id: {}, Socket ID: {}, User ID:{}", basePath, jobId, componentId, socketId, userID); try { removeDebugFiles(basePath, jobId, componentId, socketId, userID, password); LOG.info("+++ Stop: " + new Timestamp((new Date()).getTime())); } catch (Exception e) { LOG.error("Error in deleting debug files", e); } return "error"; } private void removeDebugFiles(String basePath, String jobId, String componentId, String socketId, String userID, String password) { try { // DebugFilesReader debugFilesReader = new // DebugFilesReader(basePath, jobId, componentId, socketId, // userID, // password); delete(basePath, jobId, componentId, socketId, userID, password); } catch (Exception e) { LOG.error("Error while deleting the debug file", e); throw new RuntimeException(e); } } /** * Deletes the jobId directory * * @param password * @param userID * @param socketId * @param componentId * @param jobId * @param basePath * @throws IOException */ public void delete(String basePath, String jobId, String componentId, String socketId, String userID, String password) throws IOException { LOG.trace("Entering method delete()"); String deletePath = basePath + "/debug/" + jobId; Configuration configuration = new Configuration(); FileSystem fileSystem = FileSystem.get(configuration); Path deletingFilePath = new Path(deletePath); if (!fileSystem.exists(deletingFilePath)) { throw new PathNotFoundException(deletingFilePath.toString()); } else { // Delete file fileSystem.delete(deletingFilePath, true); LOG.info("Deleted path : " + deletePath); } fileSystem.close(); } }); Spark.post("/deleteLocalDebugFile", new Route() { @Override public Object handle(Request request, Response response) { String error = ""; LOG.info("+++ Start: " + new Timestamp((new Date()).getTime())); LOG.info("************************deleteLocalDebugFile endpoint - started************************"); try { String jobId = request.queryParams(Constants.JOB_ID); String componentId = request.queryParams(Constants.COMPONENT_ID); String socketId = request.queryParams(Constants.SOCKET_ID); String batchID = jobId + "_" + componentId + "_" + socketId; String tempLocationPath = ServiceUtilities.getServiceConfigResourceBundle() .getString(Constants.TEMP_LOCATION_PATH); LOG.info("Job Id: {}, Component Id: {}, Socket ID: {}, TemporaryPath: {}", jobId, componentId, socketId, tempLocationPath); LOG.debug("File to be deleted: " + tempLocationPath + "/" + batchID + ".csv"); File file = new File(tempLocationPath + "/" + batchID + ".csv"); file.delete(); LOG.trace("Local debug file deleted successfully."); return "Success"; } catch (Exception e) { LOG.error("Error in deleting local debug file.", e); error = e.getMessage(); } LOG.info("+++ Stop: " + new Timestamp((new Date()).getTime())); return "Local file delete failed. Error: " + error; } }); // TODO : Keep this for test Spark.post("/post", new Route() { @Override public Object handle(Request request, Response response) { LOG.info("****TEST SPARK POST STARTED**********"); response.type("text/json"); return "calling post..."; } }); // TODO : Keep this for test Spark.get("/test", new Route() { @Override public Object handle(Request request, Response response) { LOG.info("****TEST SPARK GET STARTED**********"); response.type("text/json"); response.status(200); response.body("Test successful!"); return "Test successful!"; } }); Spark.post("/filter", new Route() { @Override public Object handle(Request request, Response response) { LOG.info("************************filter - started************************"); LOG.info("+++ Start: " + new Timestamp((new Date()).getTime())); Gson gson = new Gson(); String json = request.queryParams(Constants.REQUEST_PARAMETERS); RemoteFilterJson remoteFilterJson = gson.fromJson(json, RemoteFilterJson.class); String jobId = remoteFilterJson.getJobDetails().getUniqueJobID(); String componentId = remoteFilterJson.getJobDetails().getComponentID(); String socketId = remoteFilterJson.getJobDetails().getComponentSocketID(); String basePath = remoteFilterJson.getJobDetails().getBasepath(); String username = remoteFilterJson.getJobDetails().getUsername(); String password = remoteFilterJson.getJobDetails().getService_pwd(); double outputFileSizeInMB = remoteFilterJson.getFileSize(); double sizeOfDataInByte = outputFileSizeInMB * 1024 * 1024; String condition = parseSQLQueryToLingualQuery(remoteFilterJson); LOG.info("Base Path: {}, Job Id: {}, Component Id: {}, Socket ID: {}, User ID:{}, DataSize:{}", basePath, jobId, componentId, socketId, username, sizeOfDataInByte); String batchID = jobId + "_" + componentId + "_" + socketId; String tempLocationPath = ServiceUtilities.getServiceConfigResourceBundle() .getString(Constants.TEMP_LOCATION_PATH); String filePath = tempLocationPath + "/" + batchID + ".csv"; String UUID = generateUUID(); String uniqueId = batchID + "_" + UUID; String linugalMetaDataPath = basePath + "/filter/" + UUID; String fieldNames[] = getHeader(basePath + "/debug/" + jobId + "/" + componentId + "_" + socketId, username, password); try { HashMap<String, Type> fieldNameAndDatatype = getFieldNameAndType(remoteFilterJson); Type[] fieldTypes = getFieldTypeFromMap(fieldNames, fieldNameAndDatatype); Configuration conf = getConfiguration(username, password); new LingualFilter().filterData(linugalMetaDataPath, uniqueId, basePath + "/debug/" + jobId + "/" + componentId + "_" + socketId, sizeOfDataInByte, filePath, condition, fieldNames, fieldTypes, conf); LOG.info("debug output path : " + filePath); LOG.info("+++ Stop: " + new Timestamp((new Date()).getTime())); } catch (Exception e) { LOG.error("Error in reading debug files", e); return "error"; } finally { try { System.gc(); deleteLingualResult(linugalMetaDataPath); } catch (Exception e) { LOG.error("Error in deleting lingual result", e); return "Error in deleting lingual result: " + e.getMessage(); } } return filePath; } private Type[] getFieldTypeFromMap(String[] fieldNames, HashMap<String, Type> fieldNameAndDatatype) { Type[] type = new Type[fieldNameAndDatatype.size()]; int i = 0; for (String eachFieldName : fieldNames) { type[i++] = fieldNameAndDatatype.get(eachFieldName); } return type; } private String[] getHeader(String path, String username, String password) { String[] header = readFile(path, username, password); return header; } private String[] readFile(String hdfsFilePath, String username, String password) { String[] header = null; try { Path path = new Path(hdfsFilePath); LOG.debug("Reading Debug file:" + hdfsFilePath); Configuration conf = getConfiguration(username, password); header = getHeaderArray(path, conf); } catch (Exception e) { throw new RuntimeException(e); } return header; } private Path filterOutSuccessFile(FileStatus[] fileStatus) { for (FileStatus status : fileStatus) { if (status.getPath().getName().toUpperCase().contains("_SUCCESS")) continue; else return status.getPath(); } return null; } private String[] getHeaderArray(Path path, Configuration conf) throws IOException { FileSystem fs = FileSystem.get(conf); FileStatus[] status = fs.listStatus(path); String line = ""; try { BufferedReader br = new BufferedReader( new InputStreamReader(fs.open(filterOutSuccessFile(status)))); line = br.readLine(); br.close(); } catch (Exception e) { throw new RuntimeException(e); } finally { fs.close(); } return line.split(","); } private Configuration getConfiguration(String userId, String password) throws LoginException, IOException { Configuration conf = new Configuration(); // load hdfs-site.xml and core-site.xml String hdfsConfigPath = ServiceUtilities.getServiceConfigResourceBundle() .getString(Constants.HDFS_SITE_CONFIG_PATH); String coreSiteConfigPath = ServiceUtilities.getServiceConfigResourceBundle() .getString(Constants.CORE_SITE_CONFIG_PATH); LOG.debug("Loading hdfs-site.xml:" + hdfsConfigPath); conf.addResource(new Path(hdfsConfigPath)); LOG.debug("Loading hdfs-site.xml:" + coreSiteConfigPath); conf.addResource(new Path(coreSiteConfigPath)); KerberosUtilities kerberosUtilities = new KerberosUtilities(); // apply kerberos token kerberosUtilities.applyKerberosToken(userId, password, conf); return conf; } private void deleteLingualResult(String deletePath) throws IOException { Configuration configuration = new Configuration(); FileSystem fileSystem = FileSystem.get(configuration); Path deletingFilePath = new Path(deletePath); if (!fileSystem.exists(deletingFilePath)) { throw new PathNotFoundException(deletingFilePath.toString()); } else { boolean isDeleted = fileSystem.delete(deletingFilePath, true); if (isDeleted) { fileSystem.deleteOnExit(deletingFilePath); } LOG.info("Deleted path : " + deletePath); } fileSystem.close(); } private String generateUUID() { return String.valueOf(UUID.randomUUID()); } private String parseSQLQueryToLingualQuery(RemoteFilterJson remoteFilterJson) { ANTLRInputStream stream = new ANTLRInputStream(remoteFilterJson.getCondition()); QueryParserLexer lexer = new QueryParserLexer(stream); CommonTokenStream tokenStream = new CommonTokenStream(lexer); QueryParserParser parser = new QueryParserParser(tokenStream); parser.removeErrorListeners(); LingualQueryCreator customVisitor = new LingualQueryCreator(remoteFilterJson.getSchema()); String condition = customVisitor.visit(parser.eval()); return condition; } private HashMap<String, Type> getFieldNameAndType(RemoteFilterJson remoteFilterJson) throws ClassNotFoundException { HashMap<String, Type> fieldDataTypeMap = new HashMap<>(); Type type; for (int i = 0; i < remoteFilterJson.getSchema().size(); i++) { Class clazz = Class.forName(remoteFilterJson.getSchema().get(i).getDataTypeValue()); if (clazz.getSimpleName().toString().equalsIgnoreCase("Date")) { type = new SQLTimestampCoercibleType(); } else { type = clazz; } fieldDataTypeMap.put(remoteFilterJson.getSchema().get(i).getFieldName(), type); } return fieldDataTypeMap; } }); }
From source file:io.seqware.pipeline.plugins.sanity.checks.HDFS_Check.java
License:Open Source License
@Override public boolean check(QueryRunner qRunner, Metadata metadataWS) throws SQLException { FileSystem fileSystem = null; HashMap<String, String> settings = (HashMap<String, String>) ConfigTools.getSettings(); if (settings.isEmpty()) { return false; } else if (!settings.containsKey("FS.DEFAULTFS") || !settings.containsKey("FS.HDFS.IMPL")) { return false; } else if (!settings.containsKey("HBASE.ZOOKEEPER.QUORUM") || !settings.containsKey("HBASE.ZOOKEEPER.PROPERTY.CLIENTPORT") || !settings.containsKey("HBASE.MASTER") || !settings.containsKey("MAPRED.JOB.TRACKER")) { return false; }/*from ww w . j a v a 2 s. c o m*/ try { Configuration conf = new Configuration(); conf.set("hbase.zookeeper.quorum", settings.get("HBASE.ZOOKEEPER.QUORUM")); conf.set("hbase.zookeeper.property.clientPort", settings.get("HBASE.ZOOKEEPER.PROPERTY.CLIENTPORT")); conf.set("hbase.master", settings.get("HBASE.MASTER")); conf.set("mapred.job.tracker", settings.get("MAPRED.JOB.TRACKER")); conf.set("fs.default.name", settings.get("FS.DEFAULTFS")); conf.set("fs.defaultfs", settings.get("FS.DEFAULTFS")); conf.set("fs.hdfs.impl", settings.get("FS.HDFS.IMPL")); fileSystem = FileSystem.get(conf); Path path = new Path("test"); fileSystem.mkdirs(path); fileSystem.deleteOnExit(path); } catch (IOException ex) { System.err.println("Error connecting to hdfs" + ex.getMessage()); return false; } finally { try { if (fileSystem != null) { fileSystem.close(); } } catch (IOException ex) { Logger.getLogger(HDFS_Check.class.getName()).log(Level.SEVERE, null, ex); } } return true; }
From source file:org.apache.accumulo.core.client.mock.MockTableOperationsTest.java
License:Apache License
private ImportTestFilesAndData prepareTestFiles() throws Throwable { Configuration defaultConf = new Configuration(); Path tempFile = new Path("target/accumulo-test/import/sample.rf"); Path failures = new Path("target/accumulo-test/failures/"); FileSystem fs = FileSystem.get(new URI("file:///"), defaultConf); fs.deleteOnExit(tempFile); fs.deleteOnExit(failures);/*from w w w . jav a 2 s. co m*/ fs.delete(failures, true); fs.delete(tempFile, true); fs.mkdirs(failures); fs.mkdirs(tempFile.getParent()); FileSKVWriter writer = FileOperations.getInstance().newWriterBuilder() .forFile(tempFile.toString(), fs, defaultConf) .withTableConfiguration(AccumuloConfiguration.getDefaultConfiguration()).build(); writer.startDefaultLocalityGroup(); List<Pair<Key, Value>> keyVals = new ArrayList<>(); for (int i = 0; i < 5; i++) { keyVals.add(new Pair<>(new Key("a" + i, "b" + i, "c" + i, new ColumnVisibility(""), 1000l + i), new Value(Integer.toString(i).getBytes()))); } for (Pair<Key, Value> keyVal : keyVals) { writer.append(keyVal.getFirst(), keyVal.getSecond()); } writer.close(); ImportTestFilesAndData files = new ImportTestFilesAndData(); files.failurePath = failures; files.importPath = tempFile.getParent(); files.keyVals = keyVals; return files; }
From source file:org.apache.beam.sdk.extensions.sorter.ExternalSorter.java
License:Apache License
/** * Initializes the hadoop sorter. Does some local file system setup, and is somewhat expensive * (~20 ms on local machine). Only executed when necessary. *///from w w w .ja v a2s. com private void initHadoopSorter() throws IOException { if (!initialized) { tempDir = new Path(options.getTempLocation(), "tmp" + UUID.randomUUID().toString()); paths = new Path[] { new Path(tempDir, "test.seq") }; JobConf conf = new JobConf(); // Sets directory for intermediate files created during merge of merge sort conf.set("io.seqfile.local.dir", tempDir.toUri().getPath()); writer = SequenceFile.createWriter(conf, Writer.valueClass(BytesWritable.class), Writer.keyClass(BytesWritable.class), Writer.file(paths[0]), Writer.compression(CompressionType.NONE)); FileSystem fs = FileSystem.getLocal(conf); // Directory has to exist for Hadoop to recognize it as deletable on exit fs.mkdirs(tempDir); fs.deleteOnExit(tempDir); sorter = new SequenceFile.Sorter(fs, new BytesWritable.Comparator(), BytesWritable.class, BytesWritable.class, conf); sorter.setMemory(options.getMemoryMB() * 1024 * 1024); initialized = true; } }
From source file:org.apache.carbondata.core.datastorage.store.impl.FileFactory.java
License:Apache License
/** * for creating a new Lock file and if it is successfully created * then in case of abrupt shutdown then the stream to that file will be closed. * * @param filePath//from w w w .j a v a2s. c o m * @param fileType * @return * @throws IOException */ public static boolean createNewLockFile(String filePath, FileType fileType) throws IOException { filePath = filePath.replace("\\", "/"); switch (fileType) { case HDFS: case VIEWFS: Path path = new Path(filePath); FileSystem fs = path.getFileSystem(configuration); if (fs.createNewFile(path)) { fs.deleteOnExit(path); return true; } return false; case LOCAL: default: File file = new File(filePath); return file.createNewFile(); } }