List of usage examples for org.apache.hadoop.fs FileSystem close
@Override public void close() throws IOException
From source file:Hdfs_Operations.HdfsReaderToLocal.java
@Override public int run(String[] args) throws Exception { if (args.length < 2) { System.err.println("HdfsReader [hdfs input path] [local output path]"); return 1; }/*from w w w . jav a 2 s .c o m*/ String line; Path inputPath = new Path(args[0]); String localOutputPath = args[1]; Configuration conf = getConf(); System.out.println("configured filesystem = " + conf.get(FS_PARAM_NAME)); FileSystem fs = FileSystem.get(conf); InputStream is = fs.open(inputPath); OutputStream os = new BufferedOutputStream(new FileOutputStream(localOutputPath)); IOUtils.copyBytes(is, os, conf); BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(inputPath))); /* System.out.println("$$$$$$$$$$$$$$$$$$$$$-> Results"); while((line = br.readLine()) != null) { System.out.println(line); } */ is.close(); os.close(); fs.close(); return 0; }
From source file:Hdfs_Operations.HdfsRemove.java
@Override public int run(String[] args) throws Exception { FileSystem fs = FileSystem.get(getConf()); Path path = new Path(args[0]); if (fs.exists(path)) fs.delete(path, true); // delete file, true for recursive fs.close(); return 0;/*ww w. j ava 2 s. c om*/ }
From source file:hibench.DataPaths.java
License:Apache License
public static void checkHdfsFile(Path path, boolean mkdir) throws IOException { FileSystem fs = path.getFileSystem(new Configuration()); if (fs.exists(path)) { fs.delete(path, true);/*from w ww .j a v a 2 s . com*/ } if (mkdir) { fs.mkdirs(path); } fs.close(); }
From source file:hibench.DataPaths.java
License:Apache License
public static void moveFilesToParent(Path src) throws IOException { FileSystem fs = src.getFileSystem(new Configuration()); Path parent = src.getParent(); FileStatus[] flist = fs.listStatus(src); for (FileStatus file : flist) { if (null != file) { fs.rename(file.getPath(), new Path(parent, file.getPath().getName())); }/* w ww . ja va2 s . c o m*/ } fs.delete(src, true); fs.close(); }
From source file:hibench.DataPaths.java
License:Apache License
public void cleanWorkDir() throws IOException { FileSystem fs = working.getFileSystem(new Configuration()); fs.delete(working, true);//from ww w .j a va 2 s . c om fs.close(); }
From source file:hibench.DataPaths.java
License:Apache License
public void cleanTempFiles(Path file) throws IOException { Path ftemp = new Path(file, "_logs"); FileSystem fs = ftemp.getFileSystem(new Configuration()); fs.delete(ftemp, true);//from ww w . j ava 2 s .c om fs.close(); }
From source file:hws.core.JobClient.java
License:Apache License
public void run(String[] args) throws Exception { //final String command = args[0]; //final int n = Integer.valueOf(args[1]); //final Path jarPath = new Path(args[2]); Options options = new Options(); /*options.addOption(OptionBuilder.withLongOpt("jar") .withDescription( "Jar path" ) .hasArg()//from ww w. ja v a 2 s .c o m .withArgName("JarPath") .create()); options.addOption(OptionBuilder.withLongOpt("scheduler") .withDescription( "Scheduler class name" ) .hasArg() .withArgName("ClassName") .create()); */options.addOption(OptionBuilder.withLongOpt("zk-servers") .withDescription("List of the ZooKeeper servers").hasArgs().withArgName("zkAddrs").create("zks")); //options.addOption("l", "list", false, "list modules"); options.addOption(OptionBuilder.withLongOpt("load").withDescription("load new modules").hasArgs() .withArgName("XMLFiles").create()); /*options.addOption(OptionBuilder.withLongOpt( "remove" ) .withDescription( "remove modules" ) .hasArgs() .withArgName("ModuleNames") .create("rm")); */CommandLineParser parser = new BasicParser(); CommandLine cmd = parser.parse(options, args); //Path jarPath = null; //String schedulerClassName = null; String[] xmlFileNames = null; //String []moduleNames = null; String zksArgs = ""; String[] zkServers = null; if (cmd.hasOption("zks")) { zksArgs = "-zks"; zkServers = cmd.getOptionValues("zks"); for (String zks : zkServers) { zksArgs += " " + zks; } } //Logger setup //FSDataOutputStream writer = FileSystem.get(conf).create(new Path("hdfs:///hws/apps/"+appIdStr+"/logs/jobClient.log")); //Logger.addOutputStream(writer); /*if(cmd.hasOption("l")){ LOG.warn("Argument --list (-l) is not supported yet."); } if(cmd.hasOption("jar")){ jarPath = new Path(cmd.getOptionValue("jar")); } if(cmd.hasOption("scheduler")){ schedulerClassName = cmd.getOptionValue("scheduler"); }*/ if (cmd.hasOption("load")) { xmlFileNames = cmd.getOptionValues("load"); } /*else if(cmd.hasOption("rm")){ moduleNames = cmd.getOptionValues("rm"); }*/ //LOG.info("Jar-Path "+jarPath); if (xmlFileNames != null) { String paths = ""; for (String path : xmlFileNames) { paths += path + "; "; } LOG.info("Load XMLs: " + paths); } /*if(moduleNames!=null){ String modules = ""; for(String module: moduleNames){ modules += module+"; "; } LOG.info("remove: "+modules); }*/ // Create yarnClient YarnConfiguration conf = new YarnConfiguration(); YarnClient yarnClient = YarnClient.createYarnClient(); yarnClient.init(conf); yarnClient.start(); // Create application via yarnClient YarnClientApplication app = yarnClient.createApplication(); System.out.println("LOG Path: " + ApplicationConstants.LOG_DIR_EXPANSION_VAR); // Set up the container launch context for the application master ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class); ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext(); ApplicationId appId = appContext.getApplicationId(); ZkClient zk = new ZkClient(zkServers[0]); //TODO select a ZooKeeper server if (!zk.exists("/hadoop-watershed")) { zk.createPersistent("/hadoop-watershed", ""); } zk.createPersistent("/hadoop-watershed/" + appId.toString(), ""); FileSystem fs = FileSystem.get(conf); LOG.info("Collecting files to upload"); fs.mkdirs(new Path("hdfs:///hws/apps/" + appId.toString())); fs.mkdirs(new Path("hdfs:///hws/apps/" + appId.toString() + "/logs")); ModulePipeline modulePipeline = ModulePipeline.fromXMLFiles(xmlFileNames); LOG.info("Uploading files to HDFS"); for (String path : modulePipeline.files()) { uploadFile(fs, new File(path), appId); } LOG.info("Upload finished"); String modulePipelineJson = Json.dumps(modulePipeline); String modulePipelineBase64 = Base64.encodeBase64String(StringUtils.getBytesUtf8(modulePipelineJson)) .replaceAll("\\s", ""); LOG.info("ModulePipeline: " + modulePipelineJson); //LOG.info("ModulePipeline: "+modulePipelineBase64); amContainer.setCommands(Collections.singletonList("$JAVA_HOME/bin/java" + " -Xmx256M" + " hws.core.JobMaster" + " -aid " + appId.toString() + " --load " + modulePipelineBase64 + " " + zksArgs + " 1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stdout" + " 2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stderr")); // Setup jar for ApplicationMaster //LocalResource appMasterJar = Records.newRecord(LocalResource.class); //setupAppMasterJar(jarPath, appMasterJar); //amContainer.setLocalResources(Collections.singletonMap("hws.jar", appMasterJar)); LOG.info("Listing files for YARN-Watershed"); RemoteIterator<LocatedFileStatus> filesIterator = fs.listFiles(new Path("hdfs:///hws/bin/"), false); Map<String, LocalResource> resources = new HashMap<String, LocalResource>(); LOG.info("Files setup as resource"); while (filesIterator.hasNext()) { LocatedFileStatus fileStatus = filesIterator.next(); // Setup jar for ApplicationMaster LocalResource containerJar = Records.newRecord(LocalResource.class); ContainerUtils.setupContainerJar(fs, fileStatus.getPath(), containerJar); resources.put(fileStatus.getPath().getName(), containerJar); } LOG.info("container resource setup"); amContainer.setLocalResources(resources); fs.close(); //closing FileSystem interface // Setup CLASSPATH for ApplicationMaster Map<String, String> appMasterEnv = new HashMap<String, String>(); ContainerUtils.setupContainerEnv(appMasterEnv, conf); amContainer.setEnvironment(appMasterEnv); // Set up resource type requirements for ApplicationMaster Resource capability = Records.newRecord(Resource.class); capability.setMemory(256); capability.setVirtualCores(1); // Finally, set-up ApplicationSubmissionContext for the application //ApplicationSubmissionContext appContext = //app.getApplicationSubmissionContext(); appContext.setApplicationName("Hadoop-Watershed"); // application name appContext.setAMContainerSpec(amContainer); appContext.setResource(capability); appContext.setQueue("default"); // queue // Submit application LOG.info("Submitting application " + appId); yarnClient.submitApplication(appContext); LOG.info("Waiting for containers to finish"); zk.waitUntilExists("/hadoop-watershed/" + appId.toString() + "/done", TimeUnit.MILLISECONDS, 250); ApplicationReport appReport = yarnClient.getApplicationReport(appId); YarnApplicationState appState = appReport.getYarnApplicationState(); while (appState != YarnApplicationState.FINISHED && appState != YarnApplicationState.KILLED && appState != YarnApplicationState.FAILED) { Thread.sleep(100); appReport = yarnClient.getApplicationReport(appId); appState = appReport.getYarnApplicationState(); } System.out.println("Application " + appId + " finished with" + " state " + appState + " at " + appReport.getFinishTime()); System.out.println("deleting " + appId.toString() + " znode"); zk.deleteRecursive("/hadoop-watershed/" + appId.toString()); //TODO remove app folder from ZooKeeper }
From source file:hws.core.JobMaster.java
License:Apache License
public void onContainersAllocated(List<Container> containers) { FileSystem fs = null; try {/*from w w w. j a va2s.c om*/ fs = FileSystem.get(getConfiguration()); } catch (IOException e) { Logger.severe(e.toString()); } for (Container container : containers) { try { //PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter("/home/yarn/rcor/yarn/app-master-log.out"))); Logger.info("Selecting instance to container: " + container.getId().toString()); //dado o container, escolher a instancia que tem dado de entrada mais perto daquele container InstanceInfo instanceInfo = null; if (instances.get(modulePipeline.get(currentModuleIndex).filterInfo().name()) .instancesBuilt() >= modulePipeline.get(currentModuleIndex).numFilterInstances()) { currentModuleIndex++; } if (currentModuleIndex < modulePipeline.size()) { instanceInfo = instances.get(modulePipeline.get(currentModuleIndex).filterInfo().name()) .build(); } else break; String instanceInfoBase64 = Base64 .encodeBase64String(StringUtils.getBytesUtf8(Json.dumps(instanceInfo))) .replaceAll("\\s", ""); // Launch container by create ContainerLaunchContext ContainerLaunchContext ctx = Records.newRecord(ContainerLaunchContext.class); ctx.setCommands(Collections.singletonList( "$JAVA_HOME/bin/java -Xmx256M hws.core.InstanceDriver --load " + instanceInfoBase64 + " -aid " + this.appIdStr + " -cid " + container.getId().toString() + " " + this.zksArgs + " 1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stdout" + " 2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stderr")); Logger.info("Listing YARN-Watershed files for app-id: " + this.appIdStr); RemoteIterator<LocatedFileStatus> files = fs.listFiles(new Path("hdfs:///hws/bin/"), false); Map<String, LocalResource> resources = new HashMap<String, LocalResource>(); Logger.info("Setup YARN-Watershed files as resources"); while (files.hasNext()) { LocatedFileStatus fileStatus = files.next(); // Setup jar for ApplicationMaster LocalResource containerJar = Records.newRecord(LocalResource.class); ContainerUtils.setupContainerJar(fs, fileStatus.getPath(), containerJar); resources.put(fileStatus.getPath().getName(), containerJar); } Logger.info("Listing application files for app-id: " + this.appIdStr); files = fs.listFiles(new Path("hdfs:///hws/apps/" + this.appIdStr + "/"), false); Logger.info("Setup application files as resources"); while (files.hasNext()) { LocatedFileStatus fileStatus = files.next(); // Setup jar for ApplicationMaster LocalResource containerJar = Records.newRecord(LocalResource.class); ContainerUtils.setupContainerJar(fs, fileStatus.getPath(), containerJar); resources.put(fileStatus.getPath().getName(), containerJar); } Logger.info("container resource setup"); ctx.setLocalResources(resources); Logger.info("Environment setup"); // Setup CLASSPATH for ApplicationMaster Map<String, String> containerEnv = new HashMap<String, String>(); ContainerUtils.setupContainerEnv(containerEnv, getConfiguration()); ctx.setEnvironment(containerEnv); Logger.info("Starting containers"); Logger.info("[AM] Launching container " + container.getId()); nmClient.startContainer(container, ctx); Logger.info("Container started!"); /*String znode = "/hadoop-watershed/"+this.appIdStr+"/"+instanceInfo.filterInfo().name()+"/"+instanceInfo.instanceId(); out.println("Saving instance znode: "+znode); out.flush(); zk.createPersistent(znode, ""); zk.createPersistent(znode+"/host", container.getNodeId().getHost()); out.println("saved location: "+container.getNodeId().getHost()); out.flush(); */ if (instances.get(modulePipeline.get(currentModuleIndex).filterInfo().name()) .instancesBuilt() >= modulePipeline.get(currentModuleIndex).numFilterInstances()) { Logger.info("Starting via ZooKeeper filter: " + instanceInfo.filterInfo().name()); zk.createPersistent("/hadoop-watershed/" + this.appIdStr + "/" + instanceInfo.filterInfo().name() + "/start", ""); } //out.close(); } catch (Exception e) { Logger.severe("[AM] Error launching container " + container.getId() + " " + e); } } try { fs.close(); } catch (IOException e) { Logger.severe(e.toString()); } }
From source file:hydrograph.server.debug.lingual.LingualSchemaCreatorTest.java
License:Apache License
@AfterClass public static void cleanUp() { System.gc();/* w w w .j a v a 2 s . c o m*/ Configuration configuration = new Configuration(); FileSystem fileSystem = null; try { fileSystem = FileSystem.get(configuration); Path deletingFilePath = new Path("testData/MetaData/"); if (!fileSystem.exists(deletingFilePath)) { throw new PathNotFoundException(deletingFilePath.toString()); } else { boolean isDeleted = fileSystem.delete(deletingFilePath, true); if (isDeleted) { fileSystem.deleteOnExit(deletingFilePath); } } fileSystem.close(); } catch (IOException e) { e.printStackTrace(); } }
From source file:hydrograph.server.service.HydrographService.java
License:Apache License
private void start() { int portNumber = Constants.DEFAULT_PORT_NUMBER; try {// w ww .j a v a 2 s.c o m portNumber = Integer .parseInt(ServiceUtilities.getServiceConfigResourceBundle().getString(Constants.PORT_ID)); LOG.debug("Port number '" + portNumber + "' fetched from properties file"); } catch (Exception e) { LOG.error("Error fetching port number. Defaulting to " + Constants.DEFAULT_PORT_NUMBER, e); } /** * Setting Port number to the server */ Spark.port(portNumber); /** * Creating Websocket on Server for Execution tracking service. */ webSocket("/executionTracking", ExecutionTrackingWebsocketHandler.class); Spark.post("/getConnectionStatus", new Route() { @SuppressWarnings({ "unchecked", "rawtypes", "unused" }) @Override public Object handle(Request request, Response response) throws InstantiationException, IllegalAccessException, ClassNotFoundException, JSONException { LOG.info("************************getConnectionStatus endpoint - started************************"); LOG.info("+++ Start: " + new Timestamp((new Date()).getTime())); ObjectMapper objectMapper = new ObjectMapper(); String requestParameters = request.queryParams(Constants.REQUEST_PARAMETERS), dbClassName = null, objectAsString = null; JSONObject requestParameterValues = new JSONObject(requestParameters); Map metadataProperties = extractingJsonObjects(requestParameterValues); String dbTypeToTest = metadataProperties .getOrDefault(Constants.dbType, new ParamsCannotBeNullOrEmpty(Constants.dbType + " Cannot be null or empty")) .toString(); switch (dbTypeToTest.toLowerCase()) { case Constants.ORACLE: try { if (ServiceUtilities.getConnectionStatus(metadataProperties, Constants.ORACLE_JDBC_CLASSNAME, Constants.QUERY_TO_TEST)) { LOG.trace("Connection Successful"); objectAsString = objectMapper .writeValueAsString("Connection To Oracle database is Successful"); } else { LOG.trace("Connection UnSuccessful"); objectAsString = objectMapper .writeValueAsString("Connection To Oracle database UnSuccessful"); } } catch (Exception e) { LOG.error("Connection fails with exception : " + e); objectAsString = e.getLocalizedMessage(); } break; case Constants.MYSQL: try { if (ServiceUtilities.getConnectionStatus(metadataProperties, Constants.MYSQL_JDBC_CLASSNAME, Constants.QUERY_TO_TEST)) { LOG.trace("Connection Successful"); objectAsString = objectMapper .writeValueAsString("Connection To MySQL database is Successful"); } else { LOG.trace("Connection UnSuccessful"); objectAsString = objectMapper .writeValueAsString("Connection To MySQL database UnSuccessful"); } } catch (Exception e) { LOG.error("Connection fails with exception : " + e); objectAsString = e.getLocalizedMessage(); } break; case Constants.REDSHIFT: try { if (ServiceUtilities.getConnectionStatus(metadataProperties, Constants.REDSHIFT_JDBC_CLASSNAME, Constants.DEFAULT_REDRESHIFT_QUERY_TO_TEST)) { LOG.trace("Connection Successful"); objectAsString = objectMapper .writeValueAsString("Connection To Redshift database is Successful"); } else { LOG.trace("Connection UnSuccessful"); objectAsString = objectMapper .writeValueAsString("Connection To Redshift database UnSuccessful"); } } catch (Exception e) { LOG.error("Connection fails with exception : " + e); objectAsString = e.getLocalizedMessage(); } break; case Constants.TERADATA: try { if (ServiceUtilities.getConnectionStatus(metadataProperties, Constants.TERADATA_JDBC_CLASSNAME, Constants.QUERY_TO_TEST_TERADATA)) { LOG.trace("Connection Successful"); objectAsString = objectMapper .writeValueAsString("Connection To Teradata database is Successful"); } else { LOG.trace("Connection UnSuccessful"); objectAsString = objectMapper .writeValueAsString("Connection To Teradata database UnSuccessful"); } } catch (Exception e) { LOG.error("Connection fails with exception : " + e); objectAsString = e.getLocalizedMessage(); } break; } return objectAsString; } @SuppressWarnings({ "unchecked", "rawtypes" }) private Map extractingJsonObjects(JSONObject requestParameterValues) throws JSONException { String dbType = null, userId = null, password = null, host = null, port = null, sid = null, driverType = null, query = null, tableName = null, database = null; Map metadataProperties = new HashMap(); if (!requestParameterValues.isNull(Constants.dbType)) { dbType = requestParameterValues.getString(Constants.dbType); metadataProperties.put(Constants.dbType, dbType); } if (!requestParameterValues.isNull(Constants.USERNAME)) { userId = requestParameterValues.getString(Constants.USERNAME); metadataProperties.put(Constants.USERNAME, userId); } if (!requestParameterValues.isNull(Constants.SERVICE_PWD)) { password = requestParameterValues.getString(Constants.SERVICE_PWD); metadataProperties.put(Constants.SERVICE_PWD, password); } if (!requestParameterValues.isNull(Constants.HOST_NAME)) { host = requestParameterValues.getString(Constants.HOST_NAME); metadataProperties.put(Constants.HOST_NAME, host); } if (!requestParameterValues.isNull(Constants.PORT_NUMBER)) { port = requestParameterValues.getString(Constants.PORT_NUMBER); metadataProperties.put(Constants.PORT_NUMBER, port); } else { if (metadataProperties.get(Constants.dbType).toString().equalsIgnoreCase("mysql")) { port = Constants.MYSQL_DEFAULT_PORT; metadataProperties.put(Constants.PORT_NUMBER, port); } else if (metadataProperties.get(Constants.dbType).toString().equalsIgnoreCase("oracle")) { port = Constants.ORACLE_DEFAULT_PORT; metadataProperties.put(Constants.PORT_NUMBER, port); } LOG.info("Connecting " + dbType + " port is not provided using default port : " + port); } if (!requestParameterValues.isNull(Constants.SID)) { sid = requestParameterValues.getString(Constants.SID); metadataProperties.put(Constants.SID, sid); } if (!requestParameterValues.isNull(Constants.DRIVER_TYPE)) { driverType = requestParameterValues.getString(Constants.DRIVER_TYPE); metadataProperties.put(Constants.DRIVER_TYPE, driverType); } if (!requestParameterValues.isNull(Constants.QUERY)) { query = requestParameterValues.getString(Constants.QUERY); metadataProperties.put(Constants.QUERY, query); } if (!requestParameterValues.isNull(Constants.TABLENAME)) { tableName = requestParameterValues.getString(Constants.TABLENAME); metadataProperties.put(Constants.TABLENAME, tableName); } if (!requestParameterValues.isNull(Constants.DATABASE_NAME)) { database = requestParameterValues.getString(Constants.DATABASE_NAME); metadataProperties.put(Constants.DATABASE_NAME, database); } LOG.info("Fetched request parameters are: " + Constants.dbType + " => " + dbType + " " + Constants.USERNAME + " => " + userId + " " + Constants.HOST_NAME + " => " + host + " " + Constants.PORT_NUMBER + " => " + port + " " + Constants.SID + " => " + sid + " " + Constants.DRIVER_TYPE + " => " + driverType + " " + Constants.QUERY + " => " + query + " " + Constants.TABLENAME + " => " + tableName + " " + Constants.DATABASE_NAME + " => " + database + " "); return metadataProperties; } }); Spark.post("readFromMetastore", new Route() { @Override public Object handle(Request request, Response response) throws ParamsCannotBeNullOrEmpty, ClassNotFoundException, IllegalAccessException, JSONException, JsonProcessingException, TableOrQueryParamNotFound, SQLException, InstantiationException { LOG.info("************************readFromMetastore endpoint - started************************"); LOG.info("+++ Start: " + new Timestamp((new Date()).getTime())); ObjectMapper objectMapper = new ObjectMapper(); String requestParameters = request.queryParams(Constants.REQUEST_PARAMETERS), objectAsString = null, dbClassName = null; JSONObject requestParameterValues = new JSONObject(requestParameters); // Method to extracting request parameter details from input // json. Map metadataProperties = extractingJsonObjects(requestParameterValues); String dbType = metadataProperties .getOrDefault(Constants.dbType, new ParamsCannotBeNullOrEmpty(Constants.dbType + " Cannot be null or empty")) .toString(); LOG.info("Retrieving schema for " + dbType + " Database."); try { switch (dbType.toLowerCase()) { case Constants.ORACLE: dbClassName = Constants.oracle; OracleMetadataStrategy oracleMetadataHelper = (OracleMetadataStrategy) Class .forName(dbClassName).newInstance(); oracleMetadataHelper.setConnection(metadataProperties); objectAsString = objectMapper .writeValueAsString(oracleMetadataHelper.fillComponentSchema(metadataProperties)); LOG.trace("Schema json for oracle : " + objectAsString); LOG.info("+++ Stop: " + new Timestamp((new Date()).getTime())); break; case Constants.HIVE: dbClassName = Constants.hive; HiveMetadataStrategy hiveMetadataHelper = (HiveMetadataStrategy) Class.forName(dbClassName) .newInstance(); hiveMetadataHelper.setConnection(metadataProperties); objectAsString = objectMapper .writeValueAsString(hiveMetadataHelper.fillComponentSchema(metadataProperties)); LOG.trace("Schema json for hive : " + objectAsString); LOG.info("+++ Stop: " + new Timestamp((new Date()).getTime())); break; case Constants.REDSHIFT: dbClassName = Constants.redshift; RedshiftMetadataStrategy redShiftMetadataHelper = (RedshiftMetadataStrategy) Class .forName(dbClassName).newInstance(); redShiftMetadataHelper.setConnection(metadataProperties); objectAsString = objectMapper .writeValueAsString(redShiftMetadataHelper.fillComponentSchema(metadataProperties)); LOG.trace("Schema json for redshift : " + objectAsString); LOG.info("+++ Stop: " + new Timestamp((new Date()).getTime())); break; case Constants.MYSQL: dbClassName = Constants.mysql; MysqlMetadataStrategy mysqlMetadataHelper = (MysqlMetadataStrategy) Class .forName(dbClassName).newInstance(); mysqlMetadataHelper.setConnection(metadataProperties); objectAsString = objectMapper .writeValueAsString(mysqlMetadataHelper.fillComponentSchema(metadataProperties)); LOG.trace("Schema json for mysql : " + objectAsString); LOG.info("+++ Stop: " + new Timestamp((new Date()).getTime())); break; case Constants.TERADATA: dbClassName = Constants.teradata; TeradataMetadataStrategy teradataMetadataHelper = (TeradataMetadataStrategy) Class .forName(dbClassName).newInstance(); teradataMetadataHelper.setConnection(metadataProperties); objectAsString = objectMapper .writeValueAsString(teradataMetadataHelper.fillComponentSchema(metadataProperties)); LOG.trace("Schema json for teradata : " + objectAsString); LOG.info("+++ Stop: " + new Timestamp((new Date()).getTime())); break; } } catch (Exception e) { LOG.error("Metadata read for database '" + dbType + "' not completed."); LOG.error("Exception : " + e); response.status(400); return "Metadata read for database '" + dbType + "' not completed."; } LOG.info("Class Name used for " + dbType + " Is : " + dbClassName); LOG.debug("Json for " + dbType + " : " + objectAsString); return objectAsString; } @SuppressWarnings({ "unchecked", "rawtypes" }) private Map extractingJsonObjects(JSONObject requestParameterValues) throws JSONException { String dbType = null, userId = null, password = null, host = null, port = null, sid = null, driverType = null, query = null, tableName = null, database = null; Map metadataProperties = new HashMap(); if (!requestParameterValues.isNull(Constants.dbType)) { dbType = requestParameterValues.getString(Constants.dbType); metadataProperties.put(Constants.dbType, dbType); } if (!requestParameterValues.isNull(Constants.USERNAME)) { userId = requestParameterValues.getString(Constants.USERNAME); metadataProperties.put(Constants.USERNAME, userId); } if (!requestParameterValues.isNull(Constants.SERVICE_PWD)) { password = requestParameterValues.getString(Constants.SERVICE_PWD); metadataProperties.put(Constants.SERVICE_PWD, password); } if (!requestParameterValues.isNull(Constants.HOST_NAME)) { host = requestParameterValues.getString(Constants.HOST_NAME); metadataProperties.put(Constants.HOST_NAME, host); } if (!requestParameterValues.isNull(Constants.PORT_NUMBER)) { port = requestParameterValues.getString(Constants.PORT_NUMBER); metadataProperties.put(Constants.PORT_NUMBER, port); } else { if (metadataProperties.get(Constants.dbType).toString().equalsIgnoreCase("mysql")) { port = Constants.MYSQL_DEFAULT_PORT; metadataProperties.put(Constants.PORT_NUMBER, port); } else if (metadataProperties.get(Constants.dbType).toString().equalsIgnoreCase("oracle")) { port = Constants.ORACLE_DEFAULT_PORT; metadataProperties.put(Constants.PORT_NUMBER, port); } LOG.info("Connecting " + dbType + " port is not provided using default port : " + port); } if (!requestParameterValues.isNull(Constants.SID)) { sid = requestParameterValues.getString(Constants.SID); metadataProperties.put(Constants.SID, sid); } if (!requestParameterValues.isNull(Constants.DRIVER_TYPE)) { driverType = requestParameterValues.getString(Constants.DRIVER_TYPE); metadataProperties.put(Constants.DRIVER_TYPE, driverType); } if (!requestParameterValues.isNull(Constants.QUERY)) { query = requestParameterValues.getString(Constants.QUERY); metadataProperties.put(Constants.QUERY, query); } if (!requestParameterValues.isNull(Constants.TABLENAME)) { tableName = requestParameterValues.getString(Constants.TABLENAME); metadataProperties.put(Constants.TABLENAME, tableName); } if (!requestParameterValues.isNull(Constants.DATABASE_NAME)) { database = requestParameterValues.getString(Constants.DATABASE_NAME); metadataProperties.put(Constants.DATABASE_NAME, database); } LOG.info("Fetched request parameters are: " + Constants.dbType + " => " + dbType + " " + Constants.USERNAME + " => " + userId + " " + Constants.HOST_NAME + " => " + host + " " + Constants.PORT_NUMBER + " => " + port + " " + Constants.SID + " => " + sid + " " + Constants.DRIVER_TYPE + " => " + driverType + " " + Constants.QUERY + " => " + query + " " + Constants.TABLENAME + " => " + tableName + " " + Constants.DATABASE_NAME + " => " + database + " "); return metadataProperties; } }); Spark.post("/read", new Route() { @Override public Object handle(Request request, Response response) { LOG.info("************************read endpoint - started************************"); LOG.info("+++ Start: " + new Timestamp((new Date()).getTime())); String jobId = request.queryParams(Constants.JOB_ID); String componentId = request.queryParams(Constants.COMPONENT_ID); String socketId = request.queryParams(Constants.SOCKET_ID); String basePath = request.queryParams(Constants.BASE_PATH); // String host = request.queryParams(Constants.HOST); String userID = request.queryParams(Constants.USER_ID); String password = request.queryParams(Constants.SERVICE_PWD); double sizeOfData = Double.parseDouble(request.queryParams(Constants.FILE_SIZE)) * 1024 * 1024; LOG.info("Base Path: {}, Job Id: {}, Component Id: {}, Socket ID: {}, User ID:{}, DataSize:{}", basePath, jobId, componentId, socketId, userID, sizeOfData); String batchID = jobId + "_" + componentId + "_" + socketId; String tempLocationPath = ServiceUtilities.getServiceConfigResourceBundle() .getString(Constants.TEMP_LOCATION_PATH); String filePath = tempLocationPath + "/" + batchID + ".csv"; try { readFileFromHDFS(basePath + "/debug/" + jobId + "/" + componentId + "_" + socketId, sizeOfData, filePath, userID, password); LOG.info("+++ Stop: " + new Timestamp((new Date()).getTime())); } catch (Exception e) { LOG.error("Error in reading debug files", e); return "error"; } return filePath; } /** * This method will read the HDFS file, fetch the records from it * and write its records to a local file on edge node with size <= * {@code sizeOfData} passed in parameter. * * @param hdfsFilePath path of HDFS file from where records to be read * @param sizeOfData defines the size of data (in bytes) to be read from * HDFS file * @param remoteFileName after reading the data of {@code sizeOfData} bytes * from HDFS file, it will be written to local file on * edge node with file name {@code remoteFileName} * @param userId * @param password */ private void readFileFromHDFS(String hdfsFilePath, double sizeOfData, String remoteFileName, String userId, String password) { try { Path path = new Path(hdfsFilePath); LOG.debug("Reading Debug file:" + hdfsFilePath); Configuration conf = new Configuration(); // load hdfs-site.xml and core-site.xml String hdfsConfigPath = ServiceUtilities.getServiceConfigResourceBundle() .getString(Constants.HDFS_SITE_CONFIG_PATH); String coreSiteConfigPath = ServiceUtilities.getServiceConfigResourceBundle() .getString(Constants.CORE_SITE_CONFIG_PATH); LOG.debug("Loading hdfs-site.xml:" + hdfsConfigPath); conf.addResource(new Path(hdfsConfigPath)); LOG.debug("Loading hdfs-site.xml:" + coreSiteConfigPath); conf.addResource(new Path(coreSiteConfigPath)); KerberosUtilities kerberosUtilities = new KerberosUtilities(); // apply kerberos token kerberosUtilities.applyKerberosToken(userId, password, conf); listAndWriteFiles(remoteFileName, path, conf, sizeOfData); } catch (Exception e) { throw new RuntimeException(e); } } /** * This method will list all files for {@code path}, read all files * and writes its data to a local file on edge node with size <= * {@code sizeOfData} passed in parameter. * * @param remoteFileName * @param path * @param conf * @param sizeOfData * @throws IOException */ private void listAndWriteFiles(String remoteFileName, Path path, Configuration conf, double sizeOfData) throws IOException { FileSystem fs = FileSystem.get(conf); FileStatus[] status = fs.listStatus(path); File remoteFile = new File(remoteFileName); OutputStream os = new FileOutputStream(remoteFileName); try { int numOfBytes = 0; for (int i = 0; i < status.length; i++) { BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(status[i].getPath()))); String line = ""; line = br.readLine(); if (line != null) { // header will only get fetch from first part file // and it // will skip header from remaining files if (numOfBytes == 0) { os.write((line + "\n").toString().getBytes()); numOfBytes += line.toString().length(); } while ((line = br.readLine()) != null) { numOfBytes += line.toString().length(); // line = br.readLine(); if (numOfBytes <= sizeOfData) { os.write((line + "\n").toString().getBytes()); } else { break; } } } br.close(); remoteFile.setReadable(true, false); } } catch (Exception e) { throw new RuntimeException(e); } finally { os.close(); fs.close(); } } }); Spark.post("/delete", new Route() { @Override public Object handle(Request request, Response response) { LOG.info("************************delete endpoint - started************************"); LOG.info("+++ Start: " + new Timestamp((new Date()).getTime())); response.type("text/json"); String jobId = request.queryParams(Constants.JOB_ID); String basePath = request.queryParams(Constants.BASE_PATH); String componentId = request.queryParams(Constants.COMPONENT_ID); String socketId = request.queryParams(Constants.SOCKET_ID); String userID = request.queryParams(Constants.USER_ID); String password = request.queryParams(Constants.SERVICE_PWD); LOG.info("Base Path: {}, Job Id: {}, Component Id: {}, Socket ID: {}, User ID:{}", basePath, jobId, componentId, socketId, userID); try { removeDebugFiles(basePath, jobId, componentId, socketId, userID, password); LOG.info("+++ Stop: " + new Timestamp((new Date()).getTime())); } catch (Exception e) { LOG.error("Error in deleting debug files", e); } return "error"; } private void removeDebugFiles(String basePath, String jobId, String componentId, String socketId, String userID, String password) { try { // DebugFilesReader debugFilesReader = new // DebugFilesReader(basePath, jobId, componentId, socketId, // userID, // password); delete(basePath, jobId, componentId, socketId, userID, password); } catch (Exception e) { LOG.error("Error while deleting the debug file", e); throw new RuntimeException(e); } } /** * Deletes the jobId directory * * @param password * @param userID * @param socketId * @param componentId * @param jobId * @param basePath * @throws IOException */ public void delete(String basePath, String jobId, String componentId, String socketId, String userID, String password) throws IOException { LOG.trace("Entering method delete()"); String deletePath = basePath + "/debug/" + jobId; Configuration configuration = new Configuration(); FileSystem fileSystem = FileSystem.get(configuration); Path deletingFilePath = new Path(deletePath); if (!fileSystem.exists(deletingFilePath)) { throw new PathNotFoundException(deletingFilePath.toString()); } else { // Delete file fileSystem.delete(deletingFilePath, true); LOG.info("Deleted path : " + deletePath); } fileSystem.close(); } }); Spark.post("/deleteLocalDebugFile", new Route() { @Override public Object handle(Request request, Response response) { String error = ""; LOG.info("+++ Start: " + new Timestamp((new Date()).getTime())); LOG.info("************************deleteLocalDebugFile endpoint - started************************"); try { String jobId = request.queryParams(Constants.JOB_ID); String componentId = request.queryParams(Constants.COMPONENT_ID); String socketId = request.queryParams(Constants.SOCKET_ID); String batchID = jobId + "_" + componentId + "_" + socketId; String tempLocationPath = ServiceUtilities.getServiceConfigResourceBundle() .getString(Constants.TEMP_LOCATION_PATH); LOG.info("Job Id: {}, Component Id: {}, Socket ID: {}, TemporaryPath: {}", jobId, componentId, socketId, tempLocationPath); LOG.debug("File to be deleted: " + tempLocationPath + "/" + batchID + ".csv"); File file = new File(tempLocationPath + "/" + batchID + ".csv"); file.delete(); LOG.trace("Local debug file deleted successfully."); return "Success"; } catch (Exception e) { LOG.error("Error in deleting local debug file.", e); error = e.getMessage(); } LOG.info("+++ Stop: " + new Timestamp((new Date()).getTime())); return "Local file delete failed. Error: " + error; } }); // TODO : Keep this for test Spark.post("/post", new Route() { @Override public Object handle(Request request, Response response) { LOG.info("****TEST SPARK POST STARTED**********"); response.type("text/json"); return "calling post..."; } }); // TODO : Keep this for test Spark.get("/test", new Route() { @Override public Object handle(Request request, Response response) { LOG.info("****TEST SPARK GET STARTED**********"); response.type("text/json"); response.status(200); response.body("Test successful!"); return "Test successful!"; } }); Spark.post("/filter", new Route() { @Override public Object handle(Request request, Response response) { LOG.info("************************filter - started************************"); LOG.info("+++ Start: " + new Timestamp((new Date()).getTime())); Gson gson = new Gson(); String json = request.queryParams(Constants.REQUEST_PARAMETERS); RemoteFilterJson remoteFilterJson = gson.fromJson(json, RemoteFilterJson.class); String jobId = remoteFilterJson.getJobDetails().getUniqueJobID(); String componentId = remoteFilterJson.getJobDetails().getComponentID(); String socketId = remoteFilterJson.getJobDetails().getComponentSocketID(); String basePath = remoteFilterJson.getJobDetails().getBasepath(); String username = remoteFilterJson.getJobDetails().getUsername(); String password = remoteFilterJson.getJobDetails().getService_pwd(); double outputFileSizeInMB = remoteFilterJson.getFileSize(); double sizeOfDataInByte = outputFileSizeInMB * 1024 * 1024; String condition = parseSQLQueryToLingualQuery(remoteFilterJson); LOG.info("Base Path: {}, Job Id: {}, Component Id: {}, Socket ID: {}, User ID:{}, DataSize:{}", basePath, jobId, componentId, socketId, username, sizeOfDataInByte); String batchID = jobId + "_" + componentId + "_" + socketId; String tempLocationPath = ServiceUtilities.getServiceConfigResourceBundle() .getString(Constants.TEMP_LOCATION_PATH); String filePath = tempLocationPath + "/" + batchID + ".csv"; String UUID = generateUUID(); String uniqueId = batchID + "_" + UUID; String linugalMetaDataPath = basePath + "/filter/" + UUID; String fieldNames[] = getHeader(basePath + "/debug/" + jobId + "/" + componentId + "_" + socketId, username, password); try { HashMap<String, Type> fieldNameAndDatatype = getFieldNameAndType(remoteFilterJson); Type[] fieldTypes = getFieldTypeFromMap(fieldNames, fieldNameAndDatatype); Configuration conf = getConfiguration(username, password); new LingualFilter().filterData(linugalMetaDataPath, uniqueId, basePath + "/debug/" + jobId + "/" + componentId + "_" + socketId, sizeOfDataInByte, filePath, condition, fieldNames, fieldTypes, conf); LOG.info("debug output path : " + filePath); LOG.info("+++ Stop: " + new Timestamp((new Date()).getTime())); } catch (Exception e) { LOG.error("Error in reading debug files", e); return "error"; } finally { try { System.gc(); deleteLingualResult(linugalMetaDataPath); } catch (Exception e) { LOG.error("Error in deleting lingual result", e); return "Error in deleting lingual result: " + e.getMessage(); } } return filePath; } private Type[] getFieldTypeFromMap(String[] fieldNames, HashMap<String, Type> fieldNameAndDatatype) { Type[] type = new Type[fieldNameAndDatatype.size()]; int i = 0; for (String eachFieldName : fieldNames) { type[i++] = fieldNameAndDatatype.get(eachFieldName); } return type; } private String[] getHeader(String path, String username, String password) { String[] header = readFile(path, username, password); return header; } private String[] readFile(String hdfsFilePath, String username, String password) { String[] header = null; try { Path path = new Path(hdfsFilePath); LOG.debug("Reading Debug file:" + hdfsFilePath); Configuration conf = getConfiguration(username, password); header = getHeaderArray(path, conf); } catch (Exception e) { throw new RuntimeException(e); } return header; } private Path filterOutSuccessFile(FileStatus[] fileStatus) { for (FileStatus status : fileStatus) { if (status.getPath().getName().toUpperCase().contains("_SUCCESS")) continue; else return status.getPath(); } return null; } private String[] getHeaderArray(Path path, Configuration conf) throws IOException { FileSystem fs = FileSystem.get(conf); FileStatus[] status = fs.listStatus(path); String line = ""; try { BufferedReader br = new BufferedReader( new InputStreamReader(fs.open(filterOutSuccessFile(status)))); line = br.readLine(); br.close(); } catch (Exception e) { throw new RuntimeException(e); } finally { fs.close(); } return line.split(","); } private Configuration getConfiguration(String userId, String password) throws LoginException, IOException { Configuration conf = new Configuration(); // load hdfs-site.xml and core-site.xml String hdfsConfigPath = ServiceUtilities.getServiceConfigResourceBundle() .getString(Constants.HDFS_SITE_CONFIG_PATH); String coreSiteConfigPath = ServiceUtilities.getServiceConfigResourceBundle() .getString(Constants.CORE_SITE_CONFIG_PATH); LOG.debug("Loading hdfs-site.xml:" + hdfsConfigPath); conf.addResource(new Path(hdfsConfigPath)); LOG.debug("Loading hdfs-site.xml:" + coreSiteConfigPath); conf.addResource(new Path(coreSiteConfigPath)); KerberosUtilities kerberosUtilities = new KerberosUtilities(); // apply kerberos token kerberosUtilities.applyKerberosToken(userId, password, conf); return conf; } private void deleteLingualResult(String deletePath) throws IOException { Configuration configuration = new Configuration(); FileSystem fileSystem = FileSystem.get(configuration); Path deletingFilePath = new Path(deletePath); if (!fileSystem.exists(deletingFilePath)) { throw new PathNotFoundException(deletingFilePath.toString()); } else { boolean isDeleted = fileSystem.delete(deletingFilePath, true); if (isDeleted) { fileSystem.deleteOnExit(deletingFilePath); } LOG.info("Deleted path : " + deletePath); } fileSystem.close(); } private String generateUUID() { return String.valueOf(UUID.randomUUID()); } private String parseSQLQueryToLingualQuery(RemoteFilterJson remoteFilterJson) { ANTLRInputStream stream = new ANTLRInputStream(remoteFilterJson.getCondition()); QueryParserLexer lexer = new QueryParserLexer(stream); CommonTokenStream tokenStream = new CommonTokenStream(lexer); QueryParserParser parser = new QueryParserParser(tokenStream); parser.removeErrorListeners(); LingualQueryCreator customVisitor = new LingualQueryCreator(remoteFilterJson.getSchema()); String condition = customVisitor.visit(parser.eval()); return condition; } private HashMap<String, Type> getFieldNameAndType(RemoteFilterJson remoteFilterJson) throws ClassNotFoundException { HashMap<String, Type> fieldDataTypeMap = new HashMap<>(); Type type; for (int i = 0; i < remoteFilterJson.getSchema().size(); i++) { Class clazz = Class.forName(remoteFilterJson.getSchema().get(i).getDataTypeValue()); if (clazz.getSimpleName().toString().equalsIgnoreCase("Date")) { type = new SQLTimestampCoercibleType(); } else { type = clazz; } fieldDataTypeMap.put(remoteFilterJson.getSchema().get(i).getFieldName(), type); } return fieldDataTypeMap; } }); }