Example usage for org.apache.hadoop.fs FileSystem close

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem close.

Prototype

@Override
public void close() throws IOException

Source Link

Document

Close this FileSystem instance.

Usage

From source file:idgs.execution.IdgsLoadTask.java

License:Open Source License

@Override
protected int execute(DriverContext ctx) {
    FileSystem fs = work.getFileSystem();
    String dataFile = work.getDataFile();
    Path dataPath = new Path(dataFile);

    InsertOperator op = new InsertOperator(work.getStoreConfig());

    int batchSize = work.getBatchSize();
    List<String> buffer = new ArrayList<String>();

    FSDataInputStream in = null;/*from www.  j  av a2 s  .c  o  m*/
    LineReader reader = null;
    try {
        in = fs.open(dataPath);
        reader = new LineReader(in);
        Text line = new Text();
        int res = 1;
        while (res > 0) {
            res = reader.readLine(line);
            if (line.toString().trim().isEmpty()) {
                continue;
            }

            buffer.add(line.toString());
            if (buffer.size() == batchSize) {
                for (String row : buffer) {
                    try {
                        op.parseRow(row);
                        op.process();
                    } catch (IdgsException e) {
                        LOG.error("Error when insert data " + row + ", caused by " + e.getMessage() + ".", e);
                        return 2;
                    }
                }
                buffer.clear();
            }
        }
    } catch (IOException e) {
        LOG.error("Cannot open file " + dataFile + ".", e);
        return 1;
    } finally {
        if (reader != null) {
            try {
                reader.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }

        if (in != null) {
            try {
                in.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }

        try {
            fs.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    return 0;
}

From source file:io.amient.yarn1.YarnContainerContext.java

License:Open Source License

private Map<String, LocalResource> prepareLocalResources() throws IOException {
    Map<String, LocalResource> localResources = Maps.newHashMap();
    FileSystem distFs = FileSystem.get(yarnConfig);

    prepareLocalResourceFile(localResources, jarName, jarName, distFs);
    prepareLocalResourceFile(localResources, "yarn1.configuration", jarName.replace(".jar", ".configuration"),
            distFs);/*from  w w w  .j  a  v  a  2 s .  co m*/

    distFs.close();
    return localResources;

}

From source file:io.hops.hopsworks.common.jobs.flink.AbstractYarnClusterDescriptor.java

License:Apache License

/**
 * This method will block until the ApplicationMaster/JobManager have been
 * deployed on YARN./*from   w ww  .j av  a 2  s. c om*/
 */
protected YarnClusterClient deployInternal() throws Exception {
    isReadyForDeployment();
    LOG.info("Using values:");
    LOG.info("\tTaskManager count = {}", taskManagerCount);
    LOG.info("\tJobManager memory = {}", jobManagerMemoryMb);
    LOG.info("\tTaskManager memory = {}", taskManagerMemoryMb);

    final YarnClient yarnClient = getYarnClient();

    // ------------------ Check if the specified queue exists --------------------
    try {
        List<QueueInfo> queues = yarnClient.getAllQueues();
        // check only if there are queues configured in yarn and for this session.
        if (queues.size() > 0 && this.yarnQueue != null) {
            boolean queueFound = false;
            for (QueueInfo queue : queues) {
                if (queue.getQueueName().equals(this.yarnQueue)) {
                    queueFound = true;
                    break;
                }
            }
            if (!queueFound) {
                String queueNames = "";
                for (QueueInfo queue : queues) {
                    queueNames += queue.getQueueName() + ", ";
                }
                LOG.warn("The specified queue '" + this.yarnQueue + "' does not exist. " + "Available queues: "
                        + queueNames);
            }
        } else {
            LOG.debug("The YARN cluster does not have any queues configured");
        }
    } catch (Throwable e) {
        LOG.warn("Error while getting queue information from YARN: " + e.getMessage());
        if (LOG.isDebugEnabled()) {
            LOG.debug("Error details", e);
        }
    }

    // Create application via yarnClient
    final YarnClientApplication yarnApplication = yarnClient.createApplication();
    GetNewApplicationResponse appResponse = yarnApplication.getNewApplicationResponse();

    Map<String, String> jobSystemProperties = new HashMap<>(2);

    // Certificates are materialized locally so DFSClient can be set to null
    // LocalResources are not used by Flink, so set it null
    HopsUtils.copyUserKafkaCerts(services.getUserCerts(), project, username,
            services.getSettings().getHopsworksTmpCertDir(), services.getSettings().getHdfsTmpCertDir(),
            JobType.FLINK, null, null, jobSystemProperties, services.getSettings().getFlinkKafkaCertDir(),
            appResponse.getApplicationId().toString());

    StringBuilder tmpBuilder = new StringBuilder();
    for (Map.Entry<String, String> prop : jobSystemProperties.entrySet()) {
        String option = YarnRunner.escapeForShell("-D" + prop.getKey() + "=" + prop.getValue());
        javaOptions.add(option);
        addHopsworksParam(option);
        tmpBuilder.append(prop.getKey()).append("=").append(prop.getValue()).append("@@");
    }

    dynamicPropertiesEncoded += tmpBuilder.toString();

    // ------------------ Add dynamic properties to local flinkConfiguraton ------
    Map<String, String> dynProperties = getDynamicProperties(dynamicPropertiesEncoded);
    for (Map.Entry<String, String> dynProperty : dynProperties.entrySet()) {
        flinkConfiguration.setString(dynProperty.getKey(), dynProperty.getValue());
    }

    // ------------------ Set default file system scheme -------------------------
    try {
        org.apache.flink.core.fs.FileSystem.setDefaultScheme(flinkConfiguration);
    } catch (IOException e) {
        throw new IOException("Error while setting the default " + "filesystem scheme from configuration.", e);
    }

    // initialize file system
    // Copy the application master jar to the filesystem
    // Create a local resource to point to the destination jar path
    final FileSystem fs = FileSystem.get(conf);

    // hard coded check for the GoogleHDFS client because its not overriding the 
    // getScheme() method.
    if (!fs.getClass().getSimpleName().equals("GoogleHadoopFileSystem") && fs.getScheme().startsWith("file")) {
        LOG.warn("The file system scheme is '" + fs.getScheme() + "'. This indicates that the "
                + "specified Hadoop configuration path is wrong and the system is "
                + "using the default Hadoop configuration values. The Flink YARN "
                + "client needs to store its files in a distributed file system");
    }

    // ------ Check if the YARN ClusterClient has the requested resources ---
    // the yarnMinAllocationMB specifies the smallest possible container 
    // allocation size. all allocations below this value are automatically 
    // set to this value.
    final int yarnMinAllocationMB = conf.getInt("yarn.scheduler.minimum-allocation-mb", 0);
    if (jobManagerMemoryMb < yarnMinAllocationMB || taskManagerMemoryMb < yarnMinAllocationMB) {
        LOG.warn("The JobManager or TaskManager memory is below the smallest possible "
                + "YARN Container size. The value of 'yarn.scheduler.minimum-allocation-mb'" + " is "
                + yarnMinAllocationMB + "'. Please increase the memory size."
                + "YARN will allocate the smaller containers but the scheduler will"
                + " account for the minimum-allocation-mb, maybe not all instances "
                + "you requested will start.");
    }

    // set the memory to minAllocationMB to do the next checks correctly
    if (jobManagerMemoryMb < yarnMinAllocationMB) {
        jobManagerMemoryMb = yarnMinAllocationMB;
    }
    if (taskManagerMemoryMb < yarnMinAllocationMB) {
        taskManagerMemoryMb = yarnMinAllocationMB;
    }

    Resource maxRes = appResponse.getMaximumResourceCapability();
    final String NOTE = "Please check the 'yarn.scheduler.maximum-allocation-mb' and the "
            + "'yarn.nodemanager.resource.memory-mb' configuration values\n";
    if (jobManagerMemoryMb > maxRes.getMemory()) {
        failSessionDuringDeployment(yarnClient, yarnApplication);
        throw new YarnDeploymentException("The cluster does not have the requested resources for the JobManager"
                + " available!\n" + "Maximum Memory: " + maxRes.getMemory() + "MB Requested: "
                + jobManagerMemoryMb + "MB. " + NOTE);
    }

    if (taskManagerMemoryMb > maxRes.getMemory()) {
        failSessionDuringDeployment(yarnClient, yarnApplication);
        throw new YarnDeploymentException(
                "The cluster does not have the requested resources for the TaskManagers available!\n"
                        + "Maximum Memory: " + maxRes.getMemory() + " Requested: " + taskManagerMemoryMb
                        + "MB. " + NOTE);
    }

    final String NOTE_RSC = "\nThe Flink YARN client will try to allocate the YARN session, "
            + "but maybe not all TaskManagers are connecting from the beginning "
            + "because the resources are currently not available in the cluster. "
            + "The allocation might take more time than usual because the Flink "
            + "YARN client needs to wait until the resources become available.";
    int totalMemoryRequired = jobManagerMemoryMb + taskManagerMemoryMb * taskManagerCount;
    ClusterResourceDescription freeClusterMem = getCurrentFreeClusterResources(yarnClient);
    if (freeClusterMem.totalFreeMemory < totalMemoryRequired) {
        LOG.warn("This YARN session requires " + totalMemoryRequired + "MB of memory in the cluster. "
                + "There are currently only " + freeClusterMem.totalFreeMemory + "MB available." + NOTE_RSC);

    }
    if (taskManagerMemoryMb > freeClusterMem.containerLimit) {
        LOG.warn("The requested amount of memory for the TaskManagers (" + taskManagerMemoryMb
                + "MB) is more than " + "the largest possible YARN container: " + freeClusterMem.containerLimit
                + NOTE_RSC);
    }
    if (jobManagerMemoryMb > freeClusterMem.containerLimit) {
        LOG.warn(
                "The requested amount of memory for the JobManager (" + jobManagerMemoryMb + "MB) is more than "
                        + "the largest possible YARN container: " + freeClusterMem.containerLimit + NOTE_RSC);
    }

    // ----------------- check if the requested containers fit into the cluster.
    int[] nmFree = Arrays.copyOf(freeClusterMem.nodeManagersFree, freeClusterMem.nodeManagersFree.length);
    // first, allocate the jobManager somewhere.
    if (!allocateResource(nmFree, jobManagerMemoryMb)) {
        LOG.warn("Unable to find a NodeManager that can fit the JobManager/Application master. "
                + "The JobManager requires " + jobManagerMemoryMb + "MB. NodeManagers available: "
                + Arrays.toString(freeClusterMem.nodeManagersFree) + NOTE_RSC);
    }
    // allocate TaskManagers
    for (int i = 0; i < taskManagerCount; i++) {
        if (!allocateResource(nmFree, taskManagerMemoryMb)) {
            LOG.warn("There is not enough memory available in the YARN cluster. "
                    + "The TaskManager(s) require " + taskManagerMemoryMb + "MB each. "
                    + "NodeManagers available: " + Arrays.toString(freeClusterMem.nodeManagersFree) + "\n"
                    + "After allocating the JobManager (" + jobManagerMemoryMb + "MB) and (" + i + "/"
                    + taskManagerCount + ") TaskManagers, " + "the following NodeManagers are available: "
                    + Arrays.toString(nmFree) + NOTE_RSC);
        }
    }

    Set<File> effectiveShipFiles = new HashSet<>(shipFiles.size());
    for (File file : shipFiles) {
        effectiveShipFiles.add(file.getAbsoluteFile());
    }

    //check if there is a logback or log4j file
    File logbackFile = new File(configurationDirectory + File.separator + CONFIG_FILE_LOGBACK_NAME);
    final boolean hasLogback = logbackFile.exists();
    if (hasLogback) {
        effectiveShipFiles.add(logbackFile);
    }

    File log4jFile = new File(configurationDirectory + File.separator + CONFIG_FILE_LOG4J_NAME);
    final boolean hasLog4j = log4jFile.exists();
    if (hasLog4j) {
        effectiveShipFiles.add(log4jFile);
        if (hasLogback) {
            // this means there is already a logback configuration file --> fail
            LOG.warn("The configuration directory ('" + configurationDirectory + "') contains both LOG4J and "
                    + "Logback configuration files. Please delete or rename one of them.");
        }
    }

    addLibFolderToShipFiles(effectiveShipFiles);

    final ContainerLaunchContext amContainer = setupApplicationMasterContainer(hasLogback, hasLog4j);

    // Set-up ApplicationSubmissionContext for the application
    ApplicationSubmissionContext appContext = yarnApplication.getApplicationSubmissionContext();

    final ApplicationId appId = appContext.getApplicationId();

    // ------------------ Add Zookeeper namespace to local flinkConfiguraton ------
    String zkNamespace = getZookeeperNamespace();
    // no user specified cli argument for namespace?
    if (zkNamespace == null || zkNamespace.isEmpty()) {
        // namespace defined in config? else use applicationId as default.
        zkNamespace = flinkConfiguration.getString(ConfigConstants.ZOOKEEPER_NAMESPACE_KEY,
                String.valueOf(appId));
        setZookeeperNamespace(zkNamespace);
    }

    flinkConfiguration.setString(ConfigConstants.ZOOKEEPER_NAMESPACE_KEY, zkNamespace);

    if (RecoveryMode.isHighAvailabilityModeActivated(flinkConfiguration)) {
        // activate re-execution of failed applications
        appContext.setMaxAppAttempts(flinkConfiguration.getInteger(ConfigConstants.YARN_APPLICATION_ATTEMPTS,
                YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS));

        activateHighAvailabilitySupport(appContext);
    } else {
        // set number of application retries to 1 in the default case
        appContext
                .setMaxAppAttempts(flinkConfiguration.getInteger(ConfigConstants.YARN_APPLICATION_ATTEMPTS, 1));
    }

    // local resource map for Yarn
    final Map<String, LocalResource> localResources = new HashMap<>(2 + effectiveShipFiles.size());
    // list of remote paths (after upload)
    final List<Path> paths = new ArrayList<>(2 + effectiveShipFiles.size());
    // classpath assembler
    final StringBuilder classPathBuilder = new StringBuilder();
    // ship list that enables reuse of resources for task manager containers
    StringBuilder envShipFileList = new StringBuilder();

    // upload and register ship files
    for (File shipFile : effectiveShipFiles) {
        LocalResource shipResources = Records.newRecord(LocalResource.class);

        Path shipLocalPath = new Path("file://" + shipFile.getAbsolutePath());
        Path remotePath = Utils.setupLocalResource(fs, appId.toString(), shipLocalPath, shipResources,
                fs.getHomeDirectory());

        paths.add(remotePath);

        localResources.put(shipFile.getName(), shipResources);

        classPathBuilder.append(shipFile.getName());
        if (shipFile.isDirectory()) {
            // add directories to the classpath
            classPathBuilder.append(File.separator).append("*");
        }
        classPathBuilder.append(File.pathSeparator);

        envShipFileList.append(remotePath).append(",");
    }
    ////////////////////////////////////////////////////////////////////////////
    /*
     * Add Hops LocalResources paths here
     *
     */
    //Add it to localResources
    for (Entry<String, LocalResource> entry : hopsworksResources.entrySet()) {
        localResources.put(entry.getKey(), entry.getValue());
        //Append name to classPathBuilder
        classPathBuilder.append(entry.getKey());
        classPathBuilder.append(File.pathSeparator);
    }

    ////////////////////////////////////////////////////////////////////////////
    // Setup jar for ApplicationMaster
    LocalResource appMasterJar = Records.newRecord(LocalResource.class);
    LocalResource flinkConf = Records.newRecord(LocalResource.class);
    Path remotePathJar = Utils.setupLocalResource(fs, appId.toString(), flinkJarPath, appMasterJar,
            fs.getHomeDirectory());
    Path remotePathConf = Utils.setupLocalResource(fs, appId.toString(), flinkConfigurationPath, flinkConf,
            fs.getHomeDirectory());
    localResources.put("flink.jar", appMasterJar);
    localResources.put("flink-conf.yaml", flinkConf);

    paths.add(remotePathJar);
    classPathBuilder.append("flink.jar").append(File.pathSeparator);
    paths.add(remotePathConf);
    classPathBuilder.append("flink-conf.yaml").append(File.pathSeparator);

    sessionFilesDir = new Path(fs.getHomeDirectory(), ".flink/" + appId.toString() + "/");

    FsPermission permission = new FsPermission(FsAction.ALL, FsAction.NONE, FsAction.NONE);
    fs.setPermission(sessionFilesDir, permission); // set permission for path.

    // setup security tokens
    Utils.setTokensFor(amContainer, paths, conf);

    amContainer.setLocalResources(localResources);
    fs.close();

    // Setup CLASSPATH and environment variables for ApplicationMaster
    final Map<String, String> appMasterEnv = new HashMap<>();
    // set user specified app master environment variables
    appMasterEnv.putAll(Utils.getEnvironmentVariables(ConfigConstants.YARN_APPLICATION_MASTER_ENV_PREFIX,
            flinkConfiguration));
    // set Flink app class path
    appMasterEnv.put(YarnConfigKeys.ENV_FLINK_CLASSPATH, classPathBuilder.toString());

    // set Flink on YARN internal configuration values
    appMasterEnv.put(YarnConfigKeys.ENV_TM_COUNT, String.valueOf(taskManagerCount));
    appMasterEnv.put(YarnConfigKeys.ENV_TM_MEMORY, String.valueOf(taskManagerMemoryMb));
    appMasterEnv.put(YarnConfigKeys.FLINK_JAR_PATH, remotePathJar.toString());
    appMasterEnv.put(YarnConfigKeys.ENV_APP_ID, appId.toString());
    appMasterEnv.put(YarnConfigKeys.ENV_CLIENT_HOME_DIR, fs.getHomeDirectory().toString());
    appMasterEnv.put(YarnConfigKeys.ENV_CLIENT_SHIP_FILES, envShipFileList.toString());
    appMasterEnv.put(YarnConfigKeys.ENV_CLIENT_USERNAME,
            UserGroupInformation.getCurrentUser().getShortUserName());
    appMasterEnv.put(YarnConfigKeys.ENV_SLOTS, String.valueOf(slots));
    appMasterEnv.put(YarnConfigKeys.ENV_DETACHED, String.valueOf(detached));
    appMasterEnv.put(YarnConfigKeys.ENV_ZOOKEEPER_NAMESPACE, getZookeeperNamespace());

    if (dynamicPropertiesEncoded != null) {
        appMasterEnv.put(YarnConfigKeys.ENV_DYNAMIC_PROPERTIES, dynamicPropertiesEncoded);
    }

    // set classpath from YARN configuration
    Utils.setupYarnClassPath(conf, appMasterEnv);

    amContainer.setEnvironment(appMasterEnv);

    // Set up resource type requirements for ApplicationMaster
    Resource capability = Records.newRecord(Resource.class);
    capability.setMemory(jobManagerMemoryMb);
    capability.setVirtualCores(1);

    String name;
    if (customName == null) {
        name = "Flink session with " + taskManagerCount + " TaskManagers";
        if (detached) {
            name += " (detached)";
        }
    } else {
        name = customName;
    }

    appContext.setApplicationName(name); // application name
    appContext.setApplicationType("Apache Flink");
    appContext.setAMContainerSpec(amContainer);
    appContext.setResource(capability);
    if (yarnQueue != null) {
        appContext.setQueue(yarnQueue);
    }

    // add a hook to clean up in case deployment fails
    Thread deploymentFailureHook = new DeploymentFailureHook(yarnClient, yarnApplication);
    Runtime.getRuntime().addShutdownHook(deploymentFailureHook);
    LOG.info("Submitting application master " + appId);
    yarnClient.submitApplication(appContext);

    LOG.info("Waiting for the cluster to be allocated");
    final long startTime = System.currentTimeMillis();
    ApplicationReport report;
    YarnApplicationState lastAppState = YarnApplicationState.NEW;
    loop: while (true) {
        try {
            report = yarnClient.getApplicationReport(appId);
        } catch (IOException e) {
            throw new YarnDeploymentException("Failed to deploy the cluster: " + e.getMessage());
        }
        YarnApplicationState appState = report.getYarnApplicationState();
        switch (appState) {
        case FAILED:
        case FINISHED:
        case KILLED:
            throw new YarnDeploymentException("The YARN application unexpectedly switched to state " + appState
                    + " during deployment. \n" + "Diagnostics from YARN: " + report.getDiagnostics() + "\n"
                    + "If log aggregation is enabled on your cluster, use this "
                    + "command to further investigate the issue:\n" + "yarn logs -applicationId " + appId);
            //break ..
        case RUNNING:
            LOG.info("YARN application has been deployed successfully.");
            break loop;
        default:
            if (appState != lastAppState) {
                LOG.info("Deploying cluster, current state " + appState);
            }
            if (System.currentTimeMillis() - startTime > 60000) {
                LOG.info("Deployment took more than 60 seconds. Please check if the "
                        + "requested resources are available in the YARN cluster");
            }

        }
        lastAppState = appState;
        Thread.sleep(250);
    }
    // print the application id for user to cancel themselves.
    if (isDetachedMode()) {
        LOG.info("The Flink YARN client has been started in detached mode. In order to stop "
                + "Flink on YARN, use the following command or a YARN web interface to stop "
                + "it:\nyarn application -kill " + appId + "\nPlease also note that the "
                + "temporary files of the YARN session in the home directoy will not be removed.");
    }
    // since deployment was successful, remove the hook
    try {
        Runtime.getRuntime().removeShutdownHook(deploymentFailureHook);
    } catch (IllegalStateException e) {
        // we're already in the shut down hook.
    }

    String host = report.getHost();
    int port = report.getRpcPort();

    // Correctly initialize the Flink config
    flinkConfiguration.setString(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY, host);
    flinkConfiguration.setInteger(ConfigConstants.JOB_MANAGER_IPC_PORT_KEY, port);

    // the Flink cluster is deployed in YARN. Represent cluster
    return createYarnClusterClient(this, yarnClient, report, flinkConfiguration, sessionFilesDir, true);
}

From source file:io.hops.hopsworks.common.jobs.flink.YarnClusterClient.java

License:Apache License

/**
 * Shuts down the Yarn application/*w w  w .  j  a  v  a  2 s. c o  m*/
 */
public void shutdownCluster() {

    if (hasBeenShutDown.getAndSet(true)) {
        return;
    }

    if (!isConnected) {
        throw new IllegalStateException("The cluster has been not been connected to the ApplicationMaster.");
    }

    try {
        Runtime.getRuntime().removeShutdownHook(clientShutdownHook);
    } catch (IllegalStateException e) {
        // we are already in the shutdown hook
    }

    LOG.info("Sending shutdown request to the Application Master");
    try {
        Future<Object> response = Patterns.ask(applicationClient.get(), new YarnMessages.LocalStopYarnSession(
                getApplicationStatus(), "Flink YARN Client requested shutdown"), new Timeout(akkaDuration));
        Await.ready(response, akkaDuration);
    } catch (Exception e) {
        LOG.warn("Error while stopping YARN cluster.", e);
    }

    try {
        File propertiesFile = FlinkYarnSessionCli.getYarnPropertiesLocation(flinkConfig);
        if (propertiesFile.isFile()) {
            if (propertiesFile.delete()) {
                LOG.info("Deleted Yarn properties file at {}", propertiesFile.getAbsoluteFile().toString());
            } else {
                LOG.warn("Couldn't delete Yarn properties file at {}",
                        propertiesFile.getAbsoluteFile().toString());
            }
        }
    } catch (Exception e) {
        LOG.warn("Exception while deleting the JobManager address file", e);
    }

    if (sessionFilesDir != null) {
        LOG.info("Deleting files in " + sessionFilesDir);
        try {
            FileSystem shutFS = FileSystem.get(hadoopConfig);
            shutFS.delete(sessionFilesDir, true); // delete conf and jar file.
            shutFS.close();
        } catch (IOException e) {
            LOG.error("Could not delete the Flink jar and configuration files in HDFS..", e);
        }
    } else {
        LOG.warn("Session file directory not set. Not deleting session files");
    }

    try {
        pollingRunner.stopRunner();
        pollingRunner.join(1000);
    } catch (InterruptedException e) {
        LOG.warn("Shutdown of the polling runner was interrupted", e);
        Thread.currentThread().interrupt();
    }

    try {
        ApplicationReport appReport = yarnClient.getApplicationReport(appId);

        LOG.info("Application " + appId + " finished with state " + appReport.getYarnApplicationState()
                + " and final state " + appReport.getFinalApplicationStatus() + " at "
                + appReport.getFinishTime());

        if (appReport.getYarnApplicationState() == YarnApplicationState.FAILED
                || appReport.getYarnApplicationState() == YarnApplicationState.KILLED) {
            LOG.warn("Application failed. Diagnostics " + appReport.getDiagnostics());
            LOG.warn("If log aggregation is activated in the Hadoop cluster, we recommend to retrieve "
                    + "the full application log using this command:\n" + "\tyarn logs -appReport "
                    + appReport.getApplicationId() + "\n"
                    + "(It sometimes takes a few seconds until the logs are aggregated)");
        }
    } catch (Exception e) {
        LOG.warn("Couldn't get final report", e);
    }

    LOG.info("YARN Client is shutting down");
    yarnClient.stop(); // actorRunner is using the yarnClient.
    yarnClient = null; // set null to clearly see if somebody wants to access it afterwards.
}

From source file:io.seqware.pipeline.plugins.sanity.checks.HDFS_Check.java

License:Open Source License

@Override
public boolean check(QueryRunner qRunner, Metadata metadataWS) throws SQLException {
    FileSystem fileSystem = null;

    HashMap<String, String> settings = (HashMap<String, String>) ConfigTools.getSettings();
    if (settings.isEmpty()) {
        return false;
    } else if (!settings.containsKey("FS.DEFAULTFS") || !settings.containsKey("FS.HDFS.IMPL")) {
        return false;
    } else if (!settings.containsKey("HBASE.ZOOKEEPER.QUORUM")
            || !settings.containsKey("HBASE.ZOOKEEPER.PROPERTY.CLIENTPORT")
            || !settings.containsKey("HBASE.MASTER") || !settings.containsKey("MAPRED.JOB.TRACKER")) {
        return false;
    }/*  w w w .  jav a 2s . c o m*/

    try {
        Configuration conf = new Configuration();

        conf.set("hbase.zookeeper.quorum", settings.get("HBASE.ZOOKEEPER.QUORUM"));
        conf.set("hbase.zookeeper.property.clientPort", settings.get("HBASE.ZOOKEEPER.PROPERTY.CLIENTPORT"));
        conf.set("hbase.master", settings.get("HBASE.MASTER"));
        conf.set("mapred.job.tracker", settings.get("MAPRED.JOB.TRACKER"));
        conf.set("fs.default.name", settings.get("FS.DEFAULTFS"));
        conf.set("fs.defaultfs", settings.get("FS.DEFAULTFS"));
        conf.set("fs.hdfs.impl", settings.get("FS.HDFS.IMPL"));
        fileSystem = FileSystem.get(conf);
        Path path = new Path("test");
        fileSystem.mkdirs(path);
        fileSystem.deleteOnExit(path);

    } catch (IOException ex) {
        System.err.println("Error connecting to hdfs" + ex.getMessage());
        return false;
    } finally {
        try {
            if (fileSystem != null) {
                fileSystem.close();
            }
        } catch (IOException ex) {
            Logger.getLogger(HDFS_Check.class.getName()).log(Level.SEVERE, null, ex);
        }
    }

    return true;
}

From source file:it.tizianofagni.sparkboost.DataUtils.java

License:Apache License

/**
 * Write a text file on Hadoop file system by using standard Hadoop API.
 *
 * @param outputPath The file to be written.
 * @param content    The content to put in the file.
 *//*ww  w .  ja  v  a2  s .co m*/
public static void saveHadoopTextFile(String outputPath, String content) {
    try {
        Configuration configuration = new Configuration();
        Path file = new Path(outputPath);
        Path parentFile = file.getParent();
        FileSystem hdfs = FileSystem.get(file.toUri(), configuration);
        if (parentFile != null)
            hdfs.mkdirs(parentFile);
        OutputStream os = hdfs.create(file, true);
        BufferedWriter br = new BufferedWriter(new OutputStreamWriter(os, "UTF-8"));
        br.write(content);
        br.close();
        hdfs.close();
    } catch (Exception e) {
        throw new RuntimeException("Writing Hadoop text file", e);
    }
}

From source file:net.broomie.JpWordCounter.java

License:Apache License

/**
 * This method is implement for creating the dfdb with MapReduce.
 * @param conf Specify the conf object, which is hadoop Configuration.
 * @param dfdb Specify the dfdb directory path on HDFS.
 * @return Return `true' if success, return `false' if fail.
 * @throws IOException Exception for a input file IO.
 * @throws InterruptedException Exception for return waitForCompletion().
 * @throws ClassNotFoundException Exception for Mapper and Reduce class.
 * @throws URISyntaxException Exception for new URI().
 * The dfdb means `document frequency'.//from   www  . ja va2 s. c  o m
 */
private boolean runCreateDFDB(Configuration conf, String dfdb)
        throws IOException, InterruptedException, ClassNotFoundException, URISyntaxException {
    String reducerNum = conf.get(WORD_COUNTER_REDUCER_NUM);
    Job job = new Job(conf);
    job.setJarByClass(JpWordCounter.class);
    TextInputFormat.addInputPath(job, new Path(in));
    FileSystem fs = FileSystem.get(new URI(dfdb), conf);
    FileStatus[] status = fs.listStatus(new Path(dfdb));
    if (status != null) {
        fs.delete(new Path(dfdb), true);
    }
    fs.close();
    FileOutputFormat.setOutputPath(job, new Path(dfdb));
    job.setMapperClass(DFMapper.class);
    job.setReducerClass(TokenizeReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    job.setNumReduceTasks(Integer.valueOf(reducerNum));
    boolean rv = job.waitForCompletion(true);

    if (rv) {
        Counters counters = job.getCounters();
        long inputNum = counters.findCounter("org.apache.hadoop.mapred.Task$Counter", "MAP_INPUT_RECORDS")
                .getValue();
        FileSystem hdfs = FileSystem.get(conf);
        String numLinePath = conf.get(PROP_LINE_NUM);
        FSDataOutputStream stream = hdfs.create(new Path(numLinePath));
        stream.writeUTF(String.valueOf((int) inputNum));
        stream.close();
    }

    return rv;
}

From source file:net.broomie.WordCoCounter.java

License:Apache License

/**
 * This method is implement for creating the dfdb with MapReduce.
 * @param conf Specify the conf object, which is hadoop Configuration.
 * @param dfdb Specify the dfdb directory path on HDFS.
 * @return Return `true' if success, return `false' if fail.
 * @throws IOException Exception for a input file IO.
 * @throws InterruptedException Exception for return waitForCompletion().
 * @throws ClassNotFoundException Exception for Mapper and Reduce class.
 * @throws URISyntaxException Exception for new URI().
 * The dfdb means `document frequency'.// w w  w  . j a v  a  2s  . co  m
 */
private boolean runWordCount(Configuration conf, String dfdb)
        throws IOException, InterruptedException, ClassNotFoundException, URISyntaxException {
    String reducerNum = conf.get(WORD_CO_COUNTER_REDUCER_NUM);
    Job job = new Job(conf);
    job.setJarByClass(WordCoCounter.class);
    TextInputFormat.addInputPath(job, new Path(in));
    FileSystem fs = FileSystem.get(new URI(dfdb), conf);
    FileStatus[] status = fs.listStatus(new Path(dfdb));
    if (status != null) {
        fs.delete(new Path(dfdb), true);
    }
    fs.close();
    FileOutputFormat.setOutputPath(job, new Path(dfdb));
    //job.setMapperClass(TokenizeMapper.class);
    job.setMapperClass(DFMapper.class);
    job.setReducerClass(TokenizeReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    //job.setNumReduceTasks(Integer.valueOf(reducerNum));
    job.setNumReduceTasks(Integer.valueOf(8));
    boolean rv = job.waitForCompletion(true);
    if (rv) {
        Counters counters = job.getCounters();
        long inputNum = counters.findCounter("org.apache.hadoop.mapred.Task$Counter", "MAP_INPUT_RECORDS")
                .getValue();
        FileSystem hdfs = FileSystem.get(conf);
        String numLinePath = conf.get(PROP_LINE_NUM);
        FSDataOutputStream stream = hdfs.create(new Path(numLinePath));
        stream.writeUTF(String.valueOf((int) inputNum));
        stream.close();
    }
    return rv;
}

From source file:net.java.jatextmining.JaCoOccurrence.java

License:Apache License

/**
 * Tne implementation for start counting the co-occurrence words.
 * @param conf Specify the Hadoop Configuration object.
 * @param cache Specify the distributed cache file path.
 * @return If success return true, not success return false.
 * @throws IOException Exception for IO.
 * @throws URISyntaxException Exception for distributed cache file path.
 * @throws InterruptedException Exception for threads, waitForComletion().
 * @throws ClassNotFoundException Exception for waitForCompletion().
 *///from  ww w  . j  a  va2  s .co m
private boolean runJaCoOccurrence(Configuration conf, String cache)
        throws IOException, URISyntaxException, InterruptedException, ClassNotFoundException {
    String reducerNum = conf.get("jatextmining.JaWordCounterReducerNum");
    conf.setBoolean("df", true);
    Job job = new Job(conf);
    job.setJarByClass(JaCoOccurrence.class);
    TextInputFormat.addInputPath(job, new Path(in));
    FileOutputFormat.setOutputPath(job, new Path(cache));
    FileSystem fs = FileSystem.get(new URI(cache), conf);
    FileStatus[] status = fs.listStatus(new Path(cache));
    if (status != null) {
        fs.delete(new Path(cache), true);
    }
    fs.close();
    job.setMapperClass(CoOccurrenceMapper.class);
    job.setReducerClass(CountReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);
    job.setNumReduceTasks(Integer.valueOf(reducerNum));
    boolean rv = job.waitForCompletion(true);
    if (rv) {
        writeDocNumFile(conf, job);
    }

    return rv;
}

From source file:net.java.jatextmining.JaWordCounter.java

License:Apache License

/**
 * Creating the DF database from Japanese documents.
 * @param conf Specify the Hadoop Configuration object.
 * @param dfdb Specify the saving path for DF database.
 * @return If success return true, it not success return false.
 * @throws IOException Exception for IO.
 * @throws URISyntaxException Exception for DF database URI.
 * @throws InterruptedException Exception for waitForCompletion().
 * @throws ClassNotFoundException Exception for waitForCompletion().
 *//*from w  ww.  j  av a 2  s .  c o  m*/
private boolean runCreateDFDB(Configuration conf, String dfdb)
        throws IOException, URISyntaxException, InterruptedException, ClassNotFoundException {
    String reducerNum = conf.get("jatextmining.JaWordCounterReducerNum");
    Job job = new Job(conf);
    job.setJarByClass(JaWordCounter.class);
    TextInputFormat.addInputPath(job, new Path(dfIn));
    FileOutputFormat.setOutputPath(job, new Path(dfdb));
    FileSystem fs = FileSystem.get(new URI(dfdb), conf);
    FileStatus[] status = fs.listStatus(new Path(dfdb));
    if (status != null) {
        fs.delete(new Path(dfdb), true);
    }
    fs.close();
    job.setMapperClass(CountMapper.class);
    job.setReducerClass(CountReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);
    job.setNumReduceTasks(Integer.valueOf(reducerNum));
    boolean rv = job.waitForCompletion(true);
    if (rv) {
        Counters counters = job.getCounters();
        long docNum = counters.findCounter("org.apache.hadoop.mapred.Task$Counter", "MAP_INPUT_RECORDS")
                .getValue();
        FileSystem hdfs = FileSystem.get(conf);
        String docNumPath = conf.get("jatextmining.docNum");
        FSDataOutputStream stream = hdfs.create(new Path(docNumPath));
        stream.writeUTF(String.valueOf((int) docNum));
        stream.close();
    }

    return rv;
}