Example usage for org.apache.hadoop.conf Configuration addResource

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration addResource.

Prototype

public void addResource(Configuration conf)

Source Link

Document

Add a configuration resource.

Usage

From source file:com.twitter.hraven.util.TestHadoopConfUtil.java

License:Apache License

@Test
public void testContains() throws FileNotFoundException {
    final String JOB_CONF_FILE_NAME = "src/test/resources/job_1329348432655_0001_conf.xml";
    Configuration jobConf = new Configuration();
    jobConf.addResource(new FileInputStream(JOB_CONF_FILE_NAME));
    assertTrue(HadoopConfUtil.contains(jobConf, Constants.USER_CONF_KEY_HADOOP2));
    assertFalse(HadoopConfUtil.contains(jobConf, Constants.USER_CONF_KEY));
}

From source file:com.twitter.hraven.util.TestHadoopConfUtil.java

License:Apache License

@Test
public void testGetUserNameInConf() throws FileNotFoundException {
    final String JOB_CONF_FILE_NAME = "src/test/resources/job_1329348432655_0001_conf.xml";
    Configuration jobConf = new Configuration();
    jobConf.addResource(new FileInputStream(JOB_CONF_FILE_NAME));
    String userName = HadoopConfUtil.getUserNameInConf(jobConf);
    assertEquals(userName, "user");
}

From source file:com.twitter.hraven.util.TestHadoopConfUtil.java

License:Apache License

@Test
public void testGetQueueName() throws FileNotFoundException {
    final String JOB_CONF_FILE_NAME = "src/test/resources/job_1329348432655_0001_conf.xml";
    Configuration jobConf = new Configuration();
    jobConf.addResource(new FileInputStream(JOB_CONF_FILE_NAME));
    String queueName = HadoopConfUtil.getQueueName(jobConf);
    assertEquals(queueName, "default");
}

From source file:com.twitter.hraven.util.TestHadoopConfUtil.java

License:Apache License

@Test(expected = IllegalArgumentException.class)
public void checkUserNameAlwaysSet() throws FileNotFoundException {
    final String JOB_CONF_FILE_NAME = "src/test/resources/job_1329348432655_0001_conf.xml";

    Configuration jobConf = new Configuration();
    jobConf.addResource(new FileInputStream(JOB_CONF_FILE_NAME));

    // unset the user name to confirm exception thrown
    jobConf.set(Constants.USER_CONF_KEY_HADOOP2, "");
    jobConf.set(Constants.USER_CONF_KEY, "");
    // test the hraven user name setting
    String hRavenUserName = HadoopConfUtil.getUserNameInConf(jobConf);
    assertNull(hRavenUserName);//from w w  w .ja v a 2  s  .c  o  m
}

From source file:com.uber.hoodie.common.util.ParquetUtils.java

License:Apache License

/**
 * Read the rowKey list matching the given filter, from the given parquet file. If the filter is empty,
 * then this will return all the rowkeys.
 *
 * @param filePath              The parquet file path.
 * @param configuration         configuration to build fs object
 * @param filter                record keys filter
 * @return Set                  Set of row keys matching candidateRecordKeys
 *//*from   ww w  . j  a  v a 2  s  .  c  om*/
public static Set<String> filterParquetRowKeys(Configuration configuration, Path filePath, Set<String> filter) {
    Optional<RecordKeysFilterFunction> filterFunction = Optional.empty();
    if (CollectionUtils.isNotEmpty(filter)) {
        filterFunction = Optional.of(new RecordKeysFilterFunction(filter));
    }
    Configuration conf = new Configuration(configuration);
    conf.addResource(getFs(filePath.toString(), conf).getConf());
    Schema readSchema = HoodieAvroUtils.getRecordKeySchema();
    AvroReadSupport.setAvroReadSchema(conf, readSchema);
    AvroReadSupport.setRequestedProjection(conf, readSchema);
    Set<String> rowKeys = new HashSet<>();
    try (ParquetReader reader = AvroParquetReader.builder(filePath).withConf(conf).build()) {
        Object obj = reader.read();
        while (obj != null) {
            if (obj instanceof GenericRecord) {
                String recordKey = ((GenericRecord) obj).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
                if (!filterFunction.isPresent() || filterFunction.get().apply(recordKey)) {
                    rowKeys.add(recordKey);
                }
            }
            obj = reader.read();
        }
    } catch (IOException e) {
        throw new HoodieIOException("Failed to read row keys from Parquet " + filePath, e);

    }
    // ignore
    return rowKeys;
}

From source file:com.verizon.Main.java

public static void main(String[] args) throws Exception {

    String warehouseLocation = "file:" + System.getProperty("user.dir") + "spark-warehouse";

    SparkSession spark = SparkSession.builder().appName("Verizon").config("spark.master", "local[2]")
            .config("spark.sql.warehouse.dir", warehouseLocation).enableHiveSupport().getOrCreate();

    Configuration configuration = new Configuration();
    configuration.addResource(new Path(System.getProperty("HADOOP_INSTALL") + "/conf/core-site.xml"));
    configuration.addResource(new Path(System.getProperty("HADOOP_INSTALL") + "/conf/hdfs-site.xml"));
    configuration.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
    configuration.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());

    FileSystem hdfs = FileSystem.get(new URI("hdfs://localhost:9000"), configuration);

    SQLContext context = new SQLContext(spark);
    String schemaString = " Device,Title,ReviewText,SubmissionTime,UserNickname";
    //spark.read().textFile(schemaString)
    Dataset<Row> df = spark.read().csv("hdfs://localhost:9000/data.csv");
    //df.show();/*from  www .jav a 2  s . co  m*/
    //#df.printSchema();
    df = df.select("_c2");

    Path file = new Path("hdfs://localhost:9000/tempFile.txt");
    if (hdfs.exists(file)) {
        hdfs.delete(file, true);
    }

    df.write().csv("hdfs://localhost:9000/tempFile.txt");

    JavaRDD<String> lines = spark.read().textFile("hdfs://localhost:9000/tempFile.txt").javaRDD();
    JavaRDD<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
        @Override
        public Iterator<String> call(String s) {
            return Arrays.asList(SPACE.split(s)).iterator();
        }
    });

    JavaPairRDD<String, Integer> ones = words.mapToPair(new PairFunction<String, String, Integer>() {
        @Override
        public Tuple2<String, Integer> call(String s) {
            s = s.replaceAll("[^a-zA-Z0-9]+", "");
            s = s.toLowerCase().trim();
            return new Tuple2<>(s, 1);
        }
    });

    JavaPairRDD<String, Integer> counts = ones.reduceByKey(new Function2<Integer, Integer, Integer>() {
        @Override
        public Integer call(Integer i1, Integer i2) {
            return i1 + i2;
        }
    });

    JavaPairRDD<Integer, String> frequencies = counts
            .mapToPair(new PairFunction<Tuple2<String, Integer>, Integer, String>() {
                @Override
                public Tuple2<Integer, String> call(Tuple2<String, Integer> s) {
                    return new Tuple2<Integer, String>(s._2, s._1);
                }
            });

    frequencies = frequencies.sortByKey(false);

    JavaPairRDD<String, Integer> result = frequencies
            .mapToPair(new PairFunction<Tuple2<Integer, String>, String, Integer>() {
                @Override
                public Tuple2<String, Integer> call(Tuple2<Integer, String> s) throws Exception {
                    return new Tuple2<String, Integer>(s._2, s._1);
                }

            });

    //JavaPairRDD<Integer,String> sortedByFreq = sort(frequencies, "descending"); 
    file = new Path("hdfs://localhost:9000/allresult.csv");
    if (hdfs.exists(file)) {
        hdfs.delete(file, true);
    }

    //FileUtils.deleteDirectory(new File("allresult.csv"));

    result.saveAsTextFile("hdfs://localhost:9000/allresult.csv");

    List<Tuple2<String, Integer>> output = result.take(250);

    ExportToHive hiveExport = new ExportToHive();
    String rows = "";
    for (Tuple2<String, Integer> tuple : output) {
        String date = new Date().toString();
        String keyword = tuple._1();
        Integer count = tuple._2();
        //System.out.println( keyword+ "," +count);
        rows += date + "," + "Samsung Galaxy s7," + keyword + "," + count + System.lineSeparator();

    }
    //System.out.println(rows);
    /*
    file = new Path("hdfs://localhost:9000/result.csv");
            
    if ( hdfs.exists( file )) { hdfs.delete( file, true ); } 
    OutputStream os = hdfs.create(file);
    BufferedWriter br = new BufferedWriter( new OutputStreamWriter( os, "UTF-8" ) );
    br.write(rows);
    br.close();
    */
    hdfs.close();

    FileUtils.deleteQuietly(new File("result.csv"));
    FileUtils.writeStringToFile(new File("result.csv"), rows);

    hiveExport.writeToHive(spark);
    ExportDataToServer exportServer = new ExportDataToServer();
    exportServer.sendDataToRESTService(rows);
    spark.stop();
}

From source file:com.yahoo.labs.samoa.streams.fs.HDFSFileStreamSource.java

License:Apache License

private Configuration getDefaultConfig() {
    String hadoopHome = System.getenv("HADOOP_HOME");
    Configuration conf = new Configuration();
    if (hadoopHome != null) {
        java.nio.file.Path coreSitePath = FileSystems.getDefault().getPath(hadoopHome,
                "etc/hadoop/core-site.xml");
        java.nio.file.Path hdfsSitePath = FileSystems.getDefault().getPath(hadoopHome,
                "etc/hadoop/hdfs-site.xml");
        conf.addResource(new Path(coreSitePath.toAbsolutePath().toString()));
        conf.addResource(new Path(hdfsSitePath.toAbsolutePath().toString()));
    }//from  w ww  . j av a2 s. c o  m
    return conf;
}

From source file:common.DataNode.java

License:Apache License

/**
 * This method starts the data node with the specified conf.
 * //from w  ww .  j a  v a  2 s .c o  m
 * @param conf - the configuration
 *  if conf's CONFIG_PROPERTY_SIMULATED property is set
 *  then a simulated storage based data node is created.
 * 
 * @param dataDirs - only for a non-simulated storage data node
 * @throws IOException
 */
void startDataNode(Configuration conf, AbstractList<File> dataDirs, DatanodeProtocol namenode)
        throws IOException {
    // use configured nameserver & interface to get local hostname
    if (conf.get(DFSConfigKeys.DFS_DATANODE_HOST_NAME_KEY) != null) {
        machineName = conf.get(DFSConfigKeys.DFS_DATANODE_HOST_NAME_KEY);
    }
    if (machineName == null) {
        machineName = DNS.getDefaultHost(conf.get("dfs.datanode.dns.interface", "default"),
                conf.get("dfs.datanode.dns.nameserver", "default"));
    }
    this.nameNodeAddr = NameNode.getAddress(conf);

    this.socketTimeout = conf.getInt(DFSConfigKeys.DFS_CLIENT_SOCKET_TIMEOUT_KEY, HdfsConstants.READ_TIMEOUT);
    this.socketWriteTimeout = conf.getInt("dfs.datanode.socket.write.timeout", HdfsConstants.WRITE_TIMEOUT);
    /* Based on results on different platforms, we might need set the default 
     * to false on some of them. */
    this.transferToAllowed = conf.getBoolean("dfs.datanode.transferTo.allowed", true);
    this.writePacketSize = conf.getInt(DFSConfigKeys.DFS_CLIENT_WRITE_PACKET_SIZE_KEY,
            DFSConfigKeys.DFS_CLIENT_WRITE_PACKET_SIZE_DEFAULT);
    InetSocketAddress socAddr = NetUtils.createSocketAddr(conf.get("dfs.datanode.address", "0.0.0.0:50010"));
    int tmpPort = socAddr.getPort();
    storage = new DataStorage();
    // construct registration
    this.dnRegistration = new DatanodeRegistration(machineName + ":" + tmpPort);

    // connect to name node
    this.namenode = namenode;

    // get version and id info from the name-node
    NamespaceInfo nsInfo = handshake();
    StartupOption startOpt = getStartupOption(conf);
    assert startOpt != null : "Startup option must be set.";

    boolean simulatedFSDataset = conf.getBoolean("dfs.datanode.simulateddatastorage", false);
    if (simulatedFSDataset) {
        setNewStorageID(dnRegistration);
        dnRegistration.storageInfo.layoutVersion = FSConstants.LAYOUT_VERSION;
        dnRegistration.storageInfo.namespaceID = nsInfo.namespaceID;
        // it would have been better to pass storage as a parameter to
        // constructor below - need to augment ReflectionUtils used below.
        conf.set(DFSConfigKeys.DFS_DATANODE_STORAGEID_KEY, dnRegistration.getStorageID());
        try {
            //Equivalent of following (can't do because Simulated is in test dir)
            //  this.data = new SimulatedFSDataset(conf);
            this.data = (FSDatasetInterface) ReflectionUtils.newInstance(
                    Class.forName("org.apache.hadoop.hdfs.server.datanode.SimulatedFSDataset"), conf);
        } catch (ClassNotFoundException e) {
            throw new IOException(StringUtils.stringifyException(e));
        }
    } else { // real storage
        // read storage info, lock data dirs and transition fs state if necessary
        storage.recoverTransitionRead(nsInfo, dataDirs, startOpt);
        // adjust
        this.dnRegistration.setStorageInfo(storage);
        // initialize data node internal structure
        this.data = new FSDataset(storage, conf);
    }

    // find free port
    ServerSocket ss = (socketWriteTimeout > 0) ? ServerSocketChannel.open().socket() : new ServerSocket();
    Server.bind(ss, socAddr, 0);
    ss.setReceiveBufferSize(DEFAULT_DATA_SOCKET_SIZE);
    // adjust machine name with the actual port
    tmpPort = ss.getLocalPort();
    selfAddr = new InetSocketAddress(ss.getInetAddress().getHostAddress(), tmpPort);
    this.dnRegistration.setName(machineName + ":" + tmpPort);
    LOG.info("Opened info server at " + tmpPort);

    this.threadGroup = new ThreadGroup("dataXceiverServer");
    this.dataXceiverServer = new Daemon(threadGroup, new DataXceiverServer(ss, conf, this));
    this.threadGroup.setDaemon(true); // auto destroy when empty

    this.blockReportInterval = conf.getLong("dfs.blockreport.intervalMsec", BLOCKREPORT_INTERVAL);
    this.initialBlockReportDelay = conf.getLong("dfs.blockreport.initialDelay", BLOCKREPORT_INITIAL_DELAY)
            * 1000L;
    if (this.initialBlockReportDelay >= blockReportInterval) {
        this.initialBlockReportDelay = 0;
        LOG.info("dfs.blockreport.initialDelay is greater than " + "dfs.blockreport.intervalMsec."
                + " Setting initial delay to 0 msec:");
    }
    this.heartBeatInterval = conf.getLong("dfs.heartbeat.interval", HEARTBEAT_INTERVAL) * 1000L;

    //initialize periodic block scanner
    String reason = null;
    if (conf.getInt("dfs.datanode.scan.period.hours", 0) < 0) {
        reason = "verification is turned off by configuration";
    } else if (!(data instanceof FSDataset)) {
        reason = "verifcation is supported only with FSDataset";
    }
    if (reason == null) {
        blockScanner = new DataBlockScanner(this, (FSDataset) data, conf);
    } else {
        LOG.info("Periodic Block Verification is disabled because " + reason + ".");
    }

    //create a servlet to serve full-file content
    InetSocketAddress infoSocAddr = NetUtils
            .createSocketAddr(conf.get("dfs.datanode.http.address", "0.0.0.0:50075"));
    String infoHost = infoSocAddr.getHostName();
    int tmpInfoPort = infoSocAddr.getPort();
    this.infoServer = new HttpServer("datanode", infoHost, tmpInfoPort, tmpInfoPort == 0, conf);
    if (conf.getBoolean("dfs.https.enable", false)) {
        boolean needClientAuth = conf.getBoolean(DFSConfigKeys.DFS_CLIENT_HTTPS_NEED_AUTH_KEY,
                DFSConfigKeys.DFS_CLIENT_HTTPS_NEED_AUTH_DEFAULT);
        InetSocketAddress secInfoSocAddr = NetUtils
                .createSocketAddr(conf.get("dfs.datanode.https.address", infoHost + ":" + 0));
        Configuration sslConf = new HdfsConfiguration(false);
        sslConf.addResource(conf.get("dfs.https.server.keystore.resource", "ssl-server.xml"));
        this.infoServer.addSslListener(secInfoSocAddr, sslConf, needClientAuth);
    }
    this.infoServer.addInternalServlet(null, "/streamFile/*", StreamFile.class);
    this.infoServer.addInternalServlet(null, "/getFileChecksum/*", FileChecksumServlets.GetServlet.class);
    this.infoServer.setAttribute("datanode.blockScanner", blockScanner);
    this.infoServer.setAttribute("datanode.conf", conf);
    this.infoServer.addServlet(null, "/blockScannerReport", DataBlockScanner.Servlet.class);
    this.infoServer.start();
    // adjust info port
    this.dnRegistration.setInfoPort(this.infoServer.getPort());
    myMetrics = new DataNodeMetrics(conf, dnRegistration.getName());

    // set service-level authorization security policy
    if (conf.getBoolean(ServiceAuthorizationManager.SERVICE_AUTHORIZATION_CONFIG, false)) {
        ServiceAuthorizationManager.refresh(conf, new HDFSPolicyProvider());
    }

    //init ipc server
    InetSocketAddress ipcAddr = NetUtils.createSocketAddr(conf.get("dfs.datanode.ipc.address"));
    ipcServer = RPC.getServer(DataNode.class, this, ipcAddr.getHostName(), ipcAddr.getPort(),
            conf.getInt("dfs.datanode.handler.count", 3), false, conf);
    ipcServer.start();
    dnRegistration.setIpcPort(ipcServer.getListenerAddress().getPort());

    LOG.info("dnRegistration = " + dnRegistration);

    plugins = conf.getInstances("dfs.datanode.plugins", ServicePlugin.class);
    for (ServicePlugin p : plugins) {
        try {
            p.start(this);
            LOG.info("Started plug-in " + p);
        } catch (Throwable t) {
            LOG.warn("ServicePlugin " + p + " could not be started", t);
        }
    }
}

From source file:common.NameNode.java

License:Apache License

private void startHttpServer(Configuration conf) throws IOException {
    InetSocketAddress infoSocAddr = getHttpServerAddress(conf);
    String infoHost = infoSocAddr.getHostName();
    int infoPort = infoSocAddr.getPort();
    this.httpServer = new HttpServer("hdfs", infoHost, infoPort, infoPort == 0, conf);
    if (conf.getBoolean("dfs.https.enable", false)) {
        boolean needClientAuth = conf.getBoolean(DFSConfigKeys.DFS_CLIENT_HTTPS_NEED_AUTH_KEY,
                DFSConfigKeys.DFS_CLIENT_HTTPS_NEED_AUTH_DEFAULT);
        InetSocketAddress secInfoSocAddr = NetUtils
                .createSocketAddr(conf.get(DFSConfigKeys.DFS_NAMENODE_HTTPS_ADDRESS_KEY, infoHost + ":" + 0));
        Configuration sslConf = new HdfsConfiguration(false);
        sslConf.addResource(conf.get("dfs.https.server.keystore.resource", "ssl-server.xml"));
        this.httpServer.addSslListener(secInfoSocAddr, sslConf, needClientAuth);
        // assume same ssl port for all datanodes
        InetSocketAddress datanodeSslPort = NetUtils
                .createSocketAddr(conf.get("dfs.datanode.https.address", infoHost + ":" + 50475));
        this.httpServer.setAttribute("datanode.https.port", datanodeSslPort.getPort());
    }/*from   ww  w.  j a  v  a2 s .  co  m*/
    this.httpServer.setAttribute("name.node", this);
    this.httpServer.setAttribute("name.node.address", getNameNodeAddress());
    this.httpServer.setAttribute("name.system.image", getFSImage());
    this.httpServer.setAttribute("name.conf", conf);
    this.httpServer.addInternalServlet("getDelegationToken", DelegationTokenServlet.PATH_SPEC,
            DelegationTokenServlet.class);
    this.httpServer.addInternalServlet("fsck", "/fsck", FsckServlet.class);
    this.httpServer.addInternalServlet("getimage", "/getimage", GetImageServlet.class);
    this.httpServer.addInternalServlet("listPaths", "/listPaths/*", ListPathsServlet.class);
    this.httpServer.addInternalServlet("data", "/data/*", FileDataServlet.class);
    this.httpServer.addInternalServlet("checksum", "/fileChecksum/*",
            FileChecksumServlets.RedirectServlet.class);
    this.httpServer.addInternalServlet("contentSummary", "/contentSummary/*", ContentSummaryServlet.class);
    this.httpServer.start();

    // The web-server port can be ephemeral... ensure we have the correct info
    infoPort = this.httpServer.getPort();
    this.httpAddress = new InetSocketAddress(infoHost, infoPort);
    setHttpServerAddress(conf);
    LOG.info(getRole() + " Web-server up at: " + httpAddress);
}

From source file:cosmos.mapred.MediawikiIngestJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (1 != args.length) {
        System.err.println("Usage: input.xml,input.xml,input.xml...");
        return 1;
    }/*from w  w  w.j  a v a2 s .  c  o  m*/

    String inputFiles = args[0];

    Configuration conf = getConf();
    System.out.println("path " + conf.get("fs.default.name"));
    conf.addResource(new Path("/opt/hadoop/conf/hdfs-site.xml"));
    conf.addResource(new Path("/opt/hadoop/conf/core-site.xml"));

    conf.addResource(new Path("/opt/hadoop/conf/mapred-site.xml"));

    System.out.println("path " + conf.get("fs.default.name"));
    //System.exit(1);
    Job job = new Job(conf, "Mediawiki Ingest");

    job.setJarByClass(MediawikiIngestJob.class);

    String tablename = "sortswiki";
    String zookeepers = "localhost:2181";
    String instanceName = "accumulo";
    String user = "root";
    PasswordToken passwd = new PasswordToken("secret");

    FileInputFormat.setInputPaths(job, inputFiles);

    job.setMapperClass(MediawikiMapper.class);
    job.setNumReduceTasks(0);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Mutation.class);
    job.setOutputFormatClass(AccumuloOutputFormat.class);

    BatchWriterConfig bwConfig = new BatchWriterConfig();

    job.setInputFormatClass(MediawikiInputFormat.class);
    AccumuloOutputFormat.setZooKeeperInstance(job, instanceName, zookeepers);
    AccumuloOutputFormat.setConnectorInfo(job, user, passwd);
    AccumuloOutputFormat.setBatchWriterOptions(job, bwConfig);
    AccumuloOutputFormat.setCreateTables(job, true);
    AccumuloOutputFormat.setDefaultTableName(job, tablename);

    return job.waitForCompletion(true) ? 0 : 1;
}