Example usage for org.apache.hadoop.conf Configuration addResource

List of usage examples for org.apache.hadoop.conf Configuration addResource

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration addResource.

Prototype

public void addResource(Configuration conf) 

Source Link

Document

Add a configuration resource.

Usage

From source file:com.twitter.hraven.util.TestHadoopConfUtil.java

License:Apache License

@Test
public void testContains() throws FileNotFoundException {
    final String JOB_CONF_FILE_NAME = "src/test/resources/job_1329348432655_0001_conf.xml";
    Configuration jobConf = new Configuration();
    jobConf.addResource(new FileInputStream(JOB_CONF_FILE_NAME));
    assertTrue(HadoopConfUtil.contains(jobConf, Constants.USER_CONF_KEY_HADOOP2));
    assertFalse(HadoopConfUtil.contains(jobConf, Constants.USER_CONF_KEY));
}

From source file:com.twitter.hraven.util.TestHadoopConfUtil.java

License:Apache License

@Test
public void testGetUserNameInConf() throws FileNotFoundException {
    final String JOB_CONF_FILE_NAME = "src/test/resources/job_1329348432655_0001_conf.xml";
    Configuration jobConf = new Configuration();
    jobConf.addResource(new FileInputStream(JOB_CONF_FILE_NAME));
    String userName = HadoopConfUtil.getUserNameInConf(jobConf);
    assertEquals(userName, "user");
}

From source file:com.twitter.hraven.util.TestHadoopConfUtil.java

License:Apache License

@Test
public void testGetQueueName() throws FileNotFoundException {
    final String JOB_CONF_FILE_NAME = "src/test/resources/job_1329348432655_0001_conf.xml";
    Configuration jobConf = new Configuration();
    jobConf.addResource(new FileInputStream(JOB_CONF_FILE_NAME));
    String queueName = HadoopConfUtil.getQueueName(jobConf);
    assertEquals(queueName, "default");
}

From source file:com.twitter.hraven.util.TestHadoopConfUtil.java

License:Apache License

@Test(expected = IllegalArgumentException.class)
public void checkUserNameAlwaysSet() throws FileNotFoundException {
    final String JOB_CONF_FILE_NAME = "src/test/resources/job_1329348432655_0001_conf.xml";

    Configuration jobConf = new Configuration();
    jobConf.addResource(new FileInputStream(JOB_CONF_FILE_NAME));

    // unset the user name to confirm exception thrown
    jobConf.set(Constants.USER_CONF_KEY_HADOOP2, "");
    jobConf.set(Constants.USER_CONF_KEY, "");
    // test the hraven user name setting
    String hRavenUserName = HadoopConfUtil.getUserNameInConf(jobConf);
    assertNull(hRavenUserName);//from w w  w .ja v a 2  s  .c  o  m
}

From source file:com.uber.hoodie.common.util.ParquetUtils.java

License:Apache License

/**
 * Read the rowKey list matching the given filter, from the given parquet file. If the filter is empty,
 * then this will return all the rowkeys.
 *
 * @param filePath              The parquet file path.
 * @param configuration         configuration to build fs object
 * @param filter                record keys filter
 * @return Set                  Set of row keys matching candidateRecordKeys
 *//*from   ww w  . j  a  v a 2  s  .  c  om*/
public static Set<String> filterParquetRowKeys(Configuration configuration, Path filePath, Set<String> filter) {
    Optional<RecordKeysFilterFunction> filterFunction = Optional.empty();
    if (CollectionUtils.isNotEmpty(filter)) {
        filterFunction = Optional.of(new RecordKeysFilterFunction(filter));
    }
    Configuration conf = new Configuration(configuration);
    conf.addResource(getFs(filePath.toString(), conf).getConf());
    Schema readSchema = HoodieAvroUtils.getRecordKeySchema();
    AvroReadSupport.setAvroReadSchema(conf, readSchema);
    AvroReadSupport.setRequestedProjection(conf, readSchema);
    Set<String> rowKeys = new HashSet<>();
    try (ParquetReader reader = AvroParquetReader.builder(filePath).withConf(conf).build()) {
        Object obj = reader.read();
        while (obj != null) {
            if (obj instanceof GenericRecord) {
                String recordKey = ((GenericRecord) obj).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
                if (!filterFunction.isPresent() || filterFunction.get().apply(recordKey)) {
                    rowKeys.add(recordKey);
                }
            }
            obj = reader.read();
        }
    } catch (IOException e) {
        throw new HoodieIOException("Failed to read row keys from Parquet " + filePath, e);

    }
    // ignore
    return rowKeys;
}

From source file:com.verizon.Main.java

public static void main(String[] args) throws Exception {

    String warehouseLocation = "file:" + System.getProperty("user.dir") + "spark-warehouse";

    SparkSession spark = SparkSession.builder().appName("Verizon").config("spark.master", "local[2]")
            .config("spark.sql.warehouse.dir", warehouseLocation).enableHiveSupport().getOrCreate();

    Configuration configuration = new Configuration();
    configuration.addResource(new Path(System.getProperty("HADOOP_INSTALL") + "/conf/core-site.xml"));
    configuration.addResource(new Path(System.getProperty("HADOOP_INSTALL") + "/conf/hdfs-site.xml"));
    configuration.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
    configuration.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());

    FileSystem hdfs = FileSystem.get(new URI("hdfs://localhost:9000"), configuration);

    SQLContext context = new SQLContext(spark);
    String schemaString = " Device,Title,ReviewText,SubmissionTime,UserNickname";
    //spark.read().textFile(schemaString)
    Dataset<Row> df = spark.read().csv("hdfs://localhost:9000/data.csv");
    //df.show();/*from  www .jav a 2  s . co  m*/
    //#df.printSchema();
    df = df.select("_c2");

    Path file = new Path("hdfs://localhost:9000/tempFile.txt");
    if (hdfs.exists(file)) {
        hdfs.delete(file, true);
    }

    df.write().csv("hdfs://localhost:9000/tempFile.txt");

    JavaRDD<String> lines = spark.read().textFile("hdfs://localhost:9000/tempFile.txt").javaRDD();
    JavaRDD<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
        @Override
        public Iterator<String> call(String s) {
            return Arrays.asList(SPACE.split(s)).iterator();
        }
    });

    JavaPairRDD<String, Integer> ones = words.mapToPair(new PairFunction<String, String, Integer>() {
        @Override
        public Tuple2<String, Integer> call(String s) {
            s = s.replaceAll("[^a-zA-Z0-9]+", "");
            s = s.toLowerCase().trim();
            return new Tuple2<>(s, 1);
        }
    });

    JavaPairRDD<String, Integer> counts = ones.reduceByKey(new Function2<Integer, Integer, Integer>() {
        @Override
        public Integer call(Integer i1, Integer i2) {
            return i1 + i2;
        }
    });

    JavaPairRDD<Integer, String> frequencies = counts
            .mapToPair(new PairFunction<Tuple2<String, Integer>, Integer, String>() {
                @Override
                public Tuple2<Integer, String> call(Tuple2<String, Integer> s) {
                    return new Tuple2<Integer, String>(s._2, s._1);
                }
            });

    frequencies = frequencies.sortByKey(false);

    JavaPairRDD<String, Integer> result = frequencies
            .mapToPair(new PairFunction<Tuple2<Integer, String>, String, Integer>() {
                @Override
                public Tuple2<String, Integer> call(Tuple2<Integer, String> s) throws Exception {
                    return new Tuple2<String, Integer>(s._2, s._1);
                }

            });

    //JavaPairRDD<Integer,String> sortedByFreq = sort(frequencies, "descending"); 
    file = new Path("hdfs://localhost:9000/allresult.csv");
    if (hdfs.exists(file)) {
        hdfs.delete(file, true);
    }

    //FileUtils.deleteDirectory(new File("allresult.csv"));

    result.saveAsTextFile("hdfs://localhost:9000/allresult.csv");

    List<Tuple2<String, Integer>> output = result.take(250);

    ExportToHive hiveExport = new ExportToHive();
    String rows = "";
    for (Tuple2<String, Integer> tuple : output) {
        String date = new Date().toString();
        String keyword = tuple._1();
        Integer count = tuple._2();
        //System.out.println( keyword+ "," +count);
        rows += date + "," + "Samsung Galaxy s7," + keyword + "," + count + System.lineSeparator();

    }
    //System.out.println(rows);
    /*
    file = new Path("hdfs://localhost:9000/result.csv");
            
    if ( hdfs.exists( file )) { hdfs.delete( file, true ); } 
    OutputStream os = hdfs.create(file);
    BufferedWriter br = new BufferedWriter( new OutputStreamWriter( os, "UTF-8" ) );
    br.write(rows);
    br.close();
    */
    hdfs.close();

    FileUtils.deleteQuietly(new File("result.csv"));
    FileUtils.writeStringToFile(new File("result.csv"), rows);

    hiveExport.writeToHive(spark);
    ExportDataToServer exportServer = new ExportDataToServer();
    exportServer.sendDataToRESTService(rows);
    spark.stop();
}

From source file:com.yahoo.labs.samoa.streams.fs.HDFSFileStreamSource.java

License:Apache License

private Configuration getDefaultConfig() {
    String hadoopHome = System.getenv("HADOOP_HOME");
    Configuration conf = new Configuration();
    if (hadoopHome != null) {
        java.nio.file.Path coreSitePath = FileSystems.getDefault().getPath(hadoopHome,
                "etc/hadoop/core-site.xml");
        java.nio.file.Path hdfsSitePath = FileSystems.getDefault().getPath(hadoopHome,
                "etc/hadoop/hdfs-site.xml");
        conf.addResource(new Path(coreSitePath.toAbsolutePath().toString()));
        conf.addResource(new Path(hdfsSitePath.toAbsolutePath().toString()));
    }//from  w ww  . j av a2 s. c o  m
    return conf;
}

From source file:common.DataNode.java

License:Apache License

/**
 * This method starts the data node with the specified conf.
 * //from w  ww .  j a  v a  2 s .c o  m
 * @param conf - the configuration
 *  if conf's CONFIG_PROPERTY_SIMULATED property is set
 *  then a simulated storage based data node is created.
 * 
 * @param dataDirs - only for a non-simulated storage data node
 * @throws IOException
 */
void startDataNode(Configuration conf, AbstractList<File> dataDirs, DatanodeProtocol namenode)
        throws IOException {
    // use configured nameserver & interface to get local hostname
    if (conf.get(DFSConfigKeys.DFS_DATANODE_HOST_NAME_KEY) != null) {
        machineName = conf.get(DFSConfigKeys.DFS_DATANODE_HOST_NAME_KEY);
    }
    if (machineName == null) {
        machineName = DNS.getDefaultHost(conf.get("dfs.datanode.dns.interface", "default"),
                conf.get("dfs.datanode.dns.nameserver", "default"));
    }
    this.nameNodeAddr = NameNode.getAddress(conf);

    this.socketTimeout = conf.getInt(DFSConfigKeys.DFS_CLIENT_SOCKET_TIMEOUT_KEY, HdfsConstants.READ_TIMEOUT);
    this.socketWriteTimeout = conf.getInt("dfs.datanode.socket.write.timeout", HdfsConstants.WRITE_TIMEOUT);
    /* Based on results on different platforms, we might need set the default 
     * to false on some of them. */
    this.transferToAllowed = conf.getBoolean("dfs.datanode.transferTo.allowed", true);
    this.writePacketSize = conf.getInt(DFSConfigKeys.DFS_CLIENT_WRITE_PACKET_SIZE_KEY,
            DFSConfigKeys.DFS_CLIENT_WRITE_PACKET_SIZE_DEFAULT);
    InetSocketAddress socAddr = NetUtils.createSocketAddr(conf.get("dfs.datanode.address", "0.0.0.0:50010"));
    int tmpPort = socAddr.getPort();
    storage = new DataStorage();
    // construct registration
    this.dnRegistration = new DatanodeRegistration(machineName + ":" + tmpPort);

    // connect to name node
    this.namenode = namenode;

    // get version and id info from the name-node
    NamespaceInfo nsInfo = handshake();
    StartupOption startOpt = getStartupOption(conf);
    assert startOpt != null : "Startup option must be set.";

    boolean simulatedFSDataset = conf.getBoolean("dfs.datanode.simulateddatastorage", false);
    if (simulatedFSDataset) {
        setNewStorageID(dnRegistration);
        dnRegistration.storageInfo.layoutVersion = FSConstants.LAYOUT_VERSION;
        dnRegistration.storageInfo.namespaceID = nsInfo.namespaceID;
        // it would have been better to pass storage as a parameter to
        // constructor below - need to augment ReflectionUtils used below.
        conf.set(DFSConfigKeys.DFS_DATANODE_STORAGEID_KEY, dnRegistration.getStorageID());
        try {
            //Equivalent of following (can't do because Simulated is in test dir)
            //  this.data = new SimulatedFSDataset(conf);
            this.data = (FSDatasetInterface) ReflectionUtils.newInstance(
                    Class.forName("org.apache.hadoop.hdfs.server.datanode.SimulatedFSDataset"), conf);
        } catch (ClassNotFoundException e) {
            throw new IOException(StringUtils.stringifyException(e));
        }
    } else { // real storage
        // read storage info, lock data dirs and transition fs state if necessary
        storage.recoverTransitionRead(nsInfo, dataDirs, startOpt);
        // adjust
        this.dnRegistration.setStorageInfo(storage);
        // initialize data node internal structure
        this.data = new FSDataset(storage, conf);
    }

    // find free port
    ServerSocket ss = (socketWriteTimeout > 0) ? ServerSocketChannel.open().socket() : new ServerSocket();
    Server.bind(ss, socAddr, 0);
    ss.setReceiveBufferSize(DEFAULT_DATA_SOCKET_SIZE);
    // adjust machine name with the actual port
    tmpPort = ss.getLocalPort();
    selfAddr = new InetSocketAddress(ss.getInetAddress().getHostAddress(), tmpPort);
    this.dnRegistration.setName(machineName + ":" + tmpPort);
    LOG.info("Opened info server at " + tmpPort);

    this.threadGroup = new ThreadGroup("dataXceiverServer");
    this.dataXceiverServer = new Daemon(threadGroup, new DataXceiverServer(ss, conf, this));
    this.threadGroup.setDaemon(true); // auto destroy when empty

    this.blockReportInterval = conf.getLong("dfs.blockreport.intervalMsec", BLOCKREPORT_INTERVAL);
    this.initialBlockReportDelay = conf.getLong("dfs.blockreport.initialDelay", BLOCKREPORT_INITIAL_DELAY)
            * 1000L;
    if (this.initialBlockReportDelay >= blockReportInterval) {
        this.initialBlockReportDelay = 0;
        LOG.info("dfs.blockreport.initialDelay is greater than " + "dfs.blockreport.intervalMsec."
                + " Setting initial delay to 0 msec:");
    }
    this.heartBeatInterval = conf.getLong("dfs.heartbeat.interval", HEARTBEAT_INTERVAL) * 1000L;

    //initialize periodic block scanner
    String reason = null;
    if (conf.getInt("dfs.datanode.scan.period.hours", 0) < 0) {
        reason = "verification is turned off by configuration";
    } else if (!(data instanceof FSDataset)) {
        reason = "verifcation is supported only with FSDataset";
    }
    if (reason == null) {
        blockScanner = new DataBlockScanner(this, (FSDataset) data, conf);
    } else {
        LOG.info("Periodic Block Verification is disabled because " + reason + ".");
    }

    //create a servlet to serve full-file content
    InetSocketAddress infoSocAddr = NetUtils
            .createSocketAddr(conf.get("dfs.datanode.http.address", "0.0.0.0:50075"));
    String infoHost = infoSocAddr.getHostName();
    int tmpInfoPort = infoSocAddr.getPort();
    this.infoServer = new HttpServer("datanode", infoHost, tmpInfoPort, tmpInfoPort == 0, conf);
    if (conf.getBoolean("dfs.https.enable", false)) {
        boolean needClientAuth = conf.getBoolean(DFSConfigKeys.DFS_CLIENT_HTTPS_NEED_AUTH_KEY,
                DFSConfigKeys.DFS_CLIENT_HTTPS_NEED_AUTH_DEFAULT);
        InetSocketAddress secInfoSocAddr = NetUtils
                .createSocketAddr(conf.get("dfs.datanode.https.address", infoHost + ":" + 0));
        Configuration sslConf = new HdfsConfiguration(false);
        sslConf.addResource(conf.get("dfs.https.server.keystore.resource", "ssl-server.xml"));
        this.infoServer.addSslListener(secInfoSocAddr, sslConf, needClientAuth);
    }
    this.infoServer.addInternalServlet(null, "/streamFile/*", StreamFile.class);
    this.infoServer.addInternalServlet(null, "/getFileChecksum/*", FileChecksumServlets.GetServlet.class);
    this.infoServer.setAttribute("datanode.blockScanner", blockScanner);
    this.infoServer.setAttribute("datanode.conf", conf);
    this.infoServer.addServlet(null, "/blockScannerReport", DataBlockScanner.Servlet.class);
    this.infoServer.start();
    // adjust info port
    this.dnRegistration.setInfoPort(this.infoServer.getPort());
    myMetrics = new DataNodeMetrics(conf, dnRegistration.getName());

    // set service-level authorization security policy
    if (conf.getBoolean(ServiceAuthorizationManager.SERVICE_AUTHORIZATION_CONFIG, false)) {
        ServiceAuthorizationManager.refresh(conf, new HDFSPolicyProvider());
    }

    //init ipc server
    InetSocketAddress ipcAddr = NetUtils.createSocketAddr(conf.get("dfs.datanode.ipc.address"));
    ipcServer = RPC.getServer(DataNode.class, this, ipcAddr.getHostName(), ipcAddr.getPort(),
            conf.getInt("dfs.datanode.handler.count", 3), false, conf);
    ipcServer.start();
    dnRegistration.setIpcPort(ipcServer.getListenerAddress().getPort());

    LOG.info("dnRegistration = " + dnRegistration);

    plugins = conf.getInstances("dfs.datanode.plugins", ServicePlugin.class);
    for (ServicePlugin p : plugins) {
        try {
            p.start(this);
            LOG.info("Started plug-in " + p);
        } catch (Throwable t) {
            LOG.warn("ServicePlugin " + p + " could not be started", t);
        }
    }
}

From source file:common.NameNode.java

License:Apache License

private void startHttpServer(Configuration conf) throws IOException {
    InetSocketAddress infoSocAddr = getHttpServerAddress(conf);
    String infoHost = infoSocAddr.getHostName();
    int infoPort = infoSocAddr.getPort();
    this.httpServer = new HttpServer("hdfs", infoHost, infoPort, infoPort == 0, conf);
    if (conf.getBoolean("dfs.https.enable", false)) {
        boolean needClientAuth = conf.getBoolean(DFSConfigKeys.DFS_CLIENT_HTTPS_NEED_AUTH_KEY,
                DFSConfigKeys.DFS_CLIENT_HTTPS_NEED_AUTH_DEFAULT);
        InetSocketAddress secInfoSocAddr = NetUtils
                .createSocketAddr(conf.get(DFSConfigKeys.DFS_NAMENODE_HTTPS_ADDRESS_KEY, infoHost + ":" + 0));
        Configuration sslConf = new HdfsConfiguration(false);
        sslConf.addResource(conf.get("dfs.https.server.keystore.resource", "ssl-server.xml"));
        this.httpServer.addSslListener(secInfoSocAddr, sslConf, needClientAuth);
        // assume same ssl port for all datanodes
        InetSocketAddress datanodeSslPort = NetUtils
                .createSocketAddr(conf.get("dfs.datanode.https.address", infoHost + ":" + 50475));
        this.httpServer.setAttribute("datanode.https.port", datanodeSslPort.getPort());
    }/*from   ww  w.  j a  v  a2 s .  co  m*/
    this.httpServer.setAttribute("name.node", this);
    this.httpServer.setAttribute("name.node.address", getNameNodeAddress());
    this.httpServer.setAttribute("name.system.image", getFSImage());
    this.httpServer.setAttribute("name.conf", conf);
    this.httpServer.addInternalServlet("getDelegationToken", DelegationTokenServlet.PATH_SPEC,
            DelegationTokenServlet.class);
    this.httpServer.addInternalServlet("fsck", "/fsck", FsckServlet.class);
    this.httpServer.addInternalServlet("getimage", "/getimage", GetImageServlet.class);
    this.httpServer.addInternalServlet("listPaths", "/listPaths/*", ListPathsServlet.class);
    this.httpServer.addInternalServlet("data", "/data/*", FileDataServlet.class);
    this.httpServer.addInternalServlet("checksum", "/fileChecksum/*",
            FileChecksumServlets.RedirectServlet.class);
    this.httpServer.addInternalServlet("contentSummary", "/contentSummary/*", ContentSummaryServlet.class);
    this.httpServer.start();

    // The web-server port can be ephemeral... ensure we have the correct info
    infoPort = this.httpServer.getPort();
    this.httpAddress = new InetSocketAddress(infoHost, infoPort);
    setHttpServerAddress(conf);
    LOG.info(getRole() + " Web-server up at: " + httpAddress);
}

From source file:cosmos.mapred.MediawikiIngestJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (1 != args.length) {
        System.err.println("Usage: input.xml,input.xml,input.xml...");
        return 1;
    }/*from w  w  w.j  a v a2 s .  c  o  m*/

    String inputFiles = args[0];

    Configuration conf = getConf();
    System.out.println("path " + conf.get("fs.default.name"));
    conf.addResource(new Path("/opt/hadoop/conf/hdfs-site.xml"));
    conf.addResource(new Path("/opt/hadoop/conf/core-site.xml"));

    conf.addResource(new Path("/opt/hadoop/conf/mapred-site.xml"));

    System.out.println("path " + conf.get("fs.default.name"));
    //System.exit(1);
    Job job = new Job(conf, "Mediawiki Ingest");

    job.setJarByClass(MediawikiIngestJob.class);

    String tablename = "sortswiki";
    String zookeepers = "localhost:2181";
    String instanceName = "accumulo";
    String user = "root";
    PasswordToken passwd = new PasswordToken("secret");

    FileInputFormat.setInputPaths(job, inputFiles);

    job.setMapperClass(MediawikiMapper.class);
    job.setNumReduceTasks(0);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Mutation.class);
    job.setOutputFormatClass(AccumuloOutputFormat.class);

    BatchWriterConfig bwConfig = new BatchWriterConfig();

    job.setInputFormatClass(MediawikiInputFormat.class);
    AccumuloOutputFormat.setZooKeeperInstance(job, instanceName, zookeepers);
    AccumuloOutputFormat.setConnectorInfo(job, user, passwd);
    AccumuloOutputFormat.setBatchWriterOptions(job, bwConfig);
    AccumuloOutputFormat.setCreateTables(job, true);
    AccumuloOutputFormat.setDefaultTableName(job, tablename);

    return job.waitForCompletion(true) ? 0 : 1;
}