List of usage examples for org.apache.hadoop.conf Configuration addResource
public void addResource(Configuration conf)
From source file:com.twitter.hraven.util.TestHadoopConfUtil.java
License:Apache License
@Test public void testContains() throws FileNotFoundException { final String JOB_CONF_FILE_NAME = "src/test/resources/job_1329348432655_0001_conf.xml"; Configuration jobConf = new Configuration(); jobConf.addResource(new FileInputStream(JOB_CONF_FILE_NAME)); assertTrue(HadoopConfUtil.contains(jobConf, Constants.USER_CONF_KEY_HADOOP2)); assertFalse(HadoopConfUtil.contains(jobConf, Constants.USER_CONF_KEY)); }
From source file:com.twitter.hraven.util.TestHadoopConfUtil.java
License:Apache License
@Test public void testGetUserNameInConf() throws FileNotFoundException { final String JOB_CONF_FILE_NAME = "src/test/resources/job_1329348432655_0001_conf.xml"; Configuration jobConf = new Configuration(); jobConf.addResource(new FileInputStream(JOB_CONF_FILE_NAME)); String userName = HadoopConfUtil.getUserNameInConf(jobConf); assertEquals(userName, "user"); }
From source file:com.twitter.hraven.util.TestHadoopConfUtil.java
License:Apache License
@Test public void testGetQueueName() throws FileNotFoundException { final String JOB_CONF_FILE_NAME = "src/test/resources/job_1329348432655_0001_conf.xml"; Configuration jobConf = new Configuration(); jobConf.addResource(new FileInputStream(JOB_CONF_FILE_NAME)); String queueName = HadoopConfUtil.getQueueName(jobConf); assertEquals(queueName, "default"); }
From source file:com.twitter.hraven.util.TestHadoopConfUtil.java
License:Apache License
@Test(expected = IllegalArgumentException.class) public void checkUserNameAlwaysSet() throws FileNotFoundException { final String JOB_CONF_FILE_NAME = "src/test/resources/job_1329348432655_0001_conf.xml"; Configuration jobConf = new Configuration(); jobConf.addResource(new FileInputStream(JOB_CONF_FILE_NAME)); // unset the user name to confirm exception thrown jobConf.set(Constants.USER_CONF_KEY_HADOOP2, ""); jobConf.set(Constants.USER_CONF_KEY, ""); // test the hraven user name setting String hRavenUserName = HadoopConfUtil.getUserNameInConf(jobConf); assertNull(hRavenUserName);//from w w w .ja v a 2 s .c o m }
From source file:com.uber.hoodie.common.util.ParquetUtils.java
License:Apache License
/** * Read the rowKey list matching the given filter, from the given parquet file. If the filter is empty, * then this will return all the rowkeys. * * @param filePath The parquet file path. * @param configuration configuration to build fs object * @param filter record keys filter * @return Set Set of row keys matching candidateRecordKeys *//*from ww w . j a v a 2 s . c om*/ public static Set<String> filterParquetRowKeys(Configuration configuration, Path filePath, Set<String> filter) { Optional<RecordKeysFilterFunction> filterFunction = Optional.empty(); if (CollectionUtils.isNotEmpty(filter)) { filterFunction = Optional.of(new RecordKeysFilterFunction(filter)); } Configuration conf = new Configuration(configuration); conf.addResource(getFs(filePath.toString(), conf).getConf()); Schema readSchema = HoodieAvroUtils.getRecordKeySchema(); AvroReadSupport.setAvroReadSchema(conf, readSchema); AvroReadSupport.setRequestedProjection(conf, readSchema); Set<String> rowKeys = new HashSet<>(); try (ParquetReader reader = AvroParquetReader.builder(filePath).withConf(conf).build()) { Object obj = reader.read(); while (obj != null) { if (obj instanceof GenericRecord) { String recordKey = ((GenericRecord) obj).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString(); if (!filterFunction.isPresent() || filterFunction.get().apply(recordKey)) { rowKeys.add(recordKey); } } obj = reader.read(); } } catch (IOException e) { throw new HoodieIOException("Failed to read row keys from Parquet " + filePath, e); } // ignore return rowKeys; }
From source file:com.verizon.Main.java
public static void main(String[] args) throws Exception { String warehouseLocation = "file:" + System.getProperty("user.dir") + "spark-warehouse"; SparkSession spark = SparkSession.builder().appName("Verizon").config("spark.master", "local[2]") .config("spark.sql.warehouse.dir", warehouseLocation).enableHiveSupport().getOrCreate(); Configuration configuration = new Configuration(); configuration.addResource(new Path(System.getProperty("HADOOP_INSTALL") + "/conf/core-site.xml")); configuration.addResource(new Path(System.getProperty("HADOOP_INSTALL") + "/conf/hdfs-site.xml")); configuration.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()); configuration.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName()); FileSystem hdfs = FileSystem.get(new URI("hdfs://localhost:9000"), configuration); SQLContext context = new SQLContext(spark); String schemaString = " Device,Title,ReviewText,SubmissionTime,UserNickname"; //spark.read().textFile(schemaString) Dataset<Row> df = spark.read().csv("hdfs://localhost:9000/data.csv"); //df.show();/*from www .jav a 2 s . co m*/ //#df.printSchema(); df = df.select("_c2"); Path file = new Path("hdfs://localhost:9000/tempFile.txt"); if (hdfs.exists(file)) { hdfs.delete(file, true); } df.write().csv("hdfs://localhost:9000/tempFile.txt"); JavaRDD<String> lines = spark.read().textFile("hdfs://localhost:9000/tempFile.txt").javaRDD(); JavaRDD<String> words = lines.flatMap(new FlatMapFunction<String, String>() { @Override public Iterator<String> call(String s) { return Arrays.asList(SPACE.split(s)).iterator(); } }); JavaPairRDD<String, Integer> ones = words.mapToPair(new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { s = s.replaceAll("[^a-zA-Z0-9]+", ""); s = s.toLowerCase().trim(); return new Tuple2<>(s, 1); } }); JavaPairRDD<String, Integer> counts = ones.reduceByKey(new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } }); JavaPairRDD<Integer, String> frequencies = counts .mapToPair(new PairFunction<Tuple2<String, Integer>, Integer, String>() { @Override public Tuple2<Integer, String> call(Tuple2<String, Integer> s) { return new Tuple2<Integer, String>(s._2, s._1); } }); frequencies = frequencies.sortByKey(false); JavaPairRDD<String, Integer> result = frequencies .mapToPair(new PairFunction<Tuple2<Integer, String>, String, Integer>() { @Override public Tuple2<String, Integer> call(Tuple2<Integer, String> s) throws Exception { return new Tuple2<String, Integer>(s._2, s._1); } }); //JavaPairRDD<Integer,String> sortedByFreq = sort(frequencies, "descending"); file = new Path("hdfs://localhost:9000/allresult.csv"); if (hdfs.exists(file)) { hdfs.delete(file, true); } //FileUtils.deleteDirectory(new File("allresult.csv")); result.saveAsTextFile("hdfs://localhost:9000/allresult.csv"); List<Tuple2<String, Integer>> output = result.take(250); ExportToHive hiveExport = new ExportToHive(); String rows = ""; for (Tuple2<String, Integer> tuple : output) { String date = new Date().toString(); String keyword = tuple._1(); Integer count = tuple._2(); //System.out.println( keyword+ "," +count); rows += date + "," + "Samsung Galaxy s7," + keyword + "," + count + System.lineSeparator(); } //System.out.println(rows); /* file = new Path("hdfs://localhost:9000/result.csv"); if ( hdfs.exists( file )) { hdfs.delete( file, true ); } OutputStream os = hdfs.create(file); BufferedWriter br = new BufferedWriter( new OutputStreamWriter( os, "UTF-8" ) ); br.write(rows); br.close(); */ hdfs.close(); FileUtils.deleteQuietly(new File("result.csv")); FileUtils.writeStringToFile(new File("result.csv"), rows); hiveExport.writeToHive(spark); ExportDataToServer exportServer = new ExportDataToServer(); exportServer.sendDataToRESTService(rows); spark.stop(); }
From source file:com.yahoo.labs.samoa.streams.fs.HDFSFileStreamSource.java
License:Apache License
private Configuration getDefaultConfig() { String hadoopHome = System.getenv("HADOOP_HOME"); Configuration conf = new Configuration(); if (hadoopHome != null) { java.nio.file.Path coreSitePath = FileSystems.getDefault().getPath(hadoopHome, "etc/hadoop/core-site.xml"); java.nio.file.Path hdfsSitePath = FileSystems.getDefault().getPath(hadoopHome, "etc/hadoop/hdfs-site.xml"); conf.addResource(new Path(coreSitePath.toAbsolutePath().toString())); conf.addResource(new Path(hdfsSitePath.toAbsolutePath().toString())); }//from w ww . j av a2 s. c o m return conf; }
From source file:common.DataNode.java
License:Apache License
/** * This method starts the data node with the specified conf. * //from w ww . j a v a 2 s .c o m * @param conf - the configuration * if conf's CONFIG_PROPERTY_SIMULATED property is set * then a simulated storage based data node is created. * * @param dataDirs - only for a non-simulated storage data node * @throws IOException */ void startDataNode(Configuration conf, AbstractList<File> dataDirs, DatanodeProtocol namenode) throws IOException { // use configured nameserver & interface to get local hostname if (conf.get(DFSConfigKeys.DFS_DATANODE_HOST_NAME_KEY) != null) { machineName = conf.get(DFSConfigKeys.DFS_DATANODE_HOST_NAME_KEY); } if (machineName == null) { machineName = DNS.getDefaultHost(conf.get("dfs.datanode.dns.interface", "default"), conf.get("dfs.datanode.dns.nameserver", "default")); } this.nameNodeAddr = NameNode.getAddress(conf); this.socketTimeout = conf.getInt(DFSConfigKeys.DFS_CLIENT_SOCKET_TIMEOUT_KEY, HdfsConstants.READ_TIMEOUT); this.socketWriteTimeout = conf.getInt("dfs.datanode.socket.write.timeout", HdfsConstants.WRITE_TIMEOUT); /* Based on results on different platforms, we might need set the default * to false on some of them. */ this.transferToAllowed = conf.getBoolean("dfs.datanode.transferTo.allowed", true); this.writePacketSize = conf.getInt(DFSConfigKeys.DFS_CLIENT_WRITE_PACKET_SIZE_KEY, DFSConfigKeys.DFS_CLIENT_WRITE_PACKET_SIZE_DEFAULT); InetSocketAddress socAddr = NetUtils.createSocketAddr(conf.get("dfs.datanode.address", "0.0.0.0:50010")); int tmpPort = socAddr.getPort(); storage = new DataStorage(); // construct registration this.dnRegistration = new DatanodeRegistration(machineName + ":" + tmpPort); // connect to name node this.namenode = namenode; // get version and id info from the name-node NamespaceInfo nsInfo = handshake(); StartupOption startOpt = getStartupOption(conf); assert startOpt != null : "Startup option must be set."; boolean simulatedFSDataset = conf.getBoolean("dfs.datanode.simulateddatastorage", false); if (simulatedFSDataset) { setNewStorageID(dnRegistration); dnRegistration.storageInfo.layoutVersion = FSConstants.LAYOUT_VERSION; dnRegistration.storageInfo.namespaceID = nsInfo.namespaceID; // it would have been better to pass storage as a parameter to // constructor below - need to augment ReflectionUtils used below. conf.set(DFSConfigKeys.DFS_DATANODE_STORAGEID_KEY, dnRegistration.getStorageID()); try { //Equivalent of following (can't do because Simulated is in test dir) // this.data = new SimulatedFSDataset(conf); this.data = (FSDatasetInterface) ReflectionUtils.newInstance( Class.forName("org.apache.hadoop.hdfs.server.datanode.SimulatedFSDataset"), conf); } catch (ClassNotFoundException e) { throw new IOException(StringUtils.stringifyException(e)); } } else { // real storage // read storage info, lock data dirs and transition fs state if necessary storage.recoverTransitionRead(nsInfo, dataDirs, startOpt); // adjust this.dnRegistration.setStorageInfo(storage); // initialize data node internal structure this.data = new FSDataset(storage, conf); } // find free port ServerSocket ss = (socketWriteTimeout > 0) ? ServerSocketChannel.open().socket() : new ServerSocket(); Server.bind(ss, socAddr, 0); ss.setReceiveBufferSize(DEFAULT_DATA_SOCKET_SIZE); // adjust machine name with the actual port tmpPort = ss.getLocalPort(); selfAddr = new InetSocketAddress(ss.getInetAddress().getHostAddress(), tmpPort); this.dnRegistration.setName(machineName + ":" + tmpPort); LOG.info("Opened info server at " + tmpPort); this.threadGroup = new ThreadGroup("dataXceiverServer"); this.dataXceiverServer = new Daemon(threadGroup, new DataXceiverServer(ss, conf, this)); this.threadGroup.setDaemon(true); // auto destroy when empty this.blockReportInterval = conf.getLong("dfs.blockreport.intervalMsec", BLOCKREPORT_INTERVAL); this.initialBlockReportDelay = conf.getLong("dfs.blockreport.initialDelay", BLOCKREPORT_INITIAL_DELAY) * 1000L; if (this.initialBlockReportDelay >= blockReportInterval) { this.initialBlockReportDelay = 0; LOG.info("dfs.blockreport.initialDelay is greater than " + "dfs.blockreport.intervalMsec." + " Setting initial delay to 0 msec:"); } this.heartBeatInterval = conf.getLong("dfs.heartbeat.interval", HEARTBEAT_INTERVAL) * 1000L; //initialize periodic block scanner String reason = null; if (conf.getInt("dfs.datanode.scan.period.hours", 0) < 0) { reason = "verification is turned off by configuration"; } else if (!(data instanceof FSDataset)) { reason = "verifcation is supported only with FSDataset"; } if (reason == null) { blockScanner = new DataBlockScanner(this, (FSDataset) data, conf); } else { LOG.info("Periodic Block Verification is disabled because " + reason + "."); } //create a servlet to serve full-file content InetSocketAddress infoSocAddr = NetUtils .createSocketAddr(conf.get("dfs.datanode.http.address", "0.0.0.0:50075")); String infoHost = infoSocAddr.getHostName(); int tmpInfoPort = infoSocAddr.getPort(); this.infoServer = new HttpServer("datanode", infoHost, tmpInfoPort, tmpInfoPort == 0, conf); if (conf.getBoolean("dfs.https.enable", false)) { boolean needClientAuth = conf.getBoolean(DFSConfigKeys.DFS_CLIENT_HTTPS_NEED_AUTH_KEY, DFSConfigKeys.DFS_CLIENT_HTTPS_NEED_AUTH_DEFAULT); InetSocketAddress secInfoSocAddr = NetUtils .createSocketAddr(conf.get("dfs.datanode.https.address", infoHost + ":" + 0)); Configuration sslConf = new HdfsConfiguration(false); sslConf.addResource(conf.get("dfs.https.server.keystore.resource", "ssl-server.xml")); this.infoServer.addSslListener(secInfoSocAddr, sslConf, needClientAuth); } this.infoServer.addInternalServlet(null, "/streamFile/*", StreamFile.class); this.infoServer.addInternalServlet(null, "/getFileChecksum/*", FileChecksumServlets.GetServlet.class); this.infoServer.setAttribute("datanode.blockScanner", blockScanner); this.infoServer.setAttribute("datanode.conf", conf); this.infoServer.addServlet(null, "/blockScannerReport", DataBlockScanner.Servlet.class); this.infoServer.start(); // adjust info port this.dnRegistration.setInfoPort(this.infoServer.getPort()); myMetrics = new DataNodeMetrics(conf, dnRegistration.getName()); // set service-level authorization security policy if (conf.getBoolean(ServiceAuthorizationManager.SERVICE_AUTHORIZATION_CONFIG, false)) { ServiceAuthorizationManager.refresh(conf, new HDFSPolicyProvider()); } //init ipc server InetSocketAddress ipcAddr = NetUtils.createSocketAddr(conf.get("dfs.datanode.ipc.address")); ipcServer = RPC.getServer(DataNode.class, this, ipcAddr.getHostName(), ipcAddr.getPort(), conf.getInt("dfs.datanode.handler.count", 3), false, conf); ipcServer.start(); dnRegistration.setIpcPort(ipcServer.getListenerAddress().getPort()); LOG.info("dnRegistration = " + dnRegistration); plugins = conf.getInstances("dfs.datanode.plugins", ServicePlugin.class); for (ServicePlugin p : plugins) { try { p.start(this); LOG.info("Started plug-in " + p); } catch (Throwable t) { LOG.warn("ServicePlugin " + p + " could not be started", t); } } }
From source file:common.NameNode.java
License:Apache License
private void startHttpServer(Configuration conf) throws IOException { InetSocketAddress infoSocAddr = getHttpServerAddress(conf); String infoHost = infoSocAddr.getHostName(); int infoPort = infoSocAddr.getPort(); this.httpServer = new HttpServer("hdfs", infoHost, infoPort, infoPort == 0, conf); if (conf.getBoolean("dfs.https.enable", false)) { boolean needClientAuth = conf.getBoolean(DFSConfigKeys.DFS_CLIENT_HTTPS_NEED_AUTH_KEY, DFSConfigKeys.DFS_CLIENT_HTTPS_NEED_AUTH_DEFAULT); InetSocketAddress secInfoSocAddr = NetUtils .createSocketAddr(conf.get(DFSConfigKeys.DFS_NAMENODE_HTTPS_ADDRESS_KEY, infoHost + ":" + 0)); Configuration sslConf = new HdfsConfiguration(false); sslConf.addResource(conf.get("dfs.https.server.keystore.resource", "ssl-server.xml")); this.httpServer.addSslListener(secInfoSocAddr, sslConf, needClientAuth); // assume same ssl port for all datanodes InetSocketAddress datanodeSslPort = NetUtils .createSocketAddr(conf.get("dfs.datanode.https.address", infoHost + ":" + 50475)); this.httpServer.setAttribute("datanode.https.port", datanodeSslPort.getPort()); }/*from ww w. j a v a2 s . co m*/ this.httpServer.setAttribute("name.node", this); this.httpServer.setAttribute("name.node.address", getNameNodeAddress()); this.httpServer.setAttribute("name.system.image", getFSImage()); this.httpServer.setAttribute("name.conf", conf); this.httpServer.addInternalServlet("getDelegationToken", DelegationTokenServlet.PATH_SPEC, DelegationTokenServlet.class); this.httpServer.addInternalServlet("fsck", "/fsck", FsckServlet.class); this.httpServer.addInternalServlet("getimage", "/getimage", GetImageServlet.class); this.httpServer.addInternalServlet("listPaths", "/listPaths/*", ListPathsServlet.class); this.httpServer.addInternalServlet("data", "/data/*", FileDataServlet.class); this.httpServer.addInternalServlet("checksum", "/fileChecksum/*", FileChecksumServlets.RedirectServlet.class); this.httpServer.addInternalServlet("contentSummary", "/contentSummary/*", ContentSummaryServlet.class); this.httpServer.start(); // The web-server port can be ephemeral... ensure we have the correct info infoPort = this.httpServer.getPort(); this.httpAddress = new InetSocketAddress(infoHost, infoPort); setHttpServerAddress(conf); LOG.info(getRole() + " Web-server up at: " + httpAddress); }
From source file:cosmos.mapred.MediawikiIngestJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (1 != args.length) { System.err.println("Usage: input.xml,input.xml,input.xml..."); return 1; }/*from w w w.j a v a2 s . c o m*/ String inputFiles = args[0]; Configuration conf = getConf(); System.out.println("path " + conf.get("fs.default.name")); conf.addResource(new Path("/opt/hadoop/conf/hdfs-site.xml")); conf.addResource(new Path("/opt/hadoop/conf/core-site.xml")); conf.addResource(new Path("/opt/hadoop/conf/mapred-site.xml")); System.out.println("path " + conf.get("fs.default.name")); //System.exit(1); Job job = new Job(conf, "Mediawiki Ingest"); job.setJarByClass(MediawikiIngestJob.class); String tablename = "sortswiki"; String zookeepers = "localhost:2181"; String instanceName = "accumulo"; String user = "root"; PasswordToken passwd = new PasswordToken("secret"); FileInputFormat.setInputPaths(job, inputFiles); job.setMapperClass(MediawikiMapper.class); job.setNumReduceTasks(0); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Mutation.class); job.setOutputFormatClass(AccumuloOutputFormat.class); BatchWriterConfig bwConfig = new BatchWriterConfig(); job.setInputFormatClass(MediawikiInputFormat.class); AccumuloOutputFormat.setZooKeeperInstance(job, instanceName, zookeepers); AccumuloOutputFormat.setConnectorInfo(job, user, passwd); AccumuloOutputFormat.setBatchWriterOptions(job, bwConfig); AccumuloOutputFormat.setCreateTables(job, true); AccumuloOutputFormat.setDefaultTableName(job, tablename); return job.waitForCompletion(true) ? 0 : 1; }