List of usage examples for org.apache.hadoop.fs FileSystem create
public FSDataOutputStream create(Path f) throws IOException
From source file:com.cloudera.livy.test.apps.FailingApp.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length != 1) { throw new IllegalArgumentException("Missing output path."); }/*from w w w . j a v a2s . co m*/ String output = args[0]; FileSystem fs = FileSystem.get(new Configuration()); Path out = new Path(output); fs.create(out).close(); throw new IllegalStateException("This app always fails."); }
From source file:com.cloudera.llama.server.TestMiniLlama.java
License:Apache License
private void testMiniLlama(Configuration conf, boolean writeHdfsConf) throws Exception { File confFile = null;/*from ww w . jav a 2 s . c o m*/ MiniLlama server = new MiniLlama(conf); final NotificationEndPoint callbackServer = new NotificationEndPoint(); try { callbackServer.setConf(createCallbackConfiguration()); callbackServer.start(); Assert.assertNotNull(server.getConf().get(LlamaAM.CORE_QUEUES_KEY)); if (writeHdfsConf) { File confDir = new File("target", UUID.randomUUID().toString()); confDir.mkdirs(); confFile = new File(confDir, "minidfs-site.xml").getAbsoluteFile(); server.setWriteHadoopConfig(confFile.getAbsolutePath()); } server.start(); if (writeHdfsConf) { Assert.assertTrue(confFile.exists()); } Assert.assertNotSame(0, server.getAddressPort()); TTransport transport = new TSocket(server.getAddressHost(), server.getAddressPort()); transport.open(); TProtocol protocol = new TBinaryProtocol(transport); LlamaAMService.Client client = new LlamaAMService.Client(protocol); TLlamaAMRegisterRequest trReq = new TLlamaAMRegisterRequest(); trReq.setVersion(TLlamaServiceVersion.V1); trReq.setClient_id(TypeUtils.toTUniqueId(UUID.randomUUID())); TNetworkAddress tAddress = new TNetworkAddress(); tAddress.setHostname(callbackServer.getAddressHost()); tAddress.setPort(callbackServer.getAddressPort()); trReq.setNotification_callback_service(tAddress); //register TLlamaAMRegisterResponse trRes = client.Register(trReq); Assert.assertEquals(TStatusCode.OK, trRes.getStatus().getStatus_code()); //getNodes TLlamaAMGetNodesRequest tgnReq = new TLlamaAMGetNodesRequest(); tgnReq.setVersion(TLlamaServiceVersion.V1); tgnReq.setAm_handle(trRes.getAm_handle()); TLlamaAMGetNodesResponse tgnRes = client.GetNodes(tgnReq); Assert.assertEquals(TStatusCode.OK, tgnRes.getStatus().getStatus_code()); Assert.assertEquals(new HashSet<String>(server.getDataNodes()), new HashSet<String>(tgnRes.getNodes())); reserveExpandRelease(trRes, server, client, callbackServer, 1, 74); reserveExpandRelease(trRes, server, client, callbackServer, 1, 0); reserveExpandRelease(trRes, server, client, callbackServer, 2, 74); reserveExpandRelease(trRes, server, client, callbackServer, 1, 0); //test MiniHDFS FileSystem fs = FileSystem.get(server.getConf()); Assert.assertTrue(fs.getUri().getScheme().equals("hdfs")); fs.listStatus(new Path("/")); OutputStream os = fs.create(new Path("/test.txt")); os.write(0); os.close(); //unregister TLlamaAMUnregisterRequest turReq = new TLlamaAMUnregisterRequest(); turReq.setVersion(TLlamaServiceVersion.V1); turReq.setAm_handle(trRes.getAm_handle()); TLlamaAMUnregisterResponse turRes = client.Unregister(turReq); Assert.assertEquals(TStatusCode.OK, turRes.getStatus().getStatus_code()); } finally { server.stop(); callbackServer.stop(); } }
From source file:com.cloudera.nav.plugin.client.writer.MetadataWriterFactory.java
License:Apache License
private OutputStream createHdfsStream(PluginConfigurations config) { try {/*from www . ja v a 2 s. c om*/ FileSystem fs = FileSystem.get(config.getHadoopConfigurations()); Path path = new Path(getFilePath(config.getMetadataParentUriString())); if (fs.exists(path)) { return fs.append(path); } // TODO block sizes, replication counts etc return fs.create(path); } catch (IOException e) { throw Throwables.propagate(e); } }
From source file:com.cloudera.nav.sdk.client.writer.MetadataWriterFactory.java
License:Apache License
private OutputStream createHdfsStream() { try {//from w ww.j av a2s . c o m FileSystem fs = FileSystem.get(config.getHadoopConfigurations()); Path path = new Path(getFilePath(config.getMetadataParentUriString())); if (fs.exists(path)) { return fs.append(path); } // TODO block sizes, replication counts etc return fs.create(path); } catch (IOException e) { throw Throwables.propagate(e); } }
From source file:com.cloudera.oryx.ml.MLUpdate.java
License:Open Source License
private Pair<Path, Double> buildAndEval(int i, List<List<?>> hyperParameterCombos, JavaSparkContext sparkContext, JavaRDD<M> newData, JavaRDD<M> pastData, Path candidatesPath) { // % = cycle through combinations if needed List<?> hyperParameters = hyperParameterCombos.get(i % hyperParameterCombos.size()); Path candidatePath = new Path(candidatesPath, Integer.toString(i)); log.info("Building candidate {} with params {}", i, hyperParameters); Pair<JavaRDD<M>, JavaRDD<M>> trainTestData = splitTrainTest(newData, pastData); JavaRDD<M> allTrainData = trainTestData.getFirst(); JavaRDD<M> testData = trainTestData.getSecond(); Double eval = Double.NaN; if (empty(allTrainData)) { log.info("No train data to build a model"); } else {/* w w w .j a v a 2 s . c o m*/ PMML model = buildModel(sparkContext, allTrainData, hyperParameters, candidatePath); if (model == null) { log.info("Unable to build a model"); } else { Path modelPath = new Path(candidatePath, MODEL_FILE_NAME); log.info("Writing model to {}", modelPath); try { FileSystem fs = FileSystem.get(candidatePath.toUri(), sparkContext.hadoopConfiguration()); fs.mkdirs(candidatePath); try (OutputStream out = fs.create(modelPath)) { PMMLUtils.write(model, out); } } catch (IOException ioe) { throw new IllegalStateException(ioe); } if (empty(testData)) { log.info("No test data available to evaluate model"); } else { log.info("Evaluating model"); eval = evaluate(sparkContext, model, candidatePath, testData, allTrainData); } } } log.info("Model eval for params {}: {} ({})", hyperParameters, eval, candidatePath); return new Pair<>(candidatePath, eval); }
From source file:com.cloudera.recordbreaker.learnstructure.LearnStructure.java
License:Open Source License
/** *///from w w w .ja va 2s . c o m public void inferRecordFormat(FileSystem fs, Path p, FileSystem fs2, Path schemaFile, Path parseTreeFile, Path jsonDataFile, Path avroDataFile, boolean verbose, int maxLines) throws IOException { // Store parse errors and results List<Integer> unparseableLineNos = new ArrayList<Integer>(); List<String> unparseableStrs = new ArrayList<String>(); List<Integer> parseableLineNos = new ArrayList<Integer>(); List<List<Token.AbstractToken>> allChunks = new ArrayList<List<Token.AbstractToken>>(); // // Transform the text into a list of "chunks". A single chunk corresponds to a line of text. A chunk is a list of Tokens. // long startRead = System.currentTimeMillis(); BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(p))); try { String s = in.readLine(); int lineno = 0; while (s != null) { if (maxLines >= 0 && lineno >= maxLines) { break; } List<Token.AbstractToken> chunkToks = Tokenizer.tokenize(s); if (chunkToks != null) { allChunks.add(chunkToks); parseableLineNos.add(lineno); } else { unparseableStrs.add(s); unparseableLineNos.add(lineno); } s = in.readLine(); lineno++; } } finally { in.close(); } // // Infer type structure from the tokenized chunks // long start = System.currentTimeMillis(); InferredType typeTree = TypeInference.infer(allChunks); long end = System.currentTimeMillis(); double loadTime = (start - startRead) / 1000.0; double inferTime = (end - start) / 1000.0; double totalTime = (end - startRead) / 1000.0; if (verbose) { System.err.println("Number of chunks: " + allChunks.size()); System.err.println("Elapsed load time: " + loadTime); System.err.println("Elapsed inference time: " + inferTime); System.err.println("Total execution time: " + totalTime); } // // The existing type tree is now correct, but could probably be more succinct. // We can now improve/rewrite it. // // // Should every top-level type be ARRAY, so as to allow repeated log lines? // Or does the Avro format allow an implict top-level repeating structure? // // // Dump the results. We emit: // 1) A JSON/Avro schema // 2) A serialized parser program that can consume data and emit Avro files using the given schema // Schema s = typeTree.getAvroSchema(); if (schemaFile != null) { BufferedWriter out = new BufferedWriter(new OutputStreamWriter(fs2.create(schemaFile))); try { out.write(s.toString(true)); } finally { out.close(); } } if (parseTreeFile != null) { DataOutputStream outd = new DataOutputStream(new BufferedOutputStream(fs2.create(parseTreeFile))); try { typeTree.write(outd); } finally { outd.close(); } } // // Apply the typetree's parser. // if (jsonDataFile != null) { Schema schema = typeTree.getAvroSchema(); GenericDatumWriter jsonGDWriter = new GenericDatumWriter(schema); BufferedOutputStream outJson = new BufferedOutputStream(fs2.create(jsonDataFile)); JsonEncoder encoder = EncoderFactory.get().jsonEncoder(schema, outJson); try { in = new BufferedReader(new InputStreamReader(fs.open(p))); try { String str = in.readLine(); while (str != null) { GenericContainer gct = typeTree.parse(str); if (gct != null) { jsonGDWriter.write(gct, encoder); } str = in.readLine(); } } finally { in.close(); } } finally { encoder.flush(); outJson.close(); } } if (avroDataFile != null) { int numGoodParses = 0; int lineno = 0; Schema schema = typeTree.getAvroSchema(); GenericDatumWriter gdWriter = new GenericDatumWriter(schema); DataFileWriter outData = new DataFileWriter(gdWriter); outData = outData.create(schema, fs2.create(avroDataFile)); try { in = new BufferedReader(new InputStreamReader(fs.open(p))); try { String str = in.readLine(); while (str != null) { GenericContainer gct = typeTree.parse(str); if (gct != null) { numGoodParses++; outData.append(gct); } else { if (verbose) { System.err.println("unparsed line: '" + str + "'"); } } str = in.readLine(); lineno++; } } finally { in.close(); } } finally { outData.close(); } if (verbose) { System.err.println(); System.err.println("Total # input lines: " + lineno); System.err.println("Total # lines parsed correctly: " + numGoodParses); } } }
From source file:com.cloudera.recordservice.examples.terasort.TeraOutputFormat.java
License:Apache License
@Override public RecordWriter<Text, Text> getRecordWriter(TaskAttemptContext job) throws IOException { Path file = getDefaultWorkFile(job, ""); FileSystem fs = file.getFileSystem(job.getConfiguration()); FSDataOutputStream fileOut = fs.create(file); return new TeraRecordWriter(fileOut, job); }
From source file:com.cloudera.RenameTest.java
License:Apache License
public static void testFileSystemRename(URI uri) throws Exception { FileSystem fs = FileSystem.get(uri, new Configuration()); Path testDir = new Path(new Path(uri), "testdir"); System.out.println("mkdir " + testDir); fs.mkdirs(testDir);//ww w .ja v a 2 s . c o m Path testFile = new Path(new Path(uri), "testfile"); System.out.println("create " + testFile); FSDataOutputStream fos = fs.create(testFile); fos.close(); System.out.println("rename " + testFile + " -> " + testDir); fs.rename(testFile, testDir); }
From source file:com.cloudera.sparkwordcount.ipWordCount.java
License:Apache License
public static void main(String[] args) { JavaSparkContext sc = new JavaSparkContext( new SparkConf().set("spark.dynamicAllocation.initialExecutors", "5").setAppName("Spark Count")); // sc.addJar(""); // final Logger logger = Logger.getLogger("org"); // logger.setLevel(Level.INFO); final int threshold = Integer.parseInt(args[1]); JavaRDD<String> stringJavaRDD = sc.textFile(args[0]); JavaRDD<String> filteredRDD = stringJavaRDD.filter(new Function<String, Boolean>() { @Override//ww w .j a v a 2s .c o m public Boolean call(String value) throws Exception { if (value.contains("TIME_STAMP")) { return false; } RdrRaw line = RdrParser.parseRdr(value); if (line == null) { System.out.println("can't pars rdr"); return false; } String url = line.dstHost; if (url.trim().isEmpty()) { return false; } //System.out.println(url); return true; } }); JavaPairRDD<RdrRaw, Integer> countsIp = filteredRDD.mapToPair(new PairFunction<String, RdrRaw, Integer>() { @Override public Tuple2<RdrRaw, Integer> call(String s) throws Exception { RdrRaw rdrRaw = RdrParser.parseRdr(s); return new Tuple2<RdrRaw, Integer>(rdrRaw, 1); } }).reduceByKey(new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) throws Exception { return i1 + i2; } }); // filter out words with less than threshold occurrences JavaPairRDD<RdrRaw, Integer> filtered = countsIp.filter(new Function<Tuple2<RdrRaw, Integer>, Boolean>() { @Override public Boolean call(Tuple2<RdrRaw, Integer> rdrRawIntegerTuple2) throws Exception { return rdrRawIntegerTuple2._2() > threshold; } }); JavaPairRDD<Integer, RdrRaw> finalPair = filtered .mapToPair(new PairFunction<Tuple2<RdrRaw, Integer>, Integer, RdrRaw>() { @Override public Tuple2<Integer, RdrRaw> call(Tuple2<RdrRaw, Integer> item) throws Exception { return item.swap(); } }).sortByKey(false); // List<Tuple2<Integer, RdrRaw>> collect = finalPair.take(10); StringBuilder msgBody = new StringBuilder(); for (Tuple2<Integer, RdrRaw> rdrInTuple2 : collect) { RdrRaw rdrRaw = rdrInTuple2._2(); Integer count = rdrInTuple2._1(); msgBody.append(rdrRaw.dstHost) // .append(rdrRaw.dstParam) .append(" found [").append(count).append("]\n"); } Configuration conf = new Configuration(); try { Path p = new Path(args[2]); FileSystem fs = FileSystem.get(new Configuration()); boolean exists = fs.exists(p); if (exists) { fs.delete(p, true); } FileSystem hdfs = FileSystem.get(conf); FSDataOutputStream out = hdfs.create(p); ByteArrayInputStream in = new ByteArrayInputStream(msgBody.toString().getBytes()); byte buffer[] = new byte[256]; int bytesRead = 0; while ((bytesRead = in.read(buffer)) > 0) { out.write(buffer, 0, bytesRead); } p = new Path(args[2] + "_all"); if (fs.exists(p)) { fs.delete(p, true); } finalPair.saveAsTextFile(args[2] + "_all"); } catch (IOException e) { e.printStackTrace(); } sc.stop(); /* Properties props = new Properties(); props.put("mail.smtps.host","smtp.gmail.com"); props.put("mail.smtps.auth", "true"); Session session = Session.getDefaultInstance(props, null); System.out.println("try send email"); try { Message msg = new MimeMessage(session); msg.setFrom(new InternetAddress("spark@hadoop.com", "Spark Generated Message")); msg.addRecipient(Message.RecipientType.TO, new InternetAddress("fesswoodwork@gmail.com", "Spark Responder")); msg.setSubject("Spark task finished"); msg.setText(msgBody.toString()); SMTPTransport t = (SMTPTransport)session.getTransport("smtps"); t.connect("smtp.gmail.com", "fesswoodwork", "9610792adc"); t.sendMessage(msg, msg.getAllRecipients()); Transport.send(msg); } catch (AddressException e) { e.printStackTrace(); System.out.println("AddressException "+e.getMessage()); } catch (MessagingException e) { e.printStackTrace(); System.out.println("MessagingException " + e.getMessage()); } catch (UnsupportedEncodingException e) { e.printStackTrace(); System.out.println("UnsupportedEncodingException " + e.getMessage()); } System.out.println("sending successfully ends");*/ /* // split each document into words JavaRDD<String> tokenized = stringJavaRDD.flatMap( new FlatMapFunction<String, String>() { @Override public Iterable<String> call(String s) { return Arrays.asList(s.split(" ")); } } ); // count the occurrence of each word JavaPairRDD<String, Integer> counts = tokenized.mapToPair( new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<String, Integer>(s, 1); } } ).reduceByKey( new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } } ); // filter out words with less than threshold occurrences JavaPairRDD<String, Integer> filtered = counts.filter( new Function<Tuple2<String, Integer>, Boolean>() { @Override public Boolean call(Tuple2<String, Integer> tup) { return tup._2() >= threshold; } } ); // count characters JavaPairRDD<Character, Integer> charCounts = filtered.flatMap( new FlatMapFunction<Tuple2<String, Integer>, Character>() { @Override public Iterable<Character> call(Tuple2<String, Integer> s) { Collection<Character> chars = new ArrayList<Character>(s._1().length()); for (char c : s._1().toCharArray()) { chars.add(c); } return chars; } } ).mapToPair( new PairFunction<Character, Character, Integer>() { @Override public Tuple2<Character, Integer> call(Character c) { return new Tuple2<Character, Integer>(c, 1); } } ).reduceByKey( new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } } ); System.out.println(charCounts.collect()); */ }
From source file:com.cloudera.sqoop.manager.DirectMySQLExportTest.java
License:Apache License
/** * Test an authenticated export using mysqlimport. *///from ww w . j a v a2s. co m public void testAuthExport() throws IOException, SQLException { SqoopOptions options = new SqoopOptions(MySQLAuthTest.AUTH_CONNECT_STRING, getTableName()); options.setUsername(MySQLAuthTest.AUTH_TEST_USER); options.setPassword(MySQLAuthTest.AUTH_TEST_PASS); manager = new DirectMySQLManager(options); Connection connection = null; Statement st = null; String tableName = getTableName(); try { connection = manager.getConnection(); connection.setAutoCommit(false); st = connection.createStatement(); // create a target database table. st.executeUpdate("DROP TABLE IF EXISTS " + tableName); st.executeUpdate("CREATE TABLE " + tableName + " (" + "id INT NOT NULL PRIMARY KEY, " + "msg VARCHAR(24) NOT NULL)"); connection.commit(); // Write a file containing a record to export. Path tablePath = getTablePath(); Path filePath = new Path(tablePath, "datafile"); Configuration conf = new Configuration(); conf.set("fs.default.name", "file:///"); FileSystem fs = FileSystem.get(conf); fs.mkdirs(tablePath); OutputStream os = fs.create(filePath); BufferedWriter w = new BufferedWriter(new OutputStreamWriter(os)); w.write(getRecordLine(0)); w.write(getRecordLine(1)); w.write(getRecordLine(2)); w.close(); os.close(); // run the export and verify that the results are good. runExport(getArgv(true, 10, 10, "--username", MySQLAuthTest.AUTH_TEST_USER, "--password", MySQLAuthTest.AUTH_TEST_PASS, "--connect", MySQLAuthTest.AUTH_CONNECT_STRING)); verifyExport(3, connection); } catch (SQLException sqlE) { LOG.error("Encountered SQL Exception: " + sqlE); sqlE.printStackTrace(); fail("SQLException when accessing target table. " + sqlE); } finally { try { if (null != st) { st.close(); } if (null != connection) { connection.close(); } } catch (SQLException sqlE) { LOG.warn("Got SQLException when closing connection: " + sqlE); } } }