Example usage for org.apache.hadoop.fs FileSystem create

List of usage examples for org.apache.hadoop.fs FileSystem create

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem create.

Prototype

public FSDataOutputStream create(Path f) throws IOException 

Source Link

Document

Create an FSDataOutputStream at the indicated Path.

Usage

From source file:com.cloudera.livy.test.apps.FailingApp.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (args.length != 1) {
        throw new IllegalArgumentException("Missing output path.");
    }/*from   w w  w . j a v  a2s  . co  m*/
    String output = args[0];

    FileSystem fs = FileSystem.get(new Configuration());
    Path out = new Path(output);
    fs.create(out).close();

    throw new IllegalStateException("This app always fails.");
}

From source file:com.cloudera.llama.server.TestMiniLlama.java

License:Apache License

private void testMiniLlama(Configuration conf, boolean writeHdfsConf) throws Exception {
    File confFile = null;/*from ww w  .  jav  a  2 s  .  c  o m*/
    MiniLlama server = new MiniLlama(conf);
    final NotificationEndPoint callbackServer = new NotificationEndPoint();
    try {
        callbackServer.setConf(createCallbackConfiguration());
        callbackServer.start();
        Assert.assertNotNull(server.getConf().get(LlamaAM.CORE_QUEUES_KEY));
        if (writeHdfsConf) {
            File confDir = new File("target", UUID.randomUUID().toString());
            confDir.mkdirs();
            confFile = new File(confDir, "minidfs-site.xml").getAbsoluteFile();
            server.setWriteHadoopConfig(confFile.getAbsolutePath());
        }
        server.start();

        if (writeHdfsConf) {
            Assert.assertTrue(confFile.exists());
        }
        Assert.assertNotSame(0, server.getAddressPort());
        TTransport transport = new TSocket(server.getAddressHost(), server.getAddressPort());
        transport.open();
        TProtocol protocol = new TBinaryProtocol(transport);
        LlamaAMService.Client client = new LlamaAMService.Client(protocol);

        TLlamaAMRegisterRequest trReq = new TLlamaAMRegisterRequest();
        trReq.setVersion(TLlamaServiceVersion.V1);
        trReq.setClient_id(TypeUtils.toTUniqueId(UUID.randomUUID()));
        TNetworkAddress tAddress = new TNetworkAddress();
        tAddress.setHostname(callbackServer.getAddressHost());
        tAddress.setPort(callbackServer.getAddressPort());
        trReq.setNotification_callback_service(tAddress);

        //register
        TLlamaAMRegisterResponse trRes = client.Register(trReq);
        Assert.assertEquals(TStatusCode.OK, trRes.getStatus().getStatus_code());

        //getNodes
        TLlamaAMGetNodesRequest tgnReq = new TLlamaAMGetNodesRequest();
        tgnReq.setVersion(TLlamaServiceVersion.V1);
        tgnReq.setAm_handle(trRes.getAm_handle());
        TLlamaAMGetNodesResponse tgnRes = client.GetNodes(tgnReq);
        Assert.assertEquals(TStatusCode.OK, tgnRes.getStatus().getStatus_code());
        Assert.assertEquals(new HashSet<String>(server.getDataNodes()), new HashSet<String>(tgnRes.getNodes()));

        reserveExpandRelease(trRes, server, client, callbackServer, 1, 74);
        reserveExpandRelease(trRes, server, client, callbackServer, 1, 0);
        reserveExpandRelease(trRes, server, client, callbackServer, 2, 74);
        reserveExpandRelease(trRes, server, client, callbackServer, 1, 0);

        //test MiniHDFS
        FileSystem fs = FileSystem.get(server.getConf());
        Assert.assertTrue(fs.getUri().getScheme().equals("hdfs"));
        fs.listStatus(new Path("/"));
        OutputStream os = fs.create(new Path("/test.txt"));
        os.write(0);
        os.close();

        //unregister
        TLlamaAMUnregisterRequest turReq = new TLlamaAMUnregisterRequest();
        turReq.setVersion(TLlamaServiceVersion.V1);
        turReq.setAm_handle(trRes.getAm_handle());
        TLlamaAMUnregisterResponse turRes = client.Unregister(turReq);
        Assert.assertEquals(TStatusCode.OK, turRes.getStatus().getStatus_code());
    } finally {
        server.stop();
        callbackServer.stop();
    }
}

From source file:com.cloudera.nav.plugin.client.writer.MetadataWriterFactory.java

License:Apache License

private OutputStream createHdfsStream(PluginConfigurations config) {
    try {/*from  www .  ja v  a  2 s. c  om*/
        FileSystem fs = FileSystem.get(config.getHadoopConfigurations());
        Path path = new Path(getFilePath(config.getMetadataParentUriString()));
        if (fs.exists(path)) {
            return fs.append(path);
        }
        // TODO block sizes, replication counts etc
        return fs.create(path);
    } catch (IOException e) {
        throw Throwables.propagate(e);
    }
}

From source file:com.cloudera.nav.sdk.client.writer.MetadataWriterFactory.java

License:Apache License

private OutputStream createHdfsStream() {
    try {//from w  ww.j  av  a2s  . c  o m
        FileSystem fs = FileSystem.get(config.getHadoopConfigurations());
        Path path = new Path(getFilePath(config.getMetadataParentUriString()));
        if (fs.exists(path)) {
            return fs.append(path);
        }
        // TODO block sizes, replication counts etc
        return fs.create(path);
    } catch (IOException e) {
        throw Throwables.propagate(e);
    }
}

From source file:com.cloudera.oryx.ml.MLUpdate.java

License:Open Source License

private Pair<Path, Double> buildAndEval(int i, List<List<?>> hyperParameterCombos,
        JavaSparkContext sparkContext, JavaRDD<M> newData, JavaRDD<M> pastData, Path candidatesPath) {
    // % = cycle through combinations if needed
    List<?> hyperParameters = hyperParameterCombos.get(i % hyperParameterCombos.size());
    Path candidatePath = new Path(candidatesPath, Integer.toString(i));
    log.info("Building candidate {} with params {}", i, hyperParameters);

    Pair<JavaRDD<M>, JavaRDD<M>> trainTestData = splitTrainTest(newData, pastData);
    JavaRDD<M> allTrainData = trainTestData.getFirst();
    JavaRDD<M> testData = trainTestData.getSecond();

    Double eval = Double.NaN;
    if (empty(allTrainData)) {
        log.info("No train data to build a model");
    } else {/*  w w  w  .j a  v  a  2 s . c  o m*/
        PMML model = buildModel(sparkContext, allTrainData, hyperParameters, candidatePath);
        if (model == null) {
            log.info("Unable to build a model");
        } else {
            Path modelPath = new Path(candidatePath, MODEL_FILE_NAME);
            log.info("Writing model to {}", modelPath);
            try {
                FileSystem fs = FileSystem.get(candidatePath.toUri(), sparkContext.hadoopConfiguration());
                fs.mkdirs(candidatePath);
                try (OutputStream out = fs.create(modelPath)) {
                    PMMLUtils.write(model, out);
                }
            } catch (IOException ioe) {
                throw new IllegalStateException(ioe);
            }
            if (empty(testData)) {
                log.info("No test data available to evaluate model");
            } else {
                log.info("Evaluating model");
                eval = evaluate(sparkContext, model, candidatePath, testData, allTrainData);
            }
        }
    }

    log.info("Model eval for params {}: {} ({})", hyperParameters, eval, candidatePath);
    return new Pair<>(candidatePath, eval);
}

From source file:com.cloudera.recordbreaker.learnstructure.LearnStructure.java

License:Open Source License

/**
 *///from w w  w  .ja  va  2s  .  c  o m
public void inferRecordFormat(FileSystem fs, Path p, FileSystem fs2, Path schemaFile, Path parseTreeFile,
        Path jsonDataFile, Path avroDataFile, boolean verbose, int maxLines) throws IOException {
    // Store parse errors and results
    List<Integer> unparseableLineNos = new ArrayList<Integer>();
    List<String> unparseableStrs = new ArrayList<String>();
    List<Integer> parseableLineNos = new ArrayList<Integer>();
    List<List<Token.AbstractToken>> allChunks = new ArrayList<List<Token.AbstractToken>>();

    //
    // Transform the text into a list of "chunks".  A single chunk corresponds to a line of text.  A chunk is a list of Tokens.
    //
    long startRead = System.currentTimeMillis();
    BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(p)));
    try {
        String s = in.readLine();
        int lineno = 0;
        while (s != null) {
            if (maxLines >= 0 && lineno >= maxLines) {
                break;
            }
            List<Token.AbstractToken> chunkToks = Tokenizer.tokenize(s);
            if (chunkToks != null) {
                allChunks.add(chunkToks);
                parseableLineNos.add(lineno);
            } else {
                unparseableStrs.add(s);
                unparseableLineNos.add(lineno);
            }
            s = in.readLine();
            lineno++;
        }
    } finally {
        in.close();
    }

    //
    // Infer type structure from the tokenized chunks
    //
    long start = System.currentTimeMillis();
    InferredType typeTree = TypeInference.infer(allChunks);
    long end = System.currentTimeMillis();
    double loadTime = (start - startRead) / 1000.0;
    double inferTime = (end - start) / 1000.0;
    double totalTime = (end - startRead) / 1000.0;
    if (verbose) {
        System.err.println("Number of chunks: " + allChunks.size());
        System.err.println("Elapsed load time: " + loadTime);
        System.err.println("Elapsed inference time: " + inferTime);
        System.err.println("Total execution time: " + totalTime);
    }

    //
    // The existing type tree is now correct, but could probably be more succinct.
    // We can now improve/rewrite it.
    //

    //
    // Should every top-level type be ARRAY, so as to allow repeated log lines?
    // Or does the Avro format allow an implict top-level repeating structure?
    //

    //
    // Dump the results.  We emit:
    // 1) A JSON/Avro schema
    // 2) A serialized parser program that can consume data and emit Avro files using the given schema
    //
    Schema s = typeTree.getAvroSchema();
    if (schemaFile != null) {
        BufferedWriter out = new BufferedWriter(new OutputStreamWriter(fs2.create(schemaFile)));
        try {
            out.write(s.toString(true));
        } finally {
            out.close();
        }
    }
    if (parseTreeFile != null) {
        DataOutputStream outd = new DataOutputStream(new BufferedOutputStream(fs2.create(parseTreeFile)));
        try {
            typeTree.write(outd);
        } finally {
            outd.close();
        }
    }

    //
    // Apply the typetree's parser.
    //
    if (jsonDataFile != null) {
        Schema schema = typeTree.getAvroSchema();
        GenericDatumWriter jsonGDWriter = new GenericDatumWriter(schema);
        BufferedOutputStream outJson = new BufferedOutputStream(fs2.create(jsonDataFile));
        JsonEncoder encoder = EncoderFactory.get().jsonEncoder(schema, outJson);
        try {
            in = new BufferedReader(new InputStreamReader(fs.open(p)));
            try {
                String str = in.readLine();
                while (str != null) {
                    GenericContainer gct = typeTree.parse(str);

                    if (gct != null) {
                        jsonGDWriter.write(gct, encoder);
                    }
                    str = in.readLine();
                }
            } finally {
                in.close();
            }
        } finally {
            encoder.flush();
            outJson.close();
        }
    }

    if (avroDataFile != null) {
        int numGoodParses = 0;
        int lineno = 0;
        Schema schema = typeTree.getAvroSchema();

        GenericDatumWriter gdWriter = new GenericDatumWriter(schema);
        DataFileWriter outData = new DataFileWriter(gdWriter);
        outData = outData.create(schema, fs2.create(avroDataFile));

        try {
            in = new BufferedReader(new InputStreamReader(fs.open(p)));
            try {
                String str = in.readLine();
                while (str != null) {
                    GenericContainer gct = typeTree.parse(str);
                    if (gct != null) {
                        numGoodParses++;
                        outData.append(gct);
                    } else {
                        if (verbose) {
                            System.err.println("unparsed line: '" + str + "'");
                        }
                    }
                    str = in.readLine();
                    lineno++;
                }
            } finally {
                in.close();
            }
        } finally {
            outData.close();
        }
        if (verbose) {
            System.err.println();
            System.err.println("Total # input lines: " + lineno);
            System.err.println("Total # lines parsed correctly: " + numGoodParses);
        }
    }
}

From source file:com.cloudera.recordservice.examples.terasort.TeraOutputFormat.java

License:Apache License

@Override
public RecordWriter<Text, Text> getRecordWriter(TaskAttemptContext job) throws IOException {
    Path file = getDefaultWorkFile(job, "");
    FileSystem fs = file.getFileSystem(job.getConfiguration());
    FSDataOutputStream fileOut = fs.create(file);
    return new TeraRecordWriter(fileOut, job);
}

From source file:com.cloudera.RenameTest.java

License:Apache License

public static void testFileSystemRename(URI uri) throws Exception {
    FileSystem fs = FileSystem.get(uri, new Configuration());
    Path testDir = new Path(new Path(uri), "testdir");
    System.out.println("mkdir " + testDir);
    fs.mkdirs(testDir);//ww w  .ja v  a  2  s . c  o m
    Path testFile = new Path(new Path(uri), "testfile");
    System.out.println("create " + testFile);
    FSDataOutputStream fos = fs.create(testFile);
    fos.close();
    System.out.println("rename " + testFile + " -> " + testDir);
    fs.rename(testFile, testDir);
}

From source file:com.cloudera.sparkwordcount.ipWordCount.java

License:Apache License

public static void main(String[] args) {
    JavaSparkContext sc = new JavaSparkContext(
            new SparkConf().set("spark.dynamicAllocation.initialExecutors", "5").setAppName("Spark Count"));
    // sc.addJar("");
    //   final Logger logger = Logger.getLogger("org");
    // logger.setLevel(Level.INFO);
    final int threshold = Integer.parseInt(args[1]);
    JavaRDD<String> stringJavaRDD = sc.textFile(args[0]);
    JavaRDD<String> filteredRDD = stringJavaRDD.filter(new Function<String, Boolean>() {
        @Override//ww w .j a v a  2s .c  o  m
        public Boolean call(String value) throws Exception {
            if (value.contains("TIME_STAMP")) {
                return false;
            }
            RdrRaw line = RdrParser.parseRdr(value);
            if (line == null) {
                System.out.println("can't pars rdr");
                return false;
            }
            String url = line.dstHost;
            if (url.trim().isEmpty()) {
                return false;
            }
            //System.out.println(url);
            return true;
        }
    });
    JavaPairRDD<RdrRaw, Integer> countsIp = filteredRDD.mapToPair(new PairFunction<String, RdrRaw, Integer>() {
        @Override
        public Tuple2<RdrRaw, Integer> call(String s) throws Exception {
            RdrRaw rdrRaw = RdrParser.parseRdr(s);
            return new Tuple2<RdrRaw, Integer>(rdrRaw, 1);
        }
    }).reduceByKey(new Function2<Integer, Integer, Integer>() {
        @Override
        public Integer call(Integer i1, Integer i2) throws Exception {
            return i1 + i2;
        }
    });

    // filter out words with less than threshold occurrences
    JavaPairRDD<RdrRaw, Integer> filtered = countsIp.filter(new Function<Tuple2<RdrRaw, Integer>, Boolean>() {
        @Override
        public Boolean call(Tuple2<RdrRaw, Integer> rdrRawIntegerTuple2) throws Exception {
            return rdrRawIntegerTuple2._2() > threshold;
        }
    });
    JavaPairRDD<Integer, RdrRaw> finalPair = filtered
            .mapToPair(new PairFunction<Tuple2<RdrRaw, Integer>, Integer, RdrRaw>() {
                @Override
                public Tuple2<Integer, RdrRaw> call(Tuple2<RdrRaw, Integer> item) throws Exception {
                    return item.swap();
                }
            }).sortByKey(false);
    //
    List<Tuple2<Integer, RdrRaw>> collect = finalPair.take(10);
    StringBuilder msgBody = new StringBuilder();
    for (Tuple2<Integer, RdrRaw> rdrInTuple2 : collect) {
        RdrRaw rdrRaw = rdrInTuple2._2();
        Integer count = rdrInTuple2._1();
        msgBody.append(rdrRaw.dstHost)
                // .append(rdrRaw.dstParam)
                .append(" found [").append(count).append("]\n");
    }
    Configuration conf = new Configuration();
    try {
        Path p = new Path(args[2]);
        FileSystem fs = FileSystem.get(new Configuration());
        boolean exists = fs.exists(p);
        if (exists) {
            fs.delete(p, true);
        }
        FileSystem hdfs = FileSystem.get(conf);
        FSDataOutputStream out = hdfs.create(p);
        ByteArrayInputStream in = new ByteArrayInputStream(msgBody.toString().getBytes());
        byte buffer[] = new byte[256];
        int bytesRead = 0;
        while ((bytesRead = in.read(buffer)) > 0) {
            out.write(buffer, 0, bytesRead);
        }
        p = new Path(args[2] + "_all");
        if (fs.exists(p)) {
            fs.delete(p, true);
        }
        finalPair.saveAsTextFile(args[2] + "_all");
    } catch (IOException e) {
        e.printStackTrace();
    }

    sc.stop();
    /* Properties props = new Properties();
     props.put("mail.smtps.host","smtp.gmail.com");
     props.put("mail.smtps.auth", "true");
     Session session = Session.getDefaultInstance(props, null);
            
     System.out.println("try send email");
     try {
    Message msg = new MimeMessage(session);
    msg.setFrom(new InternetAddress("spark@hadoop.com", "Spark Generated Message"));
    msg.addRecipient(Message.RecipientType.TO,
            new InternetAddress("fesswoodwork@gmail.com", "Spark Responder"));
    msg.setSubject("Spark task finished");
    msg.setText(msgBody.toString());
    SMTPTransport t =
            (SMTPTransport)session.getTransport("smtps");
    t.connect("smtp.gmail.com", "fesswoodwork", "9610792adc");
    t.sendMessage(msg, msg.getAllRecipients());
    Transport.send(msg);
            
     } catch (AddressException e) {
        e.printStackTrace();
    System.out.println("AddressException "+e.getMessage());
     } catch (MessagingException e) {
    e.printStackTrace();
    System.out.println("MessagingException " + e.getMessage());
     } catch (UnsupportedEncodingException e) {
    e.printStackTrace();
    System.out.println("UnsupportedEncodingException " + e.getMessage());
     }
     System.out.println("sending successfully ends");*/

    /*      // split each document into words
          JavaRDD<String> tokenized = stringJavaRDD.flatMap(
        new FlatMapFunction<String, String>() {
            @Override
            public Iterable<String> call(String s) {
                return Arrays.asList(s.split(" "));
            }
        }
          );
            
          // count the occurrence of each word
          JavaPairRDD<String, Integer> counts = tokenized.mapToPair(
        new PairFunction<String, String, Integer>() {
            @Override
            public Tuple2<String, Integer> call(String s) {
                return new Tuple2<String, Integer>(s, 1);
            }
        }
          ).reduceByKey(
        new Function2<Integer, Integer, Integer>() {
            @Override
            public Integer call(Integer i1, Integer i2) {
                return i1 + i2;
            }
        }
          );
            
          // filter out words with less than threshold occurrences
          JavaPairRDD<String, Integer> filtered = counts.filter(
        new Function<Tuple2<String, Integer>, Boolean>() {
            @Override
            public Boolean call(Tuple2<String, Integer> tup) {
                return tup._2() >= threshold;
            }
        }
          );
            
          // count characters
          JavaPairRDD<Character, Integer> charCounts = filtered.flatMap(
        new FlatMapFunction<Tuple2<String, Integer>, Character>() {
            @Override
            public Iterable<Character> call(Tuple2<String, Integer> s) {
                Collection<Character> chars = new ArrayList<Character>(s._1().length());
                for (char c : s._1().toCharArray()) {
                    chars.add(c);
                }
                return chars;
            }
        }
          ).mapToPair(
        new PairFunction<Character, Character, Integer>() {
            @Override
            public Tuple2<Character, Integer> call(Character c) {
                return new Tuple2<Character, Integer>(c, 1);
            }
        }
          ).reduceByKey(
        new Function2<Integer, Integer, Integer>() {
            @Override
            public Integer call(Integer i1, Integer i2) {
                return i1 + i2;
            }
        }
          );
            
          System.out.println(charCounts.collect());
          */

}

From source file:com.cloudera.sqoop.manager.DirectMySQLExportTest.java

License:Apache License

/**
 * Test an authenticated export using mysqlimport.
 *///from  ww w . j a  v a2s.  co  m
public void testAuthExport() throws IOException, SQLException {
    SqoopOptions options = new SqoopOptions(MySQLAuthTest.AUTH_CONNECT_STRING, getTableName());
    options.setUsername(MySQLAuthTest.AUTH_TEST_USER);
    options.setPassword(MySQLAuthTest.AUTH_TEST_PASS);

    manager = new DirectMySQLManager(options);

    Connection connection = null;
    Statement st = null;

    String tableName = getTableName();

    try {
        connection = manager.getConnection();
        connection.setAutoCommit(false);
        st = connection.createStatement();

        // create a target database table.
        st.executeUpdate("DROP TABLE IF EXISTS " + tableName);
        st.executeUpdate("CREATE TABLE " + tableName + " (" + "id INT NOT NULL PRIMARY KEY, "
                + "msg VARCHAR(24) NOT NULL)");
        connection.commit();

        // Write a file containing a record to export.
        Path tablePath = getTablePath();
        Path filePath = new Path(tablePath, "datafile");
        Configuration conf = new Configuration();
        conf.set("fs.default.name", "file:///");

        FileSystem fs = FileSystem.get(conf);
        fs.mkdirs(tablePath);
        OutputStream os = fs.create(filePath);
        BufferedWriter w = new BufferedWriter(new OutputStreamWriter(os));
        w.write(getRecordLine(0));
        w.write(getRecordLine(1));
        w.write(getRecordLine(2));
        w.close();
        os.close();

        // run the export and verify that the results are good.
        runExport(getArgv(true, 10, 10, "--username", MySQLAuthTest.AUTH_TEST_USER, "--password",
                MySQLAuthTest.AUTH_TEST_PASS, "--connect", MySQLAuthTest.AUTH_CONNECT_STRING));
        verifyExport(3, connection);
    } catch (SQLException sqlE) {
        LOG.error("Encountered SQL Exception: " + sqlE);
        sqlE.printStackTrace();
        fail("SQLException when accessing target table. " + sqlE);
    } finally {
        try {
            if (null != st) {
                st.close();
            }

            if (null != connection) {
                connection.close();
            }
        } catch (SQLException sqlE) {
            LOG.warn("Got SQLException when closing connection: " + sqlE);
        }
    }
}