Java tutorial
/** * Copyright 2016 Otto (GmbH & Co KG) * <p> * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * <p> * http://www.apache.org/licenses/LICENSE-2.0 * <p> * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.schedoscope.export.ftp; import org.apache.avro.Schema; import org.apache.avro.mapred.AvroValue; import org.apache.avro.mapreduce.AvroJob; import org.apache.commons.lang3.RandomStringUtils; import org.apache.commons.net.ftp.FTPClient; import org.apache.commons.net.ftp.FTPFile; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hive.hcatalog.mapreduce.HCatInputFormat; import org.junit.AfterClass; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; import org.schedoscope.export.HiveUnitBaseTest; import org.schedoscope.export.ftp.outputformat.FileOutputType; import org.schedoscope.export.ftp.outputformat.FtpUploadOutputFormat; import org.schedoscope.export.ftp.upload.FileCompressionCodec; import org.schedoscope.export.kafka.avro.HCatToAvroSchemaConverter; import org.schedoscope.export.testsupport.EmbeddedFtpSftpServer; import org.schedoscope.export.writables.TextPairArrayWritable; import java.io.IOException; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; public class FtpExportCSVMRTest extends HiveUnitBaseTest { private static EmbeddedFtpSftpServer server; private static final String DELIMITER = "\t"; private static final String TEST_TABLE = "test_table"; private String filePrefix; @Override @Before public void setUp() throws Exception { super.setUp(); filePrefix = RandomStringUtils.randomNumeric(20); } @BeforeClass() public static void setUpServer() throws Exception { server = new EmbeddedFtpSftpServer(); server.startEmbeddedFtpServer(); server.startEmbeddedSftpServer(); } @AfterClass public static void tearDownServer() throws InterruptedException { server.stopEmbeddedFtpServer(); server.stopEmbeddedSftpServer(); } @Test public void testFtpCSVExport() throws Exception { setUpHiveServer("src/test/resources/test_map_data.txt", "src/test/resources/test_map.hql", "test_map"); conf.set("io.compression.codecs", "org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec"); Job job = Job.getInstance(conf); FtpUploadOutputFormat.setOutput(job, TEST_TABLE, false, DELIMITER, FileOutputType.csv, FileCompressionCodec.none, "ftp://localhost:2221/", EmbeddedFtpSftpServer.FTP_USER_FOR_TESTING, EmbeddedFtpSftpServer.FTP_PASS_FOR_TESTING, null, filePrefix, true, true, true); job.setMapperClass(FtpExportCSVMapper.class); job.setReducerClass(Reducer.class); job.setNumReduceTasks(2); job.setInputFormatClass(HCatInputFormat.class); job.setOutputFormatClass(FtpUploadOutputFormat.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(TextPairArrayWritable.class); assertTrue(job.waitForCompletion(true)); assertEquals(2, getFileCount()); } @Test public void testSftpCSVExportUserPassAuth() throws Exception { setUpHiveServer("src/test/resources/test_map_data.txt", "src/test/resources/test_map.hql", "test_map"); conf.set("io.compression.codecs", "org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec"); Job job = Job.getInstance(conf); FtpUploadOutputFormat.setOutput(job, TEST_TABLE, true, DELIMITER, FileOutputType.csv, FileCompressionCodec.gzip, "sftp://localhost:12222/", EmbeddedFtpSftpServer.FTP_USER_FOR_TESTING, EmbeddedFtpSftpServer.FTP_PASS_FOR_TESTING, null, filePrefix, true, true, true); job.setMapperClass(FtpExportCSVMapper.class); job.setReducerClass(Reducer.class); job.setNumReduceTasks(2); job.setInputFormatClass(HCatInputFormat.class); job.setOutputFormatClass(FtpUploadOutputFormat.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(TextPairArrayWritable.class); assertTrue(job.waitForCompletion(true)); assertEquals(2, getFileCount()); } @Test public void testSftpCSVExportPubKeyAuthNoEnc() throws Exception { setUpHiveServer("src/test/resources/test_map_data.txt", "src/test/resources/test_map.hql", "test_map"); conf.set("io.compression.codecs", "org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec"); Job job = Job.getInstance(conf); FtpUploadOutputFormat.setOutput(job, TEST_TABLE, true, DELIMITER, FileOutputType.csv, FileCompressionCodec.bzip2, "sftp://localhost:12222/", EmbeddedFtpSftpServer.FTP_USER_FOR_TESTING, null, "src/test/resources/keys/id_rsa_not_encrypted", filePrefix, true, true, true); job.setMapperClass(FtpExportCSVMapper.class); job.setReducerClass(Reducer.class); job.setNumReduceTasks(2); job.setInputFormatClass(HCatInputFormat.class); job.setOutputFormatClass(FtpUploadOutputFormat.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(TextPairArrayWritable.class); assertTrue(job.waitForCompletion(true)); assertEquals(2, getFileCount()); } @Test public void testSftpCSVExportPubKeyAuthEnc() throws Exception { setUpHiveServer("src/test/resources/test_map_data.txt", "src/test/resources/test_map.hql", "test_map"); conf.set("io.compression.codecs", "org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec"); Job job = Job.getInstance(conf); FtpUploadOutputFormat.setOutput(job, TEST_TABLE, true, DELIMITER, FileOutputType.csv, FileCompressionCodec.gzip, "sftp://localhost:12222/", EmbeddedFtpSftpServer.FTP_USER_FOR_TESTING, "12345", "src/test/resources/keys/id_rsa_encrypted", filePrefix, true, true, true); job.setMapperClass(FtpExportCSVMapper.class); job.setReducerClass(Reducer.class); job.setNumReduceTasks(2); job.setInputFormatClass(HCatInputFormat.class); job.setOutputFormatClass(FtpUploadOutputFormat.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(TextPairArrayWritable.class); assertTrue(job.waitForCompletion(true)); assertEquals(2, getFileCount()); } @Test public void testFtpJsonExport() throws Exception { setUpHiveServer("src/test/resources/test_map_data.txt", "src/test/resources/test_map.hql", "test_map"); conf.set("io.compression.codecs", "org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec"); Job job = Job.getInstance(conf); HCatToAvroSchemaConverter schemaConverter = new HCatToAvroSchemaConverter(); Schema schema = schemaConverter.convertSchema(hcatInputSchema, TEST_TABLE); AvroJob.setMapOutputValueSchema(job, schema); FtpUploadOutputFormat.setOutput(job, TEST_TABLE, true, DELIMITER, FileOutputType.json, FileCompressionCodec.none, "ftp://localhost:2221/", EmbeddedFtpSftpServer.FTP_USER_FOR_TESTING, EmbeddedFtpSftpServer.FTP_PASS_FOR_TESTING, null, filePrefix, true, true, true); job.setMapperClass(FtpExportJsonMapper.class); job.setReducerClass(Reducer.class); job.setNumReduceTasks(2); job.setInputFormatClass(HCatInputFormat.class); job.setOutputFormatClass(FtpUploadOutputFormat.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(AvroValue.class); assertTrue(job.waitForCompletion(true)); assertEquals(2, getFileCount()); } private int getFileCount() throws IOException { FTPClient ftp = new FTPClient(); ftp.connect("localhost", 2221); ftp.login(EmbeddedFtpSftpServer.FTP_USER_FOR_TESTING, EmbeddedFtpSftpServer.FTP_PASS_FOR_TESTING); FTPFile[] files = ftp.listFiles(); int fileCounter = 0; for (FTPFile f : files) { if (f.getName().contains(filePrefix)) { fileCounter += 1; } } return fileCounter; } }