Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package com.aliyun.odps.local.common.utils; import java.io.File; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.nio.charset.Charset; import java.util.Date; import java.util.LinkedHashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; import org.apache.commons.io.IOUtils; import com.aliyun.odps.utils.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import com.aliyun.odps.Column; import com.aliyun.odps.Odps; import com.aliyun.odps.OdpsException; import com.aliyun.odps.Partition; import com.aliyun.odps.PartitionSpec; import com.aliyun.odps.Resource; import com.aliyun.odps.Table; import com.aliyun.odps.TableResource; import com.aliyun.odps.data.Record; import com.aliyun.odps.data.RecordReader; import com.aliyun.odps.data.TableInfo; import com.aliyun.odps.tunnel.TableTunnel; import com.aliyun.odps.tunnel.TableTunnel.DownloadSession; import com.aliyun.odps.local.common.Constants; import com.aliyun.odps.local.common.DownloadMode; import com.aliyun.odps.local.common.ExceptionCode; import com.aliyun.odps.local.common.TableMeta; import com.aliyun.odps.local.common.WareHouse; import com.csvreader.CsvReader; import com.csvreader.CsvWriter; public class DownloadUtils { private static final Log LOG = LogFactory.getLog(DownloadUtils.class); public static final Charset encoding = Charset.forName("UTF-8"); public static void downloadTableSchemeAndData(Odps odps, TableInfo tableInfo, int limitDownloadRecordCount, char inputColumnSeperator) { if (odps == null || tableInfo == null) { throw new IllegalArgumentException("Missing arguments:odps|tableInfo"); } if (StringUtils.isBlank(tableInfo.getProjectName())) { tableInfo.setProjectName(odps.getDefaultProject()); } validateTable(odps, tableInfo); Table table = odps.tables().get(tableInfo.getProjectName(), tableInfo.getTableName()); // create table scheme file File tableDir = WareHouse.getInstance().getTableDir(tableInfo.getProjectName(), tableInfo.getTableName()); if (!tableDir.exists()) { tableDir.mkdirs(); } TableMeta tableMeta = TableMeta.fromTable(table); SchemaUtils.generateSchemaFile(tableMeta, null, tableDir); // download data file if (tableMeta.getPartitions() != null && tableMeta.getPartitions().length > 0) { LinkedHashMap<String, String> parts = tableInfo.getPartSpec(); int downloadedPartition = 0; for (Partition partition : table.getPartitions()) { PartitionSpec spec = partition.getPartitionSpec(); if (matches(spec, parts)) { downloadTable(odps, tableMeta, spec, limitDownloadRecordCount, inputColumnSeperator); downloadedPartition++; } } if (downloadedPartition == 0) { throw new RuntimeException("No partition found for " + tableInfo.toString()); } } else { if (tableInfo.getPartSpec().size() > 0) { throw new RuntimeException("Warehouse not exists table: " + tableInfo + " ,while try to download from remote server found it is not a partitioned table"); } downloadTable(odps, tableMeta, null, limitDownloadRecordCount, inputColumnSeperator); } } public static File downloadTable(Odps odps, TableMeta tableMeta, PartitionSpec partition, int limitDownloadRecordCount, char inputColumnSeperator) { TableInfo tableInfo = TableInfo.builder().projectName(tableMeta.getProjName()) .tableName(tableMeta.getTableName()).partSpec(partition).build(); List<String[]> records = downloadTableData(odps, tableMeta.getProjName(), tableMeta.getTableName(), partition, limitDownloadRecordCount, null); File tableDir = WareHouse.getInstance().getTableDir(tableMeta.getProjName(), tableMeta.getTableName()); if (!tableDir.exists()) { tableDir.mkdirs(); } File dataDir = tableDir; if (partition != null) { dataDir = new File(tableDir, PartitionUtils.toString(partition)); if (!dataDir.exists()) { dataDir.mkdirs(); } } LOG.info("Start to write table: " + tableInfo.toString() + "-->" + dataDir.getAbsolutePath()); File dataFile = new File(dataDir, "data"); try { dataFile.createNewFile(); } catch (IOException e1) { } CsvWriter writer = new CsvWriter(dataFile.getAbsolutePath(), inputColumnSeperator, encoding); try { for (String[] record : records) { writer.writeRecord(record); } } catch (IOException e) { throw new RuntimeException(e); } writer.close(); LOG.info("Finished write table: " + tableInfo.toString() + "-->" + dataDir.getAbsolutePath()); return dataDir; } /** * @param odps * @param projectName * @param tableName * @param partition * if null will download all partitions * @param limitDownloadRecordCount * @param readCols * if null will download all columns * @return */ public static List<String[]> downloadTableData(Odps odps, String projectName, String tableName, PartitionSpec partition, int limitDownloadRecordCount, String[] readCols) { if (odps == null || StringUtils.isBlank(projectName) || StringUtils.isBlank(tableName)) { throw new IllegalArgumentException("Missing arguments: odps|projectName|tablename"); } TableInfo tableInfo = TableInfo.builder().projectName(projectName).tableName(tableName).partSpec(partition) .build(); validateTable(odps, tableInfo); LOG.info("Start to download table: '" + tableInfo.toString() + "', download mode:" + WareHouse.getInstance().getDownloadMode()); List<String[]> list = new LinkedList<String[]>(); TableMeta tableMeta = null; int columnCount; Table table = odps.tables().get(projectName, tableName); tableMeta = TableMeta.fromTable(table); TableTunnel tunnel = new TableTunnel(odps); try { DownloadSession downloadSession = (partition == null) ? tunnel.createDownloadSession(projectName, tableName) : tunnel.createDownloadSession(projectName, tableName, partition); LOG.info("Tunnel DownloadSession ID is : " + downloadSession.getId()); RecordReader reader = downloadSession.openRecordReader(0, limitDownloadRecordCount); Record record = null; columnCount = tableMeta.getCols().length; while ((record = reader.read()) != null) { // ????MapReduce?? String[] vals = new String[columnCount]; for (int i = 0; i < vals.length; i++) { vals[i] = getColumnValueString(tableMeta, record, i); } list.add(vals); } } catch (Exception e) { throw new RuntimeException(LocalRunUtils.getDownloadErrorMsg(tableInfo.toString()) + e.getMessage()); } if (readCols == null) { return list; } List<Integer> indexList = LocalRunUtils.genReadColsIndexes(tableMeta, readCols); int indexLength = indexList == null ? columnCount : indexList.size(); if (indexLength == columnCount) { return list; } List<String[]> result = new LinkedList<String[]>(); for (String[] srcData : list) { String[] dstData = new String[indexLength]; for (int i = 0; i < indexLength; ++i) { dstData[i] = srcData[indexList.get(i)]; result.add(dstData); } } LOG.info("Finished download table: '" + tableInfo.toString() + "', download mode:" + WareHouse.getInstance().getDownloadMode()); return result; } public static void downloadResource(Odps odps, String projName, String resourceName, int limitDownloadRecordCount, char inputColumnSeperator) { if (odps == null || StringUtils.isBlank(projName) || StringUtils.isBlank(resourceName)) { throw new IllegalArgumentException("Missing arguments: odps|projName|resourceName"); } if (WareHouse.getInstance().getDownloadMode() == DownloadMode.NEVER) { throw new RuntimeException("Download resource '" + projName + "." + resourceName + "' Failed! Current download mode is:" + DownloadMode.NEVER + ".Please check parameter'" + Constants.LOCAL_DOWNLOAD_MODE + "'"); } try { if (!odps.resources().exists(projName, resourceName)) { throw new RuntimeException("Download resource: " + projName + "." + resourceName + " Failed!Remote resource not found! Download mode:" + WareHouse.getInstance().getDownloadMode()); } } catch (OdpsException e1) { throw new RuntimeException(e1); } Resource resource = odps.resources().get(projName, resourceName); File resFile = WareHouse.getInstance().getReourceFile(projName, resourceName); LOG.info("Start to download resource: " + resource.getName() + "-->" + resFile.getAbsolutePath() + ", type: " + resource.getType() + ",download mode:" + WareHouse.getInstance().getDownloadMode()); switch (resource.getType()) { case FILE: case JAR: case PY: case ARCHIVE: InputStream in = null; FileOutputStream out = null; try { in = odps.resources().getResourceAsStream(resource.getProject(), resource.getName()); resFile.getParentFile().mkdirs(); out = new FileOutputStream(resFile); IOUtils.copy(in, out); } catch (IOException e) { throw new RuntimeException(e); } catch (OdpsException e) { throw new RuntimeException(e); } finally { if (in != null) { try { in.close(); } catch (IOException e) { throw new RuntimeException(e); } } if (out != null) { try { out.close(); } catch (IOException e) { throw new RuntimeException(e); } } } break; case TABLE: TableResource tr = new TableResource(resource); Table stable = tr.getSourceTable(); String sourceProjName = stable.getProject(); String sourceTbleName = stable.getName(); PartitionSpec partSpec = tr.getSourceTablePartition(); // ?tablewarehouse/_table__ TableInfo tableInfo = null; if (partSpec == null) { tableInfo = TableInfo.builder().projectName(sourceProjName).tableName(sourceTbleName).build(); // ??limit? if (!WareHouse.getInstance().existsTable(sourceProjName, sourceTbleName)) { downloadTableSchemeAndData(odps, tableInfo, limitDownloadRecordCount, inputColumnSeperator); } } else { LinkedHashMap<String, String> partMap = PartitionUtils.convert(partSpec); tableInfo = TableInfo.builder().projectName(sourceProjName).tableName(sourceTbleName) .partSpec(partMap).build(); // ? if (!WareHouse.getInstance().existsPartition(sourceProjName, sourceTbleName, partSpec)) { downloadTableSchemeAndData(odps, tableInfo, limitDownloadRecordCount, inputColumnSeperator); } } // ?warehouse/__resource__/table_resource_name WareHouse.getInstance().createTableReourceFile(resource.getProject(), resource.getName(), tableInfo); break; default: break; } LOG.info("Finished download resource: " + resource.getName() + "-->" + resFile.getAbsolutePath() + ", type: " + resource.getType() + ",download mode:" + WareHouse.getInstance().getDownloadMode()); } public static TableMeta downloadTableInfo(Odps odps, TableInfo tableInfo) { validateTable(odps, tableInfo); Table table = odps.tables().get(tableInfo.getProjectName(), tableInfo.getTableName()); return TableMeta.fromTable(table); } private static String getColumnValueString(TableMeta tableMeta, Record record, int colIndex) { String colValue = null; Column col = tableMeta.getCols()[colIndex]; if (record.get(col.getName()) == null) { return "\\N"; } switch (col.getType()) { case BIGINT: { Long v = record.getBigint(col.getName()); colValue = v == null ? null : v.toString(); break; } case BOOLEAN: { Boolean v = record.getBoolean(col.getName()); colValue = v == null ? null : v.toString(); break; } case DOUBLE: { Double v = record.getDouble(col.getName()); colValue = v == null ? null : v.toString(); break; } case STRING: { byte[] v = record.getBytes(col.getName()); try { colValue = v == null ? null : LocalRunUtils.toReadableString(v); } catch (Exception e) { throw new RuntimeException("convert to readable string failed!" + e); } break; } case DATETIME: { Date v = record.getDatetime(col.getName()); colValue = v == null ? null : LocalRunUtils.getDateFormat(Constants.DATE_FORMAT_2).format(v); break; } default: throw new RuntimeException("Unknown column type: " + col.getType()); } if (colValue == null) { return Constants.NULL_TOKEN; } else { return colValue.replaceAll("\\\\N", "\"\\\\N\""); } } private static boolean matches(PartitionSpec spec, Map<String, String> parts) { for (Map.Entry<String, String> e : parts.entrySet()) { if (!spec.get(e.getKey()).equals(e.getValue())) { return false; } } return true; } public static void validateTable(Odps odps, TableInfo tableInfo) { if (tableInfo == null) { throw new IllegalArgumentException("Missing arugument: tableInfo"); } if (WareHouse.getInstance().getDownloadMode() == DownloadMode.NEVER) { throw new RuntimeException("Download table schema '" + tableInfo.toString() + "' Failed! Current download mode is:" + DownloadMode.NEVER + ".Please check parameter '" + Constants.LOCAL_DOWNLOAD_MODE + "'"); } try { if (!odps.tables().exists(tableInfo.getProjectName(), tableInfo.getTableName())) { throw new RuntimeException( "Download table " + tableInfo + " Failed! Remote table not found!Download mode:" + WareHouse.getInstance().getDownloadMode()); } } catch (OdpsException e1) { throw new RuntimeException(e1); } Table table = odps.tables().get(tableInfo.getProjectName(), tableInfo.getTableName()); if (tableInfo.getPartitionSpec() != null && !tableInfo.getPartitionSpec().isEmpty() && (table.getSchema().getPartitionColumns() == null || table.getSchema().getPartitionColumns().isEmpty())) { throw new RuntimeException("Download table " + tableInfo + "Failed! Remote table " + table.getProject() + "." + table.getName() + " is not a partitioned table,Download mode:" + WareHouse.getInstance().getDownloadMode()); } if (table.isVirtualView()) { throw new RuntimeException(ExceptionCode.ODPS_0720101 + " - " + tableInfo); } } public static CsvReader newCsvReader(String fileName, char sep, Charset charset) throws FileNotFoundException { CsvReader reader = new CsvReader(fileName, sep, charset); reader.setSafetySwitch(false); return reader; } }