Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package com.aliyun.odps.local.common; import java.io.BufferedInputStream; import java.io.BufferedReader; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileFilter; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStreamReader; import java.io.PrintWriter; import java.nio.charset.Charset; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.NoSuchElementException; import org.apache.commons.io.FileUtils; import org.apache.commons.io.filefilter.HiddenFileFilter; import com.aliyun.odps.utils.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import com.aliyun.odps.Column; import com.aliyun.odps.Odps; import com.aliyun.odps.OdpsException; import com.aliyun.odps.PartitionSpec; import com.aliyun.odps.conf.Configuration; import com.aliyun.odps.data.TableInfo; import com.aliyun.odps.local.common.utils.DownloadUtils; import com.aliyun.odps.local.common.utils.LocalRunUtils; import com.aliyun.odps.local.common.utils.PartitionUtils; import com.aliyun.odps.local.common.utils.SchemaUtils; import com.csvreader.CsvReader; import com.csvreader.CsvWriter; public class WareHouse { private static final Log LOG = LogFactory.getLog(WareHouse.class); public static final Charset encoding = Charset.forName("UTF-8"); private File warehouseDir; private static volatile WareHouse wareHouse; private ThreadLocal<Odps> odpsThreadLocal = new ThreadLocal<Odps>(); private ThreadLocal<Configuration> confThreadLocal = new ThreadLocal<Configuration>(); private JobDirecotry jobDirecotry; private WareHouse() { warehouseDir = new File(Constants.WAREHOUSE_HOUSE_DIR); if (!warehouseDir.exists()) { warehouseDir.mkdirs(); } jobDirecotry = new JobDirecotry(); } public static synchronized WareHouse getInstance() { if (wareHouse == null) { wareHouse = new WareHouse(); } return wareHouse; } public static void init(Odps odps, Configuration conf) { getInstance().setOdps(odps); getInstance().setConfiguration(conf); } public File getWarehouseDir() { return warehouseDir; } public File getProjectDir(String projName) { return new File(warehouseDir, projName); } /** * get directory: warehouse/project_name/__resources__ * */ public File getResourceDir(String projName) { return new File(getProjectDir(projName), Constants.RESOURCES_DIR); } public File getTableDir(String projName, String tblName) { File tableDir = new File(getProjectDir(projName), Constants.TABLES_DIR + File.separator + tblName); if (!tableDir.exists()) { // ?warehouse File oldVersionDir = new File(getProjectDir(projName), tblName); if (oldVersionDir.exists()) { return oldVersionDir; } } return tableDir; } /** * if partSpec is null or empty will return table directory */ public File getPartitionDir(String projName, String tblName, PartitionSpec partSpec) { if (partSpec == null || partSpec.isEmpty()) { return getTableDir(projName, tblName); } Map<PartitionSpec, File> map = getPartitionToPathMap(projName, tblName); for (PartitionSpec key : map.keySet()) { if (PartitionUtils.isEqual(key, partSpec)) { return map.get(key); } } return null; } /** * Warehouse??partition? * * @param projName * @param tblName * @param dataFile * Warehouse? * @return */ public PartitionSpec resolvePartition(String projName, String tblName, File dataFile) { if (StringUtils.isBlank(projName) || StringUtils.isBlank(tblName) || dataFile == null || !dataFile.exists()) { return null; } Map<PartitionSpec, File> partitionToPathMap = getPartitionToPathMap(projName, tblName); String dataFilePath = dataFile.getAbsolutePath().replaceAll("\\\\", "/"); if (!dataFilePath.endsWith("/")) { dataFilePath = dataFilePath + "/"; } for (PartitionSpec key : partitionToPathMap.keySet()) { String partitionPath = partitionToPathMap.get(key).getAbsolutePath().replaceAll("\\\\", "/"); if (dataFilePath.startsWith(partitionPath)) { return key; } } return null; } public File getReourceFile(String projName, String resourceName) { return new File(warehouseDir, projName + File.separator + Constants.RESOURCES_DIR + File.separator + resourceName); } public File getTableReourceFile(String projName, String resourceName) { File tableResourceDir = getReourceFile(projName, resourceName); return tableResourceDir.listFiles()[0]; } public File getTableSchemeFile(String projectName, String tableName) throws FileNotFoundException { if (!existsTableSchema(projectName, tableName)) { throw new FileNotFoundException( "Table Directory :" + projectName + "." + tableName + " Not exists in warehouse!"); } File tableDir = getTableDir(projectName, tableName); return new File(tableDir, Constants.SCHEMA_FILE); } public String getRelativePath(String projName, String tblName, PartitionSpec partSpec, Object... flag) { String relativePath = projName + File.separator + tblName + File.separator; if (partSpec != null) { relativePath += PartitionUtils.toString(partSpec); } return relativePath; } public List<File> getDataFiles(String projName, String tblName, PartitionSpec pattern, char inputColumnSeperator) throws IOException, OdpsException { if (pattern != null && !pattern.isEmpty() && !existsPartition(projName, tblName, pattern)) { LinkedHashMap<String, String> part = PartitionUtils.convert(pattern); TableInfo tableInfo = TableInfo.builder().projectName(projName).tableName(tblName).partSpec(part) .build(); DownloadUtils.downloadTableSchemeAndData(WareHouse.getInstance().getOdps(), tableInfo, Constants.DEFAULT_DOWNLOAD_RECORD, inputColumnSeperator); } else if (!existsTable(projName, tblName)) { TableInfo tableInfo = TableInfo.builder().projectName(projName).tableName(tblName).build(); DownloadUtils.downloadTableSchemeAndData(WareHouse.getInstance().getOdps(), tableInfo, Constants.DEFAULT_DOWNLOAD_RECORD, inputColumnSeperator); } File tableDir = getTableDir(projName, tblName); TableMeta tableMeta = SchemaUtils.readSchema(tableDir); boolean isPartitionTable = false; if (tableMeta.getPartitions() != null && tableMeta.getPartitions().length > 0) { isPartitionTable = true; } // not a partition table if (!isPartitionTable) { if (pattern != null && !pattern.isEmpty()) { throw new OdpsException("Table " + projName + "." + tblName + " is not a partition table"); } return LocalRunUtils.listDataFiles(tableDir); } // get all partitions if (pattern == null) { List<File> result = new ArrayList<File>(); LocalRunUtils.listAllDataFiles(tableDir, result); return result; } List<File> result = new ArrayList<File>(); Map<PartitionSpec, File> partitionToPathMap = WareHouse.getInstance().getPartitionToPathMap(projName, tblName); for (PartitionSpec parts : partitionToPathMap.keySet()) { if (PartitionUtils.match(pattern, parts)) { result.addAll(LocalRunUtils.listDataFiles(partitionToPathMap.get(parts))); } } return result; } /** * copy table schema to destination directory */ public boolean copyTableSchema(String projectName, String tableName, File destDir, int limitDownloadRecordCount, char inputColumnSeperator) throws IOException, OdpsException { if (StringUtils.isBlank(projectName) || StringUtils.isBlank(tableName) || destDir == null) { return false; } TableInfo tableInfo = TableInfo.builder().projectName(projectName).tableName(tableName).build(); LOG.info("Start to copy table schema: " + tableInfo + "-->" + destDir.getAbsolutePath()); if (!existsTable(projectName, tableName)) { DownloadUtils.downloadTableSchemeAndData(getOdps(), tableInfo, limitDownloadRecordCount, inputColumnSeperator); } File tableDir = getTableDir(projectName, tableName); File schemaFile = new File(tableDir, Constants.SCHEMA_FILE); if (!schemaFile.exists()) { throw new FileNotFoundException( "Schema file of table " + projectName + "." + tableName + " not exists in warehouse."); } if (!destDir.exists()) { destDir.mkdirs(); } FileUtils.copyFileToDirectory(schemaFile, destDir); LOG.info("Finished copy table schema: " + tableInfo + "-->" + destDir.getAbsolutePath()); return true; } /** * copy table schema and partition data from warehouse to target directory , * if partition not exists will download the partition data and table schema * from remote server * * @param projectName * @param tableName * @param partSpec * if null will copy all partitions * @param readCols * if null will copy all columns * @param destDir * @return * @throws IOException * @throws OdpsException */ public boolean copyTable(String projectName, String tableName, PartitionSpec partSpec, String[] readCols, File destDir, int limitDownloadRecordCount, char inputColumnSeperator) { if (StringUtils.isBlank(projectName) || StringUtils.isBlank(tableName) || destDir == null) { return false; } TableInfo tableInfo = TableInfo.builder().projectName(projectName).tableName(tableName).partSpec(partSpec) .build(); LOG.info("Start to copy table: " + tableInfo + "-->" + destDir.getAbsolutePath()); boolean hasPartition = false; if (partSpec != null && !partSpec.isEmpty()) { hasPartition = true; } // if not exist table, then download from odps server if (hasPartition && !existsPartition(projectName, tableName, partSpec)) { DownloadUtils.downloadTableSchemeAndData(getOdps(), tableInfo, limitDownloadRecordCount, inputColumnSeperator); } else if (!existsTable(projectName, tableName)) { DownloadUtils.downloadTableSchemeAndData(getOdps(), tableInfo, limitDownloadRecordCount, inputColumnSeperator); } File whTableDir = getTableDir(projectName, tableName); // copy schema file File schemaFile = new File(whTableDir, Constants.SCHEMA_FILE); if (!schemaFile.exists()) { throw new RuntimeException( "Schema file of table " + projectName + "." + tableName + " not exists in warehouse."); } if (!destDir.exists()) { destDir.mkdirs(); } // copy table schema file try { FileUtils.copyFileToDirectory(schemaFile, destDir); } catch (IOException e) { throw new RuntimeException("Copy schema file of table " + tableInfo + " failed!" + e.getMessage()); } // copy partition data files TableMeta tableMeta = getTableMeta(projectName, tableName); List<Integer> indexes = LocalRunUtils.genReadColsIndexes(tableMeta, readCols); if (hasPartition) { final Collection<File> dataFiles = FileUtils.listFiles(whTableDir, HiddenFileFilter.VISIBLE, HiddenFileFilter.VISIBLE); for (File dataFile : dataFiles) { if (dataFile.getName().equals(Constants.SCHEMA_FILE)) { continue; } String parentDir = dataFile.getParentFile().getAbsolutePath(); String partPath = parentDir.substring(whTableDir.getAbsolutePath().length(), parentDir.length()); PartitionSpec ps = PartitionUtils.convert(partPath); if (PartitionUtils.isEqual(ps, partSpec)) { File destPartitionDir = new File(destDir, PartitionUtils.toString(ps)); destPartitionDir.mkdirs(); try { copyDataFiles(dataFile.getParentFile(), indexes, destPartitionDir, inputColumnSeperator); } catch (IOException e) { throw new RuntimeException( "Copy data file of table " + tableInfo + " failed!" + e.getMessage()); } } } } else { try { copyDataFiles(whTableDir, indexes, destDir, inputColumnSeperator); } catch (IOException e) { throw new RuntimeException("Copy data file of table " + tableInfo + " failed!" + e.getMessage()); } } LOG.info("Finished copy table: " + tableInfo + "-->" + destDir.getAbsolutePath()); return true; } /** * copy resource from warehouse/__resources__/ to temp/resource/ * * @param projName * @param resourceName * @param resourceRootDir * @param limitDownloadRecordCount * @param inputColumnSeperator * @throws IOException * @throws OdpsException */ public void copyResource(String projName, String resourceName, File resourceRootDir, int limitDownloadRecordCount, char inputColumnSeperator) throws IOException, OdpsException { if (StringUtils.isBlank(projName) || StringUtils.isBlank(resourceName) || resourceRootDir == null) { return; } if (!resourceRootDir.exists()) { resourceRootDir.mkdirs(); } LOG.info("Start to copy resource: " + projName + "." + resourceName + "-->" + resourceRootDir.getAbsolutePath()); if (!existsResource(projName, resourceName)) { DownloadUtils.downloadResource(getOdps(), projName, resourceName, limitDownloadRecordCount, inputColumnSeperator); } File file = getReourceFile(projName, resourceName); // table resource if (file.isDirectory()) { File tableResourceDir = new File(resourceRootDir, resourceName); TableInfo refTableInfo = getReferencedTable(projName, resourceName); LinkedHashMap<String, String> partitions = refTableInfo.getPartSpec(); if (partitions != null && partitions.size() > 0) { PartitionSpec partSpec = new PartitionSpec(); for (String key : partitions.keySet()) { partSpec.set(key, partitions.get(key)); } copyTable(refTableInfo.getProjectName(), refTableInfo.getTableName(), partSpec, null, tableResourceDir, limitDownloadRecordCount, inputColumnSeperator); } else { copyTable(refTableInfo.getProjectName(), refTableInfo.getTableName(), null, null, tableResourceDir, limitDownloadRecordCount, inputColumnSeperator); } } else { // not table resource if (!existsResource(projName, resourceName)) { DownloadUtils.downloadResource(getOdps(), projName, resourceName, limitDownloadRecordCount, inputColumnSeperator); } FileUtils.copyFileToDirectory(file, resourceRootDir); } LOG.info("Finished copy resource: " + projName + "." + resourceName + "-->" + resourceRootDir.getAbsolutePath()); } /** * copy output data files from job directory to warehouse * * @param srcDir * @param indexes * @param destDir * @throws IOException */ public void copyDataFiles(File srcDir, List<Integer> indexes, File destDir, char inputColumnSeperator) throws IOException { if (indexes == null || indexes.isEmpty()) { for (File file : LocalRunUtils.listDataFiles(srcDir)) { FileUtils.copyFileToDirectory(file, destDir); } } else { for (File file : LocalRunUtils.listDataFiles(srcDir)) { CsvReader reader = DownloadUtils.newCsvReader(file.getAbsolutePath(), inputColumnSeperator, encoding); CsvWriter writer = new CsvWriter(new File(destDir, file.getName()).getAbsolutePath(), inputColumnSeperator, encoding); while (reader.readRecord()) { String[] vals = reader.getValues(); String[] newVals = new String[indexes.size()]; for (int i = 0; i < indexes.size(); ++i) { newVals[i] = vals[indexes.get(i)]; } writer.writeRecord(newVals); } writer.close(); reader.close(); } } } public File createPartitionDir(String projName, String tblName, PartitionSpec partSpec) { File tableDir = getTableDir(projName, tblName); if (!tableDir.exists()) { tableDir.mkdirs(); } if (partSpec != null && !partSpec.isEmpty()) { File partitionDir = new File(tableDir, PartitionUtils.toString(partSpec)); if (!partitionDir.exists()) { partitionDir.mkdirs(); } return partitionDir; } else { return tableDir; } } public boolean createTableReourceFile(String projName, String resourceName, TableInfo refTableInfo) { StringBuffer sb = new StringBuffer(); sb.append(refTableInfo.getProjectName()); sb.append("."); sb.append(refTableInfo.getTableName()); String partitions = refTableInfo.getPartPath(); if (partitions != null && !partitions.trim().isEmpty()) { sb.append("("); String partStr = partitions.replaceAll("/", ","); if (partStr.endsWith(",")) { partStr = partStr.substring(0, partStr.length() - 1); } sb.append(partStr); sb.append(")"); } File tableResourceDir = getReourceFile(projName, resourceName); if (!tableResourceDir.exists()) { tableResourceDir.mkdirs(); } PrintWriter pw = null; try { pw = new PrintWriter(new File(tableResourceDir, "__ref__")); pw.println(sb.toString()); return true; } catch (FileNotFoundException e) { } finally { if (pw != null) { pw.close(); } } return false; } public TableMeta getResourceSchema(String projName, String resourceName) throws IOException { File dir = getReourceFile(projName, resourceName); if (SchemaUtils.existsSchemaFile(dir)) { return SchemaUtils.readSchema(dir); } return null; } public TableMeta getTableMeta(String projName, String tblName) { if (StringUtils.isBlank(projName) || StringUtils.isBlank(tblName)) { return null; } File dir = getTableDir(projName, tblName); TableMeta meta = SchemaUtils.readSchema(dir); if (meta.getProjName() != null && !meta.getProjName().equals(projName)) { throw new RuntimeException( "Invalid project name " + meta.getProjName() + " in file 'warehouse" + File.separator + projName + File.separator + tblName + File.separator + Constants.SCHEMA_FILE + "'"); } if (meta.getTableName() != null && !meta.getTableName().equals(tblName)) { throw new RuntimeException( "Invalid table name " + meta.getProjName() + " in file 'warehouse" + File.separator + projName + File.separator + tblName + File.separator + Constants.SCHEMA_FILE + "'"); } return meta; } public Map<PartitionSpec, File> getPartitionToPathMap(String projName, String tblName) { File tableDir = getTableDir(projName, tblName); TableMeta tableMeta = SchemaUtils.readSchema(tableDir); Map<PartitionSpec, File> result = new HashMap<PartitionSpec, File>(); File dir = getTableDir(projName, tblName); final Collection<File> dataFiles = FileUtils.listFiles(dir, HiddenFileFilter.VISIBLE, HiddenFileFilter.VISIBLE); List<File> emptyPatitions = LocalRunUtils.listEmptyDirectory(dir); dataFiles.addAll(emptyPatitions); for (File dataFile : dataFiles) { if (dataFile.getName().equals(Constants.SCHEMA_FILE)) { continue; } String partPath = null; if (dataFile.isFile()) { String parentDir = dataFile.getParentFile().getAbsolutePath(); partPath = parentDir.substring(dir.getAbsolutePath().length(), parentDir.length()); } else { // empty partition directory String parentDir = dataFile.getAbsolutePath(); partPath = parentDir.substring(dir.getAbsolutePath().length(), parentDir.length()); } try { if (partPath.length() > 0) { PartitionSpec ps = PartitionUtils.convert(partPath); if (PartitionUtils.valid(tableMeta.getPartitions(), ps)) { result.put(ps, dataFile.getParentFile()); } } } catch (Exception ex) { // LOG.warn("ignore dir: " + parentDir + ", " + ex.getMessage()); } } return result; } public List<PartitionSpec> getPartitions(String projName, String tblName) { Map<PartitionSpec, File> partitionToPathMap = getPartitionToPathMap(projName, tblName); List<PartitionSpec> result = new ArrayList<PartitionSpec>(); for (PartitionSpec key : partitionToPathMap.keySet()) { result.add(key); } return result; } public List<String> getProjectNames() { File warehouseDir = getWarehouseDir(); if (!warehouseDir.exists()) { return null; } List<String> result = new ArrayList<String>(); File[] projects = warehouseDir.listFiles(new FileFilter() { @Override public boolean accept(File pathname) { return pathname.isDirectory() && !pathname.isHidden(); } }); for (File p : projects) { if (p.isDirectory()) { result.add(p.getName()); } } return result; } public List<TableMeta> getTableMetas(String projName) throws IOException { File projectDir = getProjectDir(projName); if (!projectDir.exists()) { return null; } List<TableMeta> result = new ArrayList<TableMeta>(); File[] tables = projectDir.listFiles(new FileFilter() { @Override public boolean accept(File pathname) { return pathname.isDirectory() && !pathname.isHidden() && !pathname.getName().equals(Constants.RESOURCES_DIR); } }); // old version for (File t : tables) { if (!existsTable(projName, t.getName())) { continue; } TableMeta tableMeta = getTableMeta(projName, t.getName()); if (tableMeta != null) { result.add(tableMeta); } } // new version >=0.14 File tableBaseDir = new File(projectDir, Constants.TABLES_DIR); if (!tableBaseDir.exists()) { return result; } tables = tableBaseDir.listFiles(new FileFilter() { @Override public boolean accept(File pathname) { return pathname.isDirectory() && !pathname.isHidden(); } }); for (File t : tables) { if (!existsTable(projName, t.getName())) { continue; } TableMeta tableMeta = getTableMeta(projName, t.getName()); if (tableMeta != null) { result.add(tableMeta); } } return result; } public List<String> getTableNames(String projName) throws IOException { List<TableMeta> list = getTableMetas(projName); if (list == null || list.size() == 0) { return null; } List<String> result = new ArrayList<String>(); for (TableMeta tableMeta : list) { result.add(tableMeta.getTableName()); } return result; } /* * get referenced table of table resource */ public TableInfo getReferencedTable(String projName, String resourceName) { File file = getTableReourceFile(projName, resourceName); BufferedReader br = null; try { br = new BufferedReader(new InputStreamReader(new FileInputStream(file))); String line = br.readLine(); while (line != null && (line.trim().isEmpty() || line.startsWith("#"))) { line = br.readLine(); } if (line == null || line.trim().isEmpty()) { return null; } String project; int index = line.indexOf("."); if (index == -1) { project = projName; } else { project = line.substring(0, index); line = line.substring(index + 1); } String table; index = line.indexOf("("); if (index == -1) { table = line; line = null; } else { table = line.substring(0, index); line = line.substring(index + 1, line.length() - 1); } String partitions = null; if (line != null) { partitions = ""; String[] parts = line.split(","); for (String item : parts) { if (!partitions.equals("")) { partitions += "/"; } partitions += item; } } TableInfo tableInfo; if (partitions == null) { tableInfo = TableInfo.builder().projectName(project).tableName(table).build(); } else { tableInfo = TableInfo.builder().projectName(project).tableName(table).partSpec(partitions).build(); } return tableInfo; } catch (IOException e) { } finally { if (br != null) { try { br.close(); } catch (IOException e) { } } } return null; } public boolean existsTable(String projName, String tblName) { return existsTableSchema(projName, tblName); } public boolean existsTableSchema(String projectName, String tableName) { File tableDir = getTableDir(projectName, tableName); if (!tableDir.exists()) { return false; } return new File(tableDir, Constants.SCHEMA_FILE).exists(); } public boolean existsPartition(String projectName, String tableName, PartitionSpec partSpec) { if (!existsTable(projectName, tableName)) { return false; } if (partSpec == null || partSpec.isEmpty()) { return true; } List<PartitionSpec> partionList = getPartitions(projectName, tableName); if (partionList == null || partionList.size() == 0) { return false; } for (PartitionSpec item : partionList) { if (PartitionUtils.match(partSpec, item)) { return true; } } return false; } public boolean existsResource(String projName, String resourceName) { return getReourceFile(projName, resourceName).exists(); } /** * will drop table directory,include table schema and data */ public void dropTableIfExists(String projName, String tblName) throws IOException { File tableDir = getTableDir(projName, tblName); if (tableDir != null && tableDir.exists() && tableDir.isDirectory()) { FileUtils.deleteDirectory(tableDir); } } /** * drop partition data if exists, will not drop table schema if partSpec is * null, will drop table data */ public void dropTableDataIfExists(String projName, String tblName, PartitionSpec partSpec) throws IOException { File partitionDir = getPartitionDir(projName, tblName, partSpec); if (partitionDir != null && partitionDir.exists() && partitionDir.isDirectory()) { LocalRunUtils.removeDataFiles(partitionDir); } } /** * if partSpec is null will check if table has no data file else will check * the partition has no data file */ public boolean isTableEmpty(String projName, String tblName, PartitionSpec partSpec) throws IOException { File partitionDir = getPartitionDir(projName, tblName, partSpec); if (partitionDir != null && partitionDir.exists() && partitionDir.isDirectory()) { List<File> dataFiles = new ArrayList<File>(); LocalRunUtils.listAllDataFiles(partitionDir, dataFiles); if (dataFiles == null || dataFiles.isEmpty()) { return true; } // empty data file for (File f : dataFiles) { if (f.length() > 0) { return false; } } } return true; } public void dropResourceIfExists(String projName, String resourceName) throws IOException { File resourceDir = getReourceFile(projName, resourceName); if (resourceDir == null || !resourceDir.exists()) { return; } if (resourceDir.isDirectory()) { FileUtils.deleteDirectory(resourceDir); } if (resourceDir.isFile()) { resourceDir.delete(); } } public boolean valid(String projName, String tblName, PartitionSpec partitionSpec, String[] readCols) throws OdpsException, IOException { if (StringUtils.isBlank(projName)) { throw new OdpsException("Project " + projName + " is null"); } if (StringUtils.isBlank(tblName)) { throw new OdpsException("Table Name is null"); } if (!existsTable(projName, tblName)) { throw new OdpsException("table " + projName + "." + tblName + " not exitsts"); } if (partitionSpec != null && !existsPartition(projName, tblName, partitionSpec)) { throw new OdpsException("table " + projName + "." + tblName + "(" + PartitionUtils.toString(partitionSpec) + ") not exitsts"); } if (readCols != null) { TableMeta tableMeta = getTableMeta(projName, tblName); int columnCount = tableMeta.getCols().length; for (int i = 0; i < readCols.length; i++) { boolean isFind = false; for (int j = 0; j < columnCount; j++) { if (tableMeta.getCols()[j].getName().equals(readCols[i])) { isFind = true; break; } } if (!isFind) { throw new OdpsException( "table " + projName + "." + tblName + " do not have column :" + readCols[i]); } } } return true; } public List<Object[]> readData(String projName, String tblName, PartitionSpec partitionSpec, String[] readCols, char inputColumnSeperator) throws OdpsException, IOException { List<File> dataFiles = getDataFiles(projName, tblName, partitionSpec, inputColumnSeperator); if (dataFiles == null || dataFiles.size() == 0) { return null; } File tableDir = getTableDir(projName, tblName); TableMeta tableMeta = SchemaUtils.readSchema(tableDir); List<Integer> indexes = LocalRunUtils.genReadColsIndexes(tableMeta, readCols); CsvReader reader; List<Object[]> result = new ArrayList<Object[]>(); for (File file : dataFiles) { reader = DownloadUtils.newCsvReader(file.getAbsolutePath(), inputColumnSeperator, encoding); while (reader.readRecord()) { String[] vals = reader.getValues(); Object[] newVals; if (indexes != null && !indexes.isEmpty()) { newVals = new Object[indexes.size()]; for (int i = 0; i < indexes.size(); ++i) { newVals[i] = LocalRunUtils.fromString(tableMeta.getCols()[indexes.get(i)].getType(), vals[indexes.get(i)], "\\N"); } } else { newVals = new Object[vals.length]; for (int i = 0; i < vals.length; i++) { newVals[i] = LocalRunUtils.fromString(tableMeta.getCols()[i].getType(), vals[i], "\\N"); } System.out.println(); } result.add(newVals); } reader.close(); } return result; } /** * when finished read should close inputstream */ public BufferedInputStream readResourceFileAsStream(String project, String resource, char inputColumnSeperator) throws IOException, OdpsException { if (!existsResource(project, resource)) { DownloadUtils.downloadResource(WareHouse.getInstance().getOdps(), getOdps().getDefaultProject(), resource, Constants.DEFAULT_DOWNLOAD_RECORD, inputColumnSeperator); } if (!existsResource(project, resource)) { throw new OdpsException("File Resource " + project + "." + resource + " not exists"); } File file = getReourceFile(project, resource); if (!file.isFile()) { throw new OdpsException("Resource " + project + "." + resource + " is not a valid file Resource, because it is a direcotry"); } return new BufferedInputStream(new FileInputStream(file)); } public byte[] readResourceFile(String project, String resource, char inputColumnSeperator) throws IOException, OdpsException { if (!existsResource(project, resource)) { DownloadUtils.downloadResource(WareHouse.getInstance().getOdps(), getOdps().getDefaultProject(), resource, Constants.DEFAULT_DOWNLOAD_RECORD, inputColumnSeperator); } File file = getReourceFile(project, resource); if (!file.isFile()) { throw new OdpsException("Resource " + project + "." + resource + " is not a valid file Resource, because it is a direcotry"); } FileInputStream in = new FileInputStream(file); ByteArrayOutputStream out = new ByteArrayOutputStream(1024); byte[] temp = new byte[1024]; int length; while ((length = in.read(temp)) != -1) { out.write(temp, 0, length); } in.close(); return out.toByteArray(); } public Iterator<Object[]> readResourceTable(String project, String resource, final char inputColumnSeperator) throws IOException, OdpsException { if (!existsResource(project, resource)) { DownloadUtils.downloadResource(WareHouse.getInstance().getOdps(), getOdps().getDefaultProject(), resource, Constants.DEFAULT_DOWNLOAD_RECORD, inputColumnSeperator); } File tableResourceDir = getReourceFile(project, resource); if (!tableResourceDir.isDirectory()) { throw new OdpsException("Resource " + project + "." + resource + " is not a valid file Resource, because it is not a direcotry"); } // LOG.info("Reading resource table from " + // tableResourceDir.getAbsolutePath()); TableInfo tableInfo = getReferencedTable(project, resource); PartitionSpec partitionSpec = PartitionUtils.convert(tableInfo.getPartSpec()); final List<File> datafiles = getDataFiles(project, tableInfo.getTableName(), partitionSpec, inputColumnSeperator); final Column[] schema = SchemaUtils.readSchema(getTableDir(project, tableInfo.getTableName())).getCols(); return new Iterator<Object[]>() { CsvReader reader; Object[] current; boolean fetched; @Override public boolean hasNext() { if (fetched) { return current != null; } // Fetch new one try { fetch(); } catch (IOException e) { throw new RuntimeException(e); } return current != null; } private void fetch() throws IOException { // first time if (reader == null) { if (datafiles.isEmpty()) { current = null; fetched = true; return; } File f = datafiles.remove(0); reader = DownloadUtils.newCsvReader(f.getAbsolutePath(), inputColumnSeperator, encoding); reader.setSafetySwitch(false); current = read(); fetched = true; return; } current = read(); if (current == null && !datafiles.isEmpty()) { File f = datafiles.remove(0); reader = DownloadUtils.newCsvReader(f.getAbsolutePath(), inputColumnSeperator, encoding); reader.setSafetySwitch(false); current = read(); fetched = true; return; } fetched = true; } @Override public Object[] next() { if (!hasNext()) { throw new NoSuchElementException(); } fetched = false; return current; } @Override public void remove() { throw new UnsupportedOperationException(); } private Object[] read() throws IOException { Object[] result; if (!reader.readRecord()) { return null; } String[] vals = reader.getValues(); if (vals == null || vals.length == 0) { result = null; } else { result = new Object[vals.length]; for (int i = 0; i < vals.length; i++) { result[i] = LocalRunUtils.fromString(schema[i].getType(), vals[i], "\\N"); } } return result; } }; } // ////////////////Local Configuration//////////////////////////// public boolean isRetainTempData() { String tempRetain = getConfiguration().get(Constants.LOCAL_TEMP_RETAIN, "true"); return tempRetain.equalsIgnoreCase("false") ? false : true; } public String getJobDirStr() { return getConfiguration().get(Constants.LOCAL_TEMP_DIR, "temp"); } public JobDirecotry getJobDir() { return jobDirecotry; } public String getJobName() { return getConfiguration().get(Constants.JOB_NAME, ""); } public void setJobName(String jobname) { getConfiguration().set(Constants.JOB_NAME, jobname); } public char getInputColumnSeperator() { String seperator = getConfiguration().get(Constants.LOCAL_INPUT_COLUMN_SEPERATOR, ","); if (seperator.length() > 0) { char c = seperator.charAt(0); if (c == 's' || c == ' ') { return ' '; } else if (c == 't' || c == '\t') { return '\t'; } else if (c == ',' || c == '\t' || c == ';' || c == '|') { return c; } } return ','; } public char getOutputColumnSeperator() { String seperator = getConfiguration().get(Constants.LOCAL_OUTPUT_COLUMN_SEPERATOR, ","); if (seperator.length() > 0) { char c = seperator.charAt(0); if (c == 's' || c == ' ') { return ' '; } else if (c == 't' || c == '\t') { return '\t'; } else if (c == ',' || c == '\t' || c == ';' || c == '|') { return c; } } return ','; } public int getLimitDownloadRecordCount() { int limit = getConfiguration().getInt(Constants.LOCAL_RECORD_LIMIT, Constants.DEFAULT_DOWNLOAD_RECORD); if (limit > Constants.LIMIT_DOWNLOAD_RECORD) { return Constants.LIMIT_DOWNLOAD_RECORD; } return limit > 0 ? limit : Constants.DEFAULT_DOWNLOAD_RECORD; } public DownloadMode getDownloadMode() { String downloadModeStr = getConfiguration().get(Constants.LOCAL_DOWNLOAD_MODE, DownloadMode.AUTO.toString()) .toUpperCase(); DownloadMode downloadMode = DownloadMode.AUTO; try { downloadMode = DownloadMode.valueOf(downloadModeStr); } catch (Exception exception) { } return downloadMode; } public boolean caintainsKey(String key) { Iterator<Entry<String, String>> it = getConfiguration().iterator(); while (it.hasNext()) { Entry<String, String> entry = it.next(); if (entry.getKey().equals(key)) { return true; } } return false; } public void setOdps(Odps odps) { if (odps != null) { odpsThreadLocal.remove(); odpsThreadLocal.set(odps); } } public Odps getOdps() { return odpsThreadLocal.get(); } public void setConfiguration(Configuration conf) { if (conf != null) { confThreadLocal.remove(); confThreadLocal.set(conf); } } public Configuration getConfiguration() { Configuration conf = confThreadLocal.get(); if (conf == null) { conf = new Configuration(); confThreadLocal.set(conf); } return conf; } }