com.aliyun.odps.local.common.WareHouse.java Source code

Java tutorial

Introduction

Here is the source code for com.aliyun.odps.local.common.WareHouse.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package com.aliyun.odps.local.common;

import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.NoSuchElementException;

import org.apache.commons.io.FileUtils;
import org.apache.commons.io.filefilter.HiddenFileFilter;
import com.aliyun.odps.utils.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import com.aliyun.odps.Column;
import com.aliyun.odps.Odps;
import com.aliyun.odps.OdpsException;
import com.aliyun.odps.PartitionSpec;
import com.aliyun.odps.conf.Configuration;
import com.aliyun.odps.data.TableInfo;
import com.aliyun.odps.local.common.utils.DownloadUtils;
import com.aliyun.odps.local.common.utils.LocalRunUtils;
import com.aliyun.odps.local.common.utils.PartitionUtils;
import com.aliyun.odps.local.common.utils.SchemaUtils;
import com.csvreader.CsvReader;
import com.csvreader.CsvWriter;

public class WareHouse {

    private static final Log LOG = LogFactory.getLog(WareHouse.class);
    public static final Charset encoding = Charset.forName("UTF-8");

    private File warehouseDir;

    private static volatile WareHouse wareHouse;

    private ThreadLocal<Odps> odpsThreadLocal = new ThreadLocal<Odps>();

    private ThreadLocal<Configuration> confThreadLocal = new ThreadLocal<Configuration>();

    private JobDirecotry jobDirecotry;

    private WareHouse() {
        warehouseDir = new File(Constants.WAREHOUSE_HOUSE_DIR);
        if (!warehouseDir.exists()) {
            warehouseDir.mkdirs();
        }
        jobDirecotry = new JobDirecotry();
    }

    public static synchronized WareHouse getInstance() {
        if (wareHouse == null) {
            wareHouse = new WareHouse();
        }
        return wareHouse;
    }

    public static void init(Odps odps, Configuration conf) {
        getInstance().setOdps(odps);
        getInstance().setConfiguration(conf);
    }

    public File getWarehouseDir() {
        return warehouseDir;
    }

    public File getProjectDir(String projName) {
        return new File(warehouseDir, projName);
    }

    /**
     * get directory: warehouse/project_name/__resources__
     * 
     */
    public File getResourceDir(String projName) {
        return new File(getProjectDir(projName), Constants.RESOURCES_DIR);
    }

    public File getTableDir(String projName, String tblName) {
        File tableDir = new File(getProjectDir(projName), Constants.TABLES_DIR + File.separator + tblName);
        if (!tableDir.exists()) {
            // ?warehouse
            File oldVersionDir = new File(getProjectDir(projName), tblName);
            if (oldVersionDir.exists()) {
                return oldVersionDir;
            }
        }
        return tableDir;
    }

    /**
     * if partSpec is null or empty will return table directory
     */
    public File getPartitionDir(String projName, String tblName, PartitionSpec partSpec) {
        if (partSpec == null || partSpec.isEmpty()) {
            return getTableDir(projName, tblName);
        }
        Map<PartitionSpec, File> map = getPartitionToPathMap(projName, tblName);
        for (PartitionSpec key : map.keySet()) {
            if (PartitionUtils.isEqual(key, partSpec)) {
                return map.get(key);
            }
        }
        return null;
    }

    /**
     * Warehouse??partition?
     *
     * @param projName
     * @param tblName
     * @param dataFile
     *     Warehouse?
     * @return
     */
    public PartitionSpec resolvePartition(String projName, String tblName, File dataFile) {
        if (StringUtils.isBlank(projName) || StringUtils.isBlank(tblName) || dataFile == null
                || !dataFile.exists()) {
            return null;
        }
        Map<PartitionSpec, File> partitionToPathMap = getPartitionToPathMap(projName, tblName);
        String dataFilePath = dataFile.getAbsolutePath().replaceAll("\\\\", "/");
        if (!dataFilePath.endsWith("/")) {
            dataFilePath = dataFilePath + "/";
        }
        for (PartitionSpec key : partitionToPathMap.keySet()) {
            String partitionPath = partitionToPathMap.get(key).getAbsolutePath().replaceAll("\\\\", "/");
            if (dataFilePath.startsWith(partitionPath)) {
                return key;
            }
        }
        return null;
    }

    public File getReourceFile(String projName, String resourceName) {
        return new File(warehouseDir,
                projName + File.separator + Constants.RESOURCES_DIR + File.separator + resourceName);
    }

    public File getTableReourceFile(String projName, String resourceName) {
        File tableResourceDir = getReourceFile(projName, resourceName);
        return tableResourceDir.listFiles()[0];
    }

    public File getTableSchemeFile(String projectName, String tableName) throws FileNotFoundException {
        if (!existsTableSchema(projectName, tableName)) {
            throw new FileNotFoundException(
                    "Table Directory :" + projectName + "." + tableName + " Not exists in warehouse!");
        }
        File tableDir = getTableDir(projectName, tableName);
        return new File(tableDir, Constants.SCHEMA_FILE);
    }

    public String getRelativePath(String projName, String tblName, PartitionSpec partSpec, Object... flag) {
        String relativePath = projName + File.separator + tblName + File.separator;
        if (partSpec != null) {
            relativePath += PartitionUtils.toString(partSpec);
        }
        return relativePath;
    }

    public List<File> getDataFiles(String projName, String tblName, PartitionSpec pattern,
            char inputColumnSeperator) throws IOException, OdpsException {

        if (pattern != null && !pattern.isEmpty() && !existsPartition(projName, tblName, pattern)) {
            LinkedHashMap<String, String> part = PartitionUtils.convert(pattern);
            TableInfo tableInfo = TableInfo.builder().projectName(projName).tableName(tblName).partSpec(part)
                    .build();
            DownloadUtils.downloadTableSchemeAndData(WareHouse.getInstance().getOdps(), tableInfo,
                    Constants.DEFAULT_DOWNLOAD_RECORD, inputColumnSeperator);
        } else if (!existsTable(projName, tblName)) {
            TableInfo tableInfo = TableInfo.builder().projectName(projName).tableName(tblName).build();
            DownloadUtils.downloadTableSchemeAndData(WareHouse.getInstance().getOdps(), tableInfo,
                    Constants.DEFAULT_DOWNLOAD_RECORD, inputColumnSeperator);
        }

        File tableDir = getTableDir(projName, tblName);
        TableMeta tableMeta = SchemaUtils.readSchema(tableDir);
        boolean isPartitionTable = false;
        if (tableMeta.getPartitions() != null && tableMeta.getPartitions().length > 0) {
            isPartitionTable = true;
        }

        // not a partition table
        if (!isPartitionTable) {
            if (pattern != null && !pattern.isEmpty()) {
                throw new OdpsException("Table " + projName + "." + tblName + " is not a partition table");
            }
            return LocalRunUtils.listDataFiles(tableDir);
        }

        // get all partitions
        if (pattern == null) {
            List<File> result = new ArrayList<File>();
            LocalRunUtils.listAllDataFiles(tableDir, result);
            return result;
        }

        List<File> result = new ArrayList<File>();
        Map<PartitionSpec, File> partitionToPathMap = WareHouse.getInstance().getPartitionToPathMap(projName,
                tblName);
        for (PartitionSpec parts : partitionToPathMap.keySet()) {
            if (PartitionUtils.match(pattern, parts)) {
                result.addAll(LocalRunUtils.listDataFiles(partitionToPathMap.get(parts)));
            }
        }

        return result;
    }

    /**
     * copy table schema to destination directory
     */
    public boolean copyTableSchema(String projectName, String tableName, File destDir, int limitDownloadRecordCount,
            char inputColumnSeperator) throws IOException, OdpsException {
        if (StringUtils.isBlank(projectName) || StringUtils.isBlank(tableName) || destDir == null) {
            return false;
        }
        TableInfo tableInfo = TableInfo.builder().projectName(projectName).tableName(tableName).build();

        LOG.info("Start to copy table schema: " + tableInfo + "-->" + destDir.getAbsolutePath());

        if (!existsTable(projectName, tableName)) {
            DownloadUtils.downloadTableSchemeAndData(getOdps(), tableInfo, limitDownloadRecordCount,
                    inputColumnSeperator);
        }

        File tableDir = getTableDir(projectName, tableName);
        File schemaFile = new File(tableDir, Constants.SCHEMA_FILE);
        if (!schemaFile.exists()) {
            throw new FileNotFoundException(
                    "Schema file of table " + projectName + "." + tableName + " not exists in warehouse.");
        }

        if (!destDir.exists()) {
            destDir.mkdirs();
        }

        FileUtils.copyFileToDirectory(schemaFile, destDir);
        LOG.info("Finished copy table schema: " + tableInfo + "-->" + destDir.getAbsolutePath());
        return true;
    }

    /**
     * copy table schema and partition data from warehouse to target directory ,
     * if partition not exists will download the partition data and table schema
     * from remote server
     *
     * @param projectName
     * @param tableName
     * @param partSpec
     *     if null will copy all partitions
     * @param readCols
     *     if null will copy all columns
     * @param destDir
     * @return
     * @throws IOException
     * @throws OdpsException
     */
    public boolean copyTable(String projectName, String tableName, PartitionSpec partSpec, String[] readCols,
            File destDir, int limitDownloadRecordCount, char inputColumnSeperator) {

        if (StringUtils.isBlank(projectName) || StringUtils.isBlank(tableName) || destDir == null) {
            return false;
        }
        TableInfo tableInfo = TableInfo.builder().projectName(projectName).tableName(tableName).partSpec(partSpec)
                .build();

        LOG.info("Start to copy table: " + tableInfo + "-->" + destDir.getAbsolutePath());

        boolean hasPartition = false;
        if (partSpec != null && !partSpec.isEmpty()) {
            hasPartition = true;
        }

        // if not exist table, then download from odps server
        if (hasPartition && !existsPartition(projectName, tableName, partSpec)) {
            DownloadUtils.downloadTableSchemeAndData(getOdps(), tableInfo, limitDownloadRecordCount,
                    inputColumnSeperator);
        } else if (!existsTable(projectName, tableName)) {
            DownloadUtils.downloadTableSchemeAndData(getOdps(), tableInfo, limitDownloadRecordCount,
                    inputColumnSeperator);
        }

        File whTableDir = getTableDir(projectName, tableName);

        // copy schema file
        File schemaFile = new File(whTableDir, Constants.SCHEMA_FILE);
        if (!schemaFile.exists()) {
            throw new RuntimeException(
                    "Schema file of table " + projectName + "." + tableName + " not exists in warehouse.");
        }

        if (!destDir.exists()) {
            destDir.mkdirs();
        }

        // copy table schema file
        try {
            FileUtils.copyFileToDirectory(schemaFile, destDir);
        } catch (IOException e) {
            throw new RuntimeException("Copy schema file of table " + tableInfo + " failed!" + e.getMessage());
        }

        // copy partition data files
        TableMeta tableMeta = getTableMeta(projectName, tableName);
        List<Integer> indexes = LocalRunUtils.genReadColsIndexes(tableMeta, readCols);

        if (hasPartition) {
            final Collection<File> dataFiles = FileUtils.listFiles(whTableDir, HiddenFileFilter.VISIBLE,
                    HiddenFileFilter.VISIBLE);
            for (File dataFile : dataFiles) {
                if (dataFile.getName().equals(Constants.SCHEMA_FILE)) {
                    continue;
                }
                String parentDir = dataFile.getParentFile().getAbsolutePath();

                String partPath = parentDir.substring(whTableDir.getAbsolutePath().length(), parentDir.length());
                PartitionSpec ps = PartitionUtils.convert(partPath);
                if (PartitionUtils.isEqual(ps, partSpec)) {
                    File destPartitionDir = new File(destDir, PartitionUtils.toString(ps));
                    destPartitionDir.mkdirs();
                    try {
                        copyDataFiles(dataFile.getParentFile(), indexes, destPartitionDir, inputColumnSeperator);
                    } catch (IOException e) {
                        throw new RuntimeException(
                                "Copy data file of table " + tableInfo + " failed!" + e.getMessage());
                    }
                }

            }
        } else {
            try {
                copyDataFiles(whTableDir, indexes, destDir, inputColumnSeperator);
            } catch (IOException e) {
                throw new RuntimeException("Copy data file of table " + tableInfo + " failed!" + e.getMessage());
            }

        }

        LOG.info("Finished copy table: " + tableInfo + "-->" + destDir.getAbsolutePath());

        return true;

    }

    /**
     * copy resource from warehouse/__resources__/ to temp/resource/
     *
     * @param projName
     * @param resourceName
     * @param resourceRootDir
     * @param limitDownloadRecordCount
     * @param inputColumnSeperator
     * @throws IOException
     * @throws OdpsException
     */
    public void copyResource(String projName, String resourceName, File resourceRootDir,
            int limitDownloadRecordCount, char inputColumnSeperator) throws IOException, OdpsException {
        if (StringUtils.isBlank(projName) || StringUtils.isBlank(resourceName) || resourceRootDir == null) {
            return;
        }

        if (!resourceRootDir.exists()) {
            resourceRootDir.mkdirs();
        }

        LOG.info("Start to copy resource: " + projName + "." + resourceName + "-->"
                + resourceRootDir.getAbsolutePath());

        if (!existsResource(projName, resourceName)) {
            DownloadUtils.downloadResource(getOdps(), projName, resourceName, limitDownloadRecordCount,
                    inputColumnSeperator);
        }

        File file = getReourceFile(projName, resourceName);

        // table resource
        if (file.isDirectory()) {
            File tableResourceDir = new File(resourceRootDir, resourceName);

            TableInfo refTableInfo = getReferencedTable(projName, resourceName);
            LinkedHashMap<String, String> partitions = refTableInfo.getPartSpec();

            if (partitions != null && partitions.size() > 0) {
                PartitionSpec partSpec = new PartitionSpec();
                for (String key : partitions.keySet()) {
                    partSpec.set(key, partitions.get(key));
                }
                copyTable(refTableInfo.getProjectName(), refTableInfo.getTableName(), partSpec, null,
                        tableResourceDir, limitDownloadRecordCount, inputColumnSeperator);
            } else {
                copyTable(refTableInfo.getProjectName(), refTableInfo.getTableName(), null, null, tableResourceDir,
                        limitDownloadRecordCount, inputColumnSeperator);
            }

        } else {
            // not table resource
            if (!existsResource(projName, resourceName)) {

                DownloadUtils.downloadResource(getOdps(), projName, resourceName, limitDownloadRecordCount,
                        inputColumnSeperator);
            }
            FileUtils.copyFileToDirectory(file, resourceRootDir);
        }

        LOG.info("Finished copy resource: " + projName + "." + resourceName + "-->"
                + resourceRootDir.getAbsolutePath());
    }

    /**
     * copy output data files from job directory to warehouse
     *
     * @param srcDir
     * @param indexes
     * @param destDir
     * @throws IOException
     */
    public void copyDataFiles(File srcDir, List<Integer> indexes, File destDir, char inputColumnSeperator)
            throws IOException {
        if (indexes == null || indexes.isEmpty()) {
            for (File file : LocalRunUtils.listDataFiles(srcDir)) {
                FileUtils.copyFileToDirectory(file, destDir);
            }
        } else {
            for (File file : LocalRunUtils.listDataFiles(srcDir)) {
                CsvReader reader = DownloadUtils.newCsvReader(file.getAbsolutePath(), inputColumnSeperator,
                        encoding);
                CsvWriter writer = new CsvWriter(new File(destDir, file.getName()).getAbsolutePath(),
                        inputColumnSeperator, encoding);
                while (reader.readRecord()) {
                    String[] vals = reader.getValues();
                    String[] newVals = new String[indexes.size()];
                    for (int i = 0; i < indexes.size(); ++i) {
                        newVals[i] = vals[indexes.get(i)];
                    }
                    writer.writeRecord(newVals);
                }
                writer.close();
                reader.close();
            }
        }
    }

    public File createPartitionDir(String projName, String tblName, PartitionSpec partSpec) {
        File tableDir = getTableDir(projName, tblName);
        if (!tableDir.exists()) {
            tableDir.mkdirs();
        }
        if (partSpec != null && !partSpec.isEmpty()) {
            File partitionDir = new File(tableDir, PartitionUtils.toString(partSpec));
            if (!partitionDir.exists()) {
                partitionDir.mkdirs();
            }
            return partitionDir;
        } else {
            return tableDir;
        }
    }

    public boolean createTableReourceFile(String projName, String resourceName, TableInfo refTableInfo) {
        StringBuffer sb = new StringBuffer();
        sb.append(refTableInfo.getProjectName());
        sb.append(".");
        sb.append(refTableInfo.getTableName());
        String partitions = refTableInfo.getPartPath();
        if (partitions != null && !partitions.trim().isEmpty()) {
            sb.append("(");
            String partStr = partitions.replaceAll("/", ",");
            if (partStr.endsWith(",")) {
                partStr = partStr.substring(0, partStr.length() - 1);
            }
            sb.append(partStr);
            sb.append(")");
        }

        File tableResourceDir = getReourceFile(projName, resourceName);
        if (!tableResourceDir.exists()) {
            tableResourceDir.mkdirs();
        }
        PrintWriter pw = null;
        try {
            pw = new PrintWriter(new File(tableResourceDir, "__ref__"));
            pw.println(sb.toString());
            return true;
        } catch (FileNotFoundException e) {
        } finally {
            if (pw != null) {
                pw.close();
            }
        }

        return false;
    }

    public TableMeta getResourceSchema(String projName, String resourceName) throws IOException {
        File dir = getReourceFile(projName, resourceName);
        if (SchemaUtils.existsSchemaFile(dir)) {
            return SchemaUtils.readSchema(dir);
        }
        return null;
    }

    public TableMeta getTableMeta(String projName, String tblName) {
        if (StringUtils.isBlank(projName) || StringUtils.isBlank(tblName)) {
            return null;
        }
        File dir = getTableDir(projName, tblName);
        TableMeta meta = SchemaUtils.readSchema(dir);
        if (meta.getProjName() != null && !meta.getProjName().equals(projName)) {
            throw new RuntimeException(
                    "Invalid project name " + meta.getProjName() + " in file 'warehouse" + File.separator + projName
                            + File.separator + tblName + File.separator + Constants.SCHEMA_FILE + "'");
        }
        if (meta.getTableName() != null && !meta.getTableName().equals(tblName)) {
            throw new RuntimeException(
                    "Invalid table name " + meta.getProjName() + " in file 'warehouse" + File.separator + projName
                            + File.separator + tblName + File.separator + Constants.SCHEMA_FILE + "'");
        }
        return meta;
    }

    public Map<PartitionSpec, File> getPartitionToPathMap(String projName, String tblName) {
        File tableDir = getTableDir(projName, tblName);
        TableMeta tableMeta = SchemaUtils.readSchema(tableDir);
        Map<PartitionSpec, File> result = new HashMap<PartitionSpec, File>();
        File dir = getTableDir(projName, tblName);
        final Collection<File> dataFiles = FileUtils.listFiles(dir, HiddenFileFilter.VISIBLE,
                HiddenFileFilter.VISIBLE);

        List<File> emptyPatitions = LocalRunUtils.listEmptyDirectory(dir);
        dataFiles.addAll(emptyPatitions);

        for (File dataFile : dataFiles) {
            if (dataFile.getName().equals(Constants.SCHEMA_FILE)) {
                continue;
            }

            String partPath = null;
            if (dataFile.isFile()) {
                String parentDir = dataFile.getParentFile().getAbsolutePath();
                partPath = parentDir.substring(dir.getAbsolutePath().length(), parentDir.length());
            } else {
                // empty partition directory
                String parentDir = dataFile.getAbsolutePath();
                partPath = parentDir.substring(dir.getAbsolutePath().length(), parentDir.length());
            }

            try {
                if (partPath.length() > 0) {
                    PartitionSpec ps = PartitionUtils.convert(partPath);
                    if (PartitionUtils.valid(tableMeta.getPartitions(), ps)) {
                        result.put(ps, dataFile.getParentFile());
                    }
                }
            } catch (Exception ex) {
                // LOG.warn("ignore dir: " + parentDir + ", " + ex.getMessage());
            }
        }
        return result;
    }

    public List<PartitionSpec> getPartitions(String projName, String tblName) {
        Map<PartitionSpec, File> partitionToPathMap = getPartitionToPathMap(projName, tblName);
        List<PartitionSpec> result = new ArrayList<PartitionSpec>();
        for (PartitionSpec key : partitionToPathMap.keySet()) {
            result.add(key);
        }
        return result;
    }

    public List<String> getProjectNames() {
        File warehouseDir = getWarehouseDir();
        if (!warehouseDir.exists()) {
            return null;
        }
        List<String> result = new ArrayList<String>();
        File[] projects = warehouseDir.listFiles(new FileFilter() {

            @Override
            public boolean accept(File pathname) {
                return pathname.isDirectory() && !pathname.isHidden();

            }
        });
        for (File p : projects) {
            if (p.isDirectory()) {
                result.add(p.getName());
            }
        }
        return result;
    }

    public List<TableMeta> getTableMetas(String projName) throws IOException {
        File projectDir = getProjectDir(projName);
        if (!projectDir.exists()) {
            return null;
        }

        List<TableMeta> result = new ArrayList<TableMeta>();
        File[] tables = projectDir.listFiles(new FileFilter() {

            @Override
            public boolean accept(File pathname) {
                return pathname.isDirectory() && !pathname.isHidden()
                        && !pathname.getName().equals(Constants.RESOURCES_DIR);
            }
        });
        // old version
        for (File t : tables) {
            if (!existsTable(projName, t.getName())) {
                continue;
            }

            TableMeta tableMeta = getTableMeta(projName, t.getName());
            if (tableMeta != null) {
                result.add(tableMeta);
            }
        }

        // new version >=0.14
        File tableBaseDir = new File(projectDir, Constants.TABLES_DIR);
        if (!tableBaseDir.exists()) {
            return result;
        }
        tables = tableBaseDir.listFiles(new FileFilter() {

            @Override
            public boolean accept(File pathname) {
                return pathname.isDirectory() && !pathname.isHidden();
            }
        });
        for (File t : tables) {
            if (!existsTable(projName, t.getName())) {
                continue;
            }

            TableMeta tableMeta = getTableMeta(projName, t.getName());
            if (tableMeta != null) {
                result.add(tableMeta);
            }
        }

        return result;
    }

    public List<String> getTableNames(String projName) throws IOException {
        List<TableMeta> list = getTableMetas(projName);
        if (list == null || list.size() == 0) {
            return null;
        }
        List<String> result = new ArrayList<String>();
        for (TableMeta tableMeta : list) {
            result.add(tableMeta.getTableName());
        }
        return result;
    }

    /*
     * get referenced table of table resource
     */
    public TableInfo getReferencedTable(String projName, String resourceName) {
        File file = getTableReourceFile(projName, resourceName);
        BufferedReader br = null;
        try {
            br = new BufferedReader(new InputStreamReader(new FileInputStream(file)));
            String line = br.readLine();
            while (line != null && (line.trim().isEmpty() || line.startsWith("#"))) {
                line = br.readLine();
            }

            if (line == null || line.trim().isEmpty()) {
                return null;
            }

            String project;

            int index = line.indexOf(".");
            if (index == -1) {
                project = projName;
            } else {
                project = line.substring(0, index);
                line = line.substring(index + 1);
            }

            String table;
            index = line.indexOf("(");
            if (index == -1) {
                table = line;
                line = null;
            } else {
                table = line.substring(0, index);
                line = line.substring(index + 1, line.length() - 1);
            }

            String partitions = null;
            if (line != null) {
                partitions = "";
                String[] parts = line.split(",");
                for (String item : parts) {
                    if (!partitions.equals("")) {
                        partitions += "/";
                    }
                    partitions += item;
                }
            }

            TableInfo tableInfo;
            if (partitions == null) {
                tableInfo = TableInfo.builder().projectName(project).tableName(table).build();
            } else {
                tableInfo = TableInfo.builder().projectName(project).tableName(table).partSpec(partitions).build();
            }
            return tableInfo;

        } catch (IOException e) {
        } finally {
            if (br != null) {
                try {
                    br.close();
                } catch (IOException e) {
                }
            }
        }
        return null;

    }

    public boolean existsTable(String projName, String tblName) {
        return existsTableSchema(projName, tblName);
    }

    public boolean existsTableSchema(String projectName, String tableName) {
        File tableDir = getTableDir(projectName, tableName);
        if (!tableDir.exists()) {
            return false;
        }
        return new File(tableDir, Constants.SCHEMA_FILE).exists();
    }

    public boolean existsPartition(String projectName, String tableName, PartitionSpec partSpec) {

        if (!existsTable(projectName, tableName)) {
            return false;
        }

        if (partSpec == null || partSpec.isEmpty()) {
            return true;
        }

        List<PartitionSpec> partionList = getPartitions(projectName, tableName);
        if (partionList == null || partionList.size() == 0) {
            return false;
        }

        for (PartitionSpec item : partionList) {
            if (PartitionUtils.match(partSpec, item)) {
                return true;
            }

        }
        return false;

    }

    public boolean existsResource(String projName, String resourceName) {
        return getReourceFile(projName, resourceName).exists();
    }

    /**
     * will drop table directory,include table schema and data
     */
    public void dropTableIfExists(String projName, String tblName) throws IOException {
        File tableDir = getTableDir(projName, tblName);
        if (tableDir != null && tableDir.exists() && tableDir.isDirectory()) {
            FileUtils.deleteDirectory(tableDir);
        }
    }

    /**
     * drop partition data if exists, will not drop table schema if partSpec is
     * null, will drop table data
     */
    public void dropTableDataIfExists(String projName, String tblName, PartitionSpec partSpec) throws IOException {
        File partitionDir = getPartitionDir(projName, tblName, partSpec);
        if (partitionDir != null && partitionDir.exists() && partitionDir.isDirectory()) {
            LocalRunUtils.removeDataFiles(partitionDir);
        }
    }

    /**
     * if partSpec is null will check if table has no data file else will check
     * the partition has no data file
     */
    public boolean isTableEmpty(String projName, String tblName, PartitionSpec partSpec) throws IOException {
        File partitionDir = getPartitionDir(projName, tblName, partSpec);
        if (partitionDir != null && partitionDir.exists() && partitionDir.isDirectory()) {
            List<File> dataFiles = new ArrayList<File>();
            LocalRunUtils.listAllDataFiles(partitionDir, dataFiles);
            if (dataFiles == null || dataFiles.isEmpty()) {
                return true;
            }
            // empty data file
            for (File f : dataFiles) {
                if (f.length() > 0) {
                    return false;
                }
            }
        }
        return true;
    }

    public void dropResourceIfExists(String projName, String resourceName) throws IOException {
        File resourceDir = getReourceFile(projName, resourceName);
        if (resourceDir == null || !resourceDir.exists()) {
            return;
        }
        if (resourceDir.isDirectory()) {
            FileUtils.deleteDirectory(resourceDir);
        }
        if (resourceDir.isFile()) {
            resourceDir.delete();
        }
    }

    public boolean valid(String projName, String tblName, PartitionSpec partitionSpec, String[] readCols)
            throws OdpsException, IOException {
        if (StringUtils.isBlank(projName)) {
            throw new OdpsException("Project " + projName + " is null");
        }
        if (StringUtils.isBlank(tblName)) {
            throw new OdpsException("Table Name is null");
        }
        if (!existsTable(projName, tblName)) {
            throw new OdpsException("table " + projName + "." + tblName + " not exitsts");
        }

        if (partitionSpec != null && !existsPartition(projName, tblName, partitionSpec)) {
            throw new OdpsException("table " + projName + "." + tblName + "("
                    + PartitionUtils.toString(partitionSpec) + ") not exitsts");
        }

        if (readCols != null) {
            TableMeta tableMeta = getTableMeta(projName, tblName);
            int columnCount = tableMeta.getCols().length;
            for (int i = 0; i < readCols.length; i++) {
                boolean isFind = false;
                for (int j = 0; j < columnCount; j++) {
                    if (tableMeta.getCols()[j].getName().equals(readCols[i])) {
                        isFind = true;
                        break;
                    }
                }
                if (!isFind) {
                    throw new OdpsException(
                            "table " + projName + "." + tblName + " do not have column :" + readCols[i]);
                }
            }
        }
        return true;
    }

    public List<Object[]> readData(String projName, String tblName, PartitionSpec partitionSpec, String[] readCols,
            char inputColumnSeperator) throws OdpsException, IOException {
        List<File> dataFiles = getDataFiles(projName, tblName, partitionSpec, inputColumnSeperator);
        if (dataFiles == null || dataFiles.size() == 0) {
            return null;
        }
        File tableDir = getTableDir(projName, tblName);
        TableMeta tableMeta = SchemaUtils.readSchema(tableDir);
        List<Integer> indexes = LocalRunUtils.genReadColsIndexes(tableMeta, readCols);
        CsvReader reader;
        List<Object[]> result = new ArrayList<Object[]>();
        for (File file : dataFiles) {
            reader = DownloadUtils.newCsvReader(file.getAbsolutePath(), inputColumnSeperator, encoding);
            while (reader.readRecord()) {
                String[] vals = reader.getValues();
                Object[] newVals;
                if (indexes != null && !indexes.isEmpty()) {
                    newVals = new Object[indexes.size()];
                    for (int i = 0; i < indexes.size(); ++i) {
                        newVals[i] = LocalRunUtils.fromString(tableMeta.getCols()[indexes.get(i)].getType(),
                                vals[indexes.get(i)], "\\N");
                    }
                } else {
                    newVals = new Object[vals.length];
                    for (int i = 0; i < vals.length; i++) {
                        newVals[i] = LocalRunUtils.fromString(tableMeta.getCols()[i].getType(), vals[i], "\\N");
                    }
                    System.out.println();
                }
                result.add(newVals);
            }
            reader.close();
        }
        return result;
    }

    /**
     * when finished read should close inputstream
     */
    public BufferedInputStream readResourceFileAsStream(String project, String resource, char inputColumnSeperator)
            throws IOException, OdpsException {

        if (!existsResource(project, resource)) {
            DownloadUtils.downloadResource(WareHouse.getInstance().getOdps(), getOdps().getDefaultProject(),
                    resource, Constants.DEFAULT_DOWNLOAD_RECORD, inputColumnSeperator);
        }

        if (!existsResource(project, resource)) {
            throw new OdpsException("File Resource " + project + "." + resource + " not exists");
        }

        File file = getReourceFile(project, resource);
        if (!file.isFile()) {
            throw new OdpsException("Resource " + project + "." + resource
                    + " is not a valid file Resource, because it is a direcotry");
        }
        return new BufferedInputStream(new FileInputStream(file));
    }

    public byte[] readResourceFile(String project, String resource, char inputColumnSeperator)
            throws IOException, OdpsException {
        if (!existsResource(project, resource)) {
            DownloadUtils.downloadResource(WareHouse.getInstance().getOdps(), getOdps().getDefaultProject(),
                    resource, Constants.DEFAULT_DOWNLOAD_RECORD, inputColumnSeperator);
        }

        File file = getReourceFile(project, resource);
        if (!file.isFile()) {
            throw new OdpsException("Resource " + project + "." + resource
                    + " is not a valid file Resource, because it is a direcotry");
        }
        FileInputStream in = new FileInputStream(file);
        ByteArrayOutputStream out = new ByteArrayOutputStream(1024);
        byte[] temp = new byte[1024];
        int length;
        while ((length = in.read(temp)) != -1) {
            out.write(temp, 0, length);
        }
        in.close();
        return out.toByteArray();
    }

    public Iterator<Object[]> readResourceTable(String project, String resource, final char inputColumnSeperator)
            throws IOException, OdpsException {
        if (!existsResource(project, resource)) {
            DownloadUtils.downloadResource(WareHouse.getInstance().getOdps(), getOdps().getDefaultProject(),
                    resource, Constants.DEFAULT_DOWNLOAD_RECORD, inputColumnSeperator);
        }

        File tableResourceDir = getReourceFile(project, resource);
        if (!tableResourceDir.isDirectory()) {
            throw new OdpsException("Resource " + project + "." + resource
                    + " is not a valid file Resource, because it is not a direcotry");
        }

        // LOG.info("Reading resource table from " +
        // tableResourceDir.getAbsolutePath());
        TableInfo tableInfo = getReferencedTable(project, resource);
        PartitionSpec partitionSpec = PartitionUtils.convert(tableInfo.getPartSpec());

        final List<File> datafiles = getDataFiles(project, tableInfo.getTableName(), partitionSpec,
                inputColumnSeperator);

        final Column[] schema = SchemaUtils.readSchema(getTableDir(project, tableInfo.getTableName())).getCols();

        return new Iterator<Object[]>() {
            CsvReader reader;
            Object[] current;
            boolean fetched;

            @Override
            public boolean hasNext() {
                if (fetched) {
                    return current != null;
                }
                // Fetch new one
                try {
                    fetch();
                } catch (IOException e) {
                    throw new RuntimeException(e);
                }
                return current != null;

            }

            private void fetch() throws IOException {

                // first time
                if (reader == null) {
                    if (datafiles.isEmpty()) {
                        current = null;
                        fetched = true;
                        return;
                    }

                    File f = datafiles.remove(0);
                    reader = DownloadUtils.newCsvReader(f.getAbsolutePath(), inputColumnSeperator, encoding);
                    reader.setSafetySwitch(false);
                    current = read();
                    fetched = true;
                    return;
                }

                current = read();

                if (current == null && !datafiles.isEmpty()) {
                    File f = datafiles.remove(0);
                    reader = DownloadUtils.newCsvReader(f.getAbsolutePath(), inputColumnSeperator, encoding);
                    reader.setSafetySwitch(false);

                    current = read();

                    fetched = true;
                    return;
                }

                fetched = true;
            }

            @Override
            public Object[] next() {
                if (!hasNext()) {
                    throw new NoSuchElementException();
                }
                fetched = false;
                return current;
            }

            @Override
            public void remove() {
                throw new UnsupportedOperationException();
            }

            private Object[] read() throws IOException {
                Object[] result;
                if (!reader.readRecord()) {
                    return null;
                }
                String[] vals = reader.getValues();
                if (vals == null || vals.length == 0) {
                    result = null;
                } else {
                    result = new Object[vals.length];
                    for (int i = 0; i < vals.length; i++) {
                        result[i] = LocalRunUtils.fromString(schema[i].getType(), vals[i], "\\N");
                    }
                }
                return result;
            }

        };
    }

    // ////////////////Local Configuration////////////////////////////

    public boolean isRetainTempData() {
        String tempRetain = getConfiguration().get(Constants.LOCAL_TEMP_RETAIN, "true");
        return tempRetain.equalsIgnoreCase("false") ? false : true;
    }

    public String getJobDirStr() {
        return getConfiguration().get(Constants.LOCAL_TEMP_DIR, "temp");
    }

    public JobDirecotry getJobDir() {
        return jobDirecotry;
    }

    public String getJobName() {
        return getConfiguration().get(Constants.JOB_NAME, "");
    }

    public void setJobName(String jobname) {
        getConfiguration().set(Constants.JOB_NAME, jobname);
    }

    public char getInputColumnSeperator() {
        String seperator = getConfiguration().get(Constants.LOCAL_INPUT_COLUMN_SEPERATOR, ",");
        if (seperator.length() > 0) {
            char c = seperator.charAt(0);
            if (c == 's' || c == ' ') {
                return ' ';
            } else if (c == 't' || c == '\t') {
                return '\t';
            } else if (c == ',' || c == '\t' || c == ';' || c == '|') {
                return c;
            }
        }
        return ',';
    }

    public char getOutputColumnSeperator() {
        String seperator = getConfiguration().get(Constants.LOCAL_OUTPUT_COLUMN_SEPERATOR, ",");
        if (seperator.length() > 0) {
            char c = seperator.charAt(0);
            if (c == 's' || c == ' ') {
                return ' ';
            } else if (c == 't' || c == '\t') {
                return '\t';
            } else if (c == ',' || c == '\t' || c == ';' || c == '|') {
                return c;
            }
        }
        return ',';
    }

    public int getLimitDownloadRecordCount() {
        int limit = getConfiguration().getInt(Constants.LOCAL_RECORD_LIMIT, Constants.DEFAULT_DOWNLOAD_RECORD);
        if (limit > Constants.LIMIT_DOWNLOAD_RECORD) {
            return Constants.LIMIT_DOWNLOAD_RECORD;
        }
        return limit > 0 ? limit : Constants.DEFAULT_DOWNLOAD_RECORD;
    }

    public DownloadMode getDownloadMode() {
        String downloadModeStr = getConfiguration().get(Constants.LOCAL_DOWNLOAD_MODE, DownloadMode.AUTO.toString())
                .toUpperCase();
        DownloadMode downloadMode = DownloadMode.AUTO;
        try {
            downloadMode = DownloadMode.valueOf(downloadModeStr);
        } catch (Exception exception) {
        }
        return downloadMode;
    }

    public boolean caintainsKey(String key) {
        Iterator<Entry<String, String>> it = getConfiguration().iterator();
        while (it.hasNext()) {
            Entry<String, String> entry = it.next();
            if (entry.getKey().equals(key)) {
                return true;
            }
        }
        return false;
    }

    public void setOdps(Odps odps) {
        if (odps != null) {
            odpsThreadLocal.remove();
            odpsThreadLocal.set(odps);
        }
    }

    public Odps getOdps() {
        return odpsThreadLocal.get();
    }

    public void setConfiguration(Configuration conf) {
        if (conf != null) {
            confThreadLocal.remove();
            confThreadLocal.set(conf);
        }
    }

    public Configuration getConfiguration() {
        Configuration conf = confThreadLocal.get();
        if (conf == null) {
            conf = new Configuration();
            confThreadLocal.set(conf);
        }
        return conf;
    }
}