Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one or more contributor license * agreements. See the NOTICE file distributed with this work for additional information regarding * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance with the License. You may obtain a * copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed under the License * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express * or implied. See the License for the specific language governing permissions and limitations under * the License. */ package com.aliyun.odps.rodps; import com.aliyun.odps.*; import com.aliyun.odps.account.AliyunAccount; import com.aliyun.odps.rodps.DataTunnel.*; import com.aliyun.odps.task.SQLTask; import com.aliyun.odps.tunnel.TableTunnel.DownloadSession; import com.aliyun.odps.tunnel.TableTunnel.UploadSession; import com.aliyun.odps.utils.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.log4j.PropertyConfigurator; import org.json.JSONArray; import org.json.JSONException; import org.json.JSONObject; import java.io.*; import java.util.*; public class ROdps { static Log LOG = LogFactory.getLog(ROdps.class); private String ODPS_PROJECT_NAME; private final Odps odps; private String DT_ENDPOINT; private final static int RETRY_MAX = 3; private final static String PROG_VERSION = "rodps-1.3"; private String LOGVIEW_HOST; private String bizId = null; public ROdps(String projectName, String accessID, String accessKey, String endPoint, String dtEndpoint, String logviewHost, String log4j_properties) throws ROdpsException, OdpsException { if (log4j_properties == null || log4j_properties.isEmpty()) LOG = LogFactory.getLog(ROdps.class); else { PropertyConfigurator.configure(log4j_properties); LOG = LogFactory.getLog(ROdps.class); } LOG.info("start to init Odps"); if (projectName.equals("NA") || accessID.equals("NA") || accessKey.equals("NA") || endPoint.equals("NA")) { throw new ROdpsException("NA found with project/accessID/accessKey/endPoint"); } ODPS_PROJECT_NAME = projectName; DT_ENDPOINT = dtEndpoint; LOGVIEW_HOST = logviewHost; odps = new Odps(new AliyunAccount(accessID, accessKey)); odps.setEndpoint(endPoint); odps.setDefaultProject(projectName); odps.setUserAgent(PROG_VERSION); if (DT_ENDPOINT != null && DT_ENDPOINT.length() != 0) { System.out.println("tunnel router is closed,use your DT_ENDPOINT."); // System.exit(0); } if (LOGVIEW_HOST == null || LOGVIEW_HOST.length() == 0) { LOGVIEW_HOST = "http://logview.odps.aliyun-inc.com:8080";// internal // LOGVIEW_HOST ="http://logview.odps.aliyun.com"; //external } } public void setBizId(String s) { this.bizId = s; } // use tunnel sdk to download table public void writeTableFromDT(String projectName, String tableName, String partition, String dataFilePathName, String columnDelimiter, String rowDelimiter, long recordCount, int threadNumber) throws ROdpsException { int retryTimes = 0; while (true) { try { LOG.info("before create RDTUploader"); if (projectName == null) { projectName = this.ODPS_PROJECT_NAME; } if (partition != null) { partition = formatPartition(partition, "", ","); } Context<UploadSession> context = new Context<UploadSession>(odps, DT_ENDPOINT, projectName, tableName, partition, -1, columnDelimiter, rowDelimiter, threadNumber); context.setRecordCount(recordCount); RDTUploader uploader = new RDTUploader(context); uploader.upload(dataFilePathName); return; } catch (IOException e) { if (++retryTimes <= RETRY_MAX) { LOG.error( "write table encounter exception:" + e.getMessage() + ", retry times = " + retryTimes); try { Thread.sleep(5000); } catch (InterruptedException e1) { LOG.error("Sleep interrupted!", e1); } continue; } throw new ROdpsException(e); } catch (Exception e) { throw new ROdpsException(e); } } } /* * *use tunnel sdk to load table from odps * */ public List<List<String>> loadTableFromDT(String projectName, String tableName, String partition, String tempFile, String colDelimiter, String rowDelimiter, int limit, int threadNumber) throws ROdpsException { int retryTimes = 0; while (true) { try { if (projectName == null) { projectName = ODPS_PROJECT_NAME; } if (partition != null) { partition = formatPartition(partition, "", ","); } Context<DownloadSession> context = new Context<DownloadSession>(odps, DT_ENDPOINT, projectName, tableName, partition, limit, colDelimiter, rowDelimiter, threadNumber); RDTDownloader downloader = new RDTDownloader(context); return downloader.downloadTable(tempFile); } catch (IOException e) { if (++retryTimes <= RETRY_MAX) { LOG.error("load table encounter exception:" + e.getMessage() + ", retry times = " + retryTimes); try { Thread.sleep(5000); } catch (InterruptedException e1) { LOG.error("Sleep interrupted!", e1); } continue; } throw new ROdpsException(e); } catch (Exception e) { throw new ROdpsException(e); } } } /** * @throws OdpsException * @throws ROdpsException use project * @Title: useProject * @Description: TODO * @param projectName * @return * @return boolean * @throws */ public boolean useProject(String projectName) throws ROdpsException, OdpsException { if (projectName == null) { throw new ROdpsException("ProjectName is null"); } projectName = projectName.trim().toLowerCase(); this.runSqlTask("use " + projectName); this.ODPS_PROJECT_NAME = projectName; return true; } /** * @throws ROdpsException json?name:typemap * @Title: createSchema * @Description: TODO * @param schemaJson * @return * @throws JSONException * @return Map<String,String> * @throws */ private Map<String, Schema> createSchema(String schemaJson, String type) throws ROdpsException { Map<String, Schema> ret = new LinkedHashMap<String, Schema>(); try { JSONObject jsonMap = new JSONObject(schemaJson); if (jsonMap.get(type) != null) { JSONArray jsonArray = jsonMap.getJSONArray(type); for (int i = 0; i < jsonArray.length(); i++) { JSONObject col = (JSONObject) (jsonArray.get(i)); Schema schema = new Schema(col.getString("name"), col.getString("type"), (col.has("comment") ? col.getString("comment") : null)); schema.setPartitionKey(type.equals("partitionKeys")); ret.put(col.getString("name"), schema); } } return ret; } catch (Exception e) { LOG.error(e); throw new ROdpsException(e); } } /** * @throws OdpsException ?projectNameProject * * @Title: getProjectObject * @Description: TODO * @param projectName * @return * @return Project * @throws */ private Project getProjectObject(String projectName) throws OdpsException { if (projectName == null || projectName.isEmpty() || projectName.equals(this.ODPS_PROJECT_NAME)) { Project p = odps.projects().get(ODPS_PROJECT_NAME); p.reload(); return p; } else { // return new Project(client, projectName); Project TempProject = odps.projects().get(projectName); TempProject.reload(); return TempProject; } } /** * ?projectName?nullprojectName * * @Title: getProjectName * @Description: TODO * @param projectName * @return * @return String * @throws */ public String getProjectName(String projectName) { if (projectName == null || projectName.isEmpty() || projectName.equals(this.ODPS_PROJECT_NAME)) { return this.ODPS_PROJECT_NAME; } else { return projectName; } } /** * @throws ROdpsException * @Title: getTableSize * @Description: TODO * @param tableName * @return * @return int * @throws */ public long getTableSize(String projectName, String tableName, String partition) throws ROdpsException { Table tbl = odps.tables().get(this.getProjectName(projectName), tableName); return tbl.getSize(); } /** * @throws OdpsException * @throws ROdpsException * @throws CloneNotSupportedException * @Title: DescribeTable * @Description: TODO * @param projectName * @param tableName * @return * @return String * @throws */ public List<DataFrameItem> describeTable(String projectName, String tableName, String partition) throws ROdpsException, OdpsException { Table tbl = odps.tables().get(this.getProjectName(projectName), tableName); List<DataFrameItem> ps = new ArrayList<DataFrameItem>(); if (partition != null) { partition = this.formatPartition(partition, "'", ","); } try { tbl.reload(); ps.add(createSingleValueFrame("owner", "String", tbl.getOwner())); ps.add(this.createSingleValueFrame("project", "String", tbl.getProject())); ps.add(this.createSingleValueFrame("comment", "String", tbl.getComment())); ps.add(this.createSingleValueFrame("create_time", "DateTime", formatDateTime(tbl.getCreatedTime()))); ps.add(this.createSingleValueFrame("last_modified_time", "DateTime", formatDateTime(tbl.getLastMetaModifiedTime()))); ps.add(this.createSingleValueFrame("is_internal_table", "boolean", tbl.isVirtualView())); if (tbl.isVirtualView()) { long size = tbl.getPhysicalSize(); if (partition == null || partition.isEmpty()) { ps.add(this.createSingleValueFrame("size", "Long", size)); } else { ps.add(this.createSingleValueFrame("partition_size", "Long", size)); ps.add(this.createSingleValueFrame("partition_name", "String", partition)); } } Map<String, Schema> columns = this.createSchema(tbl.getJsonSchema(), "columns"); DataFrameItem item = new DataFrameItem("columns", "string"); for (Map.Entry<String, Schema> entry : columns.entrySet()) { item.getData().add(entry.getValue().toString()); } ps.add(item); Map<String, Schema> ptKeys = this.createSchema(tbl.getJsonSchema(), "partitionKeys"); if (ptKeys != null && ptKeys.size() > 0) { DataFrameItem ptItem = new DataFrameItem("partition_keys", "String"); for (Map.Entry<String, Schema> entry : ptKeys.entrySet()) { ptItem.getData().add(entry.getValue().toString()); } ps.add(ptItem); } return ps; } catch (Exception e) { LOG.error(e); throw new ROdpsException(e); } } private String formatDateTime(Date date) { java.text.SimpleDateFormat format = new java.text.SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); return format.format(date); } private DataFrameItem createSingleValueFrame(String name, String type, Object obj) { DataFrameItem item = new DataFrameItem(name, type); if (obj instanceof List) { item.setData((List) obj); } else { item.getData().add(obj); } return item; } /** * @throws ROdpsException * @throws CloneNotSupportedException * @Title: DropTable * @Description: TODO * @param projectName * @param tableName * @return * @return boolean * @throws */ public boolean dropTable(String projectName, String tableName) throws ROdpsException { try { this.runSqlTask("drop table " + getTableName(this.getProjectName(projectName), tableName) + ";"); return true; } catch (Exception e) { LOG.error(e); throw new ROdpsException(e); } } /** * @throws ROdpsException * @Title: isTableExist * @Description: TODO * @param tableName * @return * @return boolean * @throws */ public boolean isTableExist(String projectName, String tableName, String partition) throws ROdpsException { if (partition != null) { partition = formatPartition(partition, "'", ","); } // Table table = new Table(this.getProjectObject(projectName), tableName); // Table table = odps.tables().get(projectName,tableName); Table table = odps.tables().get(this.getProjectName(projectName), tableName); try { if (partition == null || partition.isEmpty()) { table.reload(); return true; } else { // List<String> pts = table.listPartitions(); // return pts!=null && pts.size()>0 && pts.contains(partition); return table.hasPartition(new PartitionSpec(partition)); } } catch (OdpsException e) { if (e.getMessage().indexOf("Table not found") > 0) { return false; } LOG.error(e); throw new ROdpsException(e); } } /** * Json??TableSchema * * @Title: getTableSchemaJson * @Description: TODO * @param projectName * @param tableName * */ public String getTableSchemaJson(String projectName, String tableName) { String tableSchemaJson; // Table table = new Table(this.getProjectObject(projectName), tableName); Table table = odps.tables().get(this.getProjectName(projectName), tableName); try { table.reload(); tableSchemaJson = table.getJsonSchema(); } catch (OdpsException e) { tableSchemaJson = e.getMessage(); } return tableSchemaJson; } /** * ???Table SchemaIndex * * @Title: getIndexFromColName * @Description: TODO * @param colName * @param tableSchemaJson * */ public int getIndexFromColName(String colName, String tableSchemaJson) { if (0 >= tableSchemaJson.length()) { return -1; } try { JSONObject schema = new JSONObject(tableSchemaJson); JSONArray columns = schema.getJSONArray("columns"); for (int i = 0; i < columns.length(); ++i) { JSONObject column = (JSONObject) columns.get(i); String columnName = (String) column.get("name"); if (colName.equals(columnName)) { return i + 1; } } return -1; } catch (JSONException e) { return -1; } } /** * @throws ROdpsException * @throws CloneNotSupportedException * @Title: runSqlTask * @Description: TODO * @param sql * @return * @return List<DataFrameItem> * @throws */ public List<String> runSqlTask(String sql) throws ROdpsException { // If the client forget to end with a semi-colon, append it. if (!sql.contains(";")) { sql += ";"; } LOG.debug("sql: " + sql); try { SQLTask sqlTask = new SQLTask(); sqlTask.setName("rodps_sql_task"); sqlTask.setQuery(sql); if (!StringUtils.isNullOrEmpty(this.bizId)) { sqlTask.setProperty("biz_id", this.bizId); LOG.debug("biz_id: " + this.bizId); } Instance instance = odps.instances().create(sqlTask); LogView logView = new LogView(odps); if (LOGVIEW_HOST != null) { logView.setLogViewHost(LOGVIEW_HOST); } String logViewUrl = logView.generateLogView(instance, 7 * 24); System.err.println(logViewUrl); LOG.info(logViewUrl); instance.waitForSuccess(); Map<String, String> results = instance.getTaskResults(); String result = results.get("rodps_sql_task"); if (result == null || result.isEmpty()) { return new ArrayList<String>(); } return Arrays.asList(results.get("rodps_sql_task").split("\n")); } catch (Exception e) { LOG.error("runSqlTask error,sql=" + sql, e); throw new ROdpsException(e); } } private String getTableName(String projectName, String tableName) throws ROdpsException { if (tableName == null || tableName.isEmpty()) { throw new ROdpsException("tableName is empty"); } return (projectName == null || projectName.isEmpty() ? "" : (projectName + ".")) + tableName; } public class Schema { public Schema(String name, String type, String comment) { this.name = name; this.type = type; this.comment = comment; } private String name; private String type; private String comment; private boolean isPartitionKey; public String getName() { return name; } public void setName(String name) { this.name = name; } public String getType() { return type; } public void setType(String type) { this.type = type; } public String getComment() { return comment; } public void setComment(String comment) { this.comment = comment; } public boolean isPartitionKey() { return isPartitionKey; } public void setPartitionKey(boolean isPartitionKey) { this.isPartitionKey = isPartitionKey; } public String toString() { return name + "|" + type + "|" + (comment != null ? comment : ""); } } /** * @throws ROdpsException * @throws CloneNotSupportedException * @Title: getTables * @Description: TODO * @return * @return List<String> * @throws */ public List<DataFrameItem> getTables(String projectName, String pattern) throws ROdpsException { DataFrameItem<String> owner = new DataFrameItem<String>("owner", "string"); DataFrameItem<String> tableName = new DataFrameItem<String>("table_name", "string"); List<DataFrameItem> data = new ArrayList<DataFrameItem>(); data.add(owner); data.add(tableName); TableFilter filter = new TableFilter(); filter.setName(pattern); for (Iterator<Table> it = odps.tables().iterator(projectName, filter); it.hasNext();) { Table tb = it.next(); owner.getData().add(tb.getOwner()); tableName.getData().add(tb.getName()); } return data; } public static String formatPartition(String part, String valueDim, String fieldDim) throws ROdpsException { LinkedHashMap<String, String> kv = parsePartition(part); return partitionMap2String(kv, valueDim, fieldDim); } /** * @throws ROdpsException ?partition * @Title: parsePartition * @Description: TODO * @param part * @return * @return LinkedHashMap<String,String> * @throws */ private static LinkedHashMap<String, String> parsePartition(String part) throws ROdpsException { LinkedHashMap<String, String> ret = new LinkedHashMap<String, String>(); String[] pts = part.split(",|/"); for (String p : pts) { String[] kv = p.split("="); if (kv.length != 2) { throw new ROdpsException("Partition expression error:" + part); } if (kv[1].startsWith("'") && kv[1].endsWith("'") || kv[1].startsWith("\"") && kv[1].endsWith("\"")) { kv[1] = kv[1].substring(1, kv[1].length() - 1); } ret.put(kv[0], kv[1]); } return ret; } /** * * @Title: partitionMap2String * @Description: TODO * @param sepc * @param valueDim * @param fieldDim * @return * @return String * @throws */ private static String partitionMap2String(Map<String, String> sepc, String valueDim, String fieldDim) { StringBuffer ret = new StringBuffer(); for (Map.Entry<String, String> entry : sepc.entrySet()) { if (ret.length() > 0) { ret.append(fieldDim); } ret.append(entry.getKey() + "=" + valueDim + entry.getValue() + valueDim); } return ret.toString(); } /* * *set log path* */ public boolean setLogPath(String log_path) throws IOException { String fileName = ROdps.class.getClassLoader().getResource("log4j.properties").getPath(); String mode = "loghome"; File file = new File(fileName); BufferedReader reader = null; try { reader = new BufferedReader(new FileReader(file)); CharArrayWriter tempStream = new CharArrayWriter(); String tempString = null; int line = 1; while ((tempString = reader.readLine()) != null) { if (tempString.contains(mode) && (!tempString.contains("${" + mode + "}"))) { tempString = tempString.substring(0, tempString.indexOf('=') + 1) + log_path; } tempStream.write(tempString); tempStream.append(System.getProperty("line.separator")); } reader.close(); FileWriter out = new FileWriter(fileName); tempStream.writeTo(out); out.close(); } catch (IOException e) { e.printStackTrace(); return false; } finally { if (reader != null) { try { reader.close(); } catch (IOException e1) { } } } return true; } }