Java tutorial
/** * Copyright 2015 LinkedIn Corp. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ package models.daos; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.PropertyNamingStrategy; import com.fasterxml.jackson.databind.node.ObjectNode; import java.sql.SQLException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.commons.lang3.StringUtils; import org.springframework.dao.EmptyResultDataAccessException; import play.libs.Json; import utils.JdbcUtil; import wherehows.common.schemas.DatasetDependencyRecord; import wherehows.common.schemas.DatasetRecord; import wherehows.common.utils.PartitionPatternMatcher; import wherehows.common.writers.DatabaseWriter; /** * Created by zsun on 3/18/15. * Modified by zechen on 10/12/15. */ public class DatasetDao { public static final String GET_DATASET_BY_ID = "SELECT * FROM dict_dataset WHERE id = :id"; public static final String GET_DATASET_BY_URN = "SELECT * FROM dict_dataset WHERE urn = :urn"; public static final String DEFAULT_CLUSTER_NAME = "ltx1-holdem"; public static final String CLUSTER_NAME_KEY = "cluster_name"; public static final String DATASET_URI_KEY = "dataset_uri"; public static final String HIVE_PREFIX_WITH_3_SLASH = "hive:///"; public static final String DALIDS_PREFIX_WITH_3_SLASH = "dalids:///"; public static final String HIVE_PREFIX_WITH_2_SLASH = "hive://"; public static final String DALIDS_PREFIX_WITH_2_SLASH = "dalids://"; public static final String GET_DATASET_ID_IN_MAP_TABLE_WITH_TYPE_AND_CLUSTER = "SELECT " + "c.object_dataset_id as dataset_id, d.urn, d.dataset_type, " + "i.deployment_tier, i.data_center, i.server_cluster " + "FROM cfg_object_name_map c JOIN dict_dataset d ON c.object_dataset_id = d.id " + "LEFT JOIN dict_dataset_instance i ON c.object_dataset_id = i.dataset_id " + "WHERE c.object_dataset_id is not null and lower(c.object_name) = ? " + "and lower(c.object_type) = ? and lower(i.server_cluster) = ?"; public static final String GET_DATASET_ID_IN_MAP_TABLE_WITH_CLUSTER = "SELECT c.object_dataset_id as dataset_id, " + "d.urn, d.dataset_type, i.deployment_tier, i.data_center, i.server_cluster " + "FROM cfg_object_name_map c JOIN dict_dataset d ON c.object_dataset_id = d.id " + "LEFT JOIN dict_dataset_instance i ON c.object_dataset_id = i.dataset_id " + "WHERE c.object_dataset_id is not null and lower(c.object_name) = ? and lower(i.server_cluster) = ?"; private final static String GET_DATASET_DEPENDS_VIEW = "SELECT object_type, object_sub_type, " + "object_name, map_phrase, is_identical_map, mapped_object_dataset_id, " + "mapped_object_type, mapped_object_sub_type, mapped_object_name " + "FROM cfg_object_name_map WHERE object_dataset_id = ?"; public final static String GET_DATASET_URN_PROPERTIES_LIKE_EXPR = "select urn from dict_dataset where properties like :properties"; public static final String GET_DATASET_DEPENDENTS_IN_OBJ_MAP_TABLE_BY_ID = "SELECT " + "c.object_dataset_id as dataset_id, d.urn, d.dataset_type, c.object_sub_type " + "FROM cfg_object_name_map c JOIN dict_dataset d ON c.object_dataset_id = d.id " + "WHERE c.mapped_object_dataset_id = ?"; public static final String GET_DATASET_DEPENDENTS_IN_OBJ_MAP_TABLE_BY_NAME = "SELECT " + "c.object_dataset_id as dataset_id, d.urn, d.dataset_type, c.object_sub_type " + "FROM cfg_object_name_map c JOIN dict_dataset d ON c.object_dataset_id = d.id " + "WHERE c.mapped_object_type = ? and " + "(c.mapped_object_name = ? or c.mapped_object_name like ?)"; public static Map<String, Object> getDatasetById(int datasetId) throws SQLException { Map<String, Object> params = new HashMap<>(); params.put("id", datasetId); return JdbcUtil.wherehowsNamedJdbcTemplate.queryForMap(GET_DATASET_BY_ID, params); } public static Map<String, Object> getDatasetByUrn(String urn) throws SQLException { Map<String, Object> params = new HashMap<>(); params.put("urn", urn); return JdbcUtil.wherehowsNamedJdbcTemplate.queryForMap(GET_DATASET_BY_URN, params); } public static void insertDataset(JsonNode dataset) throws Exception { ObjectMapper om = new ObjectMapper(); om.setPropertyNamingStrategy(PropertyNamingStrategy.CAMEL_CASE_TO_LOWER_CASE_WITH_UNDERSCORES); DatasetRecord record = om.convertValue(dataset, DatasetRecord.class); if (record.getRefDatasetUrn() != null) { Map<String, Object> refDataset = getDatasetByUrn(record.getRefDatasetUrn()); // Find ref dataset id if (refDataset != null) { record.setRefDatasetId(((Long) refDataset.get("id")).intValue()); } } // Find layout id if (record.getSamplePartitionFullPath() != null) { PartitionPatternMatcher ppm = new PartitionPatternMatcher(PartitionLayoutDao.getPartitionLayouts()); record.setPartitionLayoutPatternId(ppm.analyze(record.getSamplePartitionFullPath())); } DatabaseWriter dw = new DatabaseWriter(JdbcUtil.wherehowsJdbcTemplate, "dict_dataset"); dw.append(record); dw.close(); } public static void setDatasetRecord(JsonNode dataset) throws Exception { ObjectMapper om = new ObjectMapper(); om.setPropertyNamingStrategy(PropertyNamingStrategy.CAMEL_CASE_TO_LOWER_CASE_WITH_UNDERSCORES); DatasetRecord record = om.convertValue(dataset, DatasetRecord.class); if (record != null) { Map<String, Object> params = new HashMap<>(); params.put("urn", record.getUrn()); try { Map<String, Object> result = JdbcUtil.wherehowsNamedJdbcTemplate.queryForMap(GET_DATASET_BY_URN, params); updateDataset(dataset); } catch (EmptyResultDataAccessException e) { insertDataset(dataset); } } } public static void updateDataset(JsonNode dataset) throws Exception { ObjectMapper om = new ObjectMapper(); om.setPropertyNamingStrategy(PropertyNamingStrategy.CAMEL_CASE_TO_LOWER_CASE_WITH_UNDERSCORES); DatasetRecord record = om.convertValue(dataset, DatasetRecord.class); if (record.getRefDatasetUrn() != null) { Map<String, Object> refDataset = getDatasetByUrn(record.getRefDatasetUrn()); // Find ref dataset id if (refDataset != null) { record.setRefDatasetId(((Long) refDataset.get("id")).intValue()); } } // Find layout id if (record.getSamplePartitionFullPath() != null) { PartitionPatternMatcher ppm = new PartitionPatternMatcher(PartitionLayoutDao.getPartitionLayouts()); record.setPartitionLayoutPatternId(ppm.analyze(record.getSamplePartitionFullPath())); } DatabaseWriter dw = new DatabaseWriter(JdbcUtil.wherehowsJdbcTemplate, "dict_dataset"); dw.update(record.toUpdateDatabaseValue(), record.getUrn()); dw.close(); } public static int getDatasetDependencies(Long datasetId, String topologySortId, int level, List<DatasetDependencyRecord> depends) { if (depends == null) { depends = new ArrayList<DatasetDependencyRecord>(); } List<Map<String, Object>> rows = null; rows = JdbcUtil.wherehowsJdbcTemplate.queryForList(GET_DATASET_DEPENDS_VIEW, datasetId); int index = 1; if (rows != null) { for (Map row : rows) { DatasetDependencyRecord datasetDependencyRecord = new DatasetDependencyRecord(); datasetDependencyRecord.dataset_id = (Long) row.get("mapped_object_dataset_id"); String objectName = (String) row.get("mapped_object_name"); if (StringUtils.isNotBlank(objectName)) { String[] info = objectName.split("/"); if (info != null && info.length == 3) { datasetDependencyRecord.database_name = info[1]; datasetDependencyRecord.table_name = info[2]; } } datasetDependencyRecord.level_from_root = level; datasetDependencyRecord.type = (String) row.get("mapped_object_sub_type"); datasetDependencyRecord.ref_obj_location = objectName; datasetDependencyRecord.ref_obj_type = (String) row.get("mapped_object_type"); datasetDependencyRecord.topology_sort_id = topologySortId + Integer.toString((index++) * 100); datasetDependencyRecord.next_level_dependency_count = getDatasetDependencies( datasetDependencyRecord.dataset_id, datasetDependencyRecord.topology_sort_id, level + 1, depends); depends.add(datasetDependencyRecord); } return rows.size(); } else { return 0; } } public static ObjectNode getDatasetDependency(JsonNode input) throws Exception { ObjectNode resultJson = Json.newObject(); String cluster = DEFAULT_CLUSTER_NAME; String datasetUri = null; String dbName = null; String tableName = null; boolean isHive = false; boolean isDalids = false; if (input != null && input.isContainerNode()) { if (input.has(CLUSTER_NAME_KEY)) { cluster = input.get(CLUSTER_NAME_KEY).asText(); } if (input.has(DATASET_URI_KEY)) { datasetUri = input.get(DATASET_URI_KEY).asText(); } } if (StringUtils.isBlank(datasetUri)) { resultJson.put("return_code", 404); resultJson.put("message", "Wrong input format! Missing dataset uri"); return resultJson; } Integer index = -1; if ((index = datasetUri.indexOf(HIVE_PREFIX_WITH_3_SLASH)) != -1) { isHive = true; String tmp = datasetUri.substring(index + HIVE_PREFIX_WITH_3_SLASH.length()); String[] info = tmp.split("\\.|/"); if (info != null && info.length == 2) { dbName = info[0]; tableName = info[1]; } } else if ((index = datasetUri.indexOf(DALIDS_PREFIX_WITH_3_SLASH)) != -1) { isDalids = true; String tmp = datasetUri.substring(index + DALIDS_PREFIX_WITH_3_SLASH.length()); String[] info = tmp.split("\\.|/"); if (info != null && info.length == 2) { dbName = info[0]; tableName = info[1]; } } else if ((index = datasetUri.indexOf(HIVE_PREFIX_WITH_2_SLASH)) != -1) { isHive = true; String tmp = datasetUri.substring(index + HIVE_PREFIX_WITH_2_SLASH.length()); String[] info = tmp.split("\\.|/"); if (info != null && info.length == 3) { cluster = info[0]; dbName = info[1]; tableName = info[2]; } } else if ((index = datasetUri.indexOf(DALIDS_PREFIX_WITH_2_SLASH)) != -1) { isDalids = true; String tmp = datasetUri.substring(index + DALIDS_PREFIX_WITH_2_SLASH.length()); String[] info = tmp.split("\\.|/"); if (info != null && info.length == 3) { cluster = info[0]; dbName = info[1]; tableName = info[2]; } } else if (datasetUri.indexOf('.') != -1) { index = datasetUri.indexOf(':'); String tmp = datasetUri; if (index != -1) { cluster = datasetUri.substring(0, index); tmp = datasetUri.substring(index + 1); } String[] info = tmp.split("\\.|/"); if (info != null && info.length == 2) { dbName = info[0]; tableName = info[1]; } } if (StringUtils.isBlank(cluster) || StringUtils.isBlank(dbName) || StringUtils.isBlank(tableName)) { resultJson.put("return_code", 404); resultJson.put("message", "Wrong input format! Missing dataset uri"); return resultJson; } String sqlQuery = null; List<Map<String, Object>> rows = null; if (isHive) { rows = JdbcUtil.wherehowsJdbcTemplate.queryForList(GET_DATASET_ID_IN_MAP_TABLE_WITH_TYPE_AND_CLUSTER, "/" + dbName + "/" + tableName, "hive", cluster); } else if (isDalids) { rows = JdbcUtil.wherehowsJdbcTemplate.queryForList(GET_DATASET_ID_IN_MAP_TABLE_WITH_TYPE_AND_CLUSTER, "/" + dbName + "/" + tableName, "dalids", cluster); } else { rows = JdbcUtil.wherehowsJdbcTemplate.queryForList(GET_DATASET_ID_IN_MAP_TABLE_WITH_CLUSTER, "/" + dbName + "/" + tableName, cluster); } Long datasetId = 0L; String urn = null; String datasetType = null; String deploymentTier = null; String dataCenter = null; String serverCluster = null; if (rows != null && rows.size() > 0) { for (Map row : rows) { datasetId = (Long) row.get("dataset_id"); urn = (String) row.get("urn"); datasetType = (String) row.get("dataset_type"); if (datasetType.equalsIgnoreCase("hive")) { isHive = true; } else if (datasetType.equalsIgnoreCase("dalids")) { isDalids = true; } deploymentTier = (String) row.get("deployment_tier"); dataCenter = (String) row.get("data_center"); serverCluster = (String) row.get("server_cluster"); break; } } else { resultJson.put("return_code", 200); resultJson.put("message", "Dependency information is not available."); return resultJson; } List<DatasetDependencyRecord> depends = new ArrayList<DatasetDependencyRecord>(); getDatasetDependencies(datasetId, "", 1, depends); int leafLevelDependencyCount = 0; if (depends.size() > 0) { for (DatasetDependencyRecord d : depends) { if (d.next_level_dependency_count == 0) { leafLevelDependencyCount++; } } } StringBuilder inputUri = new StringBuilder(""); if (isHive) { inputUri.append("hive://"); } else if (isDalids) { inputUri.append("dalids://"); } inputUri.append(cluster + "/" + dbName + "/" + tableName); resultJson.put("return_code", 200); resultJson.put("deployment_tier", deploymentTier); resultJson.put("data_center", dataCenter); resultJson.put("cluster", StringUtils.isNotBlank(serverCluster) ? serverCluster : cluster); resultJson.put("dataset_type", datasetType); resultJson.put("database_name", dbName); resultJson.put("table_name", tableName); resultJson.put("urn", urn); resultJson.put("dataset_id", datasetId); resultJson.put("input_uri", inputUri.toString()); resultJson.set("dependencies", Json.toJson(depends)); resultJson.put("leaf_level_dependency_count", leafLevelDependencyCount); return resultJson; } // public static ObjectNode getDatasetUrnForPropertiesLike(String properties) { ObjectNode result = Json.newObject(); List<String> datasetUrns = new ArrayList<String>(); if (StringUtils.isNotBlank(properties)) { Map<String, Object> params = new HashMap<>(); params.put("properties", properties); List<Map<String, Object>> rows = null; rows = JdbcUtil.wherehowsNamedJdbcTemplate.queryForList(GET_DATASET_URN_PROPERTIES_LIKE_EXPR, params); for (Map row : rows) { String datasetUrn = (String) row.get("urn"); datasetUrns.add(datasetUrn); } result.put("count", datasetUrns.size()); result.set("urns", Json.toJson(datasetUrns)); } return result; } // public static List<Map<String, Object>> getDatasetDependents(Long datasetId) { if (datasetId > 0) { return JdbcUtil.wherehowsJdbcTemplate.queryForList(GET_DATASET_DEPENDENTS_IN_OBJ_MAP_TABLE_BY_ID, datasetId); } return null; } // public static List<Map<String, Object>> getDatasetDependents(String dataPlatform, String path) { if (path.length() > 0 && dataPlatform.length() > 0) { String child_path = path + "/%"; return JdbcUtil.wherehowsJdbcTemplate.queryForList(GET_DATASET_DEPENDENTS_IN_OBJ_MAP_TABLE_BY_NAME, dataPlatform, path, child_path); } return null; } }