Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hive.service.cli.operation; import java.io.IOException; import java.io.Serializable; import java.io.UnsupportedEncodingException; import java.security.PrivilegedExceptionAction; import java.sql.SQLException; import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.Properties; import java.util.concurrent.Future; import java.util.concurrent.RejectedExecutionException; import org.apache.commons.codec.binary.Base64; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.Schema; import org.apache.hadoop.hive.ql.CommandNeedRetryException; import org.apache.hadoop.hive.ql.Driver; import org.apache.hadoop.hive.ql.exec.ExplainTask; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.parse.VariableSubstitution; import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; import org.apache.hadoop.hive.ql.session.OperationLog; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.SerDe; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hive.service.cli.FetchOrientation; import org.apache.hive.service.cli.HiveSQLException; import org.apache.hive.service.cli.OperationState; import org.apache.hive.service.cli.RowSet; import org.apache.hive.service.cli.RowSetFactory; import org.apache.hive.service.cli.TableSchema; import org.apache.hive.service.cli.session.HiveSession; import org.apache.hive.service.server.ThreadWithGarbageCleanup; /** * SQLOperation. * */ public class SQLOperation extends ExecuteStatementOperation { private Driver driver = null; private CommandProcessorResponse response; private TableSchema resultSchema = null; private Schema mResultSchema = null; private SerDe serde = null; private boolean fetchStarted = false; public SQLOperation(HiveSession parentSession, String statement, Map<String, String> confOverlay, boolean runInBackground) { // TODO: call setRemoteUser in ExecuteStatementOperation or higher. super(parentSession, statement, confOverlay, runInBackground); } /*** * Compile the query and extract metadata * @param sqlOperationConf * @throws HiveSQLException */ public void prepare(HiveConf sqlOperationConf) throws HiveSQLException { setState(OperationState.RUNNING); try { driver = new Driver(sqlOperationConf, getParentSession().getUserName()); // set the operation handle information in Driver, so that thrift API users // can use the operation handle they receive, to lookup query information in // Yarn ATS String guid64 = Base64 .encodeBase64URLSafeString(getHandle().getHandleIdentifier().toTHandleIdentifier().getGuid()) .trim(); driver.setOperationId(guid64); // In Hive server mode, we are not able to retry in the FetchTask // case, when calling fetch queries since execute() has returned. // For now, we disable the test attempts. driver.setTryCount(Integer.MAX_VALUE); String subStatement = new VariableSubstitution().substitute(sqlOperationConf, statement); response = driver.compileAndRespond(subStatement); if (0 != response.getResponseCode()) { throw toSQLException("Error while compiling statement", response); } mResultSchema = driver.getSchema(); // hasResultSet should be true only if the query has a FetchTask // "explain" is an exception for now if (driver.getPlan().getFetchTask() != null) { //Schema has to be set if (mResultSchema == null || !mResultSchema.isSetFieldSchemas()) { throw new HiveSQLException("Error compiling query: Schema and FieldSchema " + "should be set when query plan has a FetchTask"); } resultSchema = new TableSchema(mResultSchema); setHasResultSet(true); } else { setHasResultSet(false); } // Set hasResultSet true if the plan has ExplainTask // TODO explain should use a FetchTask for reading for (Task<? extends Serializable> task : driver.getPlan().getRootTasks()) { if (task.getClass() == ExplainTask.class) { resultSchema = new TableSchema(mResultSchema); setHasResultSet(true); break; } } } catch (HiveSQLException e) { setState(OperationState.ERROR); throw e; } catch (Exception e) { setState(OperationState.ERROR); throw new HiveSQLException("Error running query: " + e.toString(), e); } } private void runQuery(HiveConf sqlOperationConf) throws HiveSQLException { try { // In Hive server mode, we are not able to retry in the FetchTask // case, when calling fetch queries since execute() has returned. // For now, we disable the test attempts. driver.setTryCount(Integer.MAX_VALUE); response = driver.run(); if (0 != response.getResponseCode()) { throw toSQLException("Error while processing statement", response); } } catch (HiveSQLException e) { // If the operation was cancelled by another thread, // Driver#run will return a non-zero response code. // We will simply return if the operation state is CANCELED, // otherwise throw an exception if (getStatus().getState() == OperationState.CANCELED) { return; } else { setState(OperationState.ERROR); throw e; } } catch (Exception e) { setState(OperationState.ERROR); throw new HiveSQLException("Error running query: " + e.toString(), e); } setState(OperationState.FINISHED); } @Override public void runInternal() throws HiveSQLException { setState(OperationState.PENDING); final HiveConf opConfig = getConfigForOperation(); prepare(opConfig); if (!shouldRunAsync()) { runQuery(opConfig); } else { // We'll pass ThreadLocals in the background thread from the foreground (handler) thread final SessionState parentSessionState = SessionState.get(); // ThreadLocal Hive object needs to be set in background thread. // The metastore client in Hive is associated with right user. final Hive parentHive = getSessionHive(); // Current UGI will get used by metastore when metsatore is in embedded mode // So this needs to get passed to the new background thread final UserGroupInformation currentUGI = getCurrentUGI(opConfig); // Runnable impl to call runInternal asynchronously, // from a different thread Runnable backgroundOperation = new Runnable() { @Override public void run() { PrivilegedExceptionAction<Object> doAsAction = new PrivilegedExceptionAction<Object>() { @Override public Object run() throws HiveSQLException { Hive.set(parentHive); SessionState.setCurrentSessionState(parentSessionState); // Set current OperationLog in this async thread for keeping on saving query log. registerCurrentOperationLog(); try { runQuery(opConfig); } catch (HiveSQLException e) { setOperationException(e); LOG.error("Error running hive query: ", e); } finally { unregisterOperationLog(); } return null; } }; try { currentUGI.doAs(doAsAction); } catch (Exception e) { setOperationException(new HiveSQLException(e)); LOG.error("Error running hive query as user : " + currentUGI.getShortUserName(), e); } finally { /** * We'll cache the ThreadLocal RawStore object for this background thread for an orderly cleanup * when this thread is garbage collected later. * @see org.apache.hive.service.server.ThreadWithGarbageCleanup#finalize() */ if (ThreadWithGarbageCleanup.currentThread() instanceof ThreadWithGarbageCleanup) { ThreadWithGarbageCleanup currentThread = (ThreadWithGarbageCleanup) ThreadWithGarbageCleanup .currentThread(); currentThread.cacheThreadLocalRawStore(); } } } }; try { // This submit blocks if no background threads are available to run this operation Future<?> backgroundHandle = getParentSession().getSessionManager() .submitBackgroundOperation(backgroundOperation); setBackgroundHandle(backgroundHandle); } catch (RejectedExecutionException rejected) { setState(OperationState.ERROR); throw new HiveSQLException("The background threadpool cannot accept" + " new task for execution, please retry the operation", rejected); } } } /** * Returns the current UGI on the stack * @param opConfig * @return UserGroupInformation * @throws HiveSQLException */ private UserGroupInformation getCurrentUGI(HiveConf opConfig) throws HiveSQLException { try { return Utils.getUGI(); } catch (Exception e) { throw new HiveSQLException("Unable to get current user", e); } } /** * Returns the ThreadLocal Hive for the current thread * @return Hive * @throws HiveSQLException */ private Hive getSessionHive() throws HiveSQLException { try { return Hive.get(); } catch (HiveException e) { throw new HiveSQLException("Failed to get ThreadLocal Hive object", e); } } private void registerCurrentOperationLog() { if (isOperationLogEnabled) { if (operationLog == null) { LOG.warn("Failed to get current OperationLog object of Operation: " + getHandle().getHandleIdentifier()); isOperationLogEnabled = false; return; } OperationLog.setCurrentOperationLog(operationLog); } } private void cleanup(OperationState state) throws HiveSQLException { setState(state); if (shouldRunAsync()) { Future<?> backgroundHandle = getBackgroundHandle(); if (backgroundHandle != null) { backgroundHandle.cancel(true); } } if (driver != null) { driver.close(); driver.destroy(); } driver = null; SessionState ss = SessionState.get(); if (ss.getTmpOutputFile() != null) { ss.getTmpOutputFile().delete(); } } @Override public void cancel() throws HiveSQLException { cleanup(OperationState.CANCELED); } @Override public void close() throws HiveSQLException { cleanup(OperationState.CLOSED); cleanupOperationLog(); } @Override public TableSchema getResultSetSchema() throws HiveSQLException { assertState(OperationState.FINISHED); if (resultSchema == null) { resultSchema = new TableSchema(driver.getSchema()); } return resultSchema; } private final transient List<Object> convey = new ArrayList<Object>(); @Override public RowSet getNextRowSet(FetchOrientation orientation, long maxRows) throws HiveSQLException { validateDefaultFetchOrientation(orientation); assertState(OperationState.FINISHED); RowSet rowSet = RowSetFactory.create(resultSchema, getProtocolVersion()); try { /* if client is requesting fetch-from-start and its not the first time reading from this operation * then reset the fetch position to beginning */ if (orientation.equals(FetchOrientation.FETCH_FIRST) && fetchStarted) { driver.resetFetch(); } fetchStarted = true; driver.setMaxRows((int) maxRows); if (driver.getResults(convey)) { return decode(convey, rowSet); } return rowSet; } catch (IOException e) { throw new HiveSQLException(e); } catch (CommandNeedRetryException e) { throw new HiveSQLException(e); } catch (Exception e) { throw new HiveSQLException(e); } finally { convey.clear(); } } private RowSet decode(List<Object> rows, RowSet rowSet) throws Exception { if (driver.isFetchingTable()) { return prepareFromRow(rows, rowSet); } return decodeFromString(rows, rowSet); } // already encoded to thrift-able object in ThriftFormatter private RowSet prepareFromRow(List<Object> rows, RowSet rowSet) throws Exception { for (Object row : rows) { rowSet.addRow((Object[]) row); } return rowSet; } private RowSet decodeFromString(List<Object> rows, RowSet rowSet) throws SQLException, SerDeException { getSerDe(); StructObjectInspector soi = (StructObjectInspector) serde.getObjectInspector(); List<? extends StructField> fieldRefs = soi.getAllStructFieldRefs(); Object[] deserializedFields = new Object[fieldRefs.size()]; Object rowObj; ObjectInspector fieldOI; int protocol = getProtocolVersion().getValue(); for (Object rowString : rows) { try { rowObj = serde.deserialize(new BytesWritable(((String) rowString).getBytes("UTF-8"))); } catch (UnsupportedEncodingException e) { throw new SerDeException(e); } for (int i = 0; i < fieldRefs.size(); i++) { StructField fieldRef = fieldRefs.get(i); fieldOI = fieldRef.getFieldObjectInspector(); Object fieldData = soi.getStructFieldData(rowObj, fieldRef); deserializedFields[i] = SerDeUtils.toThriftPayload(fieldData, fieldOI, protocol); } rowSet.addRow(deserializedFields); } return rowSet; } private SerDe getSerDe() throws SQLException { if (serde != null) { return serde; } try { List<FieldSchema> fieldSchemas = mResultSchema.getFieldSchemas(); StringBuilder namesSb = new StringBuilder(); StringBuilder typesSb = new StringBuilder(); if (fieldSchemas != null && !fieldSchemas.isEmpty()) { for (int pos = 0; pos < fieldSchemas.size(); pos++) { if (pos != 0) { namesSb.append(","); typesSb.append(","); } namesSb.append(fieldSchemas.get(pos).getName()); typesSb.append(fieldSchemas.get(pos).getType()); } } String names = namesSb.toString(); String types = typesSb.toString(); serde = new LazySimpleSerDe(); Properties props = new Properties(); if (names.length() > 0) { LOG.debug("Column names: " + names); props.setProperty(serdeConstants.LIST_COLUMNS, names); } if (types.length() > 0) { LOG.debug("Column types: " + types); props.setProperty(serdeConstants.LIST_COLUMN_TYPES, types); } SerDeUtils.initializeSerDe(serde, new HiveConf(), props, null); } catch (Exception ex) { ex.printStackTrace(); throw new SQLException("Could not create ResultSet: " + ex.getMessage(), ex); } return serde; } /** * If there are query specific settings to overlay, then create a copy of config * There are two cases we need to clone the session config that's being passed to hive driver * 1. Async query - * If the client changes a config setting, that shouldn't reflect in the execution already underway * 2. confOverlay - * The query specific settings should only be applied to the query config and not session * @return new configuration * @throws HiveSQLException */ private HiveConf getConfigForOperation() throws HiveSQLException { HiveConf sqlOperationConf = getParentSession().getHiveConf(); if (!getConfOverlay().isEmpty() || shouldRunAsync()) { // clone the parent session config for this query sqlOperationConf = new HiveConf(sqlOperationConf); // apply overlay query specific settings, if any for (Map.Entry<String, String> confEntry : getConfOverlay().entrySet()) { try { sqlOperationConf.verifyAndSet(confEntry.getKey(), confEntry.getValue()); } catch (IllegalArgumentException e) { throw new HiveSQLException("Error applying statement specific settings", e); } } } return sqlOperationConf; } }