Java tutorial
/*************************************************************************** * Copyright (c) 2013 VMware, Inc. All Rights Reserved. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ***************************************************************************/ package com.vmware.vhadoop.vhm.hadoop; import static com.vmware.vhadoop.vhm.hadoop.HadoopErrorCodes.ERROR_CATCHALL; import static com.vmware.vhadoop.vhm.hadoop.HadoopErrorCodes.ERROR_COMMAND_NOT_FOUND; import static com.vmware.vhadoop.vhm.hadoop.HadoopErrorCodes.ERROR_EXCESS_TTS; import static com.vmware.vhadoop.vhm.hadoop.HadoopErrorCodes.ERROR_FEWER_TTS; import static com.vmware.vhadoop.vhm.hadoop.HadoopErrorCodes.SUCCESS; import static com.vmware.vhadoop.vhm.hadoop.HadoopErrorCodes.UNKNOWN_ERROR; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Set; import java.util.logging.Level; import java.util.logging.Logger; import org.apache.commons.io.IOUtils; import com.vmware.vhadoop.api.vhm.HadoopActions; import com.vmware.vhadoop.util.CompoundStatus; import com.vmware.vhadoop.util.CompoundStatus.TaskStatus; import com.vmware.vhadoop.util.ExternalizedParameters; import com.vmware.vhadoop.util.ThreadLocalCompoundStatus; import com.vmware.vhadoop.vhm.hadoop.HadoopConnection.HadoopConnectionProperties; import com.vmware.vhadoop.vhm.hadoop.HadoopErrorCodes.ParamTypes; import com.vmware.vhadoop.vhm.hadoop.SshUtilities.Credentials; /** * Class which represents the real implementation of HadoopActions * The class should be able to deal with multiple clusters and should have a HadoopConnection instance for each one. * The specifics of SSH and SCP are all handled in the HadoopConnection * */ public class HadoopAdaptor implements HadoopActions { private static final Logger _log = Logger.getLogger(HadoopAdaptor.class.getName()); private final Map<String, HadoopConnection> _connections; private final HadoopErrorCodes _errorCodes; private final Credentials _credentials; private final JTConfigInfo _jtConfig; private final HadoopConnectionProperties _connectionProperties; /* TODO: Provide setter? If not, make local */ private final Map<String, Map<ParamTypes, String>> _errorParamValues; /* TODO: Will need one per connection/cluster */ private final ThreadLocalCompoundStatus _threadLocalStatus; private final SshUtilities _sshUtils; private final int JOB_TRACKER_DEFAULT_SSH_PORT = ExternalizedParameters.get() .getInt("JOB_TRACKER_DEFAULT_SSH_PORT"); private final String JOB_TRACKER_SCP_READ_PERMS = ExternalizedParameters.get() .getString("JOB_TRACKER_SCP_READ_PERMS"); private final String JOB_TRACKER_SCP_EXECUTE_PERMS = ExternalizedParameters.get() .getString("JOB_TRACKER_SCP_EXECUTE_PERMS"); private final int JOB_TRACKER_SSH_CONNECTION_CACHE_SIZE = ExternalizedParameters.get() .getInt("JOB_TRACKER_SSH_CONNECTION_CACHE_SIZE"); private final String JOB_TRACKER_DECOM_LIST_FILE_NAME = ExternalizedParameters.get() .getString("JOB_TRACKER_DECOM_LIST_FILE_NAME"); private final String JOB_TRACKER_DECOM_SCRIPT_FILE_NAME = ExternalizedParameters.get() .getString("JOB_TRACKER_DECOM_SCRIPT_FILE_NAME"); private final String JOB_TRACKER_RECOM_LIST_FILE_NAME = ExternalizedParameters.get() .getString("JOB_TRACKER_RECOM_LIST_FILE_NAME"); private final String JOB_TRACKER_RECOM_SCRIPT_FILE_NAME = ExternalizedParameters.get() .getString("JOB_TRACKER_RECOM_SCRIPT_FILE_NAME"); private final String JOB_TRACKER_CHECK_SCRIPT_FILE_NAME = ExternalizedParameters.get() .getString("JOB_TRACKER_CHECK_SCRIPT_FILE_NAME"); private final long JOB_TRACKER_CHECK_SCRIPT_MIN_RETRY_MILLIS = ExternalizedParameters.get() .getLong("JOB_TRACKER_CHECK_SCRIPT_MIN_RETRY_MILLIS"); private final String DEFAULT_SCRIPT_SRC_PATH = ExternalizedParameters.get() .getString("DEFAULT_SCRIPT_SRC_PATH"); private final String JOB_TRACKER_DEFAULT_SCRIPT_DEST_PATH = ExternalizedParameters.get() .getString("JOB_TRACKER_DEFAULT_SCRIPT_DEST_PATH"); private final int ACTIVE_TASK_TRACKERS_CHECK_RETRY_ITERATIONS = ExternalizedParameters.get() .getInt("ACTIVE_TASK_TRACKERS_CHECK_RETRY_ITERATIONS");; static final String STATUS_INTERPRET_ERROR_CODE = "interpretErrorCode"; public static final String ACTIVE_TTS_STATUS_KEY = "getActiveStatus"; public HadoopAdaptor(Credentials credentials, JTConfigInfo jtConfig, ThreadLocalCompoundStatus tlcs) { _connectionProperties = getDefaultConnectionProperties(); _credentials = credentials; _jtConfig = jtConfig; _errorCodes = new HadoopErrorCodes(); _errorParamValues = new HashMap<String, Map<ParamTypes, String>>(); _connections = new HashMap<String, HadoopConnection>(); _threadLocalStatus = tlcs; _sshUtils = new SshConnectionCache(JOB_TRACKER_SSH_CONNECTION_CACHE_SIZE); } private CompoundStatus getCompoundStatus() { if (_threadLocalStatus == null) { return new CompoundStatus("DUMMY_STATUS"); } return _threadLocalStatus.get(); } private void setErrorParamValue(HadoopClusterInfo cluster, ParamTypes paramType, String paramValue) { Map<ParamTypes, String> paramValues = _errorParamValues.get(cluster.getClusterId()); if (paramValues == null) { paramValues = new HashMap<ParamTypes, String>(); _errorParamValues.put(cluster.getClusterId(), paramValues); } paramValues.put(paramType, paramValue); } private Map<ParamTypes, String> getErrorParamValues(HadoopClusterInfo cluster) { return _errorParamValues.get(cluster.getClusterId()); } private HadoopConnectionProperties getDefaultConnectionProperties() { return new HadoopConnectionProperties() { @Override public int getSshPort() { return JOB_TRACKER_DEFAULT_SSH_PORT; } @Override public String getScpReadPerms() { return JOB_TRACKER_SCP_READ_PERMS; } @Override public String getScpExecutePerms() { return JOB_TRACKER_SCP_EXECUTE_PERMS; } }; } private HadoopConnection getConnectionForCluster(HadoopClusterInfo cluster) { if ((cluster == null) || (cluster.getJobTrackerDnsName() == null)) { return null; } HadoopConnection result = _connections.get(cluster.getClusterId()); if (result == null || result.isStale(cluster)) { /* TODO: SshUtils could be a single shared thread-safe object or non threadsafe object per connection */ result = getHadoopConnection(cluster, _connectionProperties); result.setHadoopCredentials(_credentials); result.setHadoopExcludeTTPath(_jtConfig.getExcludeTTPath()); result.setHadoopHomePath(_jtConfig.getHadoopHomePath()); _connections.put(cluster.getClusterId(), result); } setErrorParamValue(cluster, ParamTypes.HADOOP_HOME, result.getHadoopHome()); setErrorParamValue(cluster, ParamTypes.JOBTRACKER, result.getJobTrackerAddr()); setErrorParamValue(cluster, ParamTypes.EXCLUDE_FILE, result.getExcludeFilePath()); return result; } private boolean isValidTTList(Set<String> ttDnsNames) { if ((ttDnsNames == null) || (ttDnsNames.isEmpty())) { _log.log(Level.SEVERE, "VHM: validating task tracker list failed while de/recommisioning - the list is empty"); return false; } for (String tt : ttDnsNames) { if (tt == null) { _log.log(Level.SEVERE, "VHM: validating task tracker list failed while de/recommisioning - null task tracker name"); return false; } if (tt.length() == 0) { _log.log(Level.SEVERE, "VHM: validating task tracker list failed while de/recommisioning - blank task tracker name"); return false; } } return true; } private String createVMList(Set<String> tts) { StringBuilder sb = new StringBuilder(); for (String tt : tts) { sb.append(tt).append('\n'); } return sb.toString(); } private void setErrorParamsForCommand(HadoopClusterInfo cluster, String command, String drScript, String drList) { setErrorParamValue(cluster, ParamTypes.COMMAND, command); setErrorParamValue(cluster, ParamTypes.DRSCRIPT, drScript); setErrorParamValue(cluster, ParamTypes.DRLIST, drList); } private byte[] loadLocalScript(String fileName) { ClassLoader cl = HadoopAdaptor.class.getClassLoader(); InputStream is = ((cl != null) && (fileName != null)) ? cl.getResourceAsStream(fileName) : null; if (is == null) { _log.log(Level.SEVERE, "VHM: class loader resource " + fileName + " is unavailable"); return null; } byte[] result = null; try { result = IOUtils.toByteArray(is); } catch (IOException e) { _log.log(Level.SEVERE, "VHM: exception converting class loader resource " + fileName + " to byte array - " + e.getMessage()); _log.log(Level.INFO, "VHM: exception converting class loader resource " + fileName + " to byte array", e); } try { is.close(); } catch (IOException e) { _log.fine("VHM: exception closing stream for class loader resource " + fileName); } return result; } /* private byte[] loadLocalScript(String fullLocalPath) { File file = new File(fullLocalPath); if (!file.exists()) { _log.log(Level.SEVERE, "File "+fullLocalPath+" does not exist!"); return null; } try { FileInputStream fis = new FileInputStream(file); BufferedInputStream bis = new BufferedInputStream(fis); byte[] result = new byte[(int)file.length()]; bis.read(result); bis.close(); fis.close(); return result; } catch (IOException e) { _log.log(Level.SEVERE, "Unexpected error reading file "+fullLocalPath, e); } return null; } */ private int executeScriptWithCopyRetryOnFailure(HadoopConnection connection, String scriptFileName, String[] scriptArgs, ByteArrayOutputStream out) { int rc = -1; for (int i = 0; i < 2; i++) { /* ensure that we're operating with a clean output buffer */ out.reset(); rc = connection.executeScript(scriptFileName, JOB_TRACKER_DEFAULT_SCRIPT_DEST_PATH, scriptArgs, out); if (i == 0 && (rc == ERROR_COMMAND_NOT_FOUND || rc == ERROR_CATCHALL)) { _log.log(Level.INFO, scriptFileName + " not found..."); // Changed this to accommodate using jar file... // String fullLocalPath = HadoopAdaptor.class.getClassLoader().getResource(scriptFileName).getPath(); // byte[] scriptData = loadLocalScript(DEFAULT_SCRIPT_SRC_PATH + scriptFileName); // byte[] scriptData = loadLocalScript(fullLocalPath); byte[] scriptData = loadLocalScript(scriptFileName); if ((scriptData != null) && (connection.copyDataToJobTracker(scriptData, JOB_TRACKER_DEFAULT_SCRIPT_DEST_PATH, scriptFileName, true) == 0)) { continue; } } break; } return rc; } private CompoundStatus decomRecomTTs(String opDesc, Set<String> ttDnsNames, HadoopClusterInfo cluster, String scriptFileName, String listFileName) { CompoundStatus status = new CompoundStatus("decomRecomTTs"); if (!isValidTTList(ttDnsNames)) { String errorMsg = opDesc + " failed due to bad task tracker list"; _log.log(Level.SEVERE, "<%C" + cluster.getClusterId() + "%C>: " + errorMsg); status.registerTaskFailed(false, errorMsg); return status; } String scriptRemoteFilePath = JOB_TRACKER_DEFAULT_SCRIPT_DEST_PATH + scriptFileName; String listRemoteFilePath = JOB_TRACKER_DEFAULT_SCRIPT_DEST_PATH + listFileName; HadoopConnection connection = getConnectionForCluster(cluster); if (connection != null) { setErrorParamsForCommand(cluster, opDesc.toLowerCase(), scriptRemoteFilePath, listRemoteFilePath); ByteArrayOutputStream out = new ByteArrayOutputStream(); String operationList = createVMList(ttDnsNames); int rc = connection.copyDataToJobTracker(operationList.getBytes(), JOB_TRACKER_DEFAULT_SCRIPT_DEST_PATH, listFileName, false); if (rc == 0) { rc = executeScriptWithCopyRetryOnFailure(connection, scriptFileName, new String[] { listRemoteFilePath, connection.getExcludeFilePath(), connection.getHadoopHome() }, out); } status.addStatus(_errorCodes.interpretErrorCode(_log, rc, getErrorParamValues(cluster))); } else { status.registerTaskFailed(false, "could not create connection to job tracker for cluster"); } return status; } @Override public void decommissionTTs(Set<String> ttDnsNames, HadoopClusterInfo cluster) { getCompoundStatus().addStatus(decomRecomTTs("Decommission", ttDnsNames, cluster, JOB_TRACKER_DECOM_SCRIPT_FILE_NAME, JOB_TRACKER_DECOM_LIST_FILE_NAME)); } @Override public void recommissionTTs(Set<String> ttDnsNames, HadoopClusterInfo cluster) { getCompoundStatus().addStatus(decomRecomTTs("Recommission", ttDnsNames, cluster, JOB_TRACKER_RECOM_SCRIPT_FILE_NAME, JOB_TRACKER_RECOM_LIST_FILE_NAME)); } @Override public Set<String> getActiveTTs(HadoopClusterInfo cluster, int totalTargetEnabled) { return getActiveTTs(cluster, totalTargetEnabled, getCompoundStatus()); } protected Set<String> getActiveTTs(HadoopClusterInfo cluster, int totalTargetEnabled, CompoundStatus status) { HadoopConnection connection = getConnectionForCluster(cluster); if (connection == null) { return null; } ByteArrayOutputStream out = new ByteArrayOutputStream(); int rc = executeScriptWithCopyRetryOnFailure(connection, JOB_TRACKER_CHECK_SCRIPT_FILE_NAME, new String[] { "" + totalTargetEnabled, connection.getExcludeFilePath(), connection.getHadoopHome() }, out); _log.info("Error code from executing script " + rc); String[] unformattedList = out.toString().split("\n"); Set<String> formattedList = new HashSet<String>(); //Note: set also avoids potential duplicate TTnames (e.g., when a TT is restarted without decommissioning) /* JG: Changing for-loop limit from unformattedList.length-1 to unformattedList.length since we now explicitly check for TTnames starting with "TT:" (No more @@@... issue) */ for (int i = 0; i < unformattedList.length; i++) { //Expecting TTs to be annotated as "TT: ttName" if (unformattedList[i].startsWith("TT:")) { _log.fine("Adding TT: " + unformattedList[i].split("\\s+")[1]); formattedList.add(unformattedList[i].split("\\s+")[1]); } //formattedList.add(unformattedList[i].trim()); } _log.info("Active TTs so far: " + Arrays.toString(formattedList.toArray())); _log.info("#Active TTs: " + formattedList.size() + "\t #Target TTs: " + totalTargetEnabled); status.addStatus(_errorCodes.interpretErrorCode(_log, rc, getErrorParamValues(cluster))); return formattedList; } @Override /* Returns the set of active dnsNames based on input Set */ public Set<String> checkTargetTTsSuccess(String opType, Set<String> ttDnsNames, int totalTargetEnabled, HadoopClusterInfo cluster) { String scriptRemoteFilePath = JOB_TRACKER_DEFAULT_SCRIPT_DEST_PATH + JOB_TRACKER_CHECK_SCRIPT_FILE_NAME; String listRemoteFilePath = null; String opDesc = "checkTargetTTsSuccess"; if (ttDnsNames == null) { _log.warning("No valid TT names provided"); return null; } /* We don't expect null or empty values, but weed out anyway */ ttDnsNames.remove(null); ttDnsNames.remove(""); if (ttDnsNames.size() == 0) { _log.warning("No valid TT names provided"); return null; } _log.log(Level.INFO, "Affected TTs: " + ttDnsNames); setErrorParamsForCommand(cluster, opDesc, scriptRemoteFilePath, listRemoteFilePath); int iterations = 0; CompoundStatus getActiveStatus = null; int rc = UNKNOWN_ERROR; Set<String> allActiveTTs = null; long lastCheckAttemptTime = Long.MAX_VALUE; do { if (iterations > 0) { /* 1141429: Ensure that if the script fails, there is a minimum wait before the next retry attempt */ long millisSinceLastCheck = (System.currentTimeMillis() - lastCheckAttemptTime); long underWaitMillis = JOB_TRACKER_CHECK_SCRIPT_MIN_RETRY_MILLIS - millisSinceLastCheck; if (underWaitMillis > 0) { try { _log.fine("Sleeping for underWaitMillis = " + underWaitMillis); Thread.sleep(underWaitMillis); } catch (InterruptedException e) { } } _log.log(Level.INFO, "Target TTs not yet achieved...checking again - " + iterations); _log.log(Level.INFO, "Affected TTs: " + ttDnsNames); } getActiveStatus = new CompoundStatus(ACTIVE_TTS_STATUS_KEY); lastCheckAttemptTime = System.currentTimeMillis(); allActiveTTs = getActiveTTs(cluster, totalTargetEnabled, getActiveStatus); //Declare success as long as the we manage to de/recommission only the TTs we set out to handle (rather than checking correctness for all TTs) if ((allActiveTTs != null) && ((opType.equals("Recommission") && allActiveTTs.containsAll(ttDnsNames)) || (opType.equals("Decommission") && ttDnsNames.retainAll(allActiveTTs) && ttDnsNames.isEmpty()))) { _log.log(Level.INFO, "All selected TTs correctly %sed", opType.toLowerCase()); rc = SUCCESS; break; } /* If there was an error reported by getActiveTTs... */ TaskStatus taskStatus = getActiveStatus.getFirstFailure(STATUS_INTERPRET_ERROR_CODE); if (taskStatus != null) { rc = taskStatus.getErrorCode(); } else { /* * JG: Sometimes we don't know the hostnames (e.g., localhost); in these cases as long as the check script returns success based * on target #TTs we are good. * TODO: Change check script to return success if #newly added + #current_enabled is met rather than target #TTs is met. This is * to address scenarios where there is a mismatch (#Active TTs != #poweredOn VMs) to begin with... * CHANGED: We have changed the time at which this function is invoked -- it gets invoked only when dns/hostnames are available. * So we no longer have this issue of not knowing hostnames and still meeting target #TTs. Our only successful exit is when the * TTs that have been explicitly asked to be checked, have been correctly de/recommissioned. * * rc = SUCCESS; //Note: removing this * * We also notice that in this case, where #Active TTs matches target, but all the requested TTs haven't been de/recommissioned yet, * the check script returns immediately (because it only looks for a match of these values, which is true here). So we recompute * target TTs based on latest information to essentially put back the delay... */ Set<String> deltaTTs = new HashSet<String>(ttDnsNames); if (opType.equals("Recommission")) { deltaTTs.removeAll(allActiveTTs); //get TTs that haven't been recommissioned yet... totalTargetEnabled = allActiveTTs.size() + deltaTTs.size(); } else { //optype = Decommission deltaTTs.retainAll(allActiveTTs); //get TTs that haven't been decommissioned yet... totalTargetEnabled = allActiveTTs.size() - deltaTTs.size(); } _log.log(Level.INFO, "Even though #ActiveTTs = #TargetTTs, not all requested TTs have been " + opType.toLowerCase() + "ed yet - Trying again with updated target: " + totalTargetEnabled); } /* Break out if there is an error other than the ones we expect to be resolved in a subsequent invocation of the check script */ if (rc != ERROR_FEWER_TTS && rc != ERROR_EXCESS_TTS && rc != UNKNOWN_ERROR) { break; } } while (iterations++ < ACTIVE_TASK_TRACKERS_CHECK_RETRY_ITERATIONS); getCompoundStatus().addStatus(_errorCodes.interpretErrorCode(_log, rc, getErrorParamValues(cluster))); if (rc != SUCCESS) { getActiveStatus.registerTaskFailed(false, "Check Test Failed"); getCompoundStatus().addStatus(getActiveStatus); } return allActiveTTs; } /** * Interception point for fault injection, etc. * @return */ protected HadoopConnection getHadoopConnection(HadoopClusterInfo cluster, HadoopConnectionProperties properties) { return new HadoopConnection(cluster, properties, _sshUtils); } @Override public boolean validateTtHostNames(Set<String> dnsNames) { return true; } }