Java tutorial
/* * Copyright (C) 2014-2016 LinkedIn Corp. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use * this file except in compliance with the License. You may obtain a copy of the * License at http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR * CONDITIONS OF ANY KIND, either express or implied. */ package gobblin.salesforce; import java.io.BufferedReader; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.net.URI; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Arrays; import java.util.Date; import java.util.Iterator; import java.util.List; import java.util.ListIterator; import java.util.Map; import org.apache.commons.lang3.StringUtils; import org.apache.http.HttpEntity; import org.apache.http.HttpResponse; import org.apache.http.NameValuePair; import org.apache.http.client.entity.UrlEncodedFormEntity; import org.apache.http.client.methods.HttpGet; import org.apache.http.client.methods.HttpPost; import org.apache.http.client.utils.URIBuilder; import org.apache.http.message.BasicNameValuePair; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.base.Strings; import com.google.common.collect.ImmutableMap; import com.google.gson.Gson; import com.google.gson.JsonArray; import com.google.gson.JsonElement; import com.google.gson.JsonObject; import com.sforce.async.BatchInfo; import com.sforce.async.BatchStateEnum; import com.sforce.async.BulkConnection; import com.sforce.async.ConcurrencyMode; import com.sforce.async.ContentType; import com.sforce.async.JobInfo; import com.sforce.async.OperationEnum; import com.sforce.async.QueryResultList; import com.sforce.soap.partner.PartnerConnection; import com.sforce.ws.ConnectorConfig; import gobblin.configuration.ConfigurationKeys; import gobblin.configuration.WorkUnitState; import gobblin.password.PasswordManager; import gobblin.source.extractor.DataRecordException; import gobblin.source.extractor.exception.HighWatermarkException; import gobblin.source.extractor.exception.RecordCountException; import gobblin.source.extractor.exception.RestApiClientException; import gobblin.source.extractor.exception.RestApiConnectionException; import gobblin.source.extractor.exception.SchemaException; import gobblin.source.extractor.extract.Command; import gobblin.source.extractor.extract.CommandOutput; import gobblin.source.extractor.extract.jdbc.SqlQueryUtils; import gobblin.source.extractor.extract.restapi.RestApiCommand; import gobblin.source.extractor.extract.restapi.RestApiCommand.RestApiCommandType; import gobblin.source.extractor.extract.restapi.RestApiExtractor; import gobblin.source.extractor.resultset.RecordSet; import gobblin.source.extractor.resultset.RecordSetList; import gobblin.source.extractor.schema.Schema; import gobblin.source.extractor.utils.InputStreamCSVReader; import gobblin.source.extractor.utils.Utils; import gobblin.source.extractor.watermark.Predicate; import gobblin.source.extractor.watermark.WatermarkType; import gobblin.source.workunit.WorkUnit; /** * An implementation of salesforce extractor for to extract data from SFDC */ public class SalesforceExtractor extends RestApiExtractor { private static final String DEFAULT_SERVICES_DATA_PATH = "/services/data"; private static final String SOQL_RESOURCE = "/queryAll"; private static final String DEFAULT_AUTH_TOKEN_PATH = "/services/oauth2/token"; private static final String SALESFORCE_TIMESTAMP_FORMAT = "yyyy-MM-dd'T'HH:mm:ss'.000Z'"; private static final String SALESFORCE_DATE_FORMAT = "yyyy-MM-dd"; private static final String SALESFORCE_HOUR_FORMAT = "HH"; private static final String SALESFORCE_SOAP_AUTH_SERVICE = "/services/Soap/u"; private static final String SALESFORCE_BULK_AUTH_SERVICE = "/services/async"; private static final Gson gson = new Gson(); private boolean pullStatus = true; private String nextUrl; private String servicesDataEnvPath; private BulkConnection bulkConnection = null; private boolean bulkApiInitialRun = true; private JobInfo bulkJob = new JobInfo(); private BatchInfo bulkBatchInfo = null; private BufferedReader bulkBufferedReader = null; private List<String> bulkResultIdList = new ArrayList<String>(); private int bulkResultIdCount = 0; private boolean bulkJobFinished = true; private List<String> bulkRecordHeader; private int bulkResultColumCount; private boolean newBulkResultSet = true; private int bulkRecordCount = 0; private Logger log = LoggerFactory.getLogger(SalesforceExtractor.class); public SalesforceExtractor(WorkUnitState state) { super(state); } public String getServicesDataEnvPath() { return servicesDataEnvPath; } public void setServicesDataEnvPath(String servicesDataEnvPath) { this.servicesDataEnvPath = servicesDataEnvPath; } /** * true is further pull required else false */ public void setPullStatus(boolean pullStatus) { this.pullStatus = pullStatus; } /** * url for the next pull from salesforce */ public void setNextUrl(String nextUrl) { this.nextUrl = nextUrl; } private boolean isBulkJobFinished() { return this.bulkJobFinished; } private void setBulkJobFinished(boolean bulkJobFinished) { this.bulkJobFinished = bulkJobFinished; } public boolean isNewBulkResultSet() { return newBulkResultSet; } public void setNewBulkResultSet(boolean newBulkResultSet) { this.newBulkResultSet = newBulkResultSet; } @Override public HttpEntity getAuthentication() throws RestApiConnectionException { this.log.debug("Authenticating salesforce"); String clientId = this.workUnit.getProp(ConfigurationKeys.SOURCE_CONN_CLIENT_ID); String clientSecret = this.workUnit.getProp(ConfigurationKeys.SOURCE_CONN_CLIENT_SECRET); String userName = this.workUnit.getProp(ConfigurationKeys.SOURCE_CONN_USERNAME); String password = PasswordManager.getInstance(this.workUnit) .readPassword(this.workUnit.getProp(ConfigurationKeys.SOURCE_CONN_PASSWORD)); String securityToken = this.workUnit.getProp(ConfigurationKeys.SOURCE_CONN_SECURITY_TOKEN); String host = this.workUnit.getProp(ConfigurationKeys.SOURCE_CONN_HOST_NAME); List<NameValuePair> formParams = new ArrayList<NameValuePair>(); formParams.add(new BasicNameValuePair("grant_type", "password")); formParams.add(new BasicNameValuePair("client_id", clientId)); formParams.add(new BasicNameValuePair("client_secret", clientSecret)); formParams.add(new BasicNameValuePair("username", userName)); formParams.add(new BasicNameValuePair("password", password + securityToken)); try { HttpPost post = new HttpPost(host + DEFAULT_AUTH_TOKEN_PATH); post.setEntity(new UrlEncodedFormEntity(formParams)); HttpResponse httpResponse = getHttpClient().execute(post); HttpEntity httpEntity = httpResponse.getEntity(); return httpEntity; } catch (Exception e) { throw new RestApiConnectionException("Failed to authenticate salesforce using user:" + userName + " and host:" + host + "; error-" + e.getMessage(), e); } } @Override public List<Command> getSchemaMetadata(String schema, String entity) throws SchemaException { this.log.debug("Build url to retrieve schema"); return constructGetCommand(this.getFullUri("/sobjects/" + entity.trim() + "/describe")); } @Override public JsonArray getSchema(CommandOutput<?, ?> response) throws SchemaException { this.log.info("Get schema from salesforce"); String output; Iterator<String> itr = (Iterator<String>) response.getResults().values().iterator(); if (itr.hasNext()) { output = itr.next(); } else { throw new SchemaException("Failed to get schema from salesforce; REST response has no output"); } JsonArray fieldJsonArray = new JsonArray(); JsonElement element = gson.fromJson(output, JsonObject.class); JsonObject jsonObject = element.getAsJsonObject(); try { JsonArray array = jsonObject.getAsJsonArray("fields"); for (JsonElement columnElement : array) { JsonObject field = columnElement.getAsJsonObject(); Schema schema = new Schema(); schema.setColumnName(field.get("name").getAsString()); String dataType = field.get("type").getAsString(); String elementDataType = "string"; List<String> mapSymbols = null; JsonObject newDataType = this.convertDataType(field.get("name").getAsString(), dataType, elementDataType, mapSymbols); this.log.debug("ColumnName:" + field.get("name").getAsString() + "; old datatype:" + dataType + "; new datatype:" + newDataType); schema.setDataType(newDataType); schema.setLength(field.get("length").getAsLong()); schema.setPrecision(field.get("precision").getAsInt()); schema.setScale(field.get("scale").getAsInt()); schema.setNullable(field.get("nillable").getAsBoolean()); schema.setFormat(null); schema.setComment((field.get("label").isJsonNull() ? null : field.get("label").getAsString())); schema.setDefaultValue( (field.get("defaultValue").isJsonNull() ? null : field.get("defaultValue").getAsString())); schema.setUnique(field.get("unique").getAsBoolean()); String jsonStr = gson.toJson(schema); JsonObject obj = gson.fromJson(jsonStr, JsonObject.class).getAsJsonObject(); fieldJsonArray.add(obj); } } catch (Exception e) { throw new SchemaException("Failed to get schema from salesforce; error - " + e.getMessage(), e); } return fieldJsonArray; } @Override public List<Command> getHighWatermarkMetadata(String schema, String entity, String watermarkColumn, List<Predicate> predicateList) throws HighWatermarkException { this.log.debug("Build url to retrieve high watermark"); String query = "SELECT " + watermarkColumn + " FROM " + entity; String defaultPredicate = " " + watermarkColumn + " != null"; String defaultSortOrder = " ORDER BY " + watermarkColumn + " desc LIMIT 1"; String existingPredicate = ""; if (this.updatedQuery != null) { String queryLowerCase = this.updatedQuery.toLowerCase(); int startIndex = queryLowerCase.indexOf(" where "); if (startIndex > 0) { existingPredicate = this.updatedQuery.substring(startIndex); } } query = query + existingPredicate; String limitString = this.getLimitFromInputQuery(query); query = query.replace(limitString, ""); Iterator<Predicate> i = predicateList.listIterator(); while (i.hasNext()) { Predicate predicate = i.next(); query = SqlQueryUtils.addPredicate(query, predicate.getCondition()); } query = SqlQueryUtils.addPredicate(query, defaultPredicate); query = query + defaultSortOrder; this.log.info("QUERY: " + query); try { return constructGetCommand(this.getFullUri(this.getSoqlUrl(query))); } catch (Exception e) { throw new HighWatermarkException( "Failed to get salesforce url for high watermark; error - " + e.getMessage(), e); } } @Override public long getHighWatermark(CommandOutput<?, ?> response, String watermarkColumn, String format) throws HighWatermarkException { this.log.info("Get high watermark from salesforce"); String output; Iterator<String> itr = (Iterator<String>) response.getResults().values().iterator(); if (itr.hasNext()) { output = itr.next(); } else { throw new HighWatermarkException( "Failed to get high watermark from salesforce; REST response has no output"); } JsonElement element = gson.fromJson(output, JsonObject.class); long high_ts; try { JsonObject jsonObject = element.getAsJsonObject(); JsonArray jsonArray = jsonObject.getAsJsonArray("records"); if (jsonArray.size() == 0) { return -1; } String value = jsonObject.getAsJsonArray("records").get(0).getAsJsonObject().get(watermarkColumn) .getAsString(); if (format != null) { SimpleDateFormat inFormat = new SimpleDateFormat(format); Date date = null; try { date = inFormat.parse(value); } catch (ParseException e) { log.error("ParseException: " + e.getMessage(), e); } SimpleDateFormat outFormat = new SimpleDateFormat("yyyyMMddHHmmss"); high_ts = Long.parseLong(outFormat.format(date)); } else { high_ts = Long.parseLong(value); } } catch (Exception e) { throw new HighWatermarkException( "Failed to get high watermark from salesforce; error - " + e.getMessage(), e); } return high_ts; } @Override public List<Command> getCountMetadata(String schema, String entity, WorkUnit workUnit, List<Predicate> predicateList) throws RecordCountException { this.log.debug("Build url to retrieve source record count"); String existingPredicate = ""; if (this.updatedQuery != null) { String queryLowerCase = this.updatedQuery.toLowerCase(); int startIndex = queryLowerCase.indexOf(" where "); if (startIndex > 0) { existingPredicate = this.updatedQuery.substring(startIndex); } } String query = "SELECT COUNT() FROM " + entity + existingPredicate; String limitString = this.getLimitFromInputQuery(query); query = query.replace(limitString, ""); try { if (isNullPredicate(predicateList)) { this.log.info("QUERY: " + query); return constructGetCommand(this.getFullUri(this.getSoqlUrl(query))); } else { Iterator<Predicate> i = predicateList.listIterator(); while (i.hasNext()) { Predicate predicate = i.next(); query = SqlQueryUtils.addPredicate(query, predicate.getCondition()); } query = query + this.getLimitFromInputQuery(this.updatedQuery); this.log.info("QUERY: " + query); return constructGetCommand(this.getFullUri(this.getSoqlUrl(query))); } } catch (Exception e) { throw new RecordCountException( "Failed to get salesforce url for record count; error - " + e.getMessage(), e); } } @Override public long getCount(CommandOutput<?, ?> response) throws RecordCountException { this.log.info("Get source record count from salesforce"); String output; Iterator<String> itr = (Iterator<String>) response.getResults().values().iterator(); if (itr.hasNext()) { output = itr.next(); } else { throw new RecordCountException("Failed to get count from salesforce; REST response has no output"); } JsonElement element = gson.fromJson(output, JsonObject.class); long count; try { JsonObject jsonObject = element.getAsJsonObject(); count = jsonObject.get("totalSize").getAsLong(); } catch (Exception e) { throw new RecordCountException("Failed to get record count from salesforce; error - " + e.getMessage(), e); } return count; } @Override public List<Command> getDataMetadata(String schema, String entity, WorkUnit workUnit, List<Predicate> predicateList) throws DataRecordException { this.log.debug("Build url to retrieve data records"); String query = this.updatedQuery; String url = null; try { if (this.getNextUrl() != null && this.pullStatus == true) { url = this.getNextUrl(); } else { if (isNullPredicate(predicateList)) { this.log.info("QUERY:" + query); return constructGetCommand(this.getFullUri(this.getSoqlUrl(query))); } String limitString = this.getLimitFromInputQuery(query); query = query.replace(limitString, ""); Iterator<Predicate> i = predicateList.listIterator(); while (i.hasNext()) { Predicate predicate = i.next(); query = SqlQueryUtils.addPredicate(query, predicate.getCondition()); } if (Boolean.valueOf( this.workUnit.getProp(ConfigurationKeys.SOURCE_QUERYBASED_IS_SPECIFIC_API_ACTIVE))) { query = SqlQueryUtils.addPredicate(query, "IsDeleted = true"); } query = query + limitString; this.log.info("QUERY: " + query); url = this.getFullUri(this.getSoqlUrl(query)); } return constructGetCommand(url); } catch (Exception e) { throw new DataRecordException( "Failed to get salesforce url for data records; error - " + e.getMessage(), e); } } private String getLimitFromInputQuery(String query) { String inputQuery = query.toLowerCase(); int limitIndex = inputQuery.indexOf(" limit"); if (limitIndex > 0) { return query.substring(limitIndex); } return ""; } @Override public Iterator<JsonElement> getData(CommandOutput<?, ?> response) throws DataRecordException { this.log.debug("Get data records from response"); String output; Iterator<String> itr = (Iterator<String>) response.getResults().values().iterator(); if (itr.hasNext()) { output = itr.next(); } else { throw new DataRecordException("Failed to get data from salesforce; REST response has no output"); } List<JsonElement> rs = new ArrayList<JsonElement>(); JsonElement element = gson.fromJson(output, JsonObject.class); JsonArray partRecords; try { JsonObject jsonObject = element.getAsJsonObject(); partRecords = jsonObject.getAsJsonArray("records"); if (jsonObject.get("done").getAsBoolean()) { this.setPullStatus(false); } else { this.setNextUrl(this.getFullUri( jsonObject.get("nextRecordsUrl").getAsString().replaceAll(this.servicesDataEnvPath, ""))); } JsonArray array = Utils.removeElementFromJsonArray(partRecords, "attributes"); Iterator<JsonElement> li = array.iterator(); while (li.hasNext()) { JsonElement recordElement = li.next(); rs.add(recordElement); } return rs.iterator(); } catch (Exception e) { throw new DataRecordException("Failed to get records from salesforce; error - " + e.getMessage(), e); } } @Override public boolean getPullStatus() { return this.pullStatus; } @Override public String getNextUrl() { return nextUrl; } public String getSoqlUrl(String soqlQuery) throws RestApiClientException { String path = SOQL_RESOURCE + "/"; NameValuePair pair = new BasicNameValuePair("q", soqlQuery); List<NameValuePair> qparams = new ArrayList<NameValuePair>(); qparams.add(pair); return this.buildUrl(path, qparams); } public String buildUrl(String path, List<NameValuePair> qparams) throws RestApiClientException { URIBuilder builder = new URIBuilder(); builder.setPath(path); ListIterator<NameValuePair> i = qparams.listIterator(); while (i.hasNext()) { NameValuePair keyValue = i.next(); builder.setParameter(keyValue.getName(), keyValue.getValue()); } URI uri; try { uri = builder.build(); } catch (Exception e) { throw new RestApiClientException("Failed to build url; error - " + e.getMessage(), e); } return new HttpGet(uri).getURI().toString(); } private String getServiceBaseUrl() { String dataEnvPath = DEFAULT_SERVICES_DATA_PATH + "/v" + this.workUnit.getProp(ConfigurationKeys.SOURCE_CONN_VERSION); this.setServicesDataEnvPath(dataEnvPath); return this.instanceUrl + dataEnvPath; } public String getFullUri(String resourcePath) { return StringUtils.removeEnd(getServiceBaseUrl(), "/") + StringUtils.removeEnd(resourcePath, "/"); } public boolean isNullPredicate(List<Predicate> predicateList) { if (predicateList == null || predicateList.size() == 0) { return true; } return false; } @Override public String getWatermarkSourceFormat(WatermarkType watermarkType) { switch (watermarkType) { case TIMESTAMP: return "yyyy-MM-dd'T'HH:mm:ss"; case DATE: return "yyyy-MM-dd"; default: return null; } } @Override public String getHourPredicateCondition(String column, long value, String valueFormat, String operator) { this.log.info("Getting hour predicate from salesforce"); String Formattedvalue = Utils.toDateTimeFormat(Long.toString(value), valueFormat, SALESFORCE_HOUR_FORMAT); return column + " " + operator + " " + Formattedvalue; } @Override public String getDatePredicateCondition(String column, long value, String valueFormat, String operator) { this.log.info("Getting date predicate from salesforce"); String Formattedvalue = Utils.toDateTimeFormat(Long.toString(value), valueFormat, SALESFORCE_DATE_FORMAT); return column + " " + operator + " " + Formattedvalue; } @Override public String getTimestampPredicateCondition(String column, long value, String valueFormat, String operator) { this.log.info("Getting timestamp predicate from salesforce"); String Formattedvalue = Utils.toDateTimeFormat(Long.toString(value), valueFormat, SALESFORCE_TIMESTAMP_FORMAT); return column + " " + operator + " " + Formattedvalue; } @Override public Map<String, String> getDataTypeMap() { Map<String, String> dataTypeMap = ImmutableMap.<String, String>builder().put("url", "string") .put("textarea", "string").put("reference", "string").put("phone", "string") .put("masterrecord", "string").put("location", "string").put("id", "string") .put("encryptedstring", "string").put("email", "string").put("DataCategoryGroupReference", "string") .put("calculated", "string").put("anyType", "string").put("address", "string").put("blob", "string") .put("date", "date").put("datetime", "timestamp").put("time", "time").put("object", "string") .put("string", "string").put("int", "int").put("long", "long").put("double", "double") .put("percent", "double").put("currency", "double").put("decimal", "double") .put("boolean", "boolean").put("picklist", "string").put("multipicklist", "string") .put("combobox", "string").put("list", "string").put("set", "string").put("map", "string") .put("enum", "string").build(); return dataTypeMap; } @Override public Iterator<JsonElement> getRecordSetFromSourceApi(String schema, String entity, WorkUnit workUnit, List<Predicate> predicateList) throws IOException { this.log.debug("Getting salesforce data using bulk api"); RecordSet<JsonElement> rs = null; try { //Get query result ids in the first run //result id is used to construct url while fetching data if (this.bulkApiInitialRun == true) { // set finish status to false before starting the bulk job this.setBulkJobFinished(false); this.bulkResultIdList = this.getQueryResultIds(schema, entity, predicateList); this.log.info("Number of bulk api resultSet Ids:" + this.bulkResultIdList.size()); } // Get data from input stream // If bulk load load is not finished, get data from the stream if (!this.isBulkJobFinished()) { rs = getBulkData(); } // Set bulkApiInitialRun to false after the completion of first run this.bulkApiInitialRun = false; // If bulk job is finished, get soft deleted records using Rest API boolean isSoftDeletesPullDisabled = Boolean.valueOf(this.workUnit.getProp( SalesforceConfigurationKeys.SOURCE_QUERYBASED_SALESFORCE_IS_SOFT_DELETES_PULL_DISABLED)); if (rs == null || rs.isEmpty()) { // Get soft delete records only if IsDeleted column exists and soft deletes pull is not disabled if (this.columnList.contains("IsDeleted") && !isSoftDeletesPullDisabled) { return this.getSoftDeletedRecords(schema, entity, workUnit, predicateList); } else { this.log.info("Ignoring soft delete records"); } } return rs.iterator(); } catch (Exception e) { throw new IOException("Failed to get records using bulk api; error - " + e.getMessage(), e); } } /** * Get soft deleted records using Rest Api * @return iterator with deleted records */ private Iterator<JsonElement> getSoftDeletedRecords(String schema, String entity, WorkUnit workUnit, List<Predicate> predicateList) throws DataRecordException { return this.getRecordSet(schema, entity, workUnit, predicateList); } /** * Login to salesforce * @return login status */ public boolean bulkApiLogin() throws Exception { this.log.info("Authenticating salesforce bulk api"); boolean success = false; String hostName = this.workUnit.getProp(ConfigurationKeys.SOURCE_CONN_HOST_NAME); String apiVersion = this.workUnit.getProp(ConfigurationKeys.SOURCE_CONN_VERSION); if (Strings.isNullOrEmpty(apiVersion)) { apiVersion = "29.0"; } String soapAuthEndPoint = hostName + SALESFORCE_SOAP_AUTH_SERVICE + "/" + apiVersion; try { ConnectorConfig partnerConfig = new ConnectorConfig(); if (super.workUnitState.contains(ConfigurationKeys.SOURCE_CONN_USE_PROXY_URL) && !super.workUnitState.getProp(ConfigurationKeys.SOURCE_CONN_USE_PROXY_URL).isEmpty()) { partnerConfig.setProxy(super.workUnitState.getProp(ConfigurationKeys.SOURCE_CONN_USE_PROXY_URL), super.workUnitState.getPropAsInt(ConfigurationKeys.SOURCE_CONN_USE_PROXY_PORT)); } partnerConfig.setUsername(this.workUnit.getProp(ConfigurationKeys.SOURCE_CONN_USERNAME)); partnerConfig.setPassword(PasswordManager.getInstance(this.workUnit) .readPassword(this.workUnit.getProp(ConfigurationKeys.SOURCE_CONN_PASSWORD))); partnerConfig.setAuthEndpoint(soapAuthEndPoint); PartnerConnection connection = new PartnerConnection(partnerConfig); String soapEndpoint = partnerConfig.getServiceEndpoint(); String restEndpoint = soapEndpoint.substring(0, soapEndpoint.indexOf("Soap/")) + "async/" + apiVersion; ConnectorConfig config = new ConnectorConfig(); config.setSessionId(partnerConfig.getSessionId()); config.setRestEndpoint(restEndpoint); config.setCompression(true); config.setTraceFile("traceLogs.txt"); config.setTraceMessage(false); config.setPrettyPrintXml(true); if (super.workUnitState.contains(ConfigurationKeys.SOURCE_CONN_USE_PROXY_URL) && !super.workUnitState.getProp(ConfigurationKeys.SOURCE_CONN_USE_PROXY_URL).isEmpty()) { config.setProxy(super.workUnitState.getProp(ConfigurationKeys.SOURCE_CONN_USE_PROXY_URL), super.workUnitState.getPropAsInt(ConfigurationKeys.SOURCE_CONN_USE_PROXY_PORT)); } this.bulkConnection = new BulkConnection(config); success = true; } catch (Exception e) { throw new Exception("Failed to connect to salesforce bulk api; error - " + e.getMessage(), e); } return success; } /** * Get Record set using salesforce specific API(Bulk API) * @param schema/databasename * @param entity/tablename * @param list of all predicate conditions * @return iterator with batch of records */ private List<String> getQueryResultIds(String schema, String entity, List<Predicate> predicateList) throws Exception { if (!bulkApiLogin()) { throw new IllegalArgumentException("Invalid Login"); } try { // Set bulk job attributes this.bulkJob.setObject(entity); this.bulkJob.setOperation(OperationEnum.query); this.bulkJob.setConcurrencyMode(ConcurrencyMode.Parallel); // Result type as CSV this.bulkJob.setContentType(ContentType.CSV); this.bulkJob = bulkConnection.createJob(this.bulkJob); this.bulkJob = bulkConnection.getJobStatus(this.bulkJob.getId()); // Construct query with the predicates String query = this.updatedQuery; if (!isNullPredicate(predicateList)) { String limitString = this.getLimitFromInputQuery(query); query = query.replace(limitString, ""); Iterator<Predicate> i = predicateList.listIterator(); while (i.hasNext()) { Predicate predicate = i.next(); query = SqlQueryUtils.addPredicate(query, predicate.getCondition()); } query = query + limitString; } this.log.info("QUERY:" + query); ByteArrayInputStream bout = new ByteArrayInputStream( query.getBytes(ConfigurationKeys.DEFAULT_CHARSET_ENCODING)); this.bulkBatchInfo = bulkConnection.createBatchFromStream(this.bulkJob, bout); int retryInterval = 30 + (int) Math.ceil((float) this.getExpectedRecordCount() / 10000) * 2; this.log.info("Salesforce bulk api retry interval in seconds:" + retryInterval); // Get batch info with complete resultset (info id - refers to the resultset id corresponding to entire resultset) this.bulkBatchInfo = bulkConnection.getBatchInfo(this.bulkJob.getId(), this.bulkBatchInfo.getId()); if (this.bulkBatchInfo.getState() == BatchStateEnum.Failed) { throw new RuntimeException("Failed to get bulk batch info for jobId " + this.bulkBatchInfo.getJobId() + " error - " + this.bulkBatchInfo.getStateMessage()); } while ((this.bulkBatchInfo.getState() != BatchStateEnum.Completed) && (this.bulkBatchInfo.getState() != BatchStateEnum.Failed)) { Thread.sleep(retryInterval * 1000); this.bulkBatchInfo = bulkConnection.getBatchInfo(this.bulkJob.getId(), this.bulkBatchInfo.getId()); this.log.debug("Bulk Api Batch Info:" + this.bulkBatchInfo); this.log.info("Waiting for bulk resultSetIds"); } // Get resultset ids from the batch info QueryResultList list = bulkConnection.getQueryResultList(this.bulkJob.getId(), this.bulkBatchInfo.getId()); return Arrays.asList(list.getResult()); } catch (Exception e) { throw new RuntimeException( "Failed to get query result ids from salesforce using bulk api; error - " + e.getMessage(), e); } } /** * Get data from the bulk api input stream * @return record set with each record as a JsonObject */ private RecordSet<JsonElement> getBulkData() throws DataRecordException { this.log.debug("Processing bulk api batch..."); RecordSetList<JsonElement> rs = new RecordSetList<JsonElement>(); try { // if Buffer is empty then get stream for the new resultset id if (this.bulkBufferedReader == null || !this.bulkBufferedReader.ready()) { // if there is unprocessed resultset id then get result stream for that id if (this.bulkResultIdCount < this.bulkResultIdList.size()) { this.log.info("Stream resultset for resultId:" + bulkResultIdList.get(bulkResultIdCount)); this.setNewBulkResultSet(true); this.bulkBufferedReader = new BufferedReader(new InputStreamReader( this.bulkConnection.getQueryResultStream(bulkJob.getId(), bulkBatchInfo.getId(), bulkResultIdList.get(bulkResultIdCount)), ConfigurationKeys.DEFAULT_CHARSET_ENCODING)); this.bulkResultIdCount++; } else { // if result stream processed for all resultset ids then finish the bulk job this.log.info("Bulk job is finished"); this.setBulkJobFinished(true); return rs; } } // if Buffer stream has data then process the same // Get batch size from .pull file int batchSize = Utils.getAsInt(this.workUnit.getProp(ConfigurationKeys.SOURCE_QUERYBASED_FETCH_SIZE)); if (batchSize == 0) { batchSize = ConfigurationKeys.DEFAULT_SOURCE_FETCH_SIZE; } // Stream the resultset through CSV reader to identify columns in each record InputStreamCSVReader reader = new InputStreamCSVReader(this.bulkBufferedReader); // Get header if it is first run of a new resultset if (this.isNewBulkResultSet()) { this.bulkRecordHeader = reader.nextRecord(); this.bulkResultColumCount = this.bulkRecordHeader.size(); this.setNewBulkResultSet(false); } List<String> csvRecord; int recordCount = 0; // Get record from CSV reader stream while ((csvRecord = reader.nextRecord()) != null) { // Convert CSV record to JsonObject JsonObject jsonObject = Utils.csvToJsonObject(this.bulkRecordHeader, csvRecord, this.bulkResultColumCount); rs.add(jsonObject); recordCount++; this.bulkRecordCount++; // Insert records in record set until it reaches the batch size if (recordCount >= batchSize) { this.log.info("Total number of records processed so far: " + this.bulkRecordCount); break; } } } catch (Exception e) { throw new DataRecordException("Failed to get records from salesforce; error - " + e.getMessage(), e); } return rs; } @Override public void closeConnection() throws Exception { if (this.bulkConnection != null && !this.bulkConnection.getJobStatus(this.bulkJob.getId()).getState().toString().equals("Closed")) { this.log.info("Closing salesforce bulk job connection"); this.bulkConnection.closeJob(bulkJob.getId()); } } public static List<Command> constructGetCommand(String restQuery) { return Arrays.asList(new RestApiCommand().build(Arrays.asList(restQuery), RestApiCommandType.GET)); } }