Java tutorial
/** * Copyright (C) 2004-2016, GoodData(R) Corporation. All rights reserved. * This source code is licensed under the BSD-style license found in the * LICENSE.txt file in the root directory of this source tree. */ package com.gooddata.dataset; import com.gooddata.AbstractPollHandler; import com.gooddata.AbstractService; import com.gooddata.FutureResult; import com.gooddata.PollResult; import com.gooddata.GoodDataException; import com.gooddata.GoodDataRestException; import com.gooddata.gdc.*; import com.gooddata.gdc.AboutLinks.Link; import com.gooddata.project.Project; import org.apache.commons.lang.RandomStringUtils; import org.springframework.http.client.ClientHttpResponse; import org.springframework.web.client.RestClientException; import org.springframework.web.client.RestTemplate; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; import java.net.URI; import java.nio.file.Path; import java.nio.file.Paths; import java.util.*; import static com.gooddata.util.Validate.notEmpty; import static com.gooddata.util.Validate.notNull; import static java.lang.String.format; import static java.nio.charset.StandardCharsets.UTF_8; import static java.util.Arrays.asList; import static java.util.Collections.emptyList; import static java.util.Collections.singletonList; import static org.springframework.util.StringUtils.isEmpty; /** * Service to work with datasets, manifests and dataset uploads. */ public class DatasetService extends AbstractService { private static final String MANIFEST_FILE_NAME = "upload_info.json"; private static final String ETL_PULL_DEFAULT_ERROR_MESSAGE = "ETL Pull failed with status %s"; private final DataStoreService dataStoreService; public DatasetService(RestTemplate restTemplate, DataStoreService dataStoreService) { super(restTemplate); this.dataStoreService = notNull(dataStoreService, "dataStoreService"); } /** * Obtains manifest from given project by given datasetId * * @param project project to which manifest belongs * @param datasetId id of dataset * @return manifest for dataset * @throws com.gooddata.dataset.DatasetNotFoundException when manifest can't be found (doesn't exist) * @throws com.gooddata.dataset.DatasetException in case the API call failure */ public DatasetManifest getDatasetManifest(Project project, String datasetId) { notNull(project, "project"); notEmpty(datasetId, "datasetId"); try { return restTemplate.getForObject(DatasetManifest.URI, DatasetManifest.class, project.getId(), datasetId); } catch (GoodDataRestException e) { if (e.getStatusCode() == 404) { throw new DatasetNotFoundException(datasetId, e); } else { throw new DatasetException("Unable to get manifest", datasetId, e); } } catch (RestClientException e) { throw new DatasetException("Unable to get manifest", datasetId, e); } } /** * Loads dataset into platform. Uploads given dataset and manifest to staging area and triggers ETL pull. * The call is asynchronous returning {@link com.gooddata.FutureResult} to let caller wait for results. * Uploaded files are deleted from staging area when finished. * * @param project project to which dataset belongs * @param manifest dataset manifest * @param dataset dataset to upload * @return {@link com.gooddata.FutureResult} of the task, which can throw {@link com.gooddata.dataset.DatasetException} * in case the ETL pull task fails * @throws com.gooddata.dataset.DatasetException if there is a problem to serialize manifest or upload dataset */ public FutureResult<Void> loadDataset(final Project project, final DatasetManifest manifest, final InputStream dataset) { notNull(project, "project"); notNull(dataset, "dataset"); notNull(manifest, "manifest"); final Path dirPath = Paths.get("/", project.getId() + "_" + RandomStringUtils.randomAlphabetic(3), "/"); try { dataStoreService.upload(dirPath.resolve(manifest.getFile()).toString(), dataset); final String manifestJson = mapper.writeValueAsString(manifest); final ByteArrayInputStream inputStream = new ByteArrayInputStream(manifestJson.getBytes(UTF_8)); dataStoreService.upload(dirPath.resolve(MANIFEST_FILE_NAME).toString(), inputStream); return pullLoad(project, dirPath, manifest.getDataSet()); } catch (IOException e) { throw new DatasetException("Unable to serialize manifest", manifest.getDataSet(), e); } catch (DataStoreException | GoodDataRestException | RestClientException e) { throw new DatasetException("Unable to load", manifest.getDataSet(), e); } } /** * Gets DatasetManifest (using {@link #getDatasetManifest(com.gooddata.project.Project, String)} * first and then calls {@link #loadDataset(com.gooddata.project.Project, DatasetManifest, java.io.InputStream)} * * @param project project to which dataset belongs * @param datasetId datasetId to obtain a manifest * @param dataset dataset to upload * @return {@link com.gooddata.FutureResult} of the task */ public FutureResult<Void> loadDataset(Project project, String datasetId, InputStream dataset) { notNull(project, "project"); notEmpty(datasetId, "datasetId"); notNull(dataset, "dataset"); return loadDataset(project, getDatasetManifest(project, datasetId), dataset); } public FutureResult<Void> loadDatasets(final Project project, DatasetManifest... datasets) { return loadDatasets(project, asList(datasets)); } /** * Loads datasets into platform. Uploads given datasets and their manifests to staging area and triggers ETL pull. * The call is asynchronous returning {@link com.gooddata.FutureResult} to let caller wait for results. * Uploaded files are deleted from staging area when finished. * * @param project project to which dataset belongs * @param datasets map dataset manifests * @return {@link com.gooddata.FutureResult} of the task, which can throw {@link com.gooddata.dataset.DatasetException} * in case the ETL pull task fails * @throws com.gooddata.dataset.DatasetException if there is a problem to serialize manifest or upload dataset * @see <a href="https://developer.gooddata.com/article/multiload-of-csv-data">batch upload reference</a> */ public FutureResult<Void> loadDatasets(final Project project, final Collection<DatasetManifest> datasets) { notNull(project, "project"); validateUploadManifests(datasets); final List<String> datasetsNames = new ArrayList<>(datasets.size()); try { final Path dirPath = Paths.get("/", project.getId() + "_" + RandomStringUtils.randomAlphabetic(3), "/"); for (DatasetManifest datasetManifest : datasets) { datasetsNames.add(datasetManifest.getDataSet()); dataStoreService.upload(dirPath.resolve(datasetManifest.getFile()).toString(), datasetManifest.getSource()); } final String manifestJson = mapper.writeValueAsString(new DatasetManifests(datasets)); final ByteArrayInputStream inputStream = new ByteArrayInputStream(manifestJson.getBytes(UTF_8)); dataStoreService.upload(dirPath.resolve(MANIFEST_FILE_NAME).toString(), inputStream); return pullLoad(project, dirPath, datasetsNames); } catch (IOException e) { throw new DatasetException("Unable to serialize manifest", datasetsNames, e); } catch (DataStoreException | GoodDataRestException | RestClientException e) { throw new DatasetException("Unable to load", datasetsNames, e); } } private void validateUploadManifests(final Collection<DatasetManifest> datasets) { notEmpty(datasets, "datasets"); for (DatasetManifest datasetManifest : datasets) { if (datasetManifest.getSource() == null) { throw new IllegalArgumentException( format("Source for dataset '%s' is null", datasetManifest.getDataSet())); } if (datasetManifest.getFile() == null) { throw new IllegalArgumentException( format("File for dataset '%s' is null", datasetManifest.getDataSet())); } if (isEmpty(datasetManifest.getDataSet())) { throw new IllegalArgumentException("Dataset name is empty."); } } } private FutureResult<Void> pullLoad(Project project, final Path dirPath, final String dataset) { return pullLoad(project, dirPath, singletonList(dataset)); } private FutureResult<Void> pullLoad(Project project, final Path dirPath, final Collection<String> datasets) { final PullTask pullTask = restTemplate.postForObject(Pull.URI, new Pull(dirPath.toString()), PullTask.class, project.getId()); return new PollResult<>(this, new AbstractPollHandler<TaskStatus, Void>(pullTask.getPollUri(), TaskStatus.class, Void.class) { @Override public void handlePollResult(TaskStatus pollResult) { if (!pollResult.isSuccess()) { final String message = isEmpty(pollResult.getMessages()) ? String.format(ETL_PULL_DEFAULT_ERROR_MESSAGE, pollResult.getStatus()) : pollResult.getMessages().toString(); throw new DatasetException(message, datasets); } setResult(null); } @Override public void handlePollException(final GoodDataRestException e) { throw new DatasetException("Unable to load", datasets, e); } @Override protected void onFinish() { try { dataStoreService.delete(dirPath.toString()); } catch (DataStoreException ignored) { // todo log? } } }); } /** * Lists datasets (links) in project. Returns empty list in case there are no datasets. * * @param project project to list datasets in * @return collection of dataset links or empty list * @deprecated use {@link #listDatasetLinks(Project)} instead */ @Deprecated public Collection<Dataset> listDatasets(Project project) { final HashSet<Dataset> datasets = new HashSet<>(); for (final Link link : listDatasetLinks(project)) { datasets.add(new Dataset(link.getIdentifier(), link.getUri(), link.getTitle())); } return datasets; } /** * Lists datasets (links) in project. Returns empty list in case there are no datasets. * * @param project project to list datasets in * @return collection of dataset links or empty list */ public Collection<Link> listDatasetLinks(final Project project) { notNull(project, "project"); try { final DatasetLinks result = restTemplate.getForObject(DatasetLinks.URI, DatasetLinks.class, project.getId()); if (result == null) { throw new GoodDataException("Empty response from API call"); } else if (result.getLinks() == null) { return emptyList(); } return result.getLinks(); } catch (GoodDataException | RestClientException e) { throw new GoodDataException("Unable to list datasets for project " + project.getId(), e); } } /** * Optimize SLI hash. This feature is useful only if data warehouse was reduced somehow. Remove unused values from * the existing SLI hash. * * @param project project to optimize SLI hash in * @return {@link com.gooddata.FutureResult} of the task */ public FutureResult<Void> optimizeSliHash(final Project project) { notNull(project, "project"); final UriResponse uriResponse = restTemplate.postForObject(EtlMode.URL, new EtlMode(EtlModeType.SLI, LookupMode.RECREATE), UriResponse.class, project.getId()); return new PollResult<>(this, new AbstractPollHandler<TaskStatus, Void>(uriResponse.getUri(), TaskStatus.class, Void.class) { @Override public void handlePollResult(final TaskStatus pollResult) { if (!pollResult.isSuccess()) { throw new GoodDataException( "Unable to optimize SLI hash for project " + project.getId()); } setResult(null); } @Override public boolean isFinished(final ClientHttpResponse response) throws IOException { if (!super.isFinished(response)) { return false; } final TaskStatus maqlDdlTaskStatus = extractData(response, TaskStatus.class); if (maqlDdlTaskStatus.isSuccess()) { return true; } throw new GoodDataException( "Unable to optimize SLI hash: " + maqlDdlTaskStatus.getMessages()); } @Override public void handlePollException(final GoodDataRestException e) { throw new GoodDataException("Unable to optimize SLI hash: " + getPollingUri(), e); } }); } /** * Update project data with the given update script (MAQL). This method can be used for data manipulation only, * for model changes use {@link com.gooddata.model.ModelService#updateProjectModel}. * * @param project project to be updated * @param maqlDml update script to be executed in the project * @return poll result * @see com.gooddata.model.ModelService#updateProjectModel */ public FutureResult<Void> updateProjectData(final Project project, final String maqlDml) { notNull(project, "project"); final UriResponse uriResponse = restTemplate.postForObject(MaqlDml.URI, new MaqlDml(maqlDml), UriResponse.class, project.getId()); final String errorMessage = format("Unable to update data for project '%s'", project.getId()); return new PollResult<>(this, new AbstractPollHandler<TaskState, Void>(uriResponse.getUri(), TaskState.class, Void.class) { @Override public void handlePollResult(final TaskState pollResult) { if (!pollResult.isSuccess()) { throw new GoodDataException(errorMessage); } setResult(null); } @Override public boolean isFinished(final ClientHttpResponse response) throws IOException { final TaskState taskState = extractData(response, TaskState.class); if (taskState.isSuccess()) { return true; } else if (!taskState.isFinished()) { return false; } throw new GoodDataException(errorMessage + ": " + taskState.getMessage()); } @Override public void handlePollException(final GoodDataRestException e) { throw new GoodDataException(errorMessage + ": " + getPollingUri(), e); } }); } /** * Lists all uploads for the dataset with the given identifier in the given project. Returns empty list if there * are no uploads for the given dataset. * * @param project GoodData project * @param datasetId dataset identifier * @return collection of {@link Upload} objects or empty list */ public Collection<Upload> listUploadsForDataset(Project project, String datasetId) { final UploadsInfo.DataSet dataSet = getDataSetInfo(project, datasetId); if (isEmpty(dataSet.getUploadsUri())) { return emptyList(); } try { final Uploads result = restTemplate.getForObject(dataSet.getUploadsUri(), Uploads.class); if (result == null) { throw new GoodDataException("Empty response from '" + dataSet.getUploadsUri() + "'."); } else if (result.items() == null) { return emptyList(); } return result.items(); } catch (RestClientException e) { throw new GoodDataException("Unable to get '" + dataSet.getUploadsUri() + "'.", e); } } /** * Returns last upload for the dataset with given identifier in the given project. Returns null if the last upload * doesn't exist. * * @param project GoodData project * @param datasetId dataset identifier * @return last dataset upload or {@code null} if the upload doesn't exist */ public Upload getLastUploadForDataset(Project project, String datasetId) { final UploadsInfo.DataSet dataSet = getDataSetInfo(project, datasetId); if (isEmpty(dataSet.getLastUploadUri())) { return null; } try { return restTemplate.getForObject(dataSet.getLastUploadUri(), Upload.class); } catch (RestClientException e) { throw new GoodDataException("Unable to get '" + dataSet.getLastUploadUri() + "'."); } } /** * Returns global upload statistics for the given project. * * @param project GoodData project * @return {@link UploadStatistics} object with project's upload statistics */ public UploadStatistics getUploadStatistics(Project project) { notNull(project, "project"); try { return restTemplate.getForObject(UploadStatistics.URI, UploadStatistics.class, project.getId()); } catch (RestClientException e) { throw new GoodDataException("Unable to get dataset uploads statistics.", e); } } /** * Returns {@link UploadsInfo.DataSet} object containing upload information for the given dataset in the given project. * * Package-private for testing purposes. */ UploadsInfo.DataSet getDataSetInfo(Project project, String datasetId) { notNull(project, "project"); notEmpty(datasetId, "datasetId"); final URI uploadsInfoUri = UploadsInfo.URI_TEMPLATE.expand(project.getId()); try { final UploadsInfo uploadsInfo = restTemplate.getForObject(uploadsInfoUri, UploadsInfo.class); if (uploadsInfo == null) { throw new GoodDataException("Empty response from '" + uploadsInfoUri.toString() + "'."); } return uploadsInfo.getDataSet(datasetId); } catch (RestClientException e) { throw new GoodDataException("Unable to get '" + uploadsInfoUri.toString() + "'.", e); } } }