Java tutorial
// ============================================================================ // // Copyright (C) 2006-2016 Talend Inc. - www.talend.com // // This source code is available under agreement available at // https://github.com/Talend/data-prep/blob/master/LICENSE // // You should have received a copy of the agreement // along with this program; if not, write to Talend SA // 9 rue Pages 92150 Suresnes, France // // ============================================================================ package org.talend.dataprep.dataset.service; import static com.jayway.restassured.RestAssured.*; import static com.jayway.restassured.http.ContentType.JSON; import static com.jayway.restassured.path.json.JsonPath.from; import static java.time.Instant.now; import static java.util.Arrays.asList; import static java.util.Collections.emptyList; import static org.hamcrest.Matchers.*; import static org.hamcrest.core.IsEqual.equalTo; import static org.junit.Assert.*; import static org.springframework.http.HttpStatus.OK; import static org.talend.dataprep.test.SameJSONFile.sameJSONAsFile; import static org.talend.dataprep.util.SortAndOrderHelper.Sort.LAST_MODIFICATION_DATE; import static uk.co.datumedge.hamcrest.json.SameJSONAs.sameJSONAs; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; import java.util.*; import org.apache.commons.io.IOUtils; import org.assertj.core.api.Assertions; import org.hamcrest.CoreMatchers; import org.junit.Assert; import org.junit.Test; import org.springframework.http.HttpStatus; import org.springframework.test.util.ReflectionTestUtils; import org.springframework.util.ReflectionUtils; import org.talend.dataprep.api.dataset.ColumnMetadata; import org.talend.dataprep.api.dataset.DataSet; import org.talend.dataprep.api.dataset.DataSetGovernance.Certification; import org.talend.dataprep.api.dataset.DataSetMetadata; import org.talend.dataprep.api.dataset.RowMetadata; import org.talend.dataprep.api.dataset.statistics.SemanticDomain; import org.talend.dataprep.api.dataset.statistics.Statistics; import org.talend.dataprep.api.type.Type; import org.talend.dataprep.api.user.UserData; import org.talend.dataprep.dataset.DataSetBaseTest; import org.talend.dataprep.dataset.DataSetMetadataBuilder; import org.talend.dataprep.lock.DistributedLock; import org.talend.dataprep.schema.csv.CSVFormatFamily; import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.JsonNode; import com.jayway.restassured.response.Response; public class DataSetServiceTest extends DataSetBaseTest { @Test public void CORSHeaders() throws Exception { given().header("Origin", "fake.host.to.trigger.cors").when().get("/datasets").then() .header("Access-Control-Allow-Origin", "fake.host.to.trigger.cors"); } @Test public void compatibleDatasetsList() throws Exception { when().get("/datasets/{id}/compatibledatasets", "1").then().statusCode(HttpStatus.OK.value()) .body(equalTo("[]")); String dataSetId = createCSVDataSet(this.getClass().getResourceAsStream(T_SHIRT_100_CSV), "ds-19"); String dataSetId2 = createCSVDataSet(this.getClass().getResourceAsStream(T_SHIRT_100_CSV), "ds-18"); String dataSetId3 = createCSVDataSet(this.getClass().getResourceAsStream(TAGADA_CSV), "ds-17"); // when final String compatibleDatasetList = when().get("/datasets/{id}/compatibledatasets", dataSetId).asString(); // then Assert.assertTrue(compatibleDatasetList.contains(dataSetId2)); assertFalse(compatibleDatasetList.contains(dataSetId3)); } @Test public void compatibleDatasetsListNameSort() throws Exception { String dataSetId = createCSVDataSet(this.getClass().getResourceAsStream(TAGADA_CSV), "ds-16"); String dataSetId2 = createCSVDataSet(this.getClass().getResourceAsStream(TAGADA_CSV), "ds-15"); String dataSetId3 = createCSVDataSet(this.getClass().getResourceAsStream(TAGADA_CSV), "ds-14"); DataSetMetadata metadata1 = dataSetMetadataRepository.get(dataSetId); metadata1.setName("CCCC"); dataSetMetadataRepository.save(metadata1); DataSetMetadata metadata2 = dataSetMetadataRepository.get(dataSetId2); metadata2.setName("BBBB"); dataSetMetadataRepository.save(metadata2); DataSetMetadata metadata3 = dataSetMetadataRepository.get(dataSetId3); metadata3.setName("AAAA"); dataSetMetadataRepository.save(metadata3); // when final String actual = when().get("/datasets/{id}/compatibledatasets?sort=name", dataSetId).asString(); // Ensure order by name (most recent first) final Iterator<JsonNode> elements = mapper.readTree(actual).elements(); String[] expectedNames = new String[] { "BBBB", "AAAA" }; int i = 0; while (elements.hasNext()) { assertThat(elements.next().get("name").asText(), is(expectedNames[i++])); } } @Test public void compatibleDatasetsListDateSort() throws Exception { String dataSetId = createCSVDataSet(this.getClass().getResourceAsStream(TAGADA_CSV), "ds-13"); String dataSetId2 = createCSVDataSet(this.getClass().getResourceAsStream(TAGADA_CSV), "ds-12"); String dataSetId3 = createCSVDataSet(this.getClass().getResourceAsStream(TAGADA_CSV), "ds-11"); DataSetMetadata metadata1 = dataSetMetadataRepository.get(dataSetId); metadata1.setName("CCCC"); dataSetMetadataRepository.save(metadata1); DataSetMetadata metadata2 = dataSetMetadataRepository.get(dataSetId2); metadata2.setName("BBBB"); dataSetMetadataRepository.save(metadata2); DataSetMetadata metadata3 = dataSetMetadataRepository.get(dataSetId3); metadata3.setName("AAAA"); dataSetMetadataRepository.save(metadata3); // when final String actual = expect().statusCode(200).log().ifValidationFails() .get("/datasets/{id}/compatibledatasets?sort=creationDate", dataSetId).asString(); // Ensure order by name (most recent first) final Iterator<JsonNode> elements = mapper.readTree(actual).elements(); String[] expectedNames = new String[] { "AAAA", "BBBB" }; int i = 0; while (elements.hasNext()) { assertThat(elements.next().get("name").asText(), is(expectedNames[i++])); } } @Test public void compatibleDatasetsListDateOrder() throws Exception { String dataSetId = createCSVDataSet(this.getClass().getResourceAsStream(TAGADA_CSV), "ds-10"); String dataSetId2 = createCSVDataSet(this.getClass().getResourceAsStream(TAGADA_CSV), "ds-9"); String dataSetId3 = createCSVDataSet(this.getClass().getResourceAsStream(TAGADA_CSV), "ds-8"); DataSetMetadata metadata1 = dataSetMetadataRepository.get(dataSetId); metadata1.setName("CCCC"); dataSetMetadataRepository.save(metadata1); DataSetMetadata metadata2 = dataSetMetadataRepository.get(dataSetId2); metadata2.setName("BBBB"); dataSetMetadataRepository.save(metadata2); DataSetMetadata metadata3 = dataSetMetadataRepository.get(dataSetId3); metadata3.setName("AAAA"); dataSetMetadataRepository.save(metadata3); // when final String actual = when().get("/datasets/{id}/compatibledatasets?sort=creationDate&order=asc", dataSetId) .asString(); // Ensure order by name (most recent first) final Iterator<JsonNode> elements = mapper.readTree(actual).elements(); String[] expectedNames = new String[] { "BBBB", "AAAA" }; int i = 0; while (elements.hasNext()) { assertThat(elements.next().get("name").asText(), is(expectedNames[i++])); } } @Test public void compatibleDatasetsListNameOrder() throws Exception { String dataSetId = createCSVDataSet(this.getClass().getResourceAsStream(TAGADA_CSV), "ds-7"); String dataSetId2 = createCSVDataSet(this.getClass().getResourceAsStream(TAGADA_CSV), "ds-6"); String dataSetId3 = createCSVDataSet(this.getClass().getResourceAsStream(TAGADA_CSV), "ds-5"); DataSetMetadata metadata1 = dataSetMetadataRepository.get(dataSetId); metadata1.setName("CCCC"); dataSetMetadataRepository.save(metadata1); DataSetMetadata metadata2 = dataSetMetadataRepository.get(dataSetId2); metadata2.setName("BBBB"); dataSetMetadataRepository.save(metadata2); DataSetMetadata metadata3 = dataSetMetadataRepository.get(dataSetId3); metadata3.setName("AAAA"); dataSetMetadataRepository.save(metadata3); // when final String actualASC = when().get("/datasets/{id}/compatibledatasets?sort=name&order=asc", dataSetId) .asString(); final String actualDESC = when().get("/datasets/{id}/compatibledatasets?sort=name&order=desc", dataSetId) .asString(); // Ensure order by name (most recent first) final Iterator<JsonNode> elements = mapper.readTree(actualASC).elements(); String[] expectedNames = new String[] { "AAAA", "BBBB" }; int i = 0; while (elements.hasNext()) { assertThat(elements.next().get("name").asText(), is(expectedNames[i++])); } mapper.readTree(actualDESC).elements(); expectedNames = new String[] { "BBBB", "AAAA" }; i = 0; while (elements.hasNext()) { assertThat(elements.next().get("name").asText(), is(expectedNames[i++])); } } @Test public void compatibleDatasetsListIllegalSort() throws Exception { when().get("/datasets/{id}/compatibledatasets?sort=aaaa", "0000").then() .statusCode(HttpStatus.BAD_REQUEST.value()); } @Test public void compatibleDatasetsListIllegalOrder() throws Exception { when().get("/datasets/{id}/compatibledatasets?order=aaaa", "0000").then() .statusCode(HttpStatus.BAD_REQUEST.value()); } @Test public void list() throws Exception { when().get("/datasets").then().statusCode(OK.value()).body(equalTo("[]")); // Adds 1 data set to store String id1 = UUID.randomUUID().toString(); final DataSetMetadata metadata = metadataBuilder.metadata().id(id1).name("name1").author("anonymous") .created(0).formatFamilyId(new CSVFormatFamily().getBeanId()).build(); metadata.getContent().addParameter(CSVFormatFamily.SEPARATOR_PARAMETER, ";"); dataSetMetadataRepository.save(metadata); String expected = "[{\"id\":\"" + id1 + "\",\"name\":\"name1\",\"records\":0,\"author\":\"anonymous\",\"nbLinesHeader\":0,\"nbLinesFooter\":0,\"created\":0}]"; InputStream content = when().get("/datasets").asInputStream(); String contentAsString = IOUtils.toString(content); assertThat(contentAsString, sameJSONAs(expected).allowingExtraUnexpectedFields().allowingAnyArrayOrdering()); // Adds a new data set to store String id2 = UUID.randomUUID().toString(); DataSetMetadata metadata2 = metadataBuilder.metadata().id(id2).name("name2").author("anonymous").created(0) .formatFamilyId(new CSVFormatFamily().getBeanId()).build(); metadata2.getContent().addParameter(CSVFormatFamily.SEPARATOR_PARAMETER, ";"); dataSetMetadataRepository.save(metadata2); when().get("/datasets").then().statusCode(OK.value()); String response = when().get("/datasets").asString(); List<String> ids = from(response).get("id"); assertThat(ids, hasItems(id1, id2)); // check favorites List<Boolean> favoritesResp = from(response).get("favorite"); //$NON-NLS-1$ assertEquals(2, favoritesResp.size()); assertFalse(favoritesResp.get(0)); assertFalse(favoritesResp.get(1)); // add favorite UserData userData = new UserData(security.getUserId(), versionService.version().getVersionId()); HashSet<String> favorites = new HashSet<>(); favorites.add(id1); favorites.add(id2); userData.setFavoritesDatasets(favorites); userDataRepository.save(userData); favoritesResp = from(when().get("/datasets").asString()).get("favorite"); //$NON-NLS-1$ assertEquals(2, favoritesResp.size()); assertTrue(favoritesResp.get(0)); assertTrue(favoritesResp.get(1)); } @Test public void listNameSort() throws Exception { when().get("/datasets?sort=name").then().statusCode(OK.value()).body(equalTo("[]")); // Adds 2 data set metadata to store String id1 = UUID.randomUUID().toString(); final DataSetMetadata metadata1 = metadataBuilder.metadata().id(id1).name("AAAA").author("anonymous") .created(0).formatFamilyId(new CSVFormatFamily().getBeanId()).build(); dataSetMetadataRepository.save(metadata1); String id2 = UUID.randomUUID().toString(); final DataSetMetadata metadata2 = metadataBuilder.metadata().id(id2).name("BBBB").author("anonymous") .created(0).formatFamilyId(new CSVFormatFamily().getBeanId()).build(); dataSetMetadataRepository.save(metadata2); // Ensure order by name (most recent first) String actual = when().get("/datasets?sort=name").asString(); final Iterator<JsonNode> elements = mapper.readTree(actual).elements(); String[] expectedNames = new String[] { "BBBB", "AAAA" }; int i = 0; while (elements.hasNext()) { assertThat(elements.next().get("name").asText(), is(expectedNames[i++])); } } @Test public void listDateSort() throws Exception { when().get("/datasets?sort=creationDate").then().statusCode(OK.value()).body(equalTo("[]")); // Adds 2 data set metadata to store String id1 = UUID.randomUUID().toString(); final DataSetMetadata metadata1 = metadataBuilder.metadata().id(id1).name("AAAA").author("anonymous") .created(20).formatFamilyId(new CSVFormatFamily().getBeanId()).build(); dataSetMetadataRepository.save(metadata1); String id2 = UUID.randomUUID().toString(); final DataSetMetadata metadata2 = metadataBuilder.metadata().id(id2).name("BBBB").author("anonymous") .created(0).formatFamilyId(new CSVFormatFamily().getBeanId()).build(); dataSetMetadataRepository.save(metadata2); // Ensure order by date (most recent first) String actual = when().get("/datasets?sort=creationDate").asString(); final Iterator<JsonNode> elements = mapper.readTree(actual).elements(); String[] expectedNames = new String[] { "AAAA", "BBBB" }; int i = 0; while (elements.hasNext()) { assertThat(elements.next().get("name").asText(), is(expectedNames[i++])); } } @Test public void listDateOrder() throws Exception { when().get("/datasets?sort=creationDate&order=asc").then().statusCode(OK.value()).body(equalTo("[]")); // Adds 2 data set metadata to store String id1 = UUID.randomUUID().toString(); final DataSetMetadata metadata1 = metadataBuilder.metadata().id(id1).name("AAAA").author("anonymous") .created(20).formatFamilyId(new CSVFormatFamily().getBeanId()).build(); dataSetMetadataRepository.save(metadata1); String id2 = UUID.randomUUID().toString(); final DataSetMetadata metadata2 = metadataBuilder.metadata().id(id2).name("BBBB").author("anonymous") .created(0).formatFamilyId(new CSVFormatFamily().getBeanId()).build(); dataSetMetadataRepository.save(metadata2); // Ensure order by date (most recent first) String actual = when().get("/datasets?sort=creationDate&order=desc").asString(); Iterator<JsonNode> elements = mapper.readTree(actual).elements(); String[] expectedNames = new String[] { "AAAA", "BBBB" }; int i = 0; while (elements.hasNext()) { assertThat(elements.next().get("name").asText(), is(expectedNames[i++])); } // Ensure order by date (oldest first when no order value) actual = when().get("/datasets?sort=creationDate").asString(); elements = mapper.readTree(actual).elements(); expectedNames = new String[] { "AAAA", "BBBB" }; i = 0; while (elements.hasNext()) { assertThat(elements.next().get("name").asText(), is(expectedNames[i++])); } // Ensure order by date (oldest first) actual = when().get("/datasets?sort=creationDate&order=asc").asString(); elements = mapper.readTree(actual).elements(); expectedNames = new String[] { "BBBB", "AAAA" }; i = 0; while (elements.hasNext()) { assertThat(elements.next().get("name").asText(), is(expectedNames[i++])); } } @Test public void listNameOrder() throws Exception { when().get("/datasets?sort=name&order=asc").then().statusCode(OK.value()).body(equalTo("[]")); // Adds 2 data set metadata to store String id1 = UUID.randomUUID().toString(); final DataSetMetadata metadata1 = metadataBuilder.metadata().id(id1).name("AAAA").author("anonymous") .created(20).formatFamilyId(new CSVFormatFamily().getBeanId()).build(); dataSetMetadataRepository.save(metadata1); String id2 = UUID.randomUUID().toString(); final DataSetMetadata metadata2 = metadataBuilder.metadata().id(id2).name("CCCC").author("anonymous") .created(0).formatFamilyId(new CSVFormatFamily().getBeanId()).build(); dataSetMetadataRepository.save(metadata2); String id3 = UUID.randomUUID().toString(); final DataSetMetadata metadata3 = metadataBuilder.metadata().id(id3).name("bbbb").author("anonymous") .created(0).formatFamilyId(new CSVFormatFamily().getBeanId()).build(); dataSetMetadataRepository.save(metadata3); // Ensure order by name (last character from alphabet first) String actual = when().get("/datasets?sort=name&order=desc").asString(); Iterator<JsonNode> elements = mapper.readTree(actual).elements(); String[] expectedNames = new String[] { "CCCC", "bbbb", "AAAA" }; int i = 0; while (elements.hasNext()) { assertThat(elements.next().get("name").asText(), is(expectedNames[i++])); } // Ensure order by name (last character from alphabet first when no order value) actual = when().get("/datasets?sort=name").asString(); elements = mapper.readTree(actual).elements(); expectedNames = new String[] { "CCCC", "bbbb", "AAAA" }; i = 0; while (elements.hasNext()) { assertThat(elements.next().get("name").asText(), is(expectedNames[i++])); } // Ensure order by name (first character from alphabet first) actual = when().get("/datasets?sort=name&order=asc").asString(); elements = mapper.readTree(actual).elements(); expectedNames = new String[] { "AAAA", "bbbb", "CCCC" }; i = 0; while (elements.hasNext()) { assertThat(elements.next().get("name").asText(), is(expectedNames[i++])); } } @Test public void listIllegalSort() throws Exception { when().get("/datasets?sort=aaaa").then().statusCode(HttpStatus.BAD_REQUEST.value()); } @Test public void listIllegalOrder() throws Exception { when().get("/datasets?order=aaaa").then().statusCode(HttpStatus.BAD_REQUEST.value()); } @Test public void create() throws Exception { int before = dataSetMetadataRepository.size(); String dataSetId = given().body(IOUtils.toString(this.getClass().getResourceAsStream(TAGADA_CSV))) .queryParam("Content-Type", "text/csv").when().post("/datasets").asString(); int after = dataSetMetadataRepository.size(); assertThat(after - before, is(1)); // the next call may fail due to timing issues : TODO // make this synchronized somehow assertQueueMessages(dataSetId); } @Test public void cannotCreateWhenNameIsAlreadyUsed() throws Exception { // given String name = "youhou"; createCSVDataSet(this.getClass().getResourceAsStream(T_SHIRT_100_CSV), name); // when final Response response = given() // .body(IOUtils.toString(this.getClass().getResourceAsStream(T_SHIRT_100_CSV))) // .queryParam("Content-Type", "text/csv") // .queryParam("name", name) // .when() // .expect().statusCode(409).log().ifError() // .post("/datasets"); // then assertEquals(409, response.getStatusCode()); } @Test public void shouldSearchDatasets() throws Exception { // given final boolean strict = true; final boolean nonStrict = false; final String ticId = createCSVDataSet(this.getClass().getResourceAsStream(T_SHIRT_100_CSV), "tic"); final String ticTacId = createCSVDataSet(this.getClass().getResourceAsStream(T_SHIRT_100_CSV), "tic tac"); final String ticTacTocId = createCSVDataSet(this.getClass().getResourceAsStream(T_SHIRT_100_CSV), "tic tac toc"); // when / then checkSearchResult("toto", nonStrict, emptyList()); checkSearchResult("tic", nonStrict, asList(ticId, ticTacId, ticTacTocId)); checkSearchResult("tac", nonStrict, asList(ticTacId, ticTacTocId)); checkSearchResult("toc", nonStrict, asList(ticTacTocId)); checkSearchResult("tac", strict, emptyList()); checkSearchResult("tic TAC toc", strict, asList(ticTacTocId)); } private void checkSearchResult(final String search, final boolean isStrict, final List<String> expectedIds) throws IOException { final Response response = given() // .queryParam("name", search) // .queryParam("strict", isStrict) // .when()// .expect().statusCode(200).log().ifError() // .get("/datasets/search"); // then assertEquals(200, response.getStatusCode()); final List<DataSetMetadata> metadataList = mapper.readValue(response.asString(), new TypeReference<List<DataSetMetadata>>() { }); assertEquals(expectedIds.size(), metadataList.size()); assertEquals(expectedIds.size(), metadataList.stream().filter(m -> expectedIds.contains(m.getId())).count()); } @Test public void createEmptyLines() throws Exception { int before = dataSetMetadataRepository.size(); String dataSetId = given().body(IOUtils.toString(this.getClass().getResourceAsStream(EMPTY_LINES2_CSV))) .queryParam("Content-Type", "text/csv").when().post("/datasets").asString(); int after = dataSetMetadataRepository.size(); assertThat(after - before, is(1)); assertQueueMessages(dataSetId); final String content = when().get("/datasets/{id}/content", dataSetId).asString(); assertThat(content, sameJSONAsFile(this.getClass().getResourceAsStream(EMPTY_LINES2_JSON))); } @Test public void get() throws Exception { String expectedId = insertEmptyDataSet(); List<String> ids = from(when().get("/datasets").asString()).get(""); assertThat(ids.size(), is(1)); int statusCode = when().get("/datasets/{id}/content", expectedId).getStatusCode(); assertEquals("statusCode is:" + statusCode, statusCode, OK.value()); } @Test public void getNotExistingDataset() throws Exception { int statusCode = when().get("/datasets/1234/content").getStatusCode(); assertTrue("statusCode is:" + statusCode, statusCode == HttpStatus.BAD_REQUEST.value()); } @Test public void testFavorite() { // given final String datasetId = UUID.randomUUID().toString(); final DataSetMetadata dataSetMetadata = metadataBuilder.metadata().id(datasetId) .formatFamilyId(new CSVFormatFamily().getBeanId()).build(); dataSetMetadata.getContent().addParameter(CSVFormatFamily.SEPARATOR_PARAMETER, ";"); dataSetMetadataRepository.save(dataSetMetadata); contentStore.storeAsRaw(dataSetMetadata, new ByteArrayInputStream(new byte[0])); final UserData userData = new UserData(security.getUserId(), versionService.version().getVersionId()); userDataRepository.save(userData); final Set<String> favorites = new HashSet<>(); favorites.add(datasetId); boolean isFavorite = from(when().get("/datasets/{id}/content", datasetId).asString()) .get("metadata.favorite"); assertFalse(isFavorite); // when userData.setFavoritesDatasets(favorites); userDataRepository.save(userData); // then isFavorite = from(when().get("/datasets/{id}/content", datasetId).asString()).get("metadata.favorite"); assertTrue(isFavorite); } @Test public void shouldCopy() throws Exception { // given String originalId = createCSVDataSet(this.getClass().getResourceAsStream(T_SHIRT_100_CSV), "original"); // when final Response response = given() // .queryParam("copyName", "copy") // .when()// .expect().statusCode(200).log().ifError() // .post("/datasets/{id}/copy", originalId); // then assertEquals(200, response.getStatusCode()); final String copyId = response.asString(); final DataSetMetadata copy = dataSetMetadataRepository.get(copyId); assertNotNull(copy); assertEquals(9, copy.getRowMetadata().size()); } @Test public void copyNothingShouldReturnNothing() throws Exception { // when final Response response = given() // .queryParam("copyName", "copy") // .when()// .expect().statusCode(200).log().ifError() // .post("/datasets/{id}/copy", "unknown_dataset"); // then assertEquals(200, response.getStatusCode()); assertTrue(response.asString().length() == 0); } @Test public void cannotCopyIfTheNameIsAlreadyUsed() throws Exception { // given String name = "taken"; String originalId = createCSVDataSet(this.getClass().getResourceAsStream(T_SHIRT_100_CSV), name); // when final Response response = given() // .queryParam("copyName", name) // .when()// .expect().statusCode(409).log().ifError() // .post("/datasets/{id}/copy", originalId); // then assertEquals(409, response.getStatusCode()); } @Test public void sampleWithNegativeSize() throws Exception { // given String dataSetId = createCSVDataSet(this.getClass().getResourceAsStream(T_SHIRT_100_CSV), "ds-4"); // when String sample = requestDataSetSample(dataSetId, true, "-1"); // then assertEquals(100, getNumberOfRecords(sample)); } @Test public void sampleWithSizeIsZero() throws Exception { // given String dataSetId = createCSVDataSet(this.getClass().getResourceAsStream(T_SHIRT_100_CSV), UUID.randomUUID().toString()); // when String sample = requestDataSetSample(dataSetId, true, "0"); // then assertEquals(100, getNumberOfRecords(sample)); } @Test public void delete() throws Exception { String expectedId = UUID.randomUUID().toString(); DataSetMetadata dataSetMetadata = metadataBuilder.metadata().id(expectedId) .formatFamilyId(new CSVFormatFamily().getBeanId()).build(); dataSetMetadata.getContent().addParameter(CSVFormatFamily.SEPARATOR_PARAMETER, ";"); dataSetMetadataRepository.save(dataSetMetadata); List<String> ids = from(when().get("/datasets").asString()).get(""); assertThat(ids.size(), is(1)); int before = dataSetMetadataRepository.size(); when().delete("/datasets/{id}", expectedId).then().statusCode(OK.value()); int after = dataSetMetadataRepository.size(); logger.debug("delete before {} after {}", before, after); assertThat(before - after, is(1)); } @Test public void updateRawContent() throws Exception { String dataSetId = "123456"; given().body(IOUtils.toString(this.getClass().getResourceAsStream(TAGADA_CSV))).when() .put("/datasets/{id}/raw", dataSetId).then().statusCode(OK.value()); List<String> ids = from(when().get("/datasets").asString()).get("id"); assertThat(ids, hasItem(dataSetId)); assertQueueMessages(dataSetId); given().body(IOUtils.toString(this.getClass().getResourceAsStream("../avengers.csv"))).when() .put("/datasets/{id}/raw", dataSetId).then().statusCode(OK.value()); ids = from(when().get("/datasets").asString()).get("id"); assertThat(ids, hasItem(dataSetId)); assertQueueMessages(dataSetId); } @Test public void updateRawContent_should_preserve_non_content_related_metadata_except_last_modification_date() throws Exception { // given final String dataSetId = "123456"; given().body(IOUtils.toString(this.getClass().getResourceAsStream(TAGADA_CSV))).when() .put("/datasets/{id}/raw", dataSetId).then().statusCode(OK.value()); String datasets = when().get("/datasets").asString(); List<DataSetMetadata> datasetsMetadata = mapper.readValue(datasets, new TypeReference<ArrayList<DataSetMetadata>>() { }); final DataSetMetadata original = datasetsMetadata.get(0); // when given().body(IOUtils.toString(this.getClass().getResourceAsStream(TAGADA2_CSV))).when() .put("/datasets/{id}/raw", dataSetId).then().statusCode(OK.value()); // then datasets = when().get("/datasets").asString(); datasetsMetadata = mapper.readValue(datasets, new TypeReference<ArrayList<DataSetMetadata>>() { }); final DataSetMetadata copy = datasetsMetadata.get(0); assertThat(copy.getId(), equalTo(original.getId())); assertThat(copy.getAppVersion(), equalTo(original.getAppVersion())); assertThat(copy.getAuthor(), equalTo(original.getAuthor())); assertThat(copy.getCreationDate(), equalTo(original.getCreationDate())); assertThat(copy.getLocation(), equalTo(original.getLocation())); } @Test public void updateRawContentWithDifferentSchema() throws Exception { String dataSetId = "123456"; given().body(IOUtils.toString(this.getClass().getResourceAsStream(TAGADA_CSV))).when() .put("/datasets/{id}/raw", dataSetId).then().statusCode(OK.value()); final DataSetMetadata dataSetMetadataBeforeUpdate = dataSetMetadataRepository.get(dataSetId); assertEquals(6, dataSetMetadataBeforeUpdate.getRowMetadata().getColumns().size()); given().body(IOUtils.toString(this.getClass().getResourceAsStream("../avengers.csv"))).when() .put("/datasets/{id}/raw", dataSetId).then().statusCode(OK.value()); final DataSetMetadata dataSetMetadataAfterUpdate = dataSetMetadataRepository.get(dataSetId); assertEquals(5, dataSetMetadataAfterUpdate.getRowMetadata().getColumns().size()); } @Test public void test_TDP_2052() throws Exception { // given final String dataSetId = "123456"; given().body(IOUtils.toString(this.getClass().getResourceAsStream(TAGADA_CSV))).when() .put("/datasets/{id}/raw?name=original", dataSetId).then().statusCode(OK.value()); String datasets = when().get("/datasets").asString(); List<DataSetMetadata> datasetsMetadata = mapper.readValue(datasets, new TypeReference<ArrayList<DataSetMetadata>>() { }); final DataSetMetadata original = datasetsMetadata.get(0); // when given().body(IOUtils.toString(this.getClass().getResourceAsStream(TAGADA2_CSV))).when() .put("/datasets/{id}/raw?name=", dataSetId).then().statusCode(OK.value()); // then datasets = when().get("/datasets").asString(); datasetsMetadata = mapper.readValue(datasets, new TypeReference<ArrayList<DataSetMetadata>>() { }); final DataSetMetadata copy = datasetsMetadata.get(0); assertThat(copy.getId(), equalTo(original.getId())); assertThat(copy.getName(), equalTo(original.getName())); } @Test public void updateMetadataContentWithWrongDatasetId() throws Exception { assertThat(dataSetMetadataRepository.get("3d72677c-e2c9-4a34-8c58-959a56ec8643"), nullValue()); given().contentType(JSON) // .body(IOUtils.toString(this.getClass().getResourceAsStream(METADATA_JSON))) // .when() // .put("/datasets/{id}", "3d72677c-e2c9-4a34-8c58-959a56ec8643") // .then() // .statusCode(HttpStatus.BAD_REQUEST.value()); } @Test public void previewNonDraft() throws Exception { // Create a data set String dataSetId = given().body(IOUtils.toString(this.getClass().getResourceAsStream(TAGADA_CSV))) .queryParam("Content-Type", "text/csv").when().post("/datasets").asString(); final DataSetMetadata dataSetMetadata = dataSetMetadataRepository.get(dataSetId); assertThat(dataSetMetadata, notNullValue()); dataSetMetadata.setDraft(false); // Ensure it is no draft dataSetMetadataRepository.save(dataSetMetadata); // Should receive a 301 that redirects to the GET data set content operation given().redirects().follow(false).contentType(JSON).get("/datasets/{id}/preview", dataSetId) // .then() // .statusCode(HttpStatus.MOVED_PERMANENTLY.value()); // Should receive a 200 if code follows redirection given().redirects().follow(true).contentType(JSON).get("/datasets/{id}/preview", dataSetId) // .then() // .statusCode(OK.value()); } @Test public void previewMissingMetadata() throws Exception { // Data set 1234 does not exist, should get empty content response. given().get("/datasets/{id}/preview", "1234") // .then() // .statusCode(HttpStatus.NO_CONTENT.value()); } @Test public void preview_multi_sheet_with_a_sheet_name() throws Exception { String dataSetId = createXlsDataSet( this.getClass().getResourceAsStream("../Talend_Desk-Tableau_de_Bord-011214.xls")); String json = given().contentType(JSON).get("/datasets/{id}/preview?sheetName=Leads", dataSetId).asString(); DataSet dataSet = mapper.readerFor(DataSet.class).readValue(json); Assertions.assertThat(dataSet.getMetadata().getRowMetadata().getColumns()).isNotNull().isNotEmpty() .hasSize(21); json = given().contentType(JSON).get("/datasets/{id}/preview?sheetName=Tableau de bord", dataSetId) .asString(); dataSet = mapper.readerFor(DataSet.class).readValue(json); Assertions.assertThat(dataSet.getMetadata().getRowMetadata().getColumns()).isNotNull().isNotEmpty() .hasSize(10); } @Test public void should_get_content_from_semi_colon_csv() throws Exception { // given final String dataSetId = given().body(IOUtils.toString(this.getClass().getResourceAsStream(TAGADA_CSV))) // .queryParam("Content-Type", "text/csv") // .when() // .post("/datasets") // .asString(); assertQueueMessages(dataSetId); // when final InputStream content = when().get("/datasets/{id}/content?metadata=false", dataSetId).asInputStream(); // then final String contentAsString = IOUtils.toString(content); final InputStream expected = this.getClass().getResourceAsStream("../content/test1.json"); assertThat(contentAsString, sameJSONAsFile(expected)); } @Test public void should_get_content_from_coma_csv() throws Exception { // given final String dataSetId = given().body(IOUtils.toString(this.getClass().getResourceAsStream(TAGADA2_CSV))) .queryParam("Content-Type", "text/csv").when().post("/datasets").asString(); assertQueueMessages(dataSetId); // when final InputStream content = when().get("/datasets/{id}/content?metadata=false", dataSetId).asInputStream(); // then final String contentAsString = IOUtils.toString(content); final InputStream expected = this.getClass().getResourceAsStream("../content/test1.json"); assertThat(contentAsString, sameJSONAsFile(expected)); } @Test public void should_get_content_from_updated_dataset() throws Exception { // given final String dataSetId = given().body(IOUtils.toString(this.getClass().getResourceAsStream(TAGADA_CSV))) .queryParam("Content-Type", "text/csv").when().post("/datasets").asString(); assertQueueMessages(dataSetId); // given: update content given().body(IOUtils.toString(this.getClass().getResourceAsStream("../tagada3.csv"))) .queryParam("Content-Type", "text/csv").when().put("/datasets/" + dataSetId + "/raw"); assertQueueMessages(dataSetId); // when final InputStream content = when().get("/datasets/{id}/content?metadata=false", dataSetId).asInputStream(); final String contentAsString = IOUtils.toString(content); // then final InputStream expected = this.getClass().getResourceAsStream("../content/test2.json"); assertThat(contentAsString, sameJSONAsFile(expected)); // Update name String expectedName = "testOfADataSetName"; given().body(IOUtils.toString(this.getClass().getResourceAsStream("../tagada3.csv"))) .queryParam("Content-Type", "text/csv").when() .put("/datasets/" + dataSetId + "/raw?name=" + expectedName); final DataSetMetadata dataSetMetadata = dataSetMetadataRepository.get(dataSetId); assertThat(dataSetMetadata, notNullValue()); assertThat(dataSetMetadata.getName(), is(expectedName)); } @Test public void should_update_dataset_name() throws Exception { // given final String dataSetId = given().body(IOUtils.toString(this.getClass().getResourceAsStream(TAGADA_CSV))) .queryParam("Content-Type", "text/csv").when().post("/datasets").asString(); assertQueueMessages(dataSetId); // when final String expectedName = "testOfADataSetName"; given().body(IOUtils.toString(this.getClass().getResourceAsStream("../tagada3.csv"))) .queryParam("Content-Type", "text/csv").when() .put("/datasets/" + dataSetId + "/raw?name=" + expectedName); // then final DataSetMetadata dataSetMetadata = dataSetMetadataRepository.get(dataSetId); assertThat(dataSetMetadata, notNullValue()); assertThat(dataSetMetadata.getName(), is(expectedName)); } /** * see https://jira.talendforge.org/browse/TDP-1066 */ @Test public void shouldUpdateSeparatorWithHeader() throws Exception { // given String dataSetId = createCSVDataSet(this.getClass().getResourceAsStream("../avengers.psv"), "tpd-1066"); InputStream metadataInput = when().get("/datasets/{id}/metadata", dataSetId).asInputStream(); DataSet dataSet = mapper.readerFor(DataSet.class).readValue(metadataInput); DataSetMetadata metadata = dataSet.getMetadata(); // when final Map<String, String> parameters = metadata.getContent().getParameters(); parameters.put(CSVFormatFamily.SEPARATOR_PARAMETER, "|"); parameters.remove(CSVFormatFamily.HEADER_COLUMNS_PARAMETER); final int statusCode = given() // .contentType(JSON) // .body(mapper.writer().writeValueAsString(metadata)) // .expect().statusCode(200).log().ifError() // .when().put("/datasets/{id}", dataSetId).getStatusCode(); assertThat(statusCode, is(200)); assertQueueMessages(dataSetId); // then InputStream expected = this.getClass().getResourceAsStream("../avengers_expected.json"); String datasetContent = given().when().get("/datasets/{id}/content?metadata=true", dataSetId).asString(); assertThat(datasetContent, sameJSONAsFile(expected)); } /** * see https://jira.talendforge.org/browse/TDP-1066 */ @Test public void shouldUpdateSeparatorWithoutHeader() throws Exception { // given String dataSetId = createCSVDataSet(this.getClass().getResourceAsStream("../tdp-1066_no_header.ssv"), "tdp-1066-2"); InputStream metadataInput = when().get("/datasets/{id}/metadata", dataSetId).asInputStream(); DataSet dataSet = mapper.readerFor(DataSet.class).readValue(metadataInput); DataSetMetadata metadata = dataSet.getMetadata(); // then assertThat(metadata.getRowMetadata().getColumns().size(), is(2)); // ';' is guessed as separator ==> 2 columns // when final Map<String, String> parameters = metadata.getContent().getParameters(); parameters.put(CSVFormatFamily.SEPARATOR_PARAMETER, " "); parameters.remove(CSVFormatFamily.HEADER_COLUMNS_PARAMETER); final int statusCode = given() // .contentType(JSON) // .body(mapper.writer().writeValueAsString(metadata)) // .expect().statusCode(200).log().ifError() // .when().put("/datasets/{id}", dataSetId).getStatusCode(); assertThat(statusCode, is(200)); assertQueueMessages(dataSetId); // then InputStream datasetContent = given().when().get("/datasets/{id}/content?metadata=true", dataSetId) .asInputStream(); final DataSet actual = mapper.readerFor(DataSet.class).readValue(datasetContent); final DataSetMetadata actualMetadata = actual.getMetadata(); assertThat(actualMetadata.getRowMetadata().getColumns().size(), is(10)); // with ' ' as separator ==> 10 columns } /** * Test the import of a csv file with a really low separator coefficient variation. * * @see org.talend.dataprep.schema.csv.CSVSchemaParser */ @Test public void testLowSeparatorOccurrencesInCSV() throws Exception { String dataSetId = given().body(IOUtils.toString(this.getClass().getResourceAsStream("../avengers.csv"))) .queryParam("Content-Type", "text/csv").when().post("/datasets").asString(); assertQueueMessages(dataSetId); InputStream expected = this.getClass().getResourceAsStream("../avengers_expected.json"); String datasetContent = given().when().get("/datasets/{id}/content?metadata=true", dataSetId).asString(); assertThat(datasetContent, sameJSONAsFile(expected)); } /** * Test the import of an excel file that is also detected as csv file. See * https://jira.talendforge.org/browse/TDP-258 * * @see org.talend.dataprep.schema.csv.CSVSchemaParser */ @Test public void testXlsFileThatIsAlsoParsedAsCSV() throws Exception { String dataSetId = given() .body(IOUtils.toByteArray(this.getClass().getResourceAsStream("../TDP-375_xsl_read_as_csv.xls"))) .when().post("/datasets").asString(); assertQueueMessages(dataSetId); String json = given().when().get("/datasets/{id}/metadata", dataSetId).asString(); final JsonNode rootNode = mapper.reader().readTree(json); final JsonNode metadata = rootNode.get("metadata"); // only interested in the parser --> excel parser must be used ! assertEquals(metadata.get("type").asText(), "application/vnd.ms-excel"); assertEquals(metadata.get("formatGuess").asText(), "formatGuess#xls"); assertEquals(metadata.get("records").asText(), "500"); } @Test public void testQuotes() throws Exception { String dataSetId = given() .body(IOUtils.toString(this.getClass().getResourceAsStream("../bands_quotes.csv"))) .queryParam("Content-Type", "text/csv").when().post("/datasets").asString(); assertQueueMessages(dataSetId); InputStream content = when().get("/datasets/{id}/content?metadata=false", dataSetId).asInputStream(); String contentAsString = IOUtils.toString(content); InputStream expected = this.getClass().getResourceAsStream("../test_quotes.json"); assertThat(contentAsString, sameJSONAsFile(expected)); } @Test public void testQuotesAndCarriageReturn() throws Exception { String dataSetId = given() .body(IOUtils .toString(this.getClass().getResourceAsStream("../bands_quotes_and_carriage_return.csv"))) .queryParam("Content-Type", "text/csv").when().post("/datasets").asString(); assertQueueMessages(dataSetId); InputStream content = when().get("/datasets/{id}/content?metadata=false", dataSetId).asInputStream(); String contentAsString = IOUtils.toString(content); InputStream expected = this.getClass().getResourceAsStream("../test_quotes_and_carriage_return.json"); assertThat(contentAsString, sameJSONAsFile(expected)); } /** * see https://jira.talendforge.org/browse/TDP-71 */ @Test public void empty_lines_and_missing_values() throws Exception { String dataSetId = given() .body(IOUtils.toString(this.getClass().getResourceAsStream(US_STATES_TO_CLEAN_CSV))) .queryParam("Content-Type", "text/csv").when().post("/datasets").asString(); assertQueueMessages(dataSetId); InputStream content = when().get("/datasets/{id}/content?metadata=false", dataSetId).asInputStream(); String contentAsString = IOUtils.toString(content); InputStream expected = this.getClass().getResourceAsStream("../us_states_to_clean.csv_expected.json"); assertThat(contentAsString, sameJSONAsFile(expected)); } @Test public void nbLines() throws Exception { String dataSetId = given().body(IOUtils.toString(this.getClass().getResourceAsStream(TAGADA_CSV))) .queryParam("Content-Type", "text/csv").when().post("/datasets").asString(); assertQueueMessages(dataSetId); InputStream content = when().get("/datasets/{id}/content?metadata=true", dataSetId).asInputStream(); String contentAsString = IOUtils.toString(content); assertThat(contentAsString, sameJSONAs("{\"metadata\":{\"records\":2,\"nbLinesHeader\":1,\"nbLinesFooter\":0}}") .allowingExtraUnexpectedFields().allowingAnyArrayOrdering()); } @Test public void nbLines2() throws Exception { String dataSetId = given().body(IOUtils.toString(this.getClass().getResourceAsStream(T_SHIRT_100_CSV))) .queryParam("Content-Type", "text/csv").when().post("/datasets").asString(); assertQueueMessages(dataSetId); InputStream content = when().get("/datasets/{id}/content?metadata=true", dataSetId).asInputStream(); String contentAsString = IOUtils.toString(content); InputStream expected = this.getClass().getResourceAsStream(T_SHIRT_100_CSV_EXPECTED_JSON); assertThat(contentAsString, sameJSONAsFile(expected)); } @Test public void nbLinesUpdate() throws Exception { String dataSetId = given().body(IOUtils.toString(this.getClass().getResourceAsStream(TAGADA_CSV))) .queryParam("Content-Type", "text/csv").when().post("/datasets").asString(); assertQueueMessages(dataSetId); InputStream content = when().get("/datasets/{id}/content?metadata=true", dataSetId).asInputStream(); String contentAsString = IOUtils.toString(content); assertThat(contentAsString, sameJSONAs("{\"metadata\":{\"records\":2,\"nbLinesHeader\":1,\"nbLinesFooter\":0}}") .allowingExtraUnexpectedFields().allowingAnyArrayOrdering()); given().body(IOUtils.toString(this.getClass().getResourceAsStream(T_SHIRT_100_CSV))) .queryParam("Content-Type", "text/csv").when().put("/datasets/{id}/raw", dataSetId).asString(); assertQueueMessages(dataSetId); content = when().get("/datasets/{id}/content?metadata=true", dataSetId).asInputStream(); contentAsString = IOUtils.toString(content); InputStream expected = this.getClass().getResourceAsStream(T_SHIRT_100_CSV_EXPECTED_JSON); assertThat(contentAsString, sameJSONAsFile(expected)); } @Test public void getMetadata() throws Exception { DataSetMetadataBuilder builder = metadataBuilder.metadata().id("1234"); builder.row(ColumnMetadata.Builder// .column()// .id(1234)// .name("id")// .empty(0)// .invalid(0)// .valid(0)// .type(Type.STRING))// .created(0)// .name("name")// .author("author")// .footerSize(0) // .headerSize(1) // .qualityAnalyzed(true) // .schemaAnalyzed(true) // .formatFamilyId(new CSVFormatFamily().getBeanId()) // .mediaType("text/csv"); DataSetMetadata metadata = builder.build(); metadata.getContent().addParameter(CSVFormatFamily.SEPARATOR_PARAMETER, ";"); dataSetMetadataRepository.save(metadata); String contentAsString = when().get("/datasets/{id}/metadata", "1234").asString(); InputStream expected = this.getClass().getResourceAsStream("../metadata1.json"); assertThat(contentAsString, sameJSONAsFile(expected)); Boolean isFavorites = from(contentAsString).get("metadata.favorite"); assertFalse(isFavorites); // add favorite UserData userData = new UserData(security.getUserId(), versionService.version().getVersionId()); HashSet<String> favorites = new HashSet<>(); favorites.add("1234"); userData.setFavoritesDatasets(favorites); userDataRepository.save(userData); contentAsString = when().get("/datasets/{id}/metadata", "1234").asString(); isFavorites = from(contentAsString).get("metadata.favorite"); assertTrue(isFavorites); } @Test public void getEmptyMetadata() throws Exception { DataSetMetadata metadata = dataSetMetadataRepository.get("9876"); assertNull(metadata); int statusCode = when().get("/datasets/{id}/metadata", "9876").statusCode(); assertThat(statusCode, is(HttpStatus.BAD_REQUEST.value())); } /** * Check that the error listing service returns a list parsable of error codes. The content is not checked * * @throws Exception if an error occurs. */ @Test public void shouldListErrors() throws Exception { String errors = when().get("/datasets/errors").asString(); JsonNode rootNode = mapper.readTree(errors); assertTrue(rootNode.isArray()); assertTrue(rootNode.size() > 0); for (final JsonNode errorCode : rootNode) { assertTrue(errorCode.has("code")); assertTrue(errorCode.has("http-status-code")); } } @Test public void testAskCertification() throws Exception { int before = dataSetMetadataRepository.size(); String dataSetId = given().body(IOUtils.toString(this.getClass().getResourceAsStream(TAGADA_CSV))) .queryParam("Content-Type", "text/csv").when().post("/datasets").asString(); int after = dataSetMetadataRepository.size(); assertThat(after - before, is(1)); assertQueueMessages(dataSetId); DataSetMetadata dataSetMetadata = dataSetMetadataRepository.get(dataSetId); assertThat(dataSetMetadata, notNullValue()); assertEquals(Certification.NONE, dataSetMetadata.getGovernance().getCertificationStep()); when().put("/datasets/{id}/processcertification", dataSetId).then().statusCode(OK.value()); dataSetMetadata = dataSetMetadataRepository.get(dataSetId); assertThat(dataSetMetadata, notNullValue()); assertEquals(Certification.PENDING, dataSetMetadata.getGovernance().getCertificationStep()); assertThat(dataSetMetadata.getRowMetadata().getColumns(), not(empty())); } @Test public void testCertify() throws Exception { int before = dataSetMetadataRepository.size(); String dataSetId = given().body(IOUtils.toString(this.getClass().getResourceAsStream(TAGADA_CSV))) .queryParam("Content-Type", "text/csv").when().post("/datasets").asString(); int after = dataSetMetadataRepository.size(); assertThat(after - before, is(1)); assertQueueMessages(dataSetId); DataSetMetadata dataSetMetadata = dataSetMetadataRepository.get(dataSetId); assertThat(dataSetMetadata, notNullValue()); long originalNbLines = dataSetMetadata.getContent().getNbRecords(); // to check later if no modified assertEquals(Certification.NONE, dataSetMetadata.getGovernance().getCertificationStep()); // NONE -> PENDING when().put("/datasets/{id}/processcertification", dataSetId).then().statusCode(OK.value()); // PENDING -> CERTIFIED when().put("/datasets/{id}/processcertification", dataSetId).then().statusCode(OK.value()); dataSetMetadata = dataSetMetadataRepository.get(dataSetId); assertThat(dataSetMetadata, notNullValue()); assertEquals(Certification.CERTIFIED, dataSetMetadata.getGovernance().getCertificationStep()); assertEquals(originalNbLines, dataSetMetadata.getContent().getNbRecords()); } @Test public void testGetFavoritesDatasetList() { when().get("/datasets/favorites").then().statusCode(OK.value()).body(equalTo("[]")); String dsId1 = UUID.randomUUID().toString(); String dsId2 = UUID.randomUUID().toString(); UserData userData = new UserData(security.getUserId(), versionService.version().getVersionId()); HashSet<String> favorites = new HashSet<>(); favorites.add(dsId1); favorites.add(dsId2); userData.setFavoritesDatasets(favorites); userDataRepository.save(userData); List<String> favoritesResp = from(when().get("/datasets/favorites").asString()).get(); assertEquals(2, favoritesResp.size()); assertThat(favoritesResp, hasItems(dsId1, dsId2)); } /** See https://jira.talendforge.org/browse/TDP-3296 **/ @Test public void testGetFavoritesDatasetList_noFavoriteForUserListNoDataset() { String dsId1 = UUID.randomUUID().toString(); String dsId2 = UUID.randomUUID().toString(); dataSetMetadataRepository.save(metadataBuilder.metadata().id(dsId1).build()); dataSetMetadataRepository.save(metadataBuilder.metadata().id(dsId2).build()); List<String> favoritesResp = when().get("/datasets?favorite=true").as(new ArrayList<String>().getClass()); assertTrue(favoritesResp.isEmpty()); } @Test public void testSetUnsetFavoriteDataSet() throws IOException { String dsId1 = UUID.randomUUID().toString(); String dsId2 = UUID.randomUUID().toString(); when().get("/datasets/favorites").then().statusCode(OK.value()).body(equalTo("[]")); dataSetMetadataRepository.save(metadataBuilder.metadata().id(dsId1).build()); dataSetMetadataRepository.save(metadataBuilder.metadata().id(dsId2).build()); // check set when().put("/datasets/{id}/favorite", dsId1).then().statusCode(OK.value()); when().put("/datasets/{id}/favorite?unset=false", dsId2).then().statusCode(OK.value()); List<String> favoritesResp = from(when().get("/datasets/favorites").asString()).get(); //$NON-NLS-1$ assertEquals(2, favoritesResp.size()); assertThat(favoritesResp, hasItems(dsId1, dsId2)); // check unset when().put("/datasets/{id}/favorite?unset=true", dsId2).then().statusCode(OK.value()); favoritesResp = from(when().get("/datasets/favorites").asString()).get(); assertEquals(1, favoritesResp.size()); assertThat(favoritesResp, hasItem(dsId1)); // check wrong datasetId String wrongDsId = UUID.randomUUID().toString(); assertThat(dataSetMetadataRepository.get(wrongDsId), nullValue()); given().contentType(JSON) // .body(IOUtils.toString(this.getClass().getResourceAsStream(METADATA_JSON))) // .when() // .put("/datasets/{id}/favorite", wrongDsId) // .then() // .statusCode(HttpStatus.BAD_REQUEST.value()); } @Test public void updateDatasetColumn_should_update_domain() throws Exception { // given final String dataSetId = given() // .body(IOUtils.toString(this.getClass().getResourceAsStream(TAGADA_CSV))) // .queryParam("Content-Type", "text/csv") // .when() // .post("/datasets") // .asString(); final ColumnMetadata column; // update the metadata in the repository (lock mechanism is needed otherwise semantic domain will be erased by // analysis) final DistributedLock lock = dataSetMetadataRepository.createDatasetMetadataLock(dataSetId); DataSetMetadata dataSetMetadata; RowMetadata row; lock.lock(); try { dataSetMetadata = dataSetMetadataRepository.get(dataSetId); assertNotNull(dataSetMetadata); row = dataSetMetadata.getRowMetadata(); assertNotNull(row); column = row.getById("0002"); final SemanticDomain jsoDomain = new SemanticDomain("JSO", "JSO label", 1.0F); column.getSemanticDomains().add(jsoDomain); dataSetMetadataRepository.save(dataSetMetadata); } finally { lock.unlock(); } assertThat(column.getDomain(), is("FIRST_NAME")); assertThat(column.getDomainLabel(), is("First Name")); assertThat(column.getDomainFrequency(), is(100.0F)); // when final Response res = given() // .body("{\"domain\": \"JSO\"}") // .when() // .contentType(JSON) // .post("/datasets/{dataSetId}/column/{columnId}", dataSetId, "0002"); // then res.then().statusCode(200); dataSetMetadata = dataSetMetadataRepository.get(dataSetId); assertNotNull(dataSetMetadata); row = dataSetMetadata.getRowMetadata(); assertNotNull(row); final ColumnMetadata actual = row.getById("0002"); assertThat(actual.getDomain(), is("JSO")); assertThat(actual.getDomainLabel(), is("JSO label")); assertThat(actual.getDomainFrequency(), is(1.0F)); } @Test public void updateDatasetColumn_should_update_type() throws Exception { // given final String dataSetId = given() // .body(IOUtils.toString(this.getClass().getResourceAsStream(TAGADA_CSV))) // .queryParam("Content-Type", "text/csv") // .when() // .post("/datasets") // .asString(); DataSetMetadata dataSetMetadata = dataSetMetadataRepository.get(dataSetId); Assert.assertNotNull(dataSetMetadata); RowMetadata row = dataSetMetadata.getRowMetadata(); assertNotNull(row); final ColumnMetadata column = row.getById("0002"); assertThat(column.getDomain(), is("FIRST_NAME")); assertThat(column.getDomainLabel(), is("First Name")); assertThat(column.getDomainFrequency(), is(100.0F)); assertThat(column.getType(), is("string")); // when final Response res = given() // .body("{\"type\": \"integer\"}") // .when() // .contentType(JSON) // .post("/datasets/{dataSetId}/column/{columnId}", dataSetId, "0002"); // then res.then().statusCode(200); dataSetMetadata = dataSetMetadataRepository.get(dataSetId); Assert.assertNotNull(dataSetMetadata); row = dataSetMetadata.getRowMetadata(); assertNotNull(row); final ColumnMetadata actual = row.getById("0002"); assertThat(actual.getDomain(), is("FIRST_NAME")); assertThat(actual.getDomainLabel(), is("First Name")); assertThat(actual.getDomainFrequency(), is(100.0F)); assertThat(actual.getType(), is("integer")); } @Test public void updateDatasetColumn_should_clear_domain() throws Exception { // given final String dataSetId = given() // .body(IOUtils.toString(this.getClass().getResourceAsStream(TAGADA_CSV))) // .queryParam("Content-Type", "text/csv") // .when() // .post("/datasets") // .asString(); DataSetMetadata dataSetMetadata = dataSetMetadataRepository.get(dataSetId); assertNotNull(dataSetMetadata); RowMetadata row = dataSetMetadata.getRowMetadata(); assertNotNull(row); final ColumnMetadata column = row.getById("0002"); assertThat(column.getDomain(), is("FIRST_NAME")); assertThat(column.getDomainLabel(), is("First Name")); assertThat(column.getDomainFrequency(), is(100.0F)); // when final Response res = given() // .body("{\"domain\": \"\"}") // .when() // .contentType(JSON) // .post("/datasets/{dataSetId}/column/{columnId}", dataSetId, "0002"); // then res.then().statusCode(200); dataSetMetadata = dataSetMetadataRepository.get(dataSetId); assertNotNull(dataSetMetadata); row = dataSetMetadata.getRowMetadata(); assertNotNull(row); final ColumnMetadata actual = row.getById("0002"); assertThat(actual.getDomain(), is("")); assertThat(actual.getDomainLabel(), is("")); assertThat(actual.getDomainFrequency(), is(0.0F)); } @Test public void datePattern() throws Exception { int before = dataSetMetadataRepository.size(); String dataSetId = given() .body(IOUtils.toString(this.getClass().getResourceAsStream("../date_time_pattern.csv"))) .queryParam("Content-Type", "text/csv").when().post("/datasets").asString(); int after = dataSetMetadataRepository.size(); assertThat(after - before, is(1)); assertQueueMessages(dataSetId); final DataSetMetadata dataSetMetadata = dataSetMetadataRepository.get(dataSetId); assertNotNull(dataSetMetadata); final ColumnMetadata column = dataSetMetadata.getRowMetadata().getById("0001"); assertThat(column.getType(), is("date")); assertThat(column.getDomain(), is("")); final Statistics statistics = mapper.readerFor(Statistics.class) .readValue(this.getClass().getResourceAsStream("../date_time_pattern_expected.json")); assertThat(column.getStatistics(), CoreMatchers.equalTo(statistics)); } @Test public void should_remove_any_NUL_character() throws Exception { // given final String originalContent = IOUtils .toString(this.getClass().getResourceAsStream(DATASET_WITH_NUL_CHAR_CSV)); assertThat(originalContent.chars().anyMatch((c) -> c == '\u0000'), is(true)); final String dataSetId = createCSVDataSet(this.getClass().getResourceAsStream(DATASET_WITH_NUL_CHAR_CSV), "test"); // when final String content = requestDataSetSample(dataSetId, false, "10"); // then assertThat(content, not(containsString("\\u0000"))); } @Test public void invalid_us_states() throws Exception { String dataSetId = given() .body(IOUtils.toString(this.getClass().getResourceAsStream("../invalid_us_states.csv"))) .queryParam("Content-Type", "text/csv").when().post("/datasets").asString(); assertQueueMessages(dataSetId); InputStream content = when().get("/datasets/{id}/content?metadata=true", dataSetId).asInputStream(); String contentAsString = IOUtils.toString(content); final DataSet dataset = mapper.readerFor(DataSet.class).readValue(contentAsString); assertThat(dataset, is(notNullValue())); assertThat(dataset.getMetadata().getRowMetadata().getColumns().isEmpty(), is(false)); final ColumnMetadata column = dataset.getMetadata().getRowMetadata().getColumns().get(0); assertThat(column.getDomain(), is("US_STATE_CODE")); // us state code assertThat(column.getQuality().getInvalid(), is(2)); // 2 invalid values } @Test public void should_list_filtered_datasets_properly() throws Exception { // create data sets final String dataSetId1 = createCSVDataSet(this.getClass().getResourceAsStream("../tagada3.csv"), "dataset1"); final String dataSetId2 = createCSVDataSet(this.getClass().getResourceAsStream("../avengers.csv"), "dataset2"); final String dataSetId3 = createCSVDataSet(this.getClass().getResourceAsStream("../tagada.csv"), "dataset3"); createCSVDataSet(this.getClass().getResourceAsStream("../tagada2.csv"), "dataset4"); // Make dataset1 more recent final DataSetMetadata dataSetMetadata1 = dataSetMetadataRepository.get(dataSetId1); dataSetMetadata1.getGovernance().setCertificationStep(Certification.CERTIFIED); dataSetMetadata1.setLastModificationDate((now().getEpochSecond() + 1) * 1_000); dataSetMetadataRepository.save(dataSetMetadata1); final DataSetMetadata dataSetMetadata2 = dataSetMetadataRepository.get(dataSetId2); dataSetMetadataRepository.save(dataSetMetadata2); final DataSetMetadata dataSetMetadata3 = dataSetMetadataRepository.get(dataSetId3); dataSetMetadata3.getGovernance().setCertificationStep(Certification.CERTIFIED); dataSetMetadataRepository.save(dataSetMetadata3); UserData userData = new UserData(); userData.setUserId(security.getUserId()); userData.addFavoriteDataset(dataSetMetadata1.getId()); userData.addFavoriteDataset(dataSetMetadata2.getId()); userDataRepository.save(userData); // @formatter:off // certified, favorite and recent given().queryParam("favorite", "true").queryParam("certified", "true").queryParam("limit", "true").when() .get("/datasets").then().statusCode(200).body("name", hasItem("dataset1")).body("name", hasSize(1)); // certified, favorite and recent given().queryParam("favorite", "true").queryParam("certified", "true").queryParam("limit", "true") .queryParam("name", "2").when().get("/datasets").then().statusCode(200).body("name", hasSize(0)); // only names given().queryParam("name", "ATAset2").when().get("/datasets").then().statusCode(200) .body("name", hasItem("dataset2")).body("name", hasSize(1)); // only favorites given().queryParam("favorite", "true").when().get("/datasets").then().statusCode(200) .body("name", hasItems("dataset1", "dataset2")).body("name", hasSize(2)); // only certified given().queryParam("certified", "true").when().get("/datasets").then().statusCode(200) .body("name", hasItems("dataset1", "dataset3")).body("name", hasSize(2)); // only recent given().queryParam("limit", "true").queryParam("sort", LAST_MODIFICATION_DATE.camelName()).when() .get("/datasets").then().statusCode(200).body("name", hasItems("dataset1", "dataset3", "dataset4")) .body("name", hasSize(3)); // all when().get("/datasets").then().statusCode(200) .body("name", hasItems("dataset1", "dataset2", "dataset3", "dataset4")).body("name", hasSize(4)); // @formatter:on } @Test public void should_have_grants_to_certify_dataset() throws Exception { // create data sets final String dataSetId = createCSVDataSet(this.getClass().getResourceAsStream("../tagada3.csv"), "dataset1"); // @formatter:off given().pathParam("id", dataSetId).when().put("/datasets/{id}/processcertification").then().statusCode(200); // @formatter:on final DataSetMetadata dataSetMetadata = dataSetMetadataRepository.get(dataSetId); assertEquals(Certification.PENDING, dataSetMetadata.getGovernance().getCertificationStep()); } @Test public void listEncodings() throws Exception { InputStream content = when().get("/datasets/encodings").asInputStream(); final String contentAsString = IOUtils.toString(content); assertThat(contentAsString, not(isEmptyString())); } @Test public void listImports() throws Exception { // Given InputStream content = when().get("/datasets/imports").asInputStream(); final String contentAsString = IOUtils.toString(content); // Then assertThat(contentAsString, not(is("[]"))); // There should be some exports available } @Test public void shouldGetDataSetColumnTypes() throws Exception { // given final String dataSetId = createCSVDataSet(this.getClass().getResourceAsStream("../communes_france.csv"), "cities"); // when final Response response = when().get("/datasets/{dataSetId}/columns/{columnId}/types", dataSetId, "0000"); // then /* * expected response array of * { * "id": "CITY", * "label": "City", * "frequency": 99.24 * } */ Assert.assertEquals(200, response.getStatusCode()); final JsonNode rootNode = mapper.readTree(response.asInputStream()); Assert.assertEquals(7, rootNode.size()); for (JsonNode type : rootNode) { assertTrue(type.has("id")); assertTrue(type.has("label")); assertTrue(type.has("frequency")); } } @Test public void test_locally_imported_dataset_does_not_exceed_limit() throws Exception { DataSetService dataSetService = context.getBean(DataSetService.class); long l = (Long) ReflectionTestUtils.getField(dataSetService, "maximumInputStreamSize"); try { ReflectionTestUtils.setField(dataSetService, "maximumInputStreamSize", 2); given() // .body("abc") // .queryParam("Content-Type", "text/csv") // .queryParam("name", "tooLargeInputDataset") // .when() // .expect().statusCode(413).log().ifError() // .post("/datasets") // .then().body("code", equalTo("TDP_DSS_LOCAL_DATA_SET_INPUT_STREAM_TOO_LARGE")); // Then assertThat("(", not(is("[]"))); // There should be some exports available } finally { ReflectionTestUtils.setField(dataSetService, "maximumInputStreamSize", l); } } private String insertEmptyDataSet() { String datasetId = UUID.randomUUID().toString(); DataSetMetadata dataSetMetadata = metadataBuilder.metadata().id(datasetId) .formatFamilyId(new CSVFormatFamily().getBeanId()).build(); dataSetMetadata.getContent().addParameter(CSVFormatFamily.SEPARATOR_PARAMETER, ";"); dataSetMetadataRepository.save(dataSetMetadata); contentStore.storeAsRaw(dataSetMetadata, new ByteArrayInputStream(new byte[0])); return datasetId; } private String createXlsDataSet(InputStream content) throws Exception { String dataSetId = given().body(IOUtils.toByteArray(content)).when().post("/datasets").asString(); assertQueueMessages(dataSetId); return dataSetId; } }