Java tutorial
/* * Hibernate Search, full-text search for your domain model * * License: GNU Lesser General Public License (LGPL), version 2.1 or later * See the lgpl.txt file in the root directory or <http://www.gnu.org/licenses/lgpl-2.1.html>. */ package org.hibernate.search.elasticsearch.test; import java.io.IOException; import java.io.OutputStream; import java.nio.ByteBuffer; import java.nio.CharBuffer; import java.nio.charset.StandardCharsets; import java.security.DigestOutputStream; import java.security.MessageDigest; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import org.hibernate.search.elasticsearch.impl.JsonBuilder; import org.hibernate.search.elasticsearch.util.impl.GsonHttpEntity; import org.hibernate.search.testsupport.TestForIssue; import org.junit.Assert; import org.junit.Test; import com.google.gson.Gson; import com.google.gson.JsonObject; import org.apache.http.nio.ContentEncoder; import org.apache.http.nio.IOControl; import static java.util.Collections.singletonList; import static org.apache.commons.codec.binary.Hex.encodeHexString; import static org.apache.commons.codec.digest.DigestUtils.getSha256Digest; import static org.apache.commons.codec.digest.DigestUtils.sha256Hex; import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.fail; /** * Tests for GsonHttpEntity to be able to write the whole JSON string * out correctly, and produce a matching sha256 digest. * * @author Sanne Grinovero (C) 2017 Red Hat Inc. */ @TestForIssue(jiraKey = "HSEARCH-2818") public class GsonStreamedEncodingTest { public static final int MAX_TESTING_BUFFER_BYTES = 4000; private static final String JSON_TEST_PAYLOAD_VERSION = "{\"version\":{\"number\":\"5.5.0\"}}\n"; private static final String JSON_TEST_PAYLOAD_EMPTY = "{}\n"; private static final int BULK_BATCH_SIZE = 100; private static final String JSON_TEST_PAYLOAD_LARGE_BULK = produceLargeBukJSONContent(); private static final Gson gson = new Gson(); @Test public void testEmptyJSON() { final List<JsonObject> list = singletonList(buildEmptyJSON()); verifyProducedContent(list); verifySha256Signature(list); verifyOutput(JSON_TEST_PAYLOAD_EMPTY, list); } @Test public void testSinglePropertyJSON() { final List<JsonObject> list = singletonList(buildVersionJSON()); verifyProducedContent(list); verifySha256Signature(list); verifyOutput(JSON_TEST_PAYLOAD_VERSION, list); } @Test public void testTripleBulkJSON() { final List<JsonObject> list = new ArrayList<>(3); list.add(buildEmptyJSON()); list.add(buildVersionJSON()); list.add(buildEmptyJSON()); verifyProducedContent(list); verifySha256Signature(list); verifyOutput(JSON_TEST_PAYLOAD_EMPTY + JSON_TEST_PAYLOAD_VERSION + JSON_TEST_PAYLOAD_EMPTY, list); } @Test public void testHugeBulkJSON() { final List<JsonObject> list = produceLargeBulkJSON(); verifyProducedContent(list); verifySha256Signature(list); verifyOutput(JSON_TEST_PAYLOAD_LARGE_BULK, list); } @Test @TestForIssue(jiraKey = "HSEARCH-2886") public void testSplitUnicodeSurrogatePair() { final List<JsonObject> list = produceUnicodeSplitSurrogatePairJSON(); verifyProducedContent(list); verifySha256Signature(list); } @Test public void testContentIsRepeatable() { final List<JsonObject> list = new ArrayList<>(3); list.add(buildEmptyJSON()); list.add(buildVersionJSON()); list.add(buildEmptyJSON()); try (GsonHttpEntity entity = new GsonHttpEntity(gson, list)) { final byte[] productionOne = produceContentWithCustomEncoder(entity); entity.close(); final byte[] productionTwo = produceContentWithCustomEncoder(entity); assertArrayEquals(productionOne, productionTwo); } catch (IOException e) { throw new RuntimeException("We're mocking IO operations, this should not happen?", e); } } @Test public void testDigestToTriggerLengthComputation() { final List<JsonObject> list = produceLargeBulkJSON(); try (GsonHttpEntity entity = new GsonHttpEntity(gson, list)) { assertEquals(-1l, entity.getContentLength()); } catch (IOException e) { throw new RuntimeException("We're mocking IO operations, this should not happen?", e); } //Need to discard the entity and get a new one, as the getContentLenght() //invocation will have frozen the value: we can't report inconsistent values //to the Apache HTTP client or it gets confused. try (GsonHttpEntity entity = new GsonHttpEntity(gson, list)) { final MessageDigest digest = getSha256Digest(); OutputStream discardingStream = new OutputStream() { @Override public void write(int b) throws IOException { } }; DigestOutputStream digestStream = new DigestOutputStream(discardingStream, digest); entity.writeTo(digestStream); assertNotEquals(-1l, entity.getContentLength()); final byte[] content = produceContentWithCustomEncoder(entity); assertEquals(content.length, entity.getContentLength()); } catch (IOException e) { throw new RuntimeException("We're mocking IO operations, this should not happen?", e); } } @Test public void testShortMessageIsNotChunked() { final List<JsonObject> list = new ArrayList<>(3); list.add(buildEmptyJSON()); list.add(buildVersionJSON()); list.add(buildEmptyJSON()); final byte[] traditionalEncoding = traditionalEncoding(list); try (GsonHttpEntity entity = new GsonHttpEntity(gson, list)) { assertEquals(traditionalEncoding.length, entity.getContentLength()); } catch (IOException e) { throw new RuntimeException("We're mocking IO operations, this should not happen?", e); } } private void verifyOutput(final String expected, final List<JsonObject> list) { assertEquals(expected, encodeToString(list)); } private void verifySha256Signature(final List<JsonObject> jsonObjects) { final String optimisedEncoding = optimisedSha256(jsonObjects); final String traditionalEncoding = traditionalSha256(jsonObjects); assertEquals("SHA-256 signatures not matching", traditionalEncoding, optimisedEncoding); } private String optimisedSha256(final List<JsonObject> bodyParts) { notEmpty(bodyParts); try (GsonHttpEntity entity = new GsonHttpEntity(gson, bodyParts)) { final MessageDigest digest = getSha256Digest(); OutputStream discardingStream = new OutputStream() { @Override public void write(int b) throws IOException { } }; DigestOutputStream digestStream = new DigestOutputStream(discardingStream, digest); entity.writeTo(digestStream); return encodeHexString(digest.digest()); } catch (IOException e) { throw new RuntimeException("We're mocking IO operations, this should not happen?", e); } } private String traditionalSha256(final List<JsonObject> jsonObjects) { return sha256Hex(traditionalEncoding(jsonObjects)); } private void verifyProducedContent(final List<JsonObject> jsonObjects) { byte[] expected = traditionalEncoding(jsonObjects); byte[] optimised = optimisedEncoding(jsonObjects); if (Arrays.equals(expected, optimised) == false) { CharBuffer decodedExpected = StandardCharsets.UTF_8.decode(ByteBuffer.wrap(expected)); CharBuffer decodedOptimised = StandardCharsets.UTF_8.decode(ByteBuffer.wrap(optimised)); System.out.println("Rendered :\n" + decodedOptimised); System.out.println("Should be:\n" + decodedExpected); } assertArrayEquals(expected, optimised); } private byte[] optimisedEncoding(List<JsonObject> bodyParts) { notEmpty(bodyParts); try (GsonHttpEntity entity = new GsonHttpEntity(gson, bodyParts)) { byte[] firstRun = produceContentWithCustomEncoder(entity); entity.close(); byte[] secondRun = produceContentWithCustomEncoder(entity); Assert.assertArrayEquals( "Being repeatable, we expect it to be able to reproduce all content even after being closed", firstRun, secondRun); return secondRun; } catch (IOException e) { throw new RuntimeException("We're mocking IO operations, this should not happen?", e); } } private byte[] produceContentWithCustomEncoder(GsonHttpEntity entity) throws IOException { IOControl fakeIO = new FakeIOControl(); HeapContentEncoder sink = new HeapContentEncoder(); int loopCounter = 0; while (sink.isCompleted() == false) { entity.produceContent(sink, fakeIO); /* * For testing, be really aggressive on the need to * manage small write windows the right way. * * Also, using small write windows helped to reproduce HSEARCH-2854 * almost all the time (we only need the flow control to push * back at least once). */ sink.setNextAcceptedBytesSize(loopCounter++ % 3); } return sink.flipAndRead(); } private void notEmpty(final List<JsonObject> bodyParts) { assertFalse("Pointless to test this, we don't use this strategy for empty blocks", bodyParts.isEmpty()); } /** * This is the simplest encoding strategy; we don't use this as * it would require to allocate significantly larger intermediate * buffers. See also HSEARCH-2818. */ byte[] traditionalEncoding(final List<JsonObject> bodyParts) { return encodeToString(bodyParts).getBytes(StandardCharsets.UTF_8); } private String encodeToString(final List<JsonObject> bodyParts) { notEmpty(bodyParts); final StringBuilder builder = new StringBuilder(); for (JsonObject bodyPart : bodyParts) { gson.toJson(bodyPart, builder); builder.append('\n'); } return builder.toString(); } private static List<JsonObject> produceLargeBulkJSON() { ArrayList<JsonObject> list = new ArrayList<>(BULK_BATCH_SIZE); for (int i = 0; i < 100; i++) { list.add(buildVersionJSON()); } return list; } private static List<JsonObject> produceUnicodeSplitSurrogatePairJSON() { String splitObjectPrefix = "{\"p\":\""; String surrogatePair = "\uD802\uDD04"; // U+10904: http://unicode.org/cldr/utility/character.jsp?a=10904 String splitObjectSuffix = "\"}"; int splitIndex = 1024; int lengthToFill = splitIndex - splitObjectPrefix.length() - 1 /* left part of the surrogate pair */; ArrayList<JsonObject> list = new ArrayList<>(); StringBuilder sb = new StringBuilder(); sb.append(splitObjectPrefix); // Add characters to align the surrogate pair precisely on the buffer split for (int i = 0; i < lengthToFill; i++) { sb.append('a'); // Any one-byte character that has no meaning in JSON would do the trick } sb.append(surrogatePair); sb.append(splitObjectSuffix); JsonObject splitObject = gson.fromJson(sb.toString(), JsonObject.class); list.add(splitObject); return list; } private static JsonObject buildEmptyJSON() { return JsonBuilder.object().build(); } private static JsonObject buildVersionJSON() { return JsonBuilder.object().add("version", JsonBuilder.object().addProperty("number", "5.5.0")).build(); } private static String produceLargeBukJSONContent() { final StringBuilder content = new StringBuilder(BULK_BATCH_SIZE * JSON_TEST_PAYLOAD_VERSION.length()); for (int i = 0; i < BULK_BATCH_SIZE; i++) { content.append(JSON_TEST_PAYLOAD_VERSION); } return content.toString(); } private static final class HeapContentEncoder implements ContentEncoder { private boolean contentComplete = false; private int nextWriteAcceptLimit = 0; private ByteBuffer buf = ByteBuffer.allocate(MAX_TESTING_BUFFER_BYTES); private boolean lastWriteWasZeroLength = false; private boolean closed = false; @Override public int write(final ByteBuffer byteBuffer) throws IOException { assertFalse(closed); lastWriteWasZeroLength = !byteBuffer.hasRemaining(); int toRead = Math.min(byteBuffer.remaining(), nextWriteAcceptLimit); byte[] currentRead = new byte[toRead]; byteBuffer.get(currentRead); buf.put(currentRead); return toRead; } @Override public void complete() throws IOException { assertFalse("A final zero-length write was detected - this should never happen. See HSEARCH-2854.", lastWriteWasZeroLength); assertFalse(closed); assertFalse("Can't mark it 'complete' multiple times", contentComplete); contentComplete = true; } @Override public boolean isCompleted() { return contentComplete; } public byte[] flipAndRead() { assertFalse("can read the buffer only once", closed); closed = true; buf.flip(); byte[] currentRead = new byte[buf.remaining()]; buf.get(currentRead); return currentRead; } public void setNextAcceptedBytesSize(int size) { this.nextWriteAcceptLimit = size; } } private static final class FakeIOControl implements IOControl { @Override public void requestInput() { fail("Should not invoke this"); } @Override public void suspendInput() { fail("Should not invoke this"); } @Override public void requestOutput() { fail("Should not invoke this"); } @Override public void suspendOutput() { fail("Should not invoke this"); } @Override public void shutdown() throws IOException { fail("Should not invoke this"); } } }