org.apache.solr.cloud.TestStressInPlaceUpdates.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.solr.cloud.TestStressInPlaceUpdates.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.solr.cloud;

import java.lang.invoke.MethodHandles;
import java.util.Arrays;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Random;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicLong;

import org.apache.commons.math3.primes.Primes;
import org.apache.lucene.util.LuceneTestCase.Slow;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.impl.HttpSolrClient;
import org.apache.solr.client.solrj.request.UpdateRequest;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.client.solrj.response.UpdateResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.cloud.ClusterState;
import org.apache.solr.common.cloud.Replica;
import org.apache.solr.common.cloud.Slice;
import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.zookeeper.KeeperException;
import org.junit.After;
import org.junit.BeforeClass;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@Slow
public class TestStressInPlaceUpdates extends AbstractFullDistribZkTestBase {
    private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());

    @BeforeClass
    public static void beforeSuperClass() throws Exception {
        System.setProperty("solr.tests.intClassName", random().nextBoolean() ? "TrieIntField" : "IntPointField");
        System.setProperty("solr.tests.longClassName", random().nextBoolean() ? "TrieLongField" : "LongPointField");
        System.setProperty("solr.tests.floatClassName",
                random().nextBoolean() ? "TrieFloatField" : "FloatPointField");
        System.setProperty("solr.tests.doubleClassName",
                random().nextBoolean() ? "TrieDoubleField" : "DoublePointField");

        schemaString = "schema-inplace-updates.xml";
        configString = "solrconfig-tlog.xml";

        // sanity check that autocommits are disabled
        initCore(configString, schemaString);
        assertEquals(-1, h.getCore().getSolrConfig().getUpdateHandlerInfo().autoCommmitMaxTime);
        assertEquals(-1, h.getCore().getSolrConfig().getUpdateHandlerInfo().autoSoftCommmitMaxTime);
        assertEquals(-1, h.getCore().getSolrConfig().getUpdateHandlerInfo().autoCommmitMaxDocs);
        assertEquals(-1, h.getCore().getSolrConfig().getUpdateHandlerInfo().autoSoftCommmitMaxDocs);
    }

    @After
    public void after() {
        System.clearProperty("solr.tests.intClassName");
        System.clearProperty("solr.tests.longClassName");
        System.clearProperty("solr.tests.floatClassName");
        System.clearProperty("solr.tests.doubleClassName");
    }

    public TestStressInPlaceUpdates() {
        super();
        sliceCount = 1;
        fixShardCount(3);
    }

    protected final ConcurrentHashMap<Integer, DocInfo> model = new ConcurrentHashMap<>();
    protected Map<Integer, DocInfo> committedModel = new HashMap<>();
    protected long snapshotCount;
    protected long committedModelClock;
    protected int clientIndexUsedForCommit;
    protected volatile int lastId;
    protected final String field = "val_l";

    private void initModel(int ndocs) {
        for (int i = 0; i < ndocs; i++) {
            // seed versions w/-1 so "from scratch" adds/updates will fail optimistic concurrency checks
            // if some other thread beats us to adding the id
            model.put(i, new DocInfo(-1L, 0, 0));
        }
        committedModel.putAll(model);
    }

    SolrClient leaderClient = null;

    @Test
    @ShardsFixed(num = 3)
    public void stressTest() throws Exception {
        waitForRecoveriesToFinish(true);

        this.leaderClient = getClientForLeader();
        assertNotNull("Couldn't obtain client for the leader of the shard", this.leaderClient);

        final int commitPercent = 5 + random().nextInt(20);
        final int softCommitPercent = 30 + random().nextInt(75); // what percent of the commits are soft
        final int deletePercent = 4 + random().nextInt(25);
        final int deleteByQueryPercent = random().nextInt(8);
        final int ndocs = atLeast(5);
        int nWriteThreads = 5 + random().nextInt(25);
        int fullUpdatePercent = 5 + random().nextInt(50);

        // query variables
        final int percentRealtimeQuery = 75;
        // number of cumulative read/write operations by all threads
        final AtomicLong operations = new AtomicLong(25000);
        int nReadThreads = 5 + random().nextInt(25);

        /** // testing
         final int commitPercent = 5;
         final int softCommitPercent = 100; // what percent of the commits are soft
         final int deletePercent = 0;
         final int deleteByQueryPercent = 50;
         final int ndocs = 10;
         int nWriteThreads = 10;
            
         final int maxConcurrentCommits = nWriteThreads;   // number of committers at a time... it should be <= maxWarmingSearchers
            
         // query variables
         final int percentRealtimeQuery = 101;
         final AtomicLong operations = new AtomicLong(50000);  // number of query operations to perform in total
         int nReadThreads = 10;
            
         int fullUpdatePercent = 20;
         **/

        log.info("{}",
                Arrays.asList("commitPercent", commitPercent, "softCommitPercent", softCommitPercent,
                        "deletePercent", deletePercent, "deleteByQueryPercent", deleteByQueryPercent, "ndocs",
                        ndocs, "nWriteThreads", nWriteThreads, "percentRealtimeQuery", percentRealtimeQuery,
                        "operations", operations, "nReadThreads", nReadThreads));

        initModel(ndocs);

        List<Thread> threads = new ArrayList<>();

        for (int i = 0; i < nWriteThreads; i++) {
            Thread thread = new Thread("WRITER" + i) {
                Random rand = new Random(random().nextInt());

                @Override
                public void run() {
                    try {
                        while (operations.decrementAndGet() > 0) {
                            int oper = rand.nextInt(100);

                            if (oper < commitPercent) {
                                Map<Integer, DocInfo> newCommittedModel;
                                long version;

                                synchronized (TestStressInPlaceUpdates.this) {
                                    // take a snapshot of the model
                                    // this is safe to do w/o synchronizing on the model because it's a ConcurrentHashMap
                                    newCommittedModel = new HashMap<>(model);
                                    version = snapshotCount++;

                                    int chosenClientIndex = rand.nextInt(clients.size());

                                    if (rand.nextInt(100) < softCommitPercent) {
                                        log.info("softCommit start");
                                        clients.get(chosenClientIndex).commit(true, true, true);
                                        log.info("softCommit end");
                                    } else {
                                        log.info("hardCommit start");
                                        clients.get(chosenClientIndex).commit();
                                        log.info("hardCommit end");
                                    }

                                    // install this model snapshot only if it's newer than the current one
                                    if (version >= committedModelClock) {
                                        if (VERBOSE) {
                                            log.info("installing new committedModel version={}",
                                                    committedModelClock);
                                        }
                                        clientIndexUsedForCommit = chosenClientIndex;
                                        committedModel = newCommittedModel;
                                        committedModelClock = version;
                                    }
                                }
                                continue;
                            }

                            int id;

                            if (rand.nextBoolean()) {
                                id = rand.nextInt(ndocs);
                            } else {
                                id = lastId; // reuse the last ID half of the time to force more race conditions
                            }

                            // set the lastId before we actually change it sometimes to try and
                            // uncover more race conditions between writing and reading
                            boolean before = rand.nextBoolean();
                            if (before) {
                                lastId = id;
                            }

                            DocInfo info = model.get(id);

                            // yield after getting the next version to increase the odds of updates happening out of order
                            if (rand.nextBoolean())
                                Thread.yield();

                            if (oper < commitPercent + deletePercent + deleteByQueryPercent) {
                                final boolean dbq = (oper >= commitPercent + deletePercent);
                                final String delType = dbq ? "DBI" : "DBQ";
                                log.info("{} id {}: {}", delType, id, info);

                                Long returnedVersion = null;

                                try {
                                    returnedVersion = deleteDocAndGetVersion(Integer.toString(id),
                                            params("_version_", Long.toString(info.version)), dbq);
                                    log.info(delType + ": Deleting id=" + id + ", version=" + info.version
                                            + ".  Returned version=" + returnedVersion);
                                } catch (RuntimeException e) {
                                    if (e.getMessage() != null && e.getMessage().contains("version conflict")
                                            || e.getMessage() != null && e.getMessage().contains("Conflict")) {
                                        // Its okay for a leader to reject a concurrent request
                                        log.warn("Conflict during {}, rejected id={}, {}", delType, id, e);
                                        returnedVersion = null;
                                    } else {
                                        throw e;
                                    }
                                }

                                // only update model if update had no conflict & the version is newer
                                synchronized (model) {
                                    DocInfo currInfo = model.get(id);
                                    if (null != returnedVersion && (Math.abs(returnedVersion.longValue()) > Math
                                            .abs(currInfo.version))) {
                                        model.put(id, new DocInfo(returnedVersion.longValue(), 0, 0));
                                    }
                                }

                            } else {
                                int val1 = info.intFieldValue;
                                long val2 = info.longFieldValue;
                                int nextVal1 = val1;
                                long nextVal2 = val2;

                                int addOper = rand.nextInt(100);
                                Long returnedVersion;
                                if (addOper < fullUpdatePercent || info.version <= 0) { // if document was never indexed or was deleted
                                    // FULL UPDATE
                                    nextVal1 = Primes.nextPrime(val1 + 1);
                                    nextVal2 = nextVal1 * 1000000000l;
                                    try {
                                        returnedVersion = addDocAndGetVersion("id", id, "title_s", "title" + id,
                                                "val1_i_dvo", nextVal1, "val2_l_dvo", nextVal2, "_version_",
                                                info.version);
                                        log.info("FULL: Writing id=" + id + ", val=[" + nextVal1 + "," + nextVal2
                                                + "], version=" + info.version + ", Prev was=[" + val1 + "," + val2
                                                + "].  Returned version=" + returnedVersion);

                                    } catch (RuntimeException e) {
                                        if (e.getMessage() != null && e.getMessage().contains("version conflict")
                                                || e.getMessage() != null && e.getMessage().contains("Conflict")) {
                                            // Its okay for a leader to reject a concurrent request
                                            log.warn("Conflict during full update, rejected id={}, {}", id, e);
                                            returnedVersion = null;
                                        } else {
                                            throw e;
                                        }
                                    }
                                } else {
                                    // PARTIAL
                                    nextVal2 = val2 + val1;
                                    try {
                                        returnedVersion = addDocAndGetVersion("id", id, "val2_l_dvo",
                                                map("inc", String.valueOf(val1)), "_version_", info.version);
                                        log.info("PARTIAL: Writing id=" + id + ", val=[" + nextVal1 + "," + nextVal2
                                                + "], version=" + info.version + ", Prev was=[" + val1 + "," + val2
                                                + "].  Returned version=" + returnedVersion);
                                    } catch (RuntimeException e) {
                                        if (e.getMessage() != null && e.getMessage().contains("version conflict")
                                                || e.getMessage() != null && e.getMessage().contains("Conflict")) {
                                            // Its okay for a leader to reject a concurrent request
                                            log.warn("Conflict during partial update, rejected id={}, {}", id, e);
                                        } else if (e.getMessage() != null
                                                && e.getMessage().contains("Document not found for update.")
                                                && e.getMessage().contains("id=" + id)) {
                                            log.warn(
                                                    "Attempted a partial update for a recently deleted document, rejected id={}, {}",
                                                    id, e);
                                        } else {
                                            throw e;
                                        }
                                        returnedVersion = null;
                                    }
                                }

                                // only update model if update had no conflict & the version is newer
                                synchronized (model) {
                                    DocInfo currInfo = model.get(id);
                                    if (null != returnedVersion && (Math.abs(returnedVersion.longValue()) > Math
                                            .abs(currInfo.version))) {
                                        model.put(id, new DocInfo(returnedVersion.longValue(), nextVal1, nextVal2));
                                    }

                                }
                            }

                            if (!before) {
                                lastId = id;
                            }
                        }
                    } catch (Throwable e) {
                        operations.set(-1L);
                        log.error("", e);
                        throw new RuntimeException(e);
                    }
                }
            };

            threads.add(thread);

        }

        // Read threads
        for (int i = 0; i < nReadThreads; i++) {
            Thread thread = new Thread("READER" + i) {
                Random rand = new Random(random().nextInt());

                @SuppressWarnings("unchecked")
                @Override
                public void run() {
                    try {
                        while (operations.decrementAndGet() >= 0) {
                            // bias toward a recently changed doc
                            int id = rand.nextInt(100) < 25 ? lastId : rand.nextInt(ndocs);

                            // when indexing, we update the index, then the model
                            // so when querying, we should first check the model, and then the index

                            boolean realTime = rand.nextInt(100) < percentRealtimeQuery;
                            DocInfo expected;

                            if (realTime) {
                                expected = model.get(id);
                            } else {
                                synchronized (TestStressInPlaceUpdates.this) {
                                    expected = committedModel.get(id);
                                }
                            }

                            if (VERBOSE) {
                                log.info("querying id {}", id);
                            }
                            ModifiableSolrParams params = new ModifiableSolrParams();
                            if (realTime) {
                                params.set("wt", "json");
                                params.set("qt", "/get");
                                params.set("ids", Integer.toString(id));
                            } else {
                                params.set("wt", "json");
                                params.set("q", "id:" + Integer.toString(id));
                                params.set("omitHeader", "true");
                            }

                            int clientId = rand.nextInt(clients.size());
                            if (!realTime)
                                clientId = clientIndexUsedForCommit;

                            QueryResponse response = clients.get(clientId).query(params);
                            if (response.getResults().size() == 0) {
                                // there's no info we can get back with a delete, so not much we can check without further synchronization
                            } else if (response.getResults().size() == 1) {
                                final SolrDocument actual = response.getResults().get(0);
                                final String msg = "Realtime=" + realTime + ", expected=" + expected + ", actual="
                                        + actual;
                                assertNotNull(msg, actual);

                                final Long foundVersion = (Long) actual.getFieldValue("_version_");
                                assertNotNull(msg, foundVersion);
                                assertTrue(msg + "... solr doc has non-positive version???",
                                        0 < foundVersion.longValue());
                                final Integer intVal = (Integer) actual.getFieldValue("val1_i_dvo");
                                assertNotNull(msg, intVal);

                                final Long longVal = (Long) actual.getFieldValue("val2_l_dvo");
                                assertNotNull(msg, longVal);

                                assertTrue(msg + " ...solr returned older version then model. "
                                        + "should not be possible given the order of operations in writer threads",
                                        Math.abs(expected.version) <= foundVersion.longValue());

                                if (foundVersion.longValue() == expected.version) {
                                    assertEquals(msg, expected.intFieldValue, intVal.intValue());
                                    assertEquals(msg, expected.longFieldValue, longVal.longValue());
                                }

                                // Some things we can assert about any Doc returned from solr,
                                // even if it's newer then our (expected) model information...

                                assertTrue(msg + " ...how did a doc in solr get a non positive intVal?",
                                        0 < intVal);
                                assertTrue(msg + " ...how did a doc in solr get a non positive longVal?",
                                        0 < longVal);
                                assertEquals(msg
                                        + " ...intVal and longVal in solr doc are internally (modulo) inconsistent w/eachother",
                                        0, (longVal % intVal));

                                // NOTE: when foundVersion is greater then the version read from the model,
                                // it's not possible to make any assertions about the field values in solr relative to the
                                // field values in the model -- ie: we can *NOT* assert expected.longFieldVal <= doc.longVal
                                //
                                // it's tempting to think that this would be possible if we changed our model to preserve the
                                // "old" valuess when doing a delete, but that's still no garuntee because of how oportunistic
                                // concurrency works with negative versions:  When adding a doc, we can assert that it must not
                                // exist with version<0, but we can't assert that the *reason* it doesn't exist was because of
                                // a delete with the specific version of "-42".
                                // So a wrtier thread might (1) prep to add a doc for the first time with "intValue=1,_version_=-1",
                                // and that add may succeed and (2) return some version X which is put in the model.  but
                                // inbetween #1 and #2 other threads may have added & deleted the doc repeatedly, updating
                                // the model with intValue=7,_version_=-42, and a reader thread might meanwhile read from the
                                // model before #2 and expect intValue=5, but get intValue=1 from solr (with a greater version)

                            } else {
                                fail(String.format(Locale.ENGLISH, "There were more than one result: {}",
                                        response));
                            }
                        }
                    } catch (Throwable e) {
                        operations.set(-1L);
                        log.error("", e);
                        throw new RuntimeException(e);
                    }
                }
            };

            threads.add(thread);
        }
        // Start all threads
        for (Thread thread : threads) {
            thread.start();
        }

        for (Thread thread : threads) {
            thread.join();
        }

        { // final pass over uncommitted model with RTG

            for (SolrClient client : clients) {
                for (Map.Entry<Integer, DocInfo> entry : model.entrySet()) {
                    final Integer id = entry.getKey();
                    final DocInfo expected = entry.getValue();
                    final SolrDocument actual = client.getById(id.toString());

                    String msg = "RTG: " + id + "=" + expected;
                    if (null == actual) {
                        // a deleted or non-existent document
                        // sanity check of the model agrees...
                        assertTrue(msg + " is deleted/non-existent in Solr, but model has non-neg version",
                                expected.version < 0);
                        assertEquals(msg + " is deleted/non-existent in Solr", expected.intFieldValue, 0);
                        assertEquals(msg + " is deleted/non-existent in Solr", expected.longFieldValue, 0);
                    } else {
                        msg = msg + " <==VS==> " + actual;
                        assertEquals(msg, expected.intFieldValue, actual.getFieldValue("val1_i_dvo"));
                        assertEquals(msg, expected.longFieldValue, actual.getFieldValue("val2_l_dvo"));
                        assertEquals(msg, expected.version, actual.getFieldValue("_version_"));
                        assertTrue(msg + " doc exists in solr, but version is negative???", 0 < expected.version);
                    }
                }
            }
        }

        { // do a final search and compare every result with the model

            // because commits don't provide any sort of concrete versioning (or optimistic concurrency constraints)
            // there's no way to garuntee that our committedModel matches what was in Solr at the time of the last commit.
            // It's possible other threads made additional writes to solr before the commit was processed, but after
            // the committedModel variable was assigned it's new value.
            //
            // what we can do however, is commit all completed updates, and *then* compare solr search results
            // against the (new) committed model....

            waitForThingsToLevelOut(30); // NOTE: this does an automatic commit for us & ensures replicas are up to date
            committedModel = new HashMap<>(model);

            // first, prune the model of any docs that have negative versions
            // ie: were never actually added, or were ultimately deleted.
            for (int i = 0; i < ndocs; i++) {
                DocInfo info = committedModel.get(i);
                if (info.version < 0) {
                    // first, a quick sanity check of the model itself...
                    assertEquals("Inconsistent int value in model for deleted doc" + i + "=" + info, 0,
                            info.intFieldValue);
                    assertEquals("Inconsistent long value in model for deleted doc" + i + "=" + info, 0L,
                            info.longFieldValue);

                    committedModel.remove(i);
                }
            }

            for (SolrClient client : clients) {
                QueryResponse rsp = client.query(params("q", "*:*", "sort", "id asc", "rows", ndocs + ""));
                for (SolrDocument actual : rsp.getResults()) {
                    final Integer id = Integer.parseInt(actual.getFieldValue("id").toString());
                    final DocInfo expected = committedModel.get(id);

                    assertNotNull("Doc found but missing/deleted from model: " + actual, expected);

                    final String msg = "Search: " + id + "=" + expected + " <==VS==> " + actual;
                    assertEquals(msg, expected.intFieldValue, actual.getFieldValue("val1_i_dvo"));
                    assertEquals(msg, expected.longFieldValue, actual.getFieldValue("val2_l_dvo"));
                    assertEquals(msg, expected.version, actual.getFieldValue("_version_"));
                    assertTrue(msg + " doc exists in solr, but version is negative???", 0 < expected.version);

                    // also sanity check the model (which we already know matches the doc)
                    assertEquals("Inconsistent (modulo) values in model for id " + id + "=" + expected, 0,
                            (expected.longFieldValue % expected.intFieldValue));
                }
                assertEquals(committedModel.size(), rsp.getResults().getNumFound());
            }
        }
    }

    /**
     * Used for storing the info for a document in an in-memory model.
     */
    private static class DocInfo {
        long version;
        int intFieldValue;
        long longFieldValue;

        public DocInfo(long version, int val1, long val2) {
            assert version != 0; // must either be real positive version, or negative deleted version/indicator
            this.version = version;
            this.intFieldValue = val1;
            this.longFieldValue = val2;
        }

        @Override
        public String toString() {
            return "[version=" + version + ", intValue=" + intFieldValue + ",longValue=" + longFieldValue + "]";
        }
    }

    @SuppressWarnings("rawtypes")
    protected long addDocAndGetVersion(Object... fields) throws Exception {
        SolrInputDocument doc = new SolrInputDocument();
        addFields(doc, fields);

        ModifiableSolrParams params = new ModifiableSolrParams();
        params.add("versions", "true");

        UpdateRequest ureq = new UpdateRequest();
        ureq.setParams(params);
        ureq.add(doc);
        UpdateResponse resp;

        // send updates to leader, to avoid SOLR-8733
        resp = ureq.process(leaderClient);

        long returnedVersion = Long.parseLong(((NamedList) resp.getResponse().get("adds")).getVal(0).toString());
        assertTrue("Due to SOLR-8733, sometimes returned version is 0. Let us assert that we have successfully"
                + " worked around that problem here.", returnedVersion > 0);
        return returnedVersion;
    }

    @SuppressWarnings("rawtypes")
    protected long deleteDocAndGetVersion(String id, ModifiableSolrParams params, boolean deleteByQuery)
            throws Exception {
        params.add("versions", "true");

        UpdateRequest ureq = new UpdateRequest();
        ureq.setParams(params);
        if (deleteByQuery) {
            ureq.deleteByQuery("id:" + id);
        } else {
            ureq.deleteById(id);
        }
        UpdateResponse resp;
        // send updates to leader, to avoid SOLR-8733
        resp = ureq.process(leaderClient);

        String key = deleteByQuery ? "deleteByQuery" : "deletes";
        long returnedVersion = Long.parseLong(((NamedList) resp.getResponse().get(key)).getVal(0).toString());
        assertTrue("Due to SOLR-8733, sometimes returned version is 0. Let us assert that we have successfully"
                + " worked around that problem here.", returnedVersion < 0);
        return returnedVersion;
    }

    /**
     * Method gets the SolrClient for the leader replica. This is needed for a workaround for SOLR-8733.
     */
    public SolrClient getClientForLeader() throws KeeperException, InterruptedException {
        ZkStateReader zkStateReader = cloudClient.getZkStateReader();
        cloudClient.getZkStateReader().forceUpdateCollection(DEFAULT_COLLECTION);
        ClusterState clusterState = cloudClient.getZkStateReader().getClusterState();
        Replica leader = null;
        Slice shard1 = clusterState.getCollection(DEFAULT_COLLECTION).getSlice(SHARD1);
        leader = shard1.getLeader();

        for (int i = 0; i < clients.size(); i++) {
            String leaderBaseUrl = zkStateReader.getBaseUrlForNodeName(leader.getNodeName());
            if (((HttpSolrClient) clients.get(i)).getBaseURL().startsWith(leaderBaseUrl))
                return clients.get(i);
        }

        return null;
    }
}