Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.solr.schema; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.xpath.XPath; import javax.xml.xpath.XPathConstants; import javax.xml.xpath.XPathFactory; import java.io.File; import java.io.InputStream; import java.nio.charset.StandardCharsets; import java.time.Instant; import java.time.LocalDateTime; import java.time.Month; import java.time.ZoneOffset; import java.util.Arrays; import java.util.HashSet; import java.util.Set; import java.util.regex.Pattern; import org.apache.commons.io.FileUtils; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.TestUtil; import org.apache.solr.core.AbstractBadConfigTestBase; import org.apache.solr.util.DOMUtil; import org.junit.After; import org.junit.Before; import org.junit.Test; import org.w3c.dom.Document; import org.w3c.dom.NodeList; import org.xml.sax.InputSource; /** * Tests the useDocValuesAsStored functionality. */ public class TestUseDocValuesAsStored extends AbstractBadConfigTestBase { private int id = 1; private static File tmpSolrHome; private static File tmpConfDir; private static final String collection = "collection1"; private static final String confDir = collection + "/conf"; private static final long START_RANDOM_EPOCH_MILLIS; private static final long END_RANDOM_EPOCH_MILLIS; private static final String[] SEVERITY; // http://www.w3.org/TR/2006/REC-xml-20060816/#charsets private static final String NON_XML_CHARS = "\u0000-\u0008\u000B-\u000C\u000E-\u001F\uFFFE\uFFFF"; // Avoid single quotes (problematic in XPath literals) and carriage returns (XML roundtripping fails) private static final Pattern BAD_CHAR_PATTERN = Pattern.compile("[\'\r" + NON_XML_CHARS + "]"); private static final Pattern STORED_FIELD_NAME_PATTERN = Pattern.compile("_dv$"); static { START_RANDOM_EPOCH_MILLIS = LocalDateTime.of(-11000, Month.JANUARY, 1, 0, 0)// BC .toInstant(ZoneOffset.UTC).toEpochMilli(); END_RANDOM_EPOCH_MILLIS = LocalDateTime.of(11000, Month.DECEMBER, 31, 23, 59, 59, 999_000_000) // AD, 5 digit year .toInstant(ZoneOffset.UTC).toEpochMilli(); try { DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder(); InputStream stream = TestUseDocValuesAsStored.class .getResourceAsStream("/solr/collection1/conf/enumsConfig.xml"); Document doc = builder .parse(new InputSource(IOUtils.getDecodingReader(stream, StandardCharsets.UTF_8))); XPath xpath = XPathFactory.newInstance().newXPath(); NodeList nodes = (NodeList) xpath.evaluate("/enumsConfig/enum[@name='severity']/value", doc, XPathConstants.NODESET); SEVERITY = new String[nodes.getLength()]; for (int i = 0; i < nodes.getLength(); ++i) { SEVERITY[i] = DOMUtil.getText(nodes.item(i)); } } catch (Exception e) { throw new RuntimeException(e); } } @Before private void initManagedSchemaCore() throws Exception { tmpSolrHome = createTempDir().toFile(); tmpConfDir = new File(tmpSolrHome, confDir); File testHomeConfDir = new File(TEST_HOME(), confDir); FileUtils.copyFileToDirectory(new File(testHomeConfDir, "solrconfig-managed-schema.xml"), tmpConfDir); FileUtils.copyFileToDirectory(new File(testHomeConfDir, "solrconfig.snippet.randomindexconfig.xml"), tmpConfDir); FileUtils.copyFileToDirectory(new File(testHomeConfDir, "enumsConfig.xml"), tmpConfDir); FileUtils.copyFileToDirectory(new File(testHomeConfDir, "schema-non-stored-docvalues.xml"), tmpConfDir); // initCore will trigger an upgrade to managed schema, since the solrconfig has // <schemaFactory class="ManagedIndexSchemaFactory" ... /> System.setProperty("enable.update.log", "false"); System.setProperty("managed.schema.mutable", "true"); initCore("solrconfig-managed-schema.xml", "schema-non-stored-docvalues.xml", tmpSolrHome.getPath()); } @After private void afterClass() throws Exception { deleteCore(); System.clearProperty("managed.schema.mutable"); System.clearProperty("enable.update.log"); } public String getCoreName() { return "basic"; } @Test public void testOnEmptyIndex() throws Exception { clearIndex(); assertU(commit()); assertJQ(req("q", "*:*"), "/response/numFound==0"); assertJQ(req("q", "*:*", "fl", "*"), "/response/numFound==0"); assertJQ(req("q", "*:*", "fl", "test_nonstored_dv_str"), "/response/numFound==0"); assertJQ(req("q", "*:*", "fl", "*,test_nonstored_dv_str"), "/response/numFound==0"); assertU(adoc("id", "xyz", "test_nonstored_dv_str", "xyz")); assertJQ(req("q", "*:*"), "/response/numFound==0"); assertJQ(req("q", "*:*", "fl", "*"), "/response/numFound==0"); assertJQ(req("q", "*:*", "fl", "test_nonstored_dv_str"), "/response/numFound==0"); assertJQ(req("q", "*:*", "fl", "*,test_nonstored_dv_str"), "/response/numFound==0"); assertU(commit()); assertJQ(req("q", "*:*"), "/response/numFound==1", "/response/docs==[" + "{'id':'xyz','test_nonstored_dv_str':'xyz'}" + "]"); assertJQ(req("q", "*:*", "fl", "*"), "/response/numFound==1", "/response/docs==[" + "{'id':'xyz','test_nonstored_dv_str':'xyz'}" + "]"); assertJQ(req("q", "*:*", "fl", "test_nonstored_dv_str"), "/response/numFound==1", "/response/docs==[" + "{'test_nonstored_dv_str':'xyz'}" + "]"); assertJQ(req("q", "*:*", "fl", "*,test_nonstored_dv_str"), "/response/numFound==1", "/response/docs==[" + "{'id':'xyz','test_nonstored_dv_str':'xyz'}" + "]"); assertU(adoc("id", "xyz")); assertU(commit()); assertJQ(req("q", "*:*"), "/response/numFound==1", "/response/docs==[" + "{'id':'xyz'}" + "]"); } @Test public void testRandomSingleAndMultiValued() throws Exception { for (int c = 0; c < 10 * RANDOM_MULTIPLIER; ++c) { clearIndex(); int[] arity = new int[9]; for (int a = 0; a < arity.length; ++a) { // Single-valued 50% of the time; other 50%: 2-10 values equally likely arity[a] = random().nextBoolean() ? 1 : TestUtil.nextInt(random(), 2, 10); } doTest("check string value is correct", dvStringFieldName(arity[0], true, false), "str", nextValues(arity[0], "str")); doTest("check int value is correct", "test_i" + plural(arity[1]) + "_dvo", "int", nextValues(arity[1], "int")); doTest("check double value is correct", "test_d" + plural(arity[2]) + "_dvo", "double", nextValues(arity[2], "double")); doTest("check long value is correct", "test_l" + plural(arity[3]) + "_dvo", "long", nextValues(arity[3], "long")); doTest("check float value is correct", "test_f" + plural(arity[4]) + "_dvo", "float", nextValues(arity[4], "float")); doTest("check date value is correct", "test_dt" + plural(arity[5]) + "_dvo", "date", nextValues(arity[5], "date")); doTest("check stored and docValues value is correct", dvStringFieldName(arity[6], true, true), "str", nextValues(arity[6], "str")); doTest("check non-stored and non-indexed is accessible", dvStringFieldName(arity[7], false, false), "str", nextValues(arity[7], "str")); doTest("enumField", "enum" + plural(arity[8]) + "_dvo", "str", nextValues(arity[8], "enum")); } } private String plural(int arity) { return arity > 1 ? "s" : ""; } private static boolean isStoredField(String fieldName) { return STORED_FIELD_NAME_PATTERN.matcher(fieldName).find(); } private String dvStringFieldName(int arity, boolean indexed, boolean stored) { String base = "test_s" + (arity > 1 ? "s" : ""); String suffix = ""; if (indexed && stored) suffix = "_dv"; else if (indexed && !stored) suffix = "_dvo"; else if (!indexed && !stored) suffix = "_dvo2"; else assertTrue("unsupported dv string field combination: stored and not indexed", false); return base + suffix; } private String[] nextValues(int arity, String valueType) throws Exception { String[] values = new String[arity]; for (int i = 0; i < arity; ++i) { switch (valueType) { case "int": values[i] = String.valueOf(random().nextInt()); break; case "double": values[i] = String.valueOf(Double.longBitsToDouble(random().nextLong())); break; case "long": values[i] = String.valueOf(random().nextLong()); break; case "float": values[i] = String.valueOf(Float.intBitsToFloat(random().nextInt())); break; case "enum": values[i] = SEVERITY[TestUtil.nextInt(random(), 0, SEVERITY.length - 1)]; break; case "str": { String str = TestUtil.randomRealisticUnicodeString(random()); values[i] = BAD_CHAR_PATTERN.matcher(str).replaceAll("\uFFFD"); break; } case "date": { long epochMillis = TestUtil.nextLong(random(), START_RANDOM_EPOCH_MILLIS, END_RANDOM_EPOCH_MILLIS); values[i] = Instant.ofEpochMilli(epochMillis).toString(); break; } default: throw new Exception("unknown type '" + valueType + "'"); } } return values; } @Test public void testMultipleSearchResults() throws Exception { // Three documents with different numbers of values for a field assertU(adoc("id", "myid1", "test_is_dvo", "101", "test_is_dvo", "102", "test_is_dvo", "103")); assertU(adoc("id", "myid2", "test_is_dvo", "201", "test_is_dvo", "202")); assertU(adoc("id", "myid3", "test_is_dvo", "301", "test_is_dvo", "302", "test_is_dvo", "303", "test_is_dvo", "304")); // Multivalued and singly valued fields in the same document assertU(adoc("id", "myid4", "test_s_dvo", "hello", "test_is_dvo", "401", "test_is_dvo", "402")); // Test a field which has useDocValuesAsStored=false assertU(adoc("id", "myid5", "nonstored_dv_str", "dont see me")); assertU(adoc("id", "myid6", "nonstored_dv_str", "dont see me", "test_s_dvo", "hello")); assertU(commit()); assertJQ(req("q", "id:myid*", "fl", "*"), "/response/docs==[" + "{'id':'myid1','test_is_dvo':[101,102,103]}," + "{'id':'myid2','test_is_dvo':[201,202]}," + "{'id':'myid3','test_is_dvo':[301,302,303,304]}," + "{'id':'myid4','test_s_dvo':'hello','test_is_dvo':[401,402]}," + "{'id':'myid5'}," + "{'id':'myid6','test_s_dvo':'hello'}" + "]"); } public void testManagedSchema() throws Exception { IndexSchema oldSchema = h.getCore().getLatestSchema(); StrField type = new StrField(); type.setTypeName("str"); SchemaField falseDVASField = new SchemaField("false_dvas", type, SchemaField.INDEXED | SchemaField.DOC_VALUES, null); SchemaField trueDVASField = new SchemaField("true_dvas", type, SchemaField.INDEXED | SchemaField.DOC_VALUES | SchemaField.USE_DOCVALUES_AS_STORED, null); IndexSchema newSchema = oldSchema.addField(falseDVASField).addField(trueDVASField); h.getCore().setLatestSchema(newSchema); clearIndex(); assertU(adoc("id", "myid1", "false_dvas", "101", "true_dvas", "102")); assertU(commit()); assertJQ(req("q", "id:myid*", "fl", "*"), "/response/docs==[" + "{'id':'myid1', 'true_dvas':'102'}]"); } private void doTest(String desc, String field, String type, String... value) { String id = "" + this.id++; String[] xpaths = new String[value.length + 1]; if (value.length > 1) { Set<String> valueSet = new HashSet<>(); valueSet.addAll(Arrays.asList(value)); String[] fieldAndValues = new String[value.length * 2 + 2]; fieldAndValues[0] = "id"; fieldAndValues[1] = id; for (int i = 0; i < value.length; ++i) { fieldAndValues[i * 2 + 2] = field; fieldAndValues[i * 2 + 3] = value[i]; xpaths[i] = "//arr[@name='" + field + "']/" + type + "[.='" + value[i] + "']"; } // Docvalues are sets, but stored values are ordered multisets, so cardinality depends on the value source xpaths[value.length] = "*[count(//arr[@name='" + field + "']/" + type + ") = " + (isStoredField(field) ? value.length : valueSet.size()) + "]"; assertU(adoc(fieldAndValues)); } else { assertU(adoc("id", id, field, value[0])); xpaths[0] = "//" + type + "[@name='" + field + "'][.='" + value[0] + "']"; xpaths[1] = "*[count(//" + type + "[@name='" + field + "']) = 1]"; } assertU(commit()); String fl = field; assertQ(desc + ": " + fl, req("q", "id:" + id, "fl", fl), xpaths); fl = field + ",*"; assertQ(desc + ": " + fl, req("q", "id:" + id, "fl", fl), xpaths); fl = "*" + field.substring(field.length() - 3); assertQ(desc + ": " + fl, req("q", "id:" + id, "fl", fl), xpaths); fl = "*"; assertQ(desc + ": " + fl, req("q", "id:" + id, "fl", fl), xpaths); fl = field + ",fakeFieldName"; assertQ(desc + ": " + fl, req("q", "id:" + id, "fl", fl), xpaths); fl = "*"; assertQ(desc + ": " + fl, req("q", "*:*", "fl", fl), xpaths); } // See SOLR-8740 for a discussion. This test is here to make sure we consciously change behavior of multiValued // fields given that we can now return docValues fields. The behavior we've guaranteed in the past is that if // multiValued fields are stored, they're returned in the document in the order they were added. // There are four new fieldTypes added: // <field name="test_mvt_dvt_st_str" type="string" indexed="true" multiValued="true" docValues="true" stored="true"/> // <field name="test_mvt_dvt_sf_str" type="string" indexed="true" multiValued="true" docValues="true" stored="false"/> // <field name="test_mvt_dvf_st_str" type="string" indexed="true" multiValued="true" docValues="false" stored="true"/> // <field name="test_mvt_dvu_st_str" type="string" indexed="true" multiValued="true" stored="true"/> // // If any of these tests break as a result of returning DocValues rather than stored values, make sure we reach some // consensus that any breaks on back-compat are A Good Thing and that that behavior is carefully documented! @Test public void testMultivaluedOrdering() throws Exception { clearIndex(); // multiValued=true, docValues=true, stored=true. Should return in original order assertU(adoc("id", "1", "test_mvt_dvt_st_str", "cccc", "test_mvt_dvt_st_str", "aaaa", "test_mvt_dvt_st_str", "bbbb")); // multiValued=true, docValues=true, stored=false. Should return in sorted order assertU(adoc("id", "2", "test_mvt_dvt_sf_str", "cccc", "test_mvt_dvt_sf_str", "aaaa", "test_mvt_dvt_sf_str", "bbbb")); // multiValued=true, docValues=false, stored=true. Should return in original order assertU(adoc("id", "3", "test_mvt_dvf_st_str", "cccc", "test_mvt_dvf_st_str", "aaaa", "test_mvt_dvf_st_str", "bbbb")); // multiValued=true, docValues=not specified, stored=true. Should return in original order assertU(adoc("id", "4", "test_mvt_dvu_st_str", "cccc", "test_mvt_dvu_st_str", "aaaa", "test_mvt_dvu_st_str", "bbbb")); assertU(commit()); assertJQ(req("q", "id:1", "fl", "test_mvt_dvt_st_str"), "/response/docs/[0]/test_mvt_dvt_st_str/[0]==cccc", "/response/docs/[0]/test_mvt_dvt_st_str/[1]==aaaa", "/response/docs/[0]/test_mvt_dvt_st_str/[2]==bbbb"); // Currently, this test fails since stored=false. When SOLR-8740 is committed, it should not throw an exception // and should succeed, returning the field in sorted order. try { assertJQ(req("q", "id:2", "fl", "test_mvt_dvt_sf_str"), "/response/docs/[0]/test_mvt_dvt_sf_str/[0]==aaaa", "/response/docs/[0]/test_mvt_dvt_sf_str/[1]==bbbb", "/response/docs/[0]/test_mvt_dvt_sf_str/[2]==cccc"); } catch (Exception e) { // do nothing until SOLR-8740 is committed. At that point this should not throw an exception. // NOTE: I think the test is correct after 8740 so just remove the try/catch } assertJQ(req("q", "id:3", "fl", "test_mvt_dvf_st_str"), "/response/docs/[0]/test_mvt_dvf_st_str/[0]==cccc", "/response/docs/[0]/test_mvt_dvf_st_str/[1]==aaaa", "/response/docs/[0]/test_mvt_dvf_st_str/[2]==bbbb"); assertJQ(req("q", "id:4", "fl", "test_mvt_dvu_st_str"), "/response/docs/[0]/test_mvt_dvu_st_str/[0]==cccc", "/response/docs/[0]/test_mvt_dvu_st_str/[1]==aaaa", "/response/docs/[0]/test_mvt_dvu_st_str/[2]==bbbb"); } }