Java tutorial
/** * Copyright (C) 2015 * "Kompetenzzentrum fuer wissensbasierte Anwendungen Forschungs- und EntwicklungsgmbH" * (Know-Center), Graz, Austria, office@know-center.at. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * @author Raoul Rubien */ package eu.eexcess.sourceselection.redde.indexer.topterm; import java.io.IOException; import java.util.HashSet; import java.util.Set; import net.sf.extjwnl.JWNLException; import org.apache.lucene.analysis.en.EnglishAnalyzer; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.TopScoreDocCollector; import eu.eexcess.tree.BaseTreeNode; import eu.eexcess.tree.ValueTreeNode; public class DBDomainSampler extends TopTermToWNDomain { public static class SampleArguments { /** * result index name */ public String indexName; /** * terms to use for sampling */ public Set<ValueTreeNode<String>> sampleDomains; } public static class WordNetArguments { public String wordnetPath; public String wordnetDomainsPath; public String wordnetDomainCsvTreePath; } private ValueTreeNode<String> domainToTermsTree; /** * Creates an instance of this class. * * @param sourceIndexPath * path to base lucene index * @param arguments * parameters where samples are being stored and what domains to * use for sampling them * @throws IOException */ DBDomainSampler(String baseIndexPath, WordNetArguments wnArgs) throws IOException, JWNLException { super(baseIndexPath, wnArgs.wordnetPath, wnArgs.wordnetDomainsPath, wnArgs.wordnetDomainCsvTreePath); } public ValueTreeNode<String> alignTerms(int fromTopTermIndex, int toTopTermIndex) throws Exception { domainToTermsTree = super.assignToDomains(fromTopTermIndex, toTopTermIndex); return domainToTermsTree; } public void sample(Set<SampleArguments> sampleArgs) throws IllegalStateException, ParseException, IOException { if (domainToTermsTree == null) { throw new IllegalStateException("no terms aligned"); } for (SampleArguments subSample : sampleArgs) { // merge requested domain-terms Set<String> terms = distinctUnifyValues(subSample.sampleDomains); // sample with domain-term dependent query String queryString = String.join("", terms); Query query = new QueryParser(DBDomainSampler.fieldOfInterest, new EnglishAnalyzer()) .parse(queryString); TopScoreDocCollector collector = TopScoreDocCollector.create(1000, false); new IndexSearcher(inIndexReader).search(query, collector); // ScoreDoc[] docs = collector.topDocs().scoreDocs; // TODO: create and store docs to new index called subSample.name } throw new UnsupportedOperationException("not implemented yet"); } Set<String> distinctUnifyValues(Set<ValueTreeNode<String>> trees) { final Set<String> unified = new HashSet<String>(); BaseTreeNode.NodeInspector<String> operator = (n) -> { for (String value : ((ValueTreeNode<String>) n).getValues()) { unified.add(value); } }; for (ValueTreeNode<String> tree : trees) { BaseTreeNode.depthFirstTraverser(tree, operator); } return unified; } }