org.metaservice.core.maven.MavenIndexCrawler.java Source code

Java tutorial

Introduction

Here is the source code for org.metaservice.core.maven.MavenIndexCrawler.java

Source

/*
 * Copyright 2015 Nikola Ilo
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.metaservice.core.maven;

import org.apache.commons.lang3.StringUtils;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.maven.index.*;
import org.apache.maven.index.context.IndexCreator;
import org.apache.maven.index.context.IndexUtils;
import org.apache.maven.index.context.IndexingContext;
import org.apache.maven.index.expr.SourcedSearchExpression;
import org.apache.maven.index.expr.UserInputSearchExpression;
import org.apache.maven.index.search.grouping.GAGrouping;
import org.apache.maven.index.updater.*;
import org.apache.maven.wagon.Wagon;
import org.apache.maven.wagon.events.TransferEvent;
import org.apache.maven.wagon.events.TransferListener;
import org.apache.maven.wagon.observers.AbstractTransferListener;
import org.apache.maven.wagon.providers.http.HttpWagon;
import org.codehaus.plexus.DefaultPlexusContainer;
import org.codehaus.plexus.PlexusContainer;
import org.codehaus.plexus.PlexusContainerException;
import org.codehaus.plexus.component.repository.exception.ComponentLookupException;
import org.eclipse.aether.util.version.GenericVersionScheme;
import org.eclipse.aether.version.InvalidVersionSpecificationException;
import org.eclipse.aether.version.Version;

import java.io.File;
import java.io.IOException;
import java.util.*;

/**
 * 1) Findung der Datei
 *
 * Option 1: ibiblio crawler nach pom.xml
 * + einheitlicher crawler
 * - viele directories durchsuchen
 *
 * Option 2: ibiblio crawler durch maven index
 * - nicht einheitlicher crawler
 * + direkter pom zugriff
 *
 * 2) Cache
 *
 * Option 1: GitCache
 *
 * Option 2: filesystem
 *
 * Option 3: maven repo
 *
 * 3) Probleme
 * overwrite in maven?
 */

/**
 * Created with IntelliJ IDEA.
 * User: ilo
 * Date: 11.12.13
 * Time: 13:10
 * To change this template use File | Settings | File Templates.
 */
public class MavenIndexCrawler {

    private final PlexusContainer plexusContainer;

    private final Indexer indexer;

    private final IndexUpdater indexUpdater;

    private final HttpWagon httpWagon;

    private IndexingContext centralContext;

    public static void main(String[] args) throws PlexusContainerException, ComponentLookupException, IOException,
            InvalidVersionSpecificationException {
        new MavenIndexCrawler().perform();
    }

    public MavenIndexCrawler() throws PlexusContainerException, ComponentLookupException {
        // here we create Plexus container, the Maven default IoC container
        // Plexus falls outside of MI scope, just accept the fact that
        // MI is a Plexus component ;)
        // If needed more info, ask on Maven Users list or Plexus Users list
        // google is your friend!
        this.plexusContainer = new DefaultPlexusContainer();

        // lookup the indexer components from plexus
        this.indexer = plexusContainer.lookup(Indexer.class);
        this.indexUpdater = plexusContainer.lookup(IndexUpdater.class);
        // lookup wagon used to remotely fetch index
        this.httpWagon = (HttpWagon) plexusContainer.lookup(Wagon.class, "http");
        this.httpWagon.setHttpHeaders(new Properties());
        this.httpWagon.getHttpHeaders().setProperty("User-Agent", "metaservice");

    }

    public void perform() throws IOException, ComponentLookupException, InvalidVersionSpecificationException {
        // Files where local cache is (if any) and Lucene Index should be located
        File centralLocalCache = new File("target/central-cache");
        File centralIndexDir = new File("target/central-index");

        // Creators we want to use (search for fields it defines)
        List<IndexCreator> indexers = new ArrayList<>();
        indexers.add(plexusContainer.lookup(IndexCreator.class, "min"));
        indexers.add(plexusContainer.lookup(IndexCreator.class, "jarContent"));
        indexers.add(plexusContainer.lookup(IndexCreator.class, "maven-plugin"));

        // Create context for central repository index
        centralContext = indexer.createIndexingContext("central-context", "central", centralLocalCache,
                centralIndexDir, "http://repo1.maven.org/maven2", null, true, true, indexers);

        // Update the index (incremental update will happen if this is not 1st run and files are not deleted)
        // This whole block below should not be executed on every app start, but rather controlled by some configuration
        // since this block will always emit at least one HTTP GET. Central indexes are updated once a week, but
        // other index sources might have different index publishing frequency.
        // Preferred frequency is once a week.
        if (true) {
            System.out.println("Updating Index...");
            System.out.println("This might take a while on first run, so please be patient!");
            // Create ResourceFetcher implementation to be used with IndexUpdateRequest
            // Here, we use Wagon based one as shorthand, but all we need is a ResourceFetcher implementation
            TransferListener listener = new AbstractTransferListener() {
                public void transferStarted(TransferEvent transferEvent) {
                    System.out.print(" Downloading " + transferEvent.getResource().getName());
                }

                public void transferProgress(TransferEvent transferEvent, byte[] buffer, int length) {
                }

                public void transferCompleted(TransferEvent transferEvent) {
                    System.out.println(" - Done");
                }
            };
            ResourceFetcher resourceFetcher = new WagonHelper.WagonFetcher(httpWagon, listener, null, null);

            Date centralContextCurrentTimestamp = centralContext.getTimestamp();
            IndexUpdateRequest updateRequest = new IndexUpdateRequest(centralContext, resourceFetcher);
            IndexUpdateResult updateResult = indexUpdater.fetchAndUpdateIndex(updateRequest);
            if (updateResult.isFullUpdate()) {
                System.out.println("Full update happened!");
            } else if (updateResult.getTimestamp().equals(centralContextCurrentTimestamp)) {
                System.out.println("No update needed, index is up to date!");
            } else {
                System.out.println("Incremental update happened, change covered " + centralContextCurrentTimestamp
                        + " - " + updateResult.getTimestamp() + " period.");
            }

            System.out.println();
        }

        System.out.println();
        System.out.println("Using index");
        System.out.println("===========");
        System.out.println();

        // ====
        // Case:
        // dump all the GAVs
        // NOTE: will not actually execute do this below, is too long to do (Central is HUGE), but is here as code
        // example

        int j = 0;
        if (true) {

            final IndexSearcher searcher = centralContext.acquireIndexSearcher();
            try {
                final IndexReader ir = searcher.getIndexReader();
                for (int i = 0; i < ir.maxDoc(); i++) {
                    if (!ir.isDeleted(i)) {
                        j++;
                        final Document doc = ir.document(i);
                        final ArtifactInfo ai = IndexUtils.constructArtifactInfo(doc, centralContext);
                        if (ai != null && "pom".equals(ai.fextension))
                            System.out.println(ai.groupId + ":" + ai.artifactId + ":" + ai.version + ":"
                                    + ai.classifier + " (sha1=" + ai.sha1 + ")");

                    }
                }
            } finally {
                centralContext.releaseIndexSearcher(searcher);
            }
        }
        System.err.println(j);
        if (j > 0)
            return;

        // ====
        // Case:
        // Search for all GAVs with known G and A and having version greater than V

        final GenericVersionScheme versionScheme = new GenericVersionScheme();
        final String versionString = "1.5.0";
        final Version version = versionScheme.parseVersion(versionString);

        // construct the query for known GA
        final Query groupIdQ = indexer.constructQuery(MAVEN.GROUP_ID,
                new SourcedSearchExpression("org.sonatype.nexus"));
        final Query artifactIdQ = indexer.constructQuery(MAVEN.ARTIFACT_ID,
                new SourcedSearchExpression("nexus-api"));
        final BooleanQuery query = new BooleanQuery();
        query.add(groupIdQ, BooleanClause.Occur.MUST);
        query.add(artifactIdQ, BooleanClause.Occur.MUST);

        // we want "jar" artifacts only
        query.add(indexer.constructQuery(MAVEN.PACKAGING, new SourcedSearchExpression("jar")),
                BooleanClause.Occur.MUST);
        // we want main artifacts only (no classifier)
        // Note: this below is unfinished API, needs fixing
        query.add(indexer.constructQuery(MAVEN.CLASSIFIER, new SourcedSearchExpression(Field.NOT_PRESENT)),
                BooleanClause.Occur.MUST_NOT);

        // construct the filter to express "V greater than"
        final ArtifactInfoFilter versionFilter = new ArtifactInfoFilter() {
            public boolean accepts(final IndexingContext ctx, final ArtifactInfo ai) {
                try {
                    final Version aiV = versionScheme.parseVersion(ai.version);
                    // Use ">=" if you are INCLUSIVE
                    return aiV.compareTo(version) > 0;
                } catch (InvalidVersionSpecificationException e) {
                    // do something here? be safe and include?
                    return true;
                }
            }
        };

        System.out.println(
                "Searching for all GAVs with G=org.sonatype.nexus and nexus-api and having V greater than 1.5.0");
        final IteratorSearchRequest request = new IteratorSearchRequest(query,
                Collections.singletonList(centralContext), versionFilter);
        final IteratorSearchResponse response = indexer.searchIterator(request);
        for (ArtifactInfo ai : response) {
            System.out.println(ai.toString());
        }

        // Case:
        // Use index
        // Searching for some artifact
        Query gidQ = indexer.constructQuery(MAVEN.GROUP_ID,
                new SourcedSearchExpression("org.apache.maven.indexer"));
        Query aidQ = indexer.constructQuery(MAVEN.ARTIFACT_ID, new SourcedSearchExpression("indexer-artifact"));

        BooleanQuery bq = new BooleanQuery();
        bq.add(gidQ, BooleanClause.Occur.MUST);
        bq.add(aidQ, BooleanClause.Occur.MUST);

        searchAndDump(indexer, "all artifacts under GA org.apache.maven.indexer:indexer-artifact", bq);

        // Searching for some main artifact
        bq = new BooleanQuery();
        bq.add(gidQ, BooleanClause.Occur.MUST);
        bq.add(aidQ, BooleanClause.Occur.MUST);
        // bq.add( nexusIndexer.constructQuery( MAVEN.CLASSIFIER, new SourcedSearchExpression( "*" ) ), Occur.MUST_NOT
        // );

        searchAndDump(indexer, "main artifacts under GA org.apache.maven.indexer:indexer-artifact", bq);

        // doing sha1 search
        searchAndDump(indexer, "SHA1 7ab67e6b20e5332a7fb4fdf2f019aec4275846c2", indexer.constructQuery(MAVEN.SHA1,
                new SourcedSearchExpression("7ab67e6b20e5332a7fb4fdf2f019aec4275846c2")));

        searchAndDump(indexer, "SHA1 7ab67e6b20 (partial hash)",
                indexer.constructQuery(MAVEN.SHA1, new UserInputSearchExpression("7ab67e6b20")));

        // doing classname search (incomplete classname)
        searchAndDump(indexer,
                "classname DefaultNexusIndexer (note: Central does not publish classes in the index)",
                indexer.constructQuery(MAVEN.CLASSNAMES, new UserInputSearchExpression("DefaultNexusIndexer")));

        // doing search for all "canonical" maven plugins latest versions
        bq = new BooleanQuery();
        bq.add(indexer.constructQuery(MAVEN.PACKAGING, new SourcedSearchExpression("maven-plugin")),
                BooleanClause.Occur.MUST);
        bq.add(indexer.constructQuery(MAVEN.GROUP_ID, new SourcedSearchExpression("org.apache.maven.plugins")),
                BooleanClause.Occur.MUST);
        searchGroupedAndDump(indexer, "all \"canonical\" maven plugins", bq, new GAGrouping());

        // close cleanly
        indexer.closeIndexingContext(centralContext, false);
    }

    public void searchAndDump(Indexer nexusIndexer, String descr, Query q) throws IOException {
        System.out.println("Searching for " + descr);

        FlatSearchResponse response = nexusIndexer.searchFlat(new FlatSearchRequest(q, centralContext));

        for (ArtifactInfo ai : response.getResults()) {
            System.out.println(ai.toString());
        }

        System.out.println("------");
        System.out.println("Total: " + response.getTotalHitsCount());
        System.out.println();
    }

    public void searchGroupedAndDump(Indexer nexusIndexer, String descr, Query q, Grouping g) throws IOException {
        System.out.println("Searching for " + descr);

        GroupedSearchResponse response = nexusIndexer.searchGrouped(new GroupedSearchRequest(q, g, centralContext));

        for (Map.Entry<String, ArtifactInfoGroup> entry : response.getResults().entrySet()) {
            ArtifactInfo ai = entry.getValue().getArtifactInfos().iterator().next();
            System.out.println("* Plugin " + ai.artifactId);
            System.out.println(" Latest version: " + ai.version);
            System.out.println(StringUtils.isBlank(ai.description) ? "No description in plugin's POM."
                    : StringUtils.abbreviate(ai.description, 60));
            System.out.println();
        }

        System.out.println("------");
        System.out.println("Total record hits: " + response.getTotalHitsCount());
        System.out.println();
    }

}