com.subgraph.vega.internal.analysis.ContentAnalyzer.java Source code

Java tutorial

Introduction

Here is the source code for com.subgraph.vega.internal.analysis.ContentAnalyzer.java

Source

/*******************************************************************************
 * Copyright (c) 2011 Subgraph.
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/epl-v10.html
 * 
 * Contributors:
 *     Subgraph - initial API and implementation
 ******************************************************************************/
package com.subgraph.vega.internal.analysis;

import java.net.URI;
import java.util.ArrayList;
import java.util.List;
import java.util.logging.Logger;

import org.apache.http.HttpRequest;

import com.subgraph.vega.api.analysis.IContentAnalyzer;
import com.subgraph.vega.api.analysis.IContentAnalyzerResult;
import com.subgraph.vega.api.analysis.MimeType;
import com.subgraph.vega.api.http.requests.IHttpResponse;
import com.subgraph.vega.api.model.IWorkspace;
import com.subgraph.vega.api.model.alerts.IScanInstance;
import com.subgraph.vega.api.model.web.IWebModel;
import com.subgraph.vega.api.model.web.IWebPath;
import com.subgraph.vega.api.scanner.modules.IResponseProcessingModule;
import com.subgraph.vega.internal.analysis.urls.UrlExtractor;

public class ContentAnalyzer implements IContentAnalyzer {

    private final Logger logger = Logger.getLogger("analysis");

    private final IScanInstance scanInstance;
    private final ContentAnalyzerFactory factory;
    private final UrlExtractor urlExtractor = new UrlExtractor();
    private final MimeDetector mimeDetector = new MimeDetector();

    private final Object responseProcessingLock = new Object();

    private List<IResponseProcessingModule> responseProcessingModules;
    private boolean addLinksToModel;
    private boolean defaultAddToRequestLog;

    ContentAnalyzer(ContentAnalyzerFactory factory, IScanInstance scanInstance) {
        this.factory = factory;
        this.scanInstance = scanInstance;
        this.addLinksToModel = true;
        this.defaultAddToRequestLog = true;
    }

    @Override
    public IContentAnalyzerResult processResponse(IHttpResponse response) {
        return processResponse(response, defaultAddToRequestLog, true);
    }

    @Override
    public void setDefaultAddToRequestLog(boolean flag) {
        defaultAddToRequestLog = flag;
    }

    @Override
    public IContentAnalyzerResult processResponse(IHttpResponse response, boolean addToRequestLog,
            boolean scrapePage) {
        final ContentAnalyzerResult result = new ContentAnalyzerResult();
        if (response == null) {
            logger.warning("ContentAnalyzer.processResponse() called with null response");
            return result;
        }

        final IWorkspace workspace = factory.getCurrentWorkspace();
        if (workspace == null) {
            logger.warning("ContentAnalyzer.processResponse() called while no workspace is active");
            return result;
        }

        if (addToRequestLog) {
            workspace.getRequestLog().addRequestResponse(response);
        }

        IWebPath path = workspace.getWebModel().getWebPathByUri(response.getRequestUri());
        path.setVisited(true);

        result.setDeclaredMimeType(mimeDetector.getDeclaredMimeType(response));
        result.setSniffedMimeType(mimeDetector.getSniffedMimeType(response));

        if (scrapePage)
            runExtractUrls(result, response, workspace.getWebModel());
        runResponseProcessingModules(response.getOriginalRequest(), response, result.getDeclaredMimeType(),
                result.getSniffedMimeType(), workspace);
        return result;
    }

    @Override
    public void setResponseProcessingModules(List<IResponseProcessingModule> modules) {
        responseProcessingModules = new ArrayList<IResponseProcessingModule>(modules);
    }

    private void runExtractUrls(ContentAnalyzerResult result, IHttpResponse response, IWebModel webModel) {
        if (response.isMostlyAscii()) {
            for (URI u : urlExtractor.findUrls(response)) {
                if (addLinksToModel
                        && (u.getScheme().equalsIgnoreCase("http") || u.getScheme().equalsIgnoreCase("https")))
                    webModel.getWebPathByUri(u);
                result.addUri(u);
            }
        }
    }

    private void runResponseProcessingModules(HttpRequest request, IHttpResponse response, MimeType declaredMime,
            MimeType sniffedMime, IWorkspace workspace) {
        if (responseProcessingModules == null || !response.isMostlyAscii()) {
            return;
        }

        if (!(isDefaultResponseProcessingMimetype(declaredMime)
                || isDefaultResponseProcessingMimetype(sniffedMime))) {
            return;
        }

        synchronized (responseProcessingLock) {
            for (IResponseProcessingModule m : responseProcessingModules) {
                if (m.isEnabled()) {
                    m.processResponse(scanInstance, request, response, workspace);
                }
            }
        }
    }

    private boolean isDefaultResponseProcessingMimetype(MimeType mime) {
        final String name = mime.getCanonicalName();
        return (name.contains("text") || name.contains("javascript") || name.contains("xml"));
    }

    @Override
    public void setAddLinksToModel(boolean flag) {
        addLinksToModel = flag;
    }
}