Example usage for java.io InputStream reset

Introduction

In this page you can find the example usage for java.io InputStream reset.

Prototype

public synchronized void reset() throws IOException

Source Link

Document

Repositions this stream to the position at the time the mark method was last called on this input stream.

Usage

From source file:com.smartsheet.api.internal.http.HttpEntitySnapshot.java

/**
 * this ctor creates a snapshot of the original entity (which requires its stream either support reset or it must be
 * entirely consumed and replaced with an exact copy)
 *//*ww w .  ja v  a 2 s . com*/
public HttpEntitySnapshot(HttpEntity original) throws IOException {
    final String contentType = original.getContentType();
    final InputStream contentStream = original.getContent();
    final long contentLength = original.getContentLength();

    super.setContentLength(contentLength);
    super.setContentType(contentType);

    if (contentType != null && contentType.startsWith(JSON_MIME_TYPE)) {
        // we need to read and then reset (if possible) the original entity's content stream (or replace it with an exact copy)
        // if contentLength > Integer.MAX_VALUE we have MUCH bigger problems than long->int rollover
        boolean sourceSupportsMark = contentStream.markSupported();
        if (sourceSupportsMark) {
            // here we can read up to a limited contents
            contentArray = new byte[MAX_SNAPSHOT_SIZE];
            contentStream.mark(MAX_SNAPSHOT_SIZE + 1);
            int bytesRead = contentStream.read(contentArray, 0, MAX_SNAPSHOT_SIZE);
            contentStream.reset();

            // trim content array to actual size
            if (bytesRead < MAX_SNAPSHOT_SIZE) {
                contentArray = Arrays.copyOf(contentArray, bytesRead);
            }
        } else {
            // here we must read everything and then repackage the byte[] into an input stream to replace the original
            byte[] fullContentArray;
            try {
                fullContentArray = StreamUtil.readBytesFromStream(contentStream, StreamUtil.ONE_MB);
            } finally {
                contentStream.close();
            }
            // having consumed the content into memory we must now replace the original stream (so it can be read by subsequent code)
            original.setContent(new ByteArrayInputStream(fullContentArray));
            // and we need a copy for potential logging purposes
            contentArray = Arrays.copyOf(fullContentArray,
                    Math.min(MAX_SNAPSHOT_SIZE, fullContentArray.length));
            // we see a lot of Content-Length:-1 from certain responses - no point in logging those
            if (contentLength != -1 && fullContentArray.length != contentLength) {
                LoggerFactory.getLogger(HttpEntitySnapshot.class).info(
                        "actual content-length {} doesn't match" + " declared content-length {}",
                        fullContentArray.length, contentLength);
            }
        }
    } else {
        contentArray = String.format("**contentType '%s' not logged**", contentType).getBytes();
    }
}

From source file:org.apache.nifi.processors.standard.util.crypto.OpenSSLPKCS5CipherProvider.java

/**
 * Returns the salt provided as part of the cipher stream, or throws an exception if one cannot be detected.
 *
 * @param in the cipher InputStream/*w w  w .jav a  2s  . c  om*/
 * @return the salt
 */
@Override
public byte[] readSalt(InputStream in) throws IOException {
    if (in == null) {
        throw new IllegalArgumentException("Cannot read salt from null InputStream");
    }

    // The header and salt format is "Salted__salt x8b" in ASCII
    byte[] salt = new byte[DEFAULT_SALT_LENGTH];

    // Try to read the header and salt from the input
    byte[] header = new byte[OPENSSL_EVP_HEADER_SIZE];

    // Mark the stream in case there is no salt
    in.mark(OPENSSL_EVP_HEADER_SIZE + 1);
    StreamUtils.fillBuffer(in, header);

    final byte[] headerMarkerBytes = OPENSSL_EVP_HEADER_MARKER.getBytes(StandardCharsets.US_ASCII);

    if (!Arrays.equals(headerMarkerBytes, header)) {
        // No salt present
        salt = new byte[0];
        // Reset the stream because we skipped 8 bytes of cipher text
        in.reset();
    }

    StreamUtils.fillBuffer(in, salt);
    return salt;
}

From source file:core.com.qiniu.http.AmazonHttpClient.java

/**
 * Internal method to execute the HTTP method given.
 *
 * @see AmazonHttpClient#execute(Request, HttpResponseHandler,
 *      HttpResponseHandler)/*from  w  ww .ja v a 2 s  .  co m*/
 * @see AmazonHttpClient#execute(Request, HttpResponseHandler,
 *      HttpResponseHandler, ExecutionContext)
 */
<T> Response<T> executeHelper(Request<?> request,
        HttpResponseHandler<AmazonWebServiceResponse<T>> responseHandler,
        HttpResponseHandler<AmazonServiceException> errorResponseHandler, ExecutionContext executionContext)
        throws AmazonClientException, AmazonServiceException {
    /*
     * Depending on which response handler we end up choosing to handle the
     * HTTP response, it might require us to leave the underlying HTTP
     * connection open, depending on whether or not it reads the complete
     * HTTP response stream from the HTTP connection, or if delays reading
     * any of the content until after a response is returned to the caller.
     */
    boolean leaveHttpConnectionOpen = false;
    AWSRequestMetrics awsRequestMetrics = executionContext.getAwsRequestMetrics();
    /*
     * add the service endpoint to the logs. You can infer service name from
     * service endpoint
     */
    awsRequestMetrics.addProperty(AWSRequestMetrics.Field.ServiceName, request.getServiceName());
    awsRequestMetrics.addProperty(AWSRequestMetrics.Field.ServiceEndpoint, request.getEndpoint());

    // Apply whatever request options we know how to handle, such as
    // user-agent.
    setUserAgent(request);
    request.addHeader(HEADER_SDK_TRANSACTION_ID, UUID.randomUUID().toString());
    int requestCount = 0;
    long lastBackoffDelay = 0;
    URI redirectedURI = null;
    AmazonClientException retriedException = null;

    // Make a copy of the original request params and headers so that we can
    // permute it in this loop and start over with the original every time.
    Map<String, String> originalParameters = new LinkedHashMap<String, String>(request.getParameters());
    Map<String, String> originalHeaders = new HashMap<String, String>(request.getHeaders());
    // mark input stream if supported
    InputStream originalContent = request.getContent();
    if (originalContent != null && originalContent.markSupported()) {
        originalContent.mark(-1);
    }

    final AWSCredentials credentials = executionContext.getCredentials();
    Signer signer = null;
    HttpResponse httpResponse = null;
    HttpRequest httpRequest = null;

    while (true) {
        ++requestCount;
        awsRequestMetrics.setCounter(AWSRequestMetrics.Field.RequestCount, requestCount);
        if (requestCount > 1) { // retry
            request.setParameters(originalParameters);
            request.setHeaders(originalHeaders);
            request.setContent(originalContent);
        }
        if (redirectedURI != null) {
            request.setEndpoint(URI.create(redirectedURI.getScheme() + "://" + redirectedURI.getAuthority()));
            request.setResourcePath(redirectedURI.getPath());
        }

        try {
            if (requestCount > 1) { // retry
                awsRequestMetrics.startEvent(AWSRequestMetrics.Field.RetryPauseTime);
                try {
                    lastBackoffDelay = pauseBeforeNextRetry(request.getOriginalRequest(), retriedException,
                            requestCount, config.getRetryPolicy());
                } finally {
                    awsRequestMetrics.endEvent(AWSRequestMetrics.Field.RetryPauseTime);
                }
                InputStream content = request.getContent();
                if (content != null && content.markSupported()) {
                    content.reset();
                }
            }
            request.addHeader(HEADER_SDK_RETRY_INFO, (requestCount - 1) + "/" + lastBackoffDelay);

            // Sign the request if a signer was provided
            if (signer == null)
                signer = executionContext.getSignerByURI(request.getEndpoint());
            if (signer != null && credentials != null) {
                awsRequestMetrics.startEvent(AWSRequestMetrics.Field.RequestSigningTime);
                try {
                    signer.sign(request, credentials);
                } finally {
                    awsRequestMetrics.endEvent(AWSRequestMetrics.Field.RequestSigningTime);
                }
            }

            if (requestLog.isDebugEnabled()) {
                requestLog.debug("Sending Request: " + request.toString());
            }

            httpRequest = requestFactory.createHttpRequest(request, config, executionContext);

            retriedException = null;
            awsRequestMetrics.startEvent(AWSRequestMetrics.Field.HttpRequestTime);
            try {
                httpResponse = httpClient.execute(httpRequest);
            } finally {
                awsRequestMetrics.endEvent(AWSRequestMetrics.Field.HttpRequestTime);
            }

            if (isRequestSuccessful(httpResponse)) {
                awsRequestMetrics.addProperty(AWSRequestMetrics.Field.StatusCode, httpResponse.getStatusCode());
                /*
                 * If we get back any 2xx status code, then we know we
                 * should treat the service call as successful.
                 */
                leaveHttpConnectionOpen = responseHandler.needsConnectionLeftOpen();
                T response = handleResponse(request, responseHandler, httpResponse, executionContext);
                return new Response<T>(response, httpResponse);
            } else if (isTemporaryRedirect(httpResponse)) {
                /*
                 * S3 sends 307 Temporary Redirects if you try to delete an
                 * EU bucket from the US endpoint. If we get a 307, we'll
                 * point the HTTP method to the redirected location, and let
                 * the next retry deliver the request to the right location.
                 */
                String redirectedLocation = httpResponse.getHeaders().get("Location");
                log.debug("Redirecting to: " + redirectedLocation);
                // set redirect uri and retry
                redirectedURI = URI.create(redirectedLocation);
                awsRequestMetrics.addProperty(AWSRequestMetrics.Field.StatusCode, httpResponse.getStatusCode());
                awsRequestMetrics.addProperty(AWSRequestMetrics.Field.RedirectLocation, redirectedLocation);
                awsRequestMetrics.addProperty(AWSRequestMetrics.Field.AWSRequestID, null);
            } else {
                leaveHttpConnectionOpen = errorResponseHandler.needsConnectionLeftOpen();
                AmazonServiceException ase = handleErrorResponse(request, errorResponseHandler, httpResponse);
                awsRequestMetrics.addProperty(AWSRequestMetrics.Field.AWSRequestID, ase.getRequestId());
                awsRequestMetrics.addProperty(AWSRequestMetrics.Field.AWSErrorCode, ase.getErrorCode());
                awsRequestMetrics.addProperty(AWSRequestMetrics.Field.StatusCode, ase.getStatusCode());

                if (!shouldRetry(request.getOriginalRequest(), httpRequest.getContent(), ase, requestCount,
                        config.getRetryPolicy())) {
                    throw ase;
                }

                // Cache the retryable exception
                retriedException = ase;
                /*
                 * Checking for clock skew error again because we don't want
                 * to set the global time offset for every service
                 * exception.
                 */
                if (RetryUtils.isClockSkewError(ase)) {
                    int timeOffset = parseClockSkewOffset(httpResponse, ase);
                    SDKGlobalConfiguration.setGlobalTimeOffset(timeOffset);
                }
                resetRequestAfterError(request, ase);
            }
        } catch (IOException ioe) {
            if (log.isDebugEnabled()) {
                log.debug("Unable to execute HTTP request: " + ioe.getMessage(), ioe);
            }
            awsRequestMetrics.incrementCounter(AWSRequestMetrics.Field.Exception);
            awsRequestMetrics.addProperty(AWSRequestMetrics.Field.Exception, ioe);
            awsRequestMetrics.addProperty(AWSRequestMetrics.Field.AWSRequestID, null);

            AmazonClientException ace = new AmazonClientException(
                    "Unable to execute HTTP request: " + ioe.getMessage(), ioe);
            if (!shouldRetry(request.getOriginalRequest(), httpRequest.getContent(), ace, requestCount,
                    config.getRetryPolicy())) {
                throw ace;
            }

            // Cache the retryable exception
            retriedException = ace;
            resetRequestAfterError(request, ioe);
        } catch (RuntimeException e) {
            throw handleUnexpectedFailure(e, awsRequestMetrics);
        } catch (Error e) {
            throw handleUnexpectedFailure(e, awsRequestMetrics);
        } finally {
            /*
             * Some response handlers need to manually manage the HTTP
             * connection and will take care of releasing the connection on
             * their own, but if this response handler doesn't need the
             * connection left open, we go ahead and release the it to free
             * up resources.
             */
            if (!leaveHttpConnectionOpen && httpResponse != null) {
                try {
                    if (httpResponse.getRawContent() != null) {
                        httpResponse.getRawContent().close();
                    }
                } catch (IOException e) {
                    log.warn("Cannot close the response content.", e);
                }
            }
        }
    } /* end while (true) */
}

From source file:org.apache.axis.attachments.DimeBodyPart.java

protected long getDataSize(DataHandler dh) {
    long dataSize = -1L;

    try {/*ww w.  j  a va  2  s.  c  om*/
        DataSource ds = dh.getDataSource();

        //Do files our selfs since this is costly to read in. Ask the file system.
        // This is 90% of the use of attachments.
        if (ds instanceof javax.activation.FileDataSource) {
            javax.activation.FileDataSource fdh = (javax.activation.FileDataSource) ds;
            java.io.File df = fdh.getFile();

            if (!df.exists()) {
                throw new RuntimeException(Messages.getMessage("noFile", df.getAbsolutePath()));
            }
            dataSize = df.length();
        } else {
            dataSize = 0;
            java.io.InputStream in = ds.getInputStream();
            byte[] readbuf = new byte[64 * 1024];
            int bytesread;

            do {
                bytesread = in.read(readbuf);
                if (bytesread > 0)
                    dataSize += bytesread;
            } while (bytesread > -1);

            if (in.markSupported()) {
                //Leave the stream open for future reading
                // and reset the stream pointer to the first byte
                in.reset();
            } else {
                //FIXME: bug http://nagoya.apache.org/jira/secure/ViewIssue.jspa?key=AXIS-1126
                //if we close this then how can we read the file? eh?
                in.close();
            }
        }
    } catch (Exception e) {
        //TODO: why are exceptions swallowed here?
        log.error(Messages.getMessage("exception00"), e);
    }
    return dataSize;
}

From source file:com.smartitengineering.event.hub.core.ChannelEventsResource.java

public Response get(String placeholderId, boolean isBefore) {
    if (count == null) {
        count = 10;//w w w. j a  v a  2  s. c o m
    }
    ResponseBuilder responseBuilder = Response.ok();
    Feed atomFeed = getFeed("Events", new Date());

    int thisCount = count;
    if (isBefore) {
        thisCount = count * -1;
    }

    Collection<Event> events = HubPersistentStorerSPI.getInstance().getStorer().getEvents(placeholderId,
            channelId, thisCount);

    if (events != null && !events.isEmpty()) {
        MultivaluedMap<String, String> queryParams = getUriInfo().getQueryParameters();

        List<Event> eventList = new ArrayList<Event>(events);
        Link nextLink = getAbderaFactory().newLink();
        nextLink.setRel(Link.REL_PREVIOUS);
        Event lastEvent = eventList.get(0);
        final UriBuilder nextUri = getRelativeURIBuilder().path(ChannelEventsResource.class).path(AFTER_METHOD);
        final UriBuilder previousUri = getRelativeURIBuilder().path(ChannelEventsResource.class)
                .path(BEFORE_METHOD);

        for (String key : queryParams.keySet()) {
            final Object[] values = queryParams.get(key).toArray();
            nextUri.queryParam(key, values);
            previousUri.queryParam(key, values);
        }

        nextLink.setHref(nextUri.build(channelId, lastEvent.getPlaceholderId()).toString());
        atomFeed.addLink(nextLink);

        Link previousLink = getAbderaFactory().newLink();
        previousLink.setRel(Link.REL_NEXT);
        Event firstEvent = eventList.get(events.size() - 1);
        previousLink.setHref(previousUri.build(channelId, firstEvent.getPlaceholderId()).toString());
        atomFeed.addLink(previousLink);

        for (Event event : events) {
            Entry eventEntry = getAbderaFactory().newEntry();

            eventEntry.setId(event.getPlaceholderId());
            eventEntry.setTitle(event.getPlaceholderId().toString());

            InputStream contentStream = event.getEventContent().getContent();
            String contentAsString = "";

            if (contentStream != null) {
                if (contentCache.containsKey(event)) {
                    contentAsString = contentCache.get(event);
                } else {
                    try {
                        if (contentStream.markSupported()) {
                            contentStream.mark(Integer.MAX_VALUE);
                        }
                        contentAsString = IOUtils.toString(contentStream);
                        contentCache.put(event, contentAsString);
                        if (contentStream.markSupported()) {
                            contentStream.reset();
                        }
                    } catch (IOException ex) {
                    }
                }
            }

            eventEntry.setContent(contentAsString);
            eventEntry.setUpdated(event.getCreationDate());

            Link eventLink = getAbderaFactory().newLink();

            eventLink.setHref(getRelativeURIBuilder().path(EventResource.class).build(event.getPlaceholderId())
                    .toString());
            eventLink.setRel(Link.REL_ALTERNATE);
            eventLink.setMimeType(MediaType.APPLICATION_JSON);

            eventEntry.addLink(eventLink);
            atomFeed.addEntry(eventEntry);
        }
    }
    responseBuilder.entity(atomFeed);
    return responseBuilder.build();
}

From source file:com.smartitengineering.event.hub.core.AllEventsResource.java

public Response get(String placeholderId, boolean isBefore) {
    if (count == null) {
        count = 10;//from w w w . jav a 2s .  c om
    }
    int thisCount = count;
    if (isBefore) {
        thisCount = count * -1;
    }
    ResponseBuilder responseBuilder = Response.ok();
    Feed atomFeed = getFeed("Events", new Date());

    Link eventsLink = getAbderaFactory().newLink();
    eventsLink.setHref(getRelativeURIBuilder().path(RootResource.class).build().toString());
    eventsLink.setRel("root");

    atomFeed.addLink(eventsLink);

    Collection<Event> events = HubPersistentStorerSPI.getInstance().getStorer().getEvents(placeholderId, null,
            thisCount);

    if (events != null && !events.isEmpty()) {
        MultivaluedMap<String, String> queryParams = getUriInfo().getQueryParameters();

        List<Event> eventList = new ArrayList<Event>(events);
        Link nextLink = getAbderaFactory().newLink();
        nextLink.setRel(Link.REL_PREVIOUS);
        Event lastEvent = eventList.get(0);
        final UriBuilder nextUri = getRelativeURIBuilder().path(AllEventsResource.class).path(AFTER_METHOD);
        final UriBuilder previousUri = getRelativeURIBuilder().path(AllEventsResource.class)
                .path(BEFORE_METHOD);

        for (String key : queryParams.keySet()) {
            final Object[] values = queryParams.get(key).toArray();
            nextUri.queryParam(key, values);
            previousUri.queryParam(key, values);
        }

        nextLink.setHref(nextUri.build(lastEvent.getPlaceholderId()).toString());
        atomFeed.addLink(nextLink);

        Link previousLink = getAbderaFactory().newLink();
        previousLink.setRel(Link.REL_NEXT);
        Event firstEvent = eventList.get(events.size() - 1);
        previousLink.setHref(previousUri.build(firstEvent.getPlaceholderId()).toString());
        atomFeed.addLink(previousLink);

        for (Event event : events) {
            Entry eventEntry = getAbderaFactory().newEntry();

            eventEntry.setId(event.getPlaceholderId());
            eventEntry.setTitle(event.getPlaceholderId().toString());

            InputStream contentStream = event.getEventContent().getContent();
            String contentAsString = "";

            if (contentStream != null) {
                if (contentCache.containsKey(event)) {
                    contentAsString = contentCache.get(event);
                } else {
                    try {
                        if (contentStream.markSupported()) {
                            contentStream.mark(Integer.MAX_VALUE);
                        }
                        contentAsString = IOUtils.toString(contentStream);
                        contentCache.put(event, contentAsString);
                        if (contentStream.markSupported()) {
                            contentStream.reset();
                        }
                    } catch (IOException ex) {
                    }
                }
            }

            eventEntry.setContent(contentAsString);
            eventEntry.setUpdated(event.getCreationDate());

            Link eventLink = getAbderaFactory().newLink();

            eventLink.setHref(getRelativeURIBuilder().path(EventResource.class).build(event.getPlaceholderId())
                    .toString());
            eventLink.setRel(Link.REL_ALTERNATE);
            eventLink.setMimeType(MediaType.APPLICATION_JSON);

            eventEntry.addLink(eventLink);
            atomFeed.addEntry(eventEntry);
        }
    }
    responseBuilder.entity(atomFeed);
    return responseBuilder.build();
}

From source file:com.gargoylesoftware.htmlunit.WebResponseData.java

private InputStream getStream(final DownloadedContent downloadedContent, final List<NameValuePair> headers)
        throws IOException {

    InputStream stream = downloadedContent_.getInputStream();
    if (stream == null) {
        return null;
    }//w  ww  .ja  va  2  s .  co  m

    if (downloadedContent.isEmpty()) {
        return stream;
    }

    final String encoding = getHeader(headers, "content-encoding");
    if (encoding != null) {
        if (StringUtils.contains(encoding, "gzip")) {
            stream = new GZIPInputStream(stream);
        } else if (StringUtils.contains(encoding, "deflate")) {
            boolean zlibHeader = false;
            if (stream.markSupported()) { // should be always the case as the content is in a byte[] or in a file
                stream.mark(2);
                final byte[] buffer = new byte[2];
                stream.read(buffer, 0, 2);
                zlibHeader = (((buffer[0] & 0xff) << 8) | (buffer[1] & 0xff)) == 0x789c;
                stream.reset();
            }
            if (zlibHeader) {
                stream = new InflaterInputStream(stream);
            } else {
                stream = new InflaterInputStream(stream, new Inflater(true));
            }
        }
    }
    return stream;
}

From source file:org.globus.gsi.X509Credential.java

protected void loadCertificate(InputStream input) throws CredentialException {

    if (input == null) {
        throw new IllegalArgumentException("Input stream to load X509Credential is null");
    }/* w w  w.  ja  va 2  s . c om*/

    X509Certificate cert;
    Vector<X509Certificate> chain = new Vector<X509Certificate>();

    String line;
    BufferedReader reader = null;
    try {
        if (input.markSupported()) {
            input.reset();
        }
        reader = new BufferedReader(new InputStreamReader(input));

        while ((line = reader.readLine()) != null) {

            if (line.indexOf("BEGIN CERTIFICATE") != -1) {
                byte[] data = getDecodedPEMObject(reader);
                cert = CertificateLoadUtil.loadCertificate(new ByteArrayInputStream(data));
                chain.addElement(cert);
            }
        }

    } catch (IOException e) {
        throw new CredentialException(e);
    } catch (GeneralSecurityException e) {
        throw new CredentialException(e);
    } finally {
        if (reader != null) {
            try {
                reader.close();
            } catch (IOException e) {
                logger.debug("error closing reader", e);
                // This is ok
            }
        }
    }

    int size = chain.size();
    if (size > 0) {
        this.certChain = new X509Certificate[size];
        chain.copyInto(this.certChain);
    }

}

From source file:org.alfresco.rm.rest.api.impl.FilePlanComponentsApiUtils.java

/**
 * Write content to file/*from   w w  w . j a va 2 s.c o  m*/
 *
 * @param nodeRef  the node to write the content to
 * @param fileName  the name of the file (used for guessing the file's mimetype)
 * @param stream  the input stream to write
 * @param guessEncoding  whether to guess stream encoding
 */
public void writeContent(NodeRef nodeRef, String fileName, InputStream stream, boolean guessEncoding) {
    try {
        ContentWriter writer = contentService.getWriter(nodeRef, ContentModel.PROP_CONTENT, true);

        String mimeType = mimetypeService.guessMimetype(fileName);
        if ((mimeType != null) && (!mimeType.equals(MimetypeMap.MIMETYPE_BINARY))) {
            // quick/weak guess based on file extension
            writer.setMimetype(mimeType);
        } else {
            // stronger guess based on file stream
            writer.guessMimetype(fileName);
        }

        InputStream is = null;

        if (guessEncoding) {
            is = new BufferedInputStream(stream);
            is.mark(1024);
            writer.setEncoding(guessEncoding(is, mimeType, false));
            try {
                is.reset();
            } catch (IOException ioe) {
                if (LOGGER.isWarnEnabled()) {
                    LOGGER.warn("Failed to reset stream after trying to guess encoding: " + ioe.getMessage());
                }
            }
        } else {
            is = stream;
        }

        writer.putContent(is);
    } catch (ContentQuotaException cqe) {
        throw new InsufficientStorageException();
    } catch (ContentLimitViolationException clv) {
        throw new RequestEntityTooLargeException(clv.getMessage());
    } catch (ContentIOException cioe) {
        if (cioe.getCause() instanceof NodeLockedException) {
            throw (NodeLockedException) cioe.getCause();
        }
        throw cioe;
    }
}

From source file:uk.bl.wa.analyser.payload.WARCPayloadAnalysers.java

public void analyse(ArchiveRecordHeader header, InputStream tikainput, SolrRecord solr) {
    log.debug("Analysing " + header.getUrl());

    final long start = System.nanoTime();
    // Analyse with tika:
    try {/* ww w  .ja  v a2  s .com*/
        if (passUriToFormatTools) {
            solr = tika.extract(solr, tikainput, header.getUrl());
        } else {
            solr = tika.extract(solr, tikainput, null);
        }
    } catch (Exception i) {
        log.error(i + ": " + i.getMessage() + ";tika; " + header.getUrl() + "@" + header.getOffset());
    }
    Instrument.timeRel("WARCPayloadAnalyzers.analyze#total", "WARCPayloadAnalyzers.analyze#tikasolrextract",
            start);

    final long firstBytesStart = System.nanoTime();
    // Pull out the first few bytes, to hunt for new format by magic:
    try {
        tikainput.reset();
        byte[] ffb = new byte[this.firstBytesLength];
        int read = tikainput.read(ffb);
        if (read >= 4) {
            String hexBytes = Hex.encodeHexString(ffb);
            solr.addField(SolrFields.CONTENT_FFB, hexBytes.substring(0, 2 * 4));
            StringBuilder separatedHexBytes = new StringBuilder();
            for (String hexByte : Splitter.fixedLength(2).split(hexBytes)) {
                separatedHexBytes.append(hexByte);
                separatedHexBytes.append(" ");
            }
            if (this.extractContentFirstBytes) {
                solr.addField(SolrFields.CONTENT_FIRST_BYTES, separatedHexBytes.toString().trim());
            }
        }
    } catch (Exception i) {
        log.error(i + ": " + i.getMessage() + ";ffb; " + header.getUrl() + "@" + header.getOffset());
    }
    Instrument.timeRel("WARCPayloadAnalyzers.analyze#total", "WARCPayloadAnalyzers.analyze#firstbytes",
            firstBytesStart);

    // Also run DROID (restricted range):
    if (dd != null && runDroid == true) {
        final long droidStart = System.nanoTime();
        try {
            tikainput.reset();
            // Pass the URL in so DROID can fall back on that:
            Metadata metadata = new Metadata();
            if (passUriToFormatTools) {
                UsableURI uuri = UsableURIFactory.getInstance(header.getUrl());
                // Droid seems unhappy about spaces in filenames, so hack to avoid:
                String cleanUrl = uuri.getName().replace(" ", "+");
                metadata.set(Metadata.RESOURCE_NAME_KEY, cleanUrl);
            }
            // Run Droid:
            MediaType mt = dd.detect(tikainput, metadata);
            solr.addField(SolrFields.CONTENT_TYPE_DROID, mt.toString());
        } catch (Exception i) {
            // Note that DROID complains about some URLs with an IllegalArgumentException.
            log.error(i + ": " + i.getMessage() + ";dd; " + header.getUrl() + " @" + header.getOffset());
        }
        Instrument.timeRel("WARCPayloadAnalyzers.analyze#total", "WARCPayloadAnalyzers.analyze#droid",
                droidStart);
    }

    // Parse ARC name
    if (!arcname.getRules().isEmpty()) {
        final long nameStart = System.nanoTime();
        arcname.analyse(header, tikainput, solr);
        Instrument.timeRel("WARCPayloadAnalyzers.analyze#total", "WARCPayloadAnalyzers.analyze#arcname",
                nameStart);
    }

    try {
        tikainput.reset();
        String mime = (String) solr.getField(SolrFields.SOLR_CONTENT_TYPE).getValue();
        if (mime.startsWith("text") || mime.startsWith("application/xhtml+xml")) {
            html.analyse(header, tikainput, solr);

        } else if (mime.startsWith("image")) {
            if (this.extractImageFeatures) {
                image.analyse(header, tikainput, solr);
            }

        } else if (mime.startsWith("application/pdf")) {
            if (extractApachePreflightErrors) {
                pdf.analyse(header, tikainput, solr);
            }

        } else if (mime.startsWith("application/xml") || mime.startsWith("text/xml")) {
            xml.analyse(header, tikainput, solr);

        } else {
            log.debug("No specific additional parser for: " + mime);
        }
    } catch (Exception i) {
        log.error(i + ": " + i.getMessage() + ";x; " + header.getUrl() + "@" + header.getOffset());
    }
    Instrument.timeRel("WARCIndexer.extract#analyzetikainput", "WARCPayloadAnalyzers.analyze#total", start);

}