Example usage for java.io InputStream mark

Introduction

In this page you can find the example usage for java.io InputStream mark.

Prototype

public synchronized void mark(int readlimit)

Source Link

Document

Marks the current position in this input stream.

Usage

From source file:org.sakaiproject.tool.assessment.facade.AssessmentGradingFacadeQueries.java

private byte[] getMediaStream(Long mediaId) {
    byte[] b = new byte[4000];
    Session session = null;//from   w ww .j av a  2 s .com
    Connection conn = null;
    InputStream in = null;
    ResultSet rs = null;
    PreparedStatement statement = null;
    try {
        session = getSessionFactory().openSession();
        conn = session.connection();
        log.debug("****Connection=" + conn);
        String query = "select MEDIA from SAM_MEDIA_T where MEDIAID=?";
        statement = conn.prepareStatement(query);
        statement.setLong(1, mediaId.longValue());
        rs = statement.executeQuery();
        if (rs.next()) {
            java.lang.Object o = rs.getObject("MEDIA");
            if (o != null) {
                in = rs.getBinaryStream("MEDIA");
                in.mark(0);
                int ch;
                int len = 0;
                while ((ch = in.read()) != -1) {
                    len++;
                }
                b = new byte[len];
                in.reset();
                in.read(b, 0, len);
            }
        }
    } catch (Exception e) {
        log.warn(e.getMessage());
    }

    finally {
        if (session != null) {
            try {
                session.close();
            } catch (Exception e1) {
                e1.printStackTrace();
            }
        }
        if (rs != null) {
            try {
                rs.close();
            } catch (Exception e1) {
                e1.printStackTrace();
            }
        }
        if (statement != null) {
            try {
                statement.close();
            } catch (Exception e1) {
                e1.printStackTrace();
            }
        }
        if (in != null) {
            try {
                in.close();
            } catch (Exception e1) {
                e1.printStackTrace();
            }
        }
        if (conn != null) {
            try {
                conn.close();
            } catch (Exception e1) {
                e1.printStackTrace();
            }
        }
    }

    return b;
}

From source file:Main.java

public static String getStreamEncoding(InputStream stream) throws IOException {
    String encoding = null;/*from w w w  . j a  va2 s  . co  m*/
    boolean DEBUG = false;

    if (DEBUG) {
        SortedMap map = Charset.availableCharsets();
        Object[] keys = map.keySet().toArray();

        for (int i = 0; i < keys.length; i++) {
            System.out.println("Key = " + keys[i] + " Value = " + map.get(keys[i]));
        }
    }

    int ch = stream.read();
    if (DEBUG)
        System.out.print("[" + ch + "]");

    // UCS-4 Big Endian (1234)
    if (ch == 0x00) {
        ch = stream.read();
        if (DEBUG)
            System.out.print("[" + ch + "]");
        if (ch == 0x00) {
            ch = stream.read();
            if (DEBUG)
                System.out.print("[" + ch + "]");
            if (ch == 0xFE) {
                ch = stream.read();
                if (DEBUG)
                    System.out.print("[" + ch + "]");
                if (ch == 0xFF) {
                    encoding = UCS_4BE;
                }
            } else if (ch == 0xFF) {
                ch = stream.read();
                if (DEBUG)
                    System.out.print("[" + ch + "]");
                if (ch == 0xFE) {
                    encoding = UNKNOWN;
                }
            } else if (ch == 0x00) {
                ch = stream.read();
                if (DEBUG)
                    System.out.print("[" + ch + "]");
                if (ch == 0x3C) {
                    encoding = UCS_4BE;
                }
            } else if (ch == 0x3C) {
                ch = stream.read();
                if (DEBUG)
                    System.out.print("[" + ch + "]");
                if (ch == 0x00) {
                    encoding = UNKNOWN;
                }
            }
        } else if (ch == 0x3C) {
            ch = stream.read();
            if (DEBUG)
                System.out.print("[" + ch + "]");
            if (ch == 0x00) {
                ch = stream.read();
                if (DEBUG)
                    System.out.print("[" + ch + "]");
                if (ch == 0x00) {
                    encoding = UNKNOWN;
                } else if (ch == 0x3F) {
                    encoding = UTF_16BE;
                }
            }
        }
    } else if (ch == 0x3C) {
        ch = stream.read();
        if (DEBUG)
            System.out.print("[" + ch + "]");
        if (ch == 0x00) {
            ch = stream.read();
            if (DEBUG)
                System.out.print("[" + ch + "]");
            if (ch == 0x00) {
                ch = stream.read();
                if (DEBUG)
                    System.out.print("[" + ch + "]");
                if (ch == 0x00) {
                    encoding = UCS_4LE;
                }
            } else if (ch == 0x3F) {
                ch = stream.read();
                if (DEBUG)
                    System.out.print("[" + ch + "]");
                if (ch == 0x00) {
                    encoding = UTF_16LE;
                }
            }
        } else if (ch == 0x3F) {
            ch = stream.read();
            if (DEBUG)
                System.out.print("[" + ch + "]");
            if (ch == 0x78) {
                ch = stream.read();
                if (DEBUG)
                    System.out.print("[" + ch + "]");
                if (ch == 0x6D) {
                    encoding = UTF_8;
                }
            }
        }
    } else if (ch == 0xFF) {
        ch = stream.read();
        if (DEBUG)
            System.out.print("[" + ch + "]");
        if (ch == 0xFE) {
            ch = stream.read();
            encoding = UTF_16LE;
            if (DEBUG)
                System.out.print("[" + ch + "]");
            if (ch == 0x00) {
                ch = stream.read();
                if (DEBUG)
                    System.out.print("[" + ch + "]");
                if (ch == 0x00) {
                    encoding = UCS_4LE;
                }
            }
        }
    } else if (ch == 0xFE) {
        ch = stream.read();
        if (DEBUG)
            System.out.print("[" + ch + "]");
        if (ch == 0xFF) {
            ch = stream.read();

            encoding = UTF_16BE;
            if (DEBUG)
                System.out.print("[" + ch + "]");
            if (ch == 0x00) {
                ch = stream.read();
                if (DEBUG)
                    System.out.print("[" + ch + "]");
                if (ch == 0x00) {
                    encoding = UNKNOWN;
                }
            }
        }
    } else if (ch == 0xEF) {
        ch = stream.read();
        if (DEBUG)
            System.out.print("[" + ch + "]");
        if (ch == 0xBB) {
            ch = stream.read();
            if (DEBUG)
                System.out.print("[" + ch + "]");
            if (ch == 0xBF) {
                //               System.out.println( "Found UTF-8 byte order mark.");
                // strip utf-8 byte order mark
                stream.mark(1024);
                encoding = UTF_8;
            }
        }
    } else if (ch == 0x4C) {
        ch = stream.read();
        if (DEBUG)
            System.out.print("[" + ch + "]");
        if (ch == 0x6F) {
            ch = stream.read();
            if (DEBUG)
                System.out.print("[" + ch + "]");
            if (ch == 0xA7) {
                ch = stream.read();
                if (DEBUG)
                    System.out.print("[" + ch + "]");
                if (ch == 0x94) {
                    encoding = EBCDIC;
                }
            }
        }
    }

    if (DEBUG)
        System.out.println("getStreamEncoding() [" + encoding + "]");
    return encoding;
}

From source file:uk.bl.wa.indexer.WARCIndexer.java

/**
 * This extracts metadata from the ArchiveRecord and creates a suitable SolrRecord.
 * Removes the text field if flag set./* ww  w  .j  ava 2 s.co m*/
 * 
 * @param archiveName
 * @param record
 * @param isTextIncluded
 * @return
 * @throws IOException
 */
public SolrRecord extract(String archiveName, ArchiveRecord record, boolean isTextIncluded) throws IOException {
    final long start = System.nanoTime();
    ArchiveRecordHeader header = record.getHeader();
    SolrRecord solr = solrFactory.createRecord(archiveName, header);

    if (!header.getHeaderFields().isEmpty()) {
        if (header.getHeaderFieldKeys().contains(HEADER_KEY_TYPE)) {
            log.debug("Looking at " + header.getHeaderValue(HEADER_KEY_TYPE));

            if (!checkRecordType((String) header.getHeaderValue(HEADER_KEY_TYPE))) {
                return null;
            }
            // Store WARC record type:
            solr.setField(SolrFields.SOLR_RECORD_TYPE, (String) header.getHeaderValue(HEADER_KEY_TYPE));

            //Store WARC-Record-ID
            solr.setField(SolrFields.WARC_KEY_ID, (String) header.getHeaderValue(HEADER_KEY_ID));
            solr.setField(SolrFields.WARC_IP, (String) header.getHeaderValue(HEADER_KEY_IP));

        } else {
            // else we're processing ARCs so nothing to filter and no
            // revisits
            solr.setField(SolrFields.SOLR_RECORD_TYPE, "arc");
        }

        if (header.getUrl() == null)
            return null;

        // Get the URL:
        String targetUrl = Normalisation.sanitiseWARCHeaderValue(header.getUrl());

        // Strip down very long URLs to avoid
        // "org.apache.commons.httpclient.URIException: Created (escaped)
        // uuri > 2083"
        // Trac #2271: replace string-splitting with URI-based methods.
        if (targetUrl.length() > 2000)
            targetUrl = targetUrl.substring(0, 2000);

        log.debug(
                "Current heap usage: " + FileUtils.byteCountToDisplaySize(Runtime.getRuntime().totalMemory()));
        log.debug("Processing " + targetUrl + " from " + archiveName);

        // Check the filters:
        if (this.checkProtocol(targetUrl) == false)
            return null;
        if (this.checkUrl(targetUrl) == false)
            return null;
        if (this.checkExclusionFilter(targetUrl) == false)
            return null;

        // -----------------------------------------------------
        // Add user supplied Archive-It Solr fields and values:
        // -----------------------------------------------------
        solr.setField(SolrFields.INSTITUTION, WARCIndexerCommand.institution);
        solr.setField(SolrFields.COLLECTION, WARCIndexerCommand.collection);
        solr.setField(SolrFields.COLLECTION_ID, WARCIndexerCommand.collection_id);

        // --- Basic headers ---

        // Basic metadata:
        solr.setField(SolrFields.SOURCE_FILE, archiveName);
        solr.setField(SolrFields.SOURCE_FILE_OFFSET, "" + header.getOffset());
        String filePath = header.getReaderIdentifier();//Full path of file                        

        //Will convert windows path to linux path. Linux paths will not be modified.
        String linuxFilePath = FilenameUtils.separatorsToUnix(filePath);
        solr.setField(SolrFields.SOURCE_FILE_PATH, linuxFilePath);

        byte[] url_md5digest = md5
                .digest(Normalisation.sanitiseWARCHeaderValue(header.getUrl()).getBytes("UTF-8"));
        // String url_base64 =
        // Base64.encodeBase64String(fullUrl.getBytes("UTF-8"));
        String url_md5hex = Base64.encodeBase64String(url_md5digest);
        solr.setField(SolrFields.SOLR_URL, Normalisation.sanitiseWARCHeaderValue(header.getUrl()));
        if (addNormalisedURL) {
            solr.setField(SolrFields.SOLR_URL_NORMALISED, Normalisation.canonicaliseURL(targetUrl));
        }

        // Get the length, but beware, this value also includes the HTTP headers (i.e. it is the payload_length):
        long content_length = header.getLength();

        // Also pull out the file extension, if any:
        String resourceName = parseResourceName(targetUrl);
        solr.addField(SolrFields.RESOURCE_NAME, resourceName);
        solr.addField(SolrFields.CONTENT_TYPE_EXT, parseExtension(resourceName));

        // Add URL-based fields:
        URI saneURI = parseURL(solr, targetUrl);

        // Prepare crawl date information:
        String waybackDate = (header.getDate().replaceAll("[^0-9]", ""));
        Date crawlDate = getWaybackDate(waybackDate);

        // Store the dates:
        solr.setField(SolrFields.CRAWL_DATE, formatter.format(crawlDate));
        solr.setField(SolrFields.CRAWL_YEAR, getYearFromDate(crawlDate));

        // Use the current value as the waybackDate:
        solr.setField(SolrFields.WAYBACK_DATE, waybackDate);

        Instrument.timeRel("WARCIndexer.extract#total", "WARCIndexer.extract#archeaders", start);

        // -----------------------------------------------------
        // Now consume record and HTTP headers (only)
        // -----------------------------------------------------

        InputStream tikainput = null;

        // Only parse HTTP headers for HTTP URIs
        if (targetUrl.startsWith("http")) {
            // Parse HTTP headers:
            String statusCode = null;
            if (record instanceof WARCRecord) {
                statusCode = this.processWARCHeaders(record, header, targetUrl, solr);
                tikainput = record;
            } else if (record instanceof ARCRecord) {
                ARCRecord arcr = (ARCRecord) record;
                statusCode = "" + arcr.getStatusCode();
                this.processHeaders(solr, statusCode, arcr.getHttpHeaders(), targetUrl);
                arcr.skipHttpHeader();
                tikainput = arcr;
            } else {
                log.error("FAIL! Unsupported archive record type.");
                return solr;
            }

            solr.setField(SolrFields.SOLR_STATUS_CODE, statusCode);

            // Skip recording non-content URLs (i.e. 2xx responses only please):
            if (!checkResponseCode(statusCode)) {
                log.debug("Skipping this record based on status code " + statusCode + ": " + targetUrl);
                return null;
            }
        } else {
            log.info("Skipping header parsing as URL does not start with 'http'");
        }

        // -----------------------------------------------------
        // Headers have been processed, payload ready to cache:
        // -----------------------------------------------------

        // Update the content_length based on what's available:
        content_length = tikainput.available();

        // Record the length:
        solr.setField(SolrFields.CONTENT_LENGTH, "" + content_length);

        // Create an appropriately cached version of the payload, to allow analysis.
        final long hashStreamStart = System.nanoTime();
        HashedCachedInputStream hcis = new HashedCachedInputStream(header, tikainput, content_length);
        tikainput = hcis.getInputStream();
        String hash = hcis.getHash();
        Instrument.timeRel("WARCIndexer.extract#total", "WARCIndexer.extract#hashstreamwrap", hashStreamStart);

        // Use an ID that ensures every URL+timestamp gets a separate
        // record:
        String id = waybackDate + "/" + url_md5hex;

        // Set these last:
        solr.setField(SolrFields.ID, id);
        solr.setField(SolrFields.HASH, hash);

        // -----------------------------------------------------
        // Apply any annotations:
        // -----------------------------------------------------
        if (ant != null) {
            try {
                ant.applyAnnotations(saneURI, solr.getSolrDocument());
            } catch (URISyntaxException e) {
                e.printStackTrace();
                log.error("Failed to annotate " + saneURI + " : " + e);
            }
        }

        // -----------------------------------------------------
        // WARC revisit record handling:
        // -----------------------------------------------------

        // If this is a revisit record, we should just return an update to the crawl_dates (when using hashUrlId)
        if (WARCConstants.WARCRecordType.revisit.name()
                .equalsIgnoreCase((String) header.getHeaderValue(HEADER_KEY_TYPE))) {
            solr.removeField(SolrFields.CONTENT_LENGTH); //It is 0 and would mess with statistics                                                                                
            //Copy content_type_served to content_type (no tika/droid for revisits)
            solr.addField(SolrFields.SOLR_CONTENT_TYPE,
                    (String) solr.getFieldValue(SolrFields.CONTENT_TYPE_SERVED));
            return solr;
        }

        // -----------------------------------------------------
        // Payload duplication has been checked, ready to parse:
        // -----------------------------------------------------

        final long analyzeStart = System.nanoTime();

        // Mark the start of the payload, with a readLimit corresponding to
        // the payload size:
        tikainput.mark((int) content_length);

        // Pass on to other extractors as required, resetting the stream before each:
        this.wpa.analyse(archiveName, header, tikainput, solr, content_length);
        Instrument.timeRel("WARCIndexer.extract#total", "WARCIndexer.extract#analyzetikainput", analyzeStart);

        // Clear up the caching of the payload:
        hcis.cleanup();

        // -----------------------------------------------------
        // Payload analysis complete, now performing text analysis:
        // -----------------------------------------------------

        this.txa.analyse(solr);

        // Remove the Text Field if required
        if (!isTextIncluded) {
            solr.removeField(SolrFields.SOLR_EXTRACTED_TEXT);

        } else {
            // Otherwise, decide whether to store or both store and index
            // the text:
            if (storeText == false) {
                // Copy the text into the indexed (but not stored) field:
                solr.setField(SolrFields.SOLR_EXTRACTED_TEXT_NOT_STORED,
                        (String) solr.getField(SolrFields.SOLR_EXTRACTED_TEXT).getFirstValue());
                // Take the text out of the original (stored) field.
                solr.removeField(SolrFields.SOLR_EXTRACTED_TEXT);
            }
        }
    }
    Instrument.timeRel("WARCIndexerCommand.parseWarcFiles#solrdocCreation", "WARCIndexer.extract#total", start);
    String servedType = "" + solr.getField(SolrFields.CONTENT_TYPE_SERVED);
    Instrument.timeRel("WARCIndexer#content_types",
            "WARCIndexer#" + (servedType.contains(";") ? servedType.split(";")[0] : servedType), start);
    Instrument.timeRel("WARCIndexer#content_types", start);
    return solr;
}

From source file:org.regenstrief.util.Util.java

/**
 * Retrieves an InputStream that can be marked and marks it
 * /*from  w  w  w  . ja va2s  .  c o m*/
 * @param in the InputStream
 * @return the markable InputStream
 **/
public static final InputStream getMarkableInputStream(InputStream in) {
    in = in.markSupported() ? in : new BufferedInputStream(in);
    in.mark(MAX_MARK_BUFFER);
    return in;
}

From source file:org.regenstrief.util.Util.java

/**
 * Retrieves an InputStream that can be marked and marks it
 * //w  w w. j  a v a2  s  .  com
 * @param in the InputStream
 * @param buffer the buffer size
 * @return the markable InputStream
 **/
public static final InputStream getMarkableInputStream(InputStream in, final int buffer) {
    in = in.markSupported() ? in : new BufferedInputStream(in);
    in.mark(buffer);
    return in;
}