Example usage for java.lang InterruptedException getClass

Introduction

In this page you can find the example usage for java.lang InterruptedException getClass.

Prototype

@HotSpotIntrinsicCandidate
public final native Class<?> getClass();

Source Link

Document

Returns the runtime class of this Object .

Usage

From source file:com.searchcode.app.jobs.IndexGitRepoJob.java

/**
 * Indexes all the documents in the path provided. Will also remove anything from the index if not on disk
 * Generally this is a slow update used only for the inital clone of a repository
 * NB this can be used for updates but it will be much slower as it needs to to walk the contents of the disk
 *///from  w w  w .j  av a  2  s.  c  o  m
public void indexDocsByPath(Path path, String repoName, String repoLocations, String repoRemoteLocation,
        boolean existingRepo) {
    SearchcodeLib scl = Singleton.getSearchCodeLib(); // Should have data object by this point
    List<String> fileLocations = new ArrayList<>();
    Queue<CodeIndexDocument> codeIndexDocumentQueue = Singleton.getCodeIndexQueue();

    // Convert once outside the main loop
    String fileRepoLocations = FilenameUtils.separatorsToUnix(repoLocations);
    boolean lowMemory = this.LOWMEMORY;
    boolean useSystemGit = this.USESYSTEMGIT;

    try {
        Files.walkFileTree(path, new SimpleFileVisitor<Path>() {
            @Override
            public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {

                while (CodeIndexer.shouldPauseAdding()) {
                    Singleton.getLogger().info("Pausing parser.");
                    try {
                        Thread.sleep(SLEEPTIME);
                    } catch (InterruptedException ex) {
                    }
                }

                // Convert Path file to unix style that way everything is easier to reason about
                String fileParent = FilenameUtils.separatorsToUnix(file.getParent().toString());
                String fileToString = FilenameUtils.separatorsToUnix(file.toString());
                String fileName = file.getFileName().toString();
                String md5Hash = Values.EMPTYSTRING;

                if (fileParent.endsWith("/.git") || fileParent.contains("/.git/")) {
                    return FileVisitResult.CONTINUE;
                }

                List<String> codeLines;
                try {
                    codeLines = Helpers.readFileLines(fileToString, MAXFILELINEDEPTH);
                } catch (IOException ex) {
                    return FileVisitResult.CONTINUE;
                }

                try {
                    FileInputStream fis = new FileInputStream(new File(fileToString));
                    md5Hash = org.apache.commons.codec.digest.DigestUtils.md5Hex(fis);
                    fis.close();
                } catch (IOException ex) {
                    Singleton.getLogger().warning("Unable to generate MD5 for " + fileToString);
                }

                // is the file minified?
                if (scl.isMinified(codeLines)) {
                    Singleton.getLogger().info("Appears to be minified will not index  " + fileToString);
                    return FileVisitResult.CONTINUE;
                }

                String languageName = scl.languageGuesser(fileName, codeLines);
                String fileLocation = fileToString.replace(fileRepoLocations, Values.EMPTYSTRING)
                        .replace(fileName, Values.EMPTYSTRING);
                String fileLocationFilename = fileToString.replace(fileRepoLocations, Values.EMPTYSTRING);
                String repoLocationRepoNameLocationFilename = fileToString;

                String newString = getBlameFilePath(fileLocationFilename);
                List<CodeOwner> owners;
                if (useSystemGit) {
                    owners = getBlameInfoExternal(codeLines.size(), repoName, fileRepoLocations, newString);
                } else {
                    owners = getBlameInfo(codeLines.size(), repoName, fileRepoLocations, newString);
                }

                String codeOwner = scl.codeOwner(owners);

                // If low memory don't add to the queue, just index it directly
                if (lowMemory) {
                    CodeIndexer.indexDocument(new CodeIndexDocument(repoLocationRepoNameLocationFilename,
                            repoName, fileName, fileLocation, fileLocationFilename, md5Hash, languageName,
                            codeLines.size(), StringUtils.join(codeLines, " "), repoRemoteLocation, codeOwner));
                } else {
                    Singleton.incrementCodeIndexLinesCount(codeLines.size());
                    codeIndexDocumentQueue.add(new CodeIndexDocument(repoLocationRepoNameLocationFilename,
                            repoName, fileName, fileLocation, fileLocationFilename, md5Hash, languageName,
                            codeLines.size(), StringUtils.join(codeLines, " "), repoRemoteLocation, codeOwner));
                }

                fileLocations.add(fileLocationFilename);
                return FileVisitResult.CONTINUE;
            }
        });
    } catch (IOException ex) {
        Singleton.getLogger().warning("ERROR - caught a " + ex.getClass() + " in " + this.getClass()
                + "\n with message: " + ex.getMessage());
    }

    if (existingRepo) {
        CodeSearcher cs = new CodeSearcher();
        List<String> indexLocations = cs.getRepoDocuments(repoName);

        for (String file : indexLocations) {
            if (!fileLocations.contains(file)) {
                Singleton.getLogger().info("Missing from disk, removing from index " + file);
                try {
                    CodeIndexer.deleteByFileLocationFilename(file);
                } catch (IOException ex) {
                    Singleton.getLogger().warning("ERROR - caught a " + ex.getClass() + " in " + this.getClass()
                            + "\n with message: " + ex.getMessage());
                }
            }
        }
    }
}

From source file:org.apache.manifoldcf.crawler.connectors.sharepoint.SharePointRepository.java

/** Method that fetches and indexes a file fetched from a SharePoint URL, with appropriate error handling
* etc.//  w  w w.  j av a  2 s . c  o  m
*/
protected void fetchAndIndexFile(IProcessActivity activities, String documentIdentifier, String version,
        String fileUrl, String fetchUrl, String[] accessTokens, String[] denyTokens, Date createdDate,
        Date modifiedDate, Map<String, String> metadataValues, String guid, SystemMetadataDescription sDesc)
        throws ManifoldCFException, ServiceInterruption {
    String errorCode = null;
    String errorDesc = null;
    long startTime = System.currentTimeMillis();
    Long fileLengthLong = null;

    try {
        // Before we fetch, confirm that the output connector will accept the document
        if (!activities.checkURLIndexable(fileUrl)) {
            // URL failed
            errorCode = activities.EXCLUDED_URL;
            errorDesc = "Document rejected because of URL (" + fileUrl + ")";
            if (Logging.connectors.isDebugEnabled())
                Logging.connectors.debug("SharePoint: Skipping document '" + documentIdentifier
                        + "' because output connector says URL '" + fileUrl + "' is not indexable");
            activities.noDocument(documentIdentifier, version);
            return;
        }

        // Also check mime type
        String contentType = mapExtensionToMimeType(documentIdentifier);
        if (!activities.checkMimeTypeIndexable(contentType)) {
            // Mime type failed
            errorCode = activities.EXCLUDED_MIMETYPE;
            errorDesc = "Document rejected because of mime type (" + contentType + ")";
            if (Logging.connectors.isDebugEnabled())
                Logging.connectors.debug("SharePoint: Skipping document '" + documentIdentifier
                        + "' because output connector says mime type '"
                        + ((contentType == null) ? "null" : contentType) + "' is not indexable");
            activities.noDocument(documentIdentifier, version);
            return;
        }

        // Now check date stamp
        if (!activities.checkDateIndexable(modifiedDate)) {
            // Date failed
            errorCode = activities.EXCLUDED_DATE;
            errorDesc = "Document rejected because of date (" + modifiedDate + ")";
            if (Logging.connectors.isDebugEnabled())
                Logging.connectors.debug("SharePoint: Skipping document '" + documentIdentifier
                        + "' because output connector says date '"
                        + ((modifiedDate == null) ? "null" : modifiedDate) + "' is not indexable");
            activities.noDocument(documentIdentifier, version);
            return;
        }

        // Set stuff up for fetch activity logging
        try {
            // Read the document into a local temporary file, so I get a reliable length.
            File tempFile = File.createTempFile("__shp__", ".tmp");
            try {
                // Open the output stream
                OutputStream os = new FileOutputStream(tempFile);
                try {
                    // Catch all exceptions having to do with reading the document
                    try {
                        ExecuteMethodThread emt = new ExecuteMethodThread(httpClient, fetchUrl, os);
                        emt.start();
                        int returnCode = emt.finishUp();

                        if (returnCode == 404 || returnCode == 401 || returnCode == 400 || returnCode == 415) {
                            // Well, sharepoint thought the document was there, but it really isn't, so delete it.
                            errorCode = "DOCUMENTNOTFOUND";
                            errorDesc = "Document not found; HTTP code " + returnCode;
                            if (Logging.connectors.isDebugEnabled())
                                Logging.connectors.debug(
                                        "SharePoint: Document at '" + fileUrl + "' failed to fetch with code "
                                                + Integer.toString(returnCode) + ", deleting");
                            activities.noDocument(documentIdentifier, version);
                            return;
                        } else if (returnCode != 200) {
                            errorCode = "UNKNOWNHTTPCODE";
                            errorDesc = "Unknown HTTP return code " + returnCode;
                            throw new ManifoldCFException("Error fetching document '" + fileUrl + "': "
                                    + Integer.toString(returnCode));
                        }

                    } catch (InterruptedException e) {
                        throw new ManifoldCFException("Interrupted: " + e.getMessage(), e,
                                ManifoldCFException.INTERRUPTED);
                    } catch (java.net.SocketTimeoutException e) {
                        errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
                        errorDesc = e.getMessage();
                        Logging.connectors.warn("SharePoint: SocketTimeoutException thrown: " + e.getMessage(),
                                e);
                        long currentTime = System.currentTimeMillis();
                        throw new ServiceInterruption(
                                "SharePoint is down attempting to read '" + fileUrl + "', retrying: "
                                        + e.getMessage(),
                                e, currentTime + 300000L, currentTime + 12 * 60 * 60000L, -1, true);
                    } catch (org.apache.http.conn.ConnectTimeoutException e) {
                        errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
                        errorDesc = e.getMessage();
                        Logging.connectors.warn("SharePoint: ConnectTimeoutException thrown: " + e.getMessage(),
                                e);
                        long currentTime = System.currentTimeMillis();
                        throw new ServiceInterruption(
                                "SharePoint is down attempting to read '" + fileUrl + "', retrying: "
                                        + e.getMessage(),
                                e, currentTime + 300000L, currentTime + 12 * 60 * 60000L, -1, true);
                    } catch (InterruptedIOException e) {
                        throw new ManifoldCFException("Interrupted: " + e.getMessage(), e,
                                ManifoldCFException.INTERRUPTED);
                    } catch (IllegalArgumentException e) {
                        errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
                        errorDesc = e.getMessage();
                        Logging.connectors.error("SharePoint: Illegal argument: " + e.getMessage(), e);
                        throw new ManifoldCFException("SharePoint: Illegal argument: " + e.getMessage(), e);
                    } catch (org.apache.http.HttpException e) {
                        errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
                        errorDesc = e.getMessage();
                        Logging.connectors.warn("SharePoint: HttpException thrown: " + e.getMessage(), e);
                        long currentTime = System.currentTimeMillis();
                        throw new ServiceInterruption(
                                "SharePoint is down attempting to read '" + fileUrl + "', retrying: "
                                        + e.getMessage(),
                                e, currentTime + 300000L, currentTime + 12 * 60 * 60000L, -1, true);
                    } catch (IOException e) {
                        errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
                        errorDesc = e.getMessage();
                        Logging.connectors.warn("SharePoint: IOException thrown: " + e.getMessage(), e);
                        long currentTime = System.currentTimeMillis();
                        throw new ServiceInterruption(
                                "SharePoint is down attempting to read '" + fileUrl + "', retrying: "
                                        + e.getMessage(),
                                e, currentTime + 300000L, currentTime + 12 * 60 * 60000L, -1, true);
                    }
                } finally {
                    os.close();
                }

                // Ingest the document
                long documentLength = tempFile.length();
                if (!activities.checkLengthIndexable(documentLength)) {
                    // Document too long
                    errorCode = activities.EXCLUDED_LENGTH;
                    errorDesc = "Document excluded due to length (" + documentLength + ")";
                    if (Logging.connectors.isDebugEnabled())
                        Logging.connectors.debug("SharePoint: Document '" + documentIdentifier
                                + "' was too long, according to output connector");
                    activities.noDocument(documentIdentifier, version);
                    return;
                }

                InputStream is = new FileInputStream(tempFile);
                try {
                    RepositoryDocument data = new RepositoryDocument();
                    data.setBinary(is, documentLength);

                    data.setFileName(mapToFileName(documentIdentifier));

                    if (contentType != null)
                        data.setMimeType(contentType);

                    setDataACLs(data, accessTokens, denyTokens);

                    setPathAttribute(data, sDesc, documentIdentifier);

                    if (modifiedDate != null)
                        data.setModifiedDate(modifiedDate);
                    if (createdDate != null)
                        data.setCreatedDate(createdDate);

                    if (metadataValues != null) {
                        Iterator<String> iter = metadataValues.keySet().iterator();
                        while (iter.hasNext()) {
                            String fieldName = iter.next();
                            String fieldData = metadataValues.get(fieldName);
                            data.addField(fieldName, fieldData);
                        }
                    }
                    data.addField("GUID", guid);

                    try {
                        activities.ingestDocumentWithException(documentIdentifier, version, fileUrl, data);
                        errorCode = "OK";
                        fileLengthLong = new Long(documentLength);
                    } catch (IOException e) {
                        handleIOException(e, "reading document");
                    }
                    return;
                } finally {
                    try {
                        is.close();
                    } catch (java.net.SocketTimeoutException e) {
                        // This is not fatal
                        Logging.connectors.debug("SharePoint: Timeout before read could finish for '" + fileUrl
                                + "': " + e.getMessage(), e);
                    } catch (org.apache.http.conn.ConnectTimeoutException e) {
                        // This is not fatal
                        Logging.connectors.debug("SharePoint: Connect timeout before read could finish for '"
                                + fileUrl + "': " + e.getMessage(), e);
                    } catch (InterruptedIOException e) {
                        throw new ManifoldCFException("Interrupted: " + e.getMessage(), e,
                                ManifoldCFException.INTERRUPTED);
                    } catch (IOException e) {
                        // This is not fatal
                        Logging.connectors
                                .debug("SharePoint: Server closed connection before read could finish for '"
                                        + fileUrl + "': " + e.getMessage(), e);
                    }
                }
            } finally {
                tempFile.delete();
            }
        } catch (java.net.SocketTimeoutException e) {
            throw new ManifoldCFException(
                    "Socket timeout error writing '" + fileUrl + "' to temporary file: " + e.getMessage(), e);
        } catch (org.apache.http.conn.ConnectTimeoutException e) {
            throw new ManifoldCFException(
                    "Connect timeout error writing '" + fileUrl + "' to temporary file: " + e.getMessage(), e);
        } catch (InterruptedIOException e) {
            throw new ManifoldCFException("Interrupted: " + e.getMessage(), e, ManifoldCFException.INTERRUPTED);
        } catch (IOException e) {
            throw new ManifoldCFException(
                    "IO error writing '" + fileUrl + "' to temporary file: " + e.getMessage(), e);
        }
    } catch (ManifoldCFException e) {
        if (e.getErrorCode() == ManifoldCFException.INTERRUPTED)
            errorCode = null;
        throw e;
    } finally {
        if (errorCode != null)
            activities.recordActivity(new Long(startTime), ACTIVITY_FETCH, fileLengthLong, documentIdentifier,
                    errorCode, errorDesc, null);
    }
}

From source file:org.apache.manifoldcf.crawler.connectors.livelink.LivelinkConnector.java

/**
* Connects to the specified Livelink document using HTTP protocol
* @param documentIdentifier is the document identifier (as far as the crawler knows).
* @param activities is the process activity structure, so we can ingest
*//* www  .j  a va2s.  com*/
protected void ingestFromLiveLink(LivelinkContext llc, String documentIdentifier, String version,
        String[] actualAcls, String[] denyAcls, String[] categoryPaths, IProcessActivity activities,
        MetadataDescription desc, SystemMetadataDescription sDesc)
        throws ManifoldCFException, ServiceInterruption {

    String contextMsg = "for '" + documentIdentifier + "'";

    // Fetch logging
    long startTime = System.currentTimeMillis();
    String resultCode = null;
    String resultDescription = null;
    Long readSize = null;
    int objID;
    int vol;

    int colonPos = documentIdentifier.indexOf(":", 1);

    if (colonPos == -1) {
        objID = new Integer(documentIdentifier.substring(1)).intValue();
        vol = LLENTWK_VOL;
    } else {
        objID = new Integer(documentIdentifier.substring(colonPos + 1)).intValue();
        vol = new Integer(documentIdentifier.substring(1, colonPos)).intValue();
    }

    // Try/finally for fetch logging
    try {
        String viewHttpAddress = convertToViewURI(documentIdentifier);
        if (viewHttpAddress == null) {
            if (Logging.connectors.isDebugEnabled())
                Logging.connectors.debug("Livelink: No view URI " + contextMsg + " - not ingesting");
            resultCode = "NOVIEWURI";
            resultDescription = "Document had no view URI";
            activities.noDocument(documentIdentifier, version);
            return;
        }

        // Check URL first
        if (!activities.checkURLIndexable(viewHttpAddress)) {
            // Document not ingestable due to URL
            resultCode = activities.EXCLUDED_URL;
            resultDescription = "URL (" + viewHttpAddress + ") was rejected by output connector";
            if (Logging.connectors.isDebugEnabled())
                Logging.connectors.debug("Livelink: Excluding document " + documentIdentifier
                        + " because its URL (" + viewHttpAddress + ") was rejected by output connector");
            activities.noDocument(documentIdentifier, version);
            return;
        }

        // Add general metadata
        ObjectInformation objInfo = llc.getObjectInformation(vol, objID);
        VersionInformation versInfo = llc.getVersionInformation(vol, objID, 0);
        if (!objInfo.exists()) {
            resultCode = "OBJECTNOTFOUND";
            resultDescription = "Object was not found in Livelink";
            Logging.connectors.debug("Livelink: No object " + contextMsg + ": not ingesting");
            activities.noDocument(documentIdentifier, version);
            return;
        }
        if (!versInfo.exists()) {
            resultCode = "VERSIONNOTFOUND";
            resultDescription = "Version was not found in Livelink";
            Logging.connectors.debug("Livelink: No version data " + contextMsg + ": not ingesting");
            activities.noDocument(documentIdentifier, version);
            return;
        }

        String mimeType = versInfo.getMimeType();
        if (!activities.checkMimeTypeIndexable(mimeType)) {
            // Document not indexable because of its mime type
            resultCode = activities.EXCLUDED_MIMETYPE;
            resultDescription = "Mime type (" + mimeType + ") was rejected by output connector";
            if (Logging.connectors.isDebugEnabled())
                Logging.connectors.debug("Livelink: Excluding document " + documentIdentifier
                        + " because its mime type (" + mimeType + ") was rejected by output connector");
            activities.noDocument(documentIdentifier, version);
            return;
        }

        Long dataSize = versInfo.getDataSize();
        if (dataSize == null) {
            // Document had no length
            resultCode = "DOCUMENTNOLENGTH";
            resultDescription = "Document had no length in Livelink";
            if (Logging.connectors.isDebugEnabled())
                Logging.connectors.debug(
                        "Livelink: Excluding document " + documentIdentifier + " because it had no length");
            activities.noDocument(documentIdentifier, version);
            return;
        }

        if (!activities.checkLengthIndexable(dataSize.longValue())) {
            // Document not indexable because of its length
            resultCode = activities.EXCLUDED_LENGTH;
            resultDescription = "Document length (" + dataSize + ") was rejected by output connector";
            if (Logging.connectors.isDebugEnabled())
                Logging.connectors.debug("Livelink: Excluding document " + documentIdentifier
                        + " because its length (" + dataSize + ") was rejected by output connector");
            activities.noDocument(documentIdentifier, version);
            return;
        }

        Date modifyDate = versInfo.getModifyDate();
        if (!activities.checkDateIndexable(modifyDate)) {
            // Document not indexable because of its date
            resultCode = activities.EXCLUDED_DATE;
            resultDescription = "Document date (" + modifyDate + ") was rejected by output connector";
            if (Logging.connectors.isDebugEnabled())
                Logging.connectors.debug("Livelink: Excluding document " + documentIdentifier
                        + " because its date (" + modifyDate + ") was rejected by output connector");
            activities.noDocument(documentIdentifier, version);
            return;
        }

        String fileName = versInfo.getFileName();
        Date creationDate = objInfo.getCreationDate();
        Integer parentID = objInfo.getParentId();

        RepositoryDocument rd = new RepositoryDocument();

        // Add general data we need for the output connector
        if (mimeType != null)
            rd.setMimeType(mimeType);
        if (fileName != null)
            rd.setFileName(fileName);
        if (creationDate != null)
            rd.setCreatedDate(creationDate);
        if (modifyDate != null)
            rd.setModifiedDate(modifyDate);

        rd.addField(GENERAL_NAME_FIELD, objInfo.getName());
        rd.addField(GENERAL_DESCRIPTION_FIELD, objInfo.getComments());
        if (creationDate != null)
            rd.addField(GENERAL_CREATIONDATE_FIELD, DateParser.formatISO8601Date(creationDate));
        if (modifyDate != null)
            rd.addField(GENERAL_MODIFYDATE_FIELD, DateParser.formatISO8601Date(modifyDate));
        if (parentID != null)
            rd.addField(GENERAL_PARENTID, parentID.toString());
        UserInformation owner = llc.getUserInformation(objInfo.getOwnerId().intValue());
        UserInformation creator = llc.getUserInformation(objInfo.getCreatorId().intValue());
        UserInformation modifier = llc.getUserInformation(versInfo.getOwnerId().intValue());
        if (owner != null)
            rd.addField(GENERAL_OWNER, owner.getName());
        if (creator != null)
            rd.addField(GENERAL_CREATOR, creator.getName());
        if (modifier != null)
            rd.addField(GENERAL_MODIFIER, modifier.getName());

        // Iterate over the metadata items.  These are organized by category
        // for speed of lookup.

        Iterator<MetadataItem> catIter = desc.getItems(categoryPaths);
        while (catIter.hasNext()) {
            MetadataItem item = catIter.next();
            MetadataPathItem pathItem = item.getPathItem();
            if (pathItem != null) {
                int catID = pathItem.getCatID();
                // grab the associated catversion
                LLValue catVersion = getCatVersion(objID, catID);
                if (catVersion != null) {
                    // Go through attributes now
                    Iterator<String> attrIter = item.getAttributeNames();
                    while (attrIter.hasNext()) {
                        String attrName = attrIter.next();
                        // Create a unique metadata name
                        String metadataName = pathItem.getCatName() + ":" + attrName;
                        // Fetch the metadata and stuff it into the RepositoryData structure
                        String[] metadataValue = getAttributeValue(catVersion, attrName);
                        if (metadataValue != null)
                            rd.addField(metadataName, metadataValue);
                        else
                            Logging.connectors.warn("Livelink: Metadata attribute '" + metadataName
                                    + "' does not seem to exist; please correct the job");
                    }
                }

            }
        }

        if (actualAcls != null && denyAcls != null)
            rd.setSecurity(RepositoryDocument.SECURITY_TYPE_DOCUMENT, actualAcls, denyAcls);

        // Add the path metadata item into the mix, if enabled
        String pathAttributeName = sDesc.getPathAttributeName();
        if (pathAttributeName != null && pathAttributeName.length() > 0) {
            String pathString = sDesc.getPathAttributeValue(documentIdentifier);
            if (pathString != null) {
                if (Logging.connectors.isDebugEnabled())
                    Logging.connectors.debug("Livelink: Path attribute name is '" + pathAttributeName + "'"
                            + contextMsg + ", value is '" + pathString + "'");
                rd.addField(pathAttributeName, pathString);
            }
        }

        if (ingestProtocol != null) {
            // Use HTTP to fetch document!
            String ingestHttpAddress = convertToIngestURI(documentIdentifier);
            if (ingestHttpAddress == null) {
                if (Logging.connectors.isDebugEnabled())
                    Logging.connectors.debug("Livelink: No fetch URI " + contextMsg + " - not ingesting");
                resultCode = "NOURI";
                resultDescription = "Document had no fetch URI";
                activities.noDocument(documentIdentifier, version);
                return;
            }

            // Set up connection
            HttpClient client = getInitializedClient(contextMsg);

            long currentTime;

            if (Logging.connectors.isInfoEnabled())
                Logging.connectors.info("Livelink: " + ingestHttpAddress);

            HttpGet method = new HttpGet(getHost().toURI() + ingestHttpAddress);
            method.setHeader(new BasicHeader("Accept", "*/*"));

            boolean wasInterrupted = false;
            ExecuteMethodThread methodThread = new ExecuteMethodThread(client, method);
            methodThread.start();
            try {
                int statusCode = methodThread.getResponseCode();
                switch (statusCode) {
                case 500:
                case 502:
                    Logging.connectors.warn("Livelink: Service interruption during fetch " + contextMsg
                            + " with Livelink HTTP Server, retrying...");
                    resultCode = "FETCHFAILED";
                    resultDescription = "HTTP error code " + statusCode + " fetching document";
                    throw new ServiceInterruption("Service interruption during fetch",
                            new ManifoldCFException(Integer.toString(statusCode) + " error while fetching"),
                            System.currentTimeMillis() + 60000L, System.currentTimeMillis() + 600000L, -1,
                            true);

                case HttpStatus.SC_UNAUTHORIZED:
                    Logging.connectors.warn("Livelink: Document fetch unauthorized for " + ingestHttpAddress
                            + " (" + contextMsg + ")");
                    // Since we logged in, we should fail here if the ingestion user doesn't have access to the
                    // the document, but if we do, don't fail hard.
                    resultCode = "UNAUTHORIZED";
                    resultDescription = "Document fetch was unauthorized by IIS";
                    activities.noDocument(documentIdentifier, version);
                    return;

                case HttpStatus.SC_OK:
                    if (Logging.connectors.isDebugEnabled())
                        Logging.connectors
                                .debug("Livelink: Created http document connection to Livelink " + contextMsg);
                    // A non-existent content length will cause a value of -1 to be returned.  This seems to indicate that the session login did not work right.
                    if (methodThread.getResponseContentLength() < 0) {
                        resultCode = "SESSIONLOGINFAILED";
                        resultDescription = "Response content length was -1, which usually means session login did not succeed";
                        activities.noDocument(documentIdentifier, version);
                        return;
                    }

                    try {
                        InputStream is = methodThread.getSafeInputStream();
                        try {
                            rd.setBinary(is, dataSize);

                            activities.ingestDocumentWithException(documentIdentifier, version, viewHttpAddress,
                                    rd);
                            resultCode = "OK";
                            readSize = dataSize;

                            if (Logging.connectors.isDebugEnabled())
                                Logging.connectors.debug("Livelink: Ingesting done " + contextMsg);

                        } finally {
                            // Close stream via thread, since otherwise this can hang
                            is.close();
                        }
                    } catch (InterruptedException e) {
                        wasInterrupted = true;
                        throw new ManifoldCFException("Interrupted: " + e.getMessage(), e,
                                ManifoldCFException.INTERRUPTED);
                    } catch (HttpException e) {
                        resultCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
                        resultDescription = e.getMessage();
                        handleHttpException(contextMsg, e);
                    } catch (IOException e) {
                        resultCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
                        resultDescription = e.getMessage();
                        handleIOException(contextMsg, e);
                    }
                    break;
                case HttpStatus.SC_BAD_REQUEST:
                case HttpStatus.SC_USE_PROXY:
                case HttpStatus.SC_GONE:
                    resultCode = "HTTPERROR";
                    resultDescription = "Http request returned status " + Integer.toString(statusCode);
                    throw new ManifoldCFException(
                            "Unrecoverable request failure; error = " + Integer.toString(statusCode));
                default:
                    resultCode = "UNKNOWNHTTPCODE";
                    resultDescription = "Http request returned status " + Integer.toString(statusCode);
                    Logging.connectors.warn("Livelink: Attempt to retrieve document from '" + ingestHttpAddress
                            + "' received a response of " + Integer.toString(statusCode)
                            + "; retrying in one minute");
                    currentTime = System.currentTimeMillis();
                    throw new ServiceInterruption("Fetch failed; retrying in 1 minute",
                            new ManifoldCFException(
                                    "Fetch failed with unknown code " + Integer.toString(statusCode)),
                            currentTime + 60000L, currentTime + 600000L, -1, true);
                }
            } catch (InterruptedException e) {
                // Drop the connection on the floor
                methodThread.interrupt();
                methodThread = null;
                throw new ManifoldCFException("Interrupted: " + e.getMessage(), e,
                        ManifoldCFException.INTERRUPTED);
            } catch (HttpException e) {
                resultCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
                resultDescription = e.getMessage();
                handleHttpException(contextMsg, e);
            } catch (IOException e) {
                resultCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
                resultDescription = e.getMessage();
                handleIOException(contextMsg, e);
            } finally {
                if (methodThread != null) {
                    methodThread.abort();
                    try {
                        if (!wasInterrupted)
                            methodThread.finishUp();
                    } catch (InterruptedException e) {
                        throw new ManifoldCFException(e.getMessage(), e, ManifoldCFException.INTERRUPTED);
                    }
                }
            }
        } else {
            // Use FetchVersion instead
            long currentTime;

            // Fire up the document reading thread
            DocumentReadingThread t = new DocumentReadingThread(vol, objID, 0);
            boolean wasInterrupted = false;
            t.start();
            try {
                try {
                    InputStream is = t.getSafeInputStream();
                    try {
                        // Can only index while background thread is running!
                        rd.setBinary(is, dataSize);
                        activities.ingestDocumentWithException(documentIdentifier, version, viewHttpAddress,
                                rd);
                        resultCode = "OK";
                        readSize = dataSize;
                    } finally {
                        is.close();
                    }
                } catch (java.net.SocketTimeoutException e) {
                    throw e;
                } catch (InterruptedIOException e) {
                    wasInterrupted = true;
                    throw e;
                } finally {
                    if (!wasInterrupted)
                        t.finishUp();
                }

                // No errors.  Record the fact that we made it.
            } catch (InterruptedException e) {
                t.interrupt();
                throw new ManifoldCFException("Interrupted: " + e.getMessage(), e,
                        ManifoldCFException.INTERRUPTED);
            } catch (IOException e) {
                resultCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
                resultDescription = e.getMessage();
                handleIOException(contextMsg, e);
            } catch (RuntimeException e) {
                resultCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
                resultDescription = e.getMessage();
                handleLivelinkRuntimeException(e, 0, true);
            }
        }
    } catch (ManifoldCFException e) {
        if (e.getErrorCode() == ManifoldCFException.INTERRUPTED)
            resultCode = null;
        throw e;
    } finally {
        if (resultCode != null)
            activities.recordActivity(new Long(startTime), ACTIVITY_FETCH, readSize, vol + ":" + objID,
                    resultCode, resultDescription, null);
    }
}

From source file:org.apache.manifoldcf.crawler.connectors.dropbox.DropboxRepositoryConnector.java

/** Process a set of documents.
* This is the method that should cause each document to be fetched, processed, and the results either added
* to the queue of documents for the current job, and/or entered into the incremental ingestion manager.
* The document specification allows this class to filter what is done based on the job.
* The connector will be connected before this method can be called.
*@param documentIdentifiers is the set of document identifiers to process.
*@param statuses are the currently-stored document versions for each document in the set of document identifiers
* passed in above.//  w w  w  .j av a2s  . c  o  m
*@param activities is the interface this method should use to queue up new document references
* and ingest documents.
*@param jobMode is an integer describing how the job is being run, whether continuous or once-only.
*@param usesDefaultAuthority will be true only if the authority in use for these documents is the default one.
*/
@Override
public void processDocuments(String[] documentIdentifiers, IExistingVersions statuses, Specification spec,
        IProcessActivity activities, int jobMode, boolean usesDefaultAuthority)
        throws ManifoldCFException, ServiceInterruption {

    Logging.connectors.debug("DROPBOX: Inside processDocuments");

    // Forced acls
    String[] acls = getAcls(spec);
    // Sort it,
    java.util.Arrays.sort(acls);

    for (String documentIdentifier : documentIdentifiers) {

        getSession();

        String versionString;
        GetObjectThread objt = new GetObjectThread(documentIdentifier);
        objt.start();
        try {
            objt.finishUp();
        } catch (InterruptedException e) {
            objt.interrupt();
            throw new ManifoldCFException("Interrupted: " + e.getMessage(), e, ManifoldCFException.INTERRUPTED);
        } catch (DropboxException e) {
            Logging.connectors.warn("DROPBOX: Error getting object: " + e.getMessage(), e);
            handleDropboxException(e);
        }

        DropboxAPI.Entry dropboxObject = objt.getResponse();

        if (dropboxObject.isDir) {
            //a folder will always be processed
            versionString = StringUtils.EMPTY;

            // adding all the children + subdirs for a folder

            List<DropboxAPI.Entry> children = dropboxObject.contents;
            for (DropboxAPI.Entry child : children) {
                activities.addDocumentReference(child.path, documentIdentifier, RELATIONSHIP_CHILD);
            }

            activities.noDocument(documentIdentifier, versionString);
            continue;
        }

        if (dropboxObject.isDeleted) {
            activities.deleteDocument(documentIdentifier);
            continue;
        }

        if (StringUtils.isEmpty(dropboxObject.rev)) {
            //a document that doesn't contain versioning information will never be processed
            activities.deleteDocument(documentIdentifier);
            continue;
        }

        StringBuilder sb = new StringBuilder();

        // Acls
        packList(sb, acls, '+');
        if (acls.length > 0) {
            sb.append('+');
            pack(sb, defaultAuthorityDenyToken, '+');
        } else
            sb.append('-');

        sb.append(dropboxObject.rev);
        versionString = sb.toString();

        if (!activities.checkDocumentNeedsReindexing(documentIdentifier, versionString))
            continue;

        long startTime = System.currentTimeMillis();
        String errorCode = null;
        String errorDesc = null;
        Long fileSize = null;
        String nodeId = documentIdentifier;
        String version = versionString;

        try {
            // Length in bytes
            long fileLength = dropboxObject.bytes;
            if (!activities.checkLengthIndexable(fileLength)) {
                errorCode = activities.EXCLUDED_LENGTH;
                errorDesc = "Document excluded because of length (" + fileLength + ")";
                activities.noDocument(documentIdentifier, versionString);
                continue;
            }

            //documentURI
            String documentURI = dropboxObject.path;
            if (!activities.checkURLIndexable(documentURI)) {
                errorCode = activities.EXCLUDED_URL;
                errorDesc = "Document excluded because of URL ('" + documentURI + "')";
                activities.noDocument(documentIdentifier, versionString);
                continue;
            }

            //Modified date
            Date modifiedDate;
            if (dropboxObject.modified != null)
                modifiedDate = com.dropbox.client2.RESTUtility.parseDate(dropboxObject.modified);
            else
                modifiedDate = null;
            if (!activities.checkDateIndexable(modifiedDate)) {
                errorCode = activities.EXCLUDED_DATE;
                errorDesc = "Document excluded because of date (" + modifiedDate + ")";
                activities.noDocument(documentIdentifier, versionString);
                continue;
            }

            // Mime type
            String mimeType = dropboxObject.mimeType;
            if (!activities.checkMimeTypeIndexable(mimeType)) {
                errorCode = activities.EXCLUDED_MIMETYPE;
                errorDesc = "Document excluded because of mime type ('" + mimeType + "')";
                activities.noDocument(documentIdentifier, versionString);
                continue;
            }

            // content ingestion
            RepositoryDocument rd = new RepositoryDocument();

            if (acls.length > 0) {
                rd.setSecurityACL(RepositoryDocument.SECURITY_TYPE_DOCUMENT, acls);
                String[] denyAclArray = new String[] { defaultAuthorityDenyToken };
                rd.setSecurityDenyACL(RepositoryDocument.SECURITY_TYPE_DOCUMENT, denyAclArray);
            }

            if (dropboxObject.path != null)
                rd.setFileName(dropboxObject.path);
            if (dropboxObject.mimeType != null)
                rd.setMimeType(dropboxObject.mimeType);
            if (dropboxObject.modified != null)
                rd.setModifiedDate(modifiedDate);
            // There doesn't appear to be a created date...

            rd.addField("Modified", dropboxObject.modified);
            rd.addField("Size", dropboxObject.size);
            rd.addField("Path", dropboxObject.path);
            rd.addField("Root", dropboxObject.root);
            rd.addField("ClientMtime", dropboxObject.clientMtime);
            rd.addField("mimeType", dropboxObject.mimeType);
            rd.addField("rev", dropboxObject.rev);

            getSession();
            BackgroundStreamThread t = new BackgroundStreamThread(nodeId);
            t.start();
            try {
                boolean wasInterrupted = false;
                try {
                    InputStream is = t.getSafeInputStream();
                    try {
                        rd.setBinary(is, fileLength);
                        activities.ingestDocumentWithException(nodeId, version, documentURI, rd);
                        // No errors.  Record the fact that we made it.
                        errorCode = "OK";
                        fileSize = new Long(fileLength);
                    } finally {
                        is.close();
                    }
                } catch (java.net.SocketTimeoutException e) {
                    throw e;
                } catch (InterruptedIOException e) {
                    wasInterrupted = true;
                    throw e;
                } catch (ManifoldCFException e) {
                    if (e.getErrorCode() == ManifoldCFException.INTERRUPTED)
                        wasInterrupted = true;
                    throw e;
                } finally {
                    if (!wasInterrupted)
                        // This does a join
                        t.finishUp();
                }

            } catch (InterruptedException e) {
                // We were interrupted out of the join, most likely.  Before we abandon the thread,
                // send a courtesy interrupt.
                t.interrupt();
                throw new ManifoldCFException("Interrupted: " + e.getMessage(), e,
                        ManifoldCFException.INTERRUPTED);
            } catch (java.net.SocketTimeoutException e) {
                errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
                errorDesc = e.getMessage();
                handleIOException(e);
            } catch (InterruptedIOException e) {
                t.interrupt();
                throw new ManifoldCFException("Interrupted: " + e.getMessage(), e,
                        ManifoldCFException.INTERRUPTED);
            } catch (IOException e) {
                errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
                errorDesc = e.getMessage();
                handleIOException(e);
            } catch (DropboxException e) {
                Logging.connectors.warn("DROPBOX: Error getting stream: " + e.getMessage(), e);
                errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
                errorDesc = e.getMessage();
                handleDropboxException(e);
            }
        } catch (ManifoldCFException e) {
            if (e.getErrorCode() == ManifoldCFException.INTERRUPTED)
                errorCode = null;
            throw e;
        } finally {
            if (errorCode != null)
                activities.recordActivity(new Long(startTime), ACTIVITY_READ, fileSize, nodeId, errorCode,
                        errorDesc, null);
        }
    }
}