actions.DownloadSPDX.java Source code

Java tutorial

Introduction

Here is the source code for actions.DownloadSPDX.java

Source

/**
 * SPDXVersion: SPDX-1.1
 * Creator: Person: Nuno Brito (nuno.brito@triplecheck.de)
 * Creator: Organization: TripleCheck (contact@triplecheck.de)
 * Created: 2014-10-07T13:36:54Z
 * LicenseName: EUPL-1.1-without-appendix
 * FileName: DownloadSPDX.java  
 * FileType: SOURCE
 * FileCopyrightText: <text> Copyright 2014 Nuno Brito, TripleCheck </text>
 * FileComment: <text>
 * 
 * This is the class that will take a text file containing a list of
 * repositories on github to then add these respective source code files inside
 * each repository on a big archive.
 * 
 * The first step is getting the list of repositories, which initially will be
 * scoped to a specific programming language. The second step is getting the 
 * files from the repository onto a folder on disk. From here we add them up
 * to the big archive, mark the repository as processed and move forward to the
 * next one on our list.
 * </text> 
 */

package actions;

import big.BigZip;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.concurrent.ConcurrentNavigableMap;
import java.util.logging.Level;
import java.util.logging.Logger;
import main.Repositories;
import main.engine;
import org.apache.commons.io.FileDeleteStrategy;
import org.eclipse.jgit.api.Git;
import org.eclipse.jgit.lib.Repository;
import org.eclipse.jgit.storage.file.FileRepositoryBuilder;
import org.mapdb.DB;
import org.mapdb.DBMaker;
import utils.files;

/**
 *
 * @author Nuno Brito, 12th of July 2014 in Darmstadt, Germany
 */
public class DownloadSPDX {

    // where we store the logs of data being processed
    private File fileLog;
    private BufferedReader reader;
    private FileReader fileReader;

    // where we test placing all the files
    final String testArchive = "./storage/spdx.big";
    final File fileArchive = new File(testArchive), fileExceptionHappened = new File("./failed-spdx.txt"),
            folderDownload = new File("./download-spdx/");

    BigZip bigArchive;

    // repositories downloaded on the current session
    private long repCounter = 0;

    // the database where we store the ongoing-downloads
    DB dbQueue;

    // A simple way to allocate working threads
    private final boolean[] slots = new boolean[10];

    /**
     * Start the processing of repositories and respective files
     * @param args 
     */
    public void start(String args[]) {
        // pre-flight check
        if (args.length < 2) {
            System.out.println("DS82 - Error, not enough parameters!");
            return;
        }

        // get the repository that we want to process
        final String repository = args[1];

        // open this file
        File fileRepository = new File(repository);

        // check our log file
        checkLogFile(fileRepository.getName());

        // load the resources related to SPDX
        addSPDX();
        // create the big archive where we store the files
        bigArchive = new BigZip(fileArchive);

        // open up our repository file for reading
        startOperation(fileRepository);

        // initialize the queue database
        initializeQueueDB();

        // now read and process each repository line
        readLines();

        // all done
        concludeOperation();
    }

    /**
     * Gets the persistent storage working. This is necessary for keeping track
     * of what is currently being processed. If for some reason processing is
     * interrupted abruptly, on the next restart we can proceed from the last
     * saved point.
     */
    private void initializeQueueDB() {
        // assign the folder for our queue database
        File folder = new File(".", "queue");
        // create the folder
        utils.files.mkdirs(folder);
        // get the database started
        dbQueue = DBMaker.newFileDB(new File(folder, "queue.db")).closeOnJvmShutdown().make();
        System.out.println("Queue database initialized: " + folder.getAbsolutePath());

        // now check for repositories on the queue that we are missing to launch
        ConcurrentNavigableMap<String, Long> map = dbQueue.getTreeMap("queue");

        boolean hasQueue = false;
        for (String repository : map.descendingKeySet()) {
            hasQueue = true;
            System.out.println(repository);
            // Wait for a free slot
            while (hasFreeSlots() == false) {
                utils_deprecated.time.wait(5);
            }
            // launch the repository download
            System.out.println("Resuming: " + repository);
            launchNewDownload(repository, "github.com");
            queueRemove(repository);
        }
    }

    /**
     * Adds a repository to our queue
     * @param repository    A repository line as extracted from the text file
     */
    private void queueAdd(final String repository) {
        // this is a good moment to save a recovery point right here
        ConcurrentNavigableMap<String, Long> map = dbQueue.getTreeMap("queue");
        // put on the database, if it is already there then it gets overwritten
        map.put(repository, System.currentTimeMillis());
        dbQueue.commit();
    }

    /**
     * Removes a repository from our queue
     * @param repository    A repository line as extracted from the text file
     */
    private void queueRemove(final String repository) {
        // this is a good moment to save a recovery point right here
        ConcurrentNavigableMap<String, Long> map = dbQueue.getTreeMap("queue");
        // put on the database, if it is already there then it gets overwritten
        map.remove(repository);
        dbQueue.commit();
    }

    /**
     * All repositories that were processed get written on a log file
     * 
     */
    private void checkLogFile(final String logId) {
        fileLog = new File(logId + "-log.txt");
        // the file doesn't exist, let's create one then
        if (fileLog.exists() == false) {
            // create a fresh log file
            utils_deprecated.files.touch(fileLog);
        }
        // if the file doesn't still exist, then something wrong happened, exit
        if (fileLog.exists() == false) {
            System.out.println("DR73 - Unable to create a log file: " + fileLog.getAbsolutePath());
            System.exit(-1);
        }
    }

    /**
     * Open the repository file for starting to read lines
     */
    private void startOperation(final File fileRepository) {
        // initialize the slots
        int i = 0;
        for (boolean slot : slots) {
            slots[i] = false;
            i++;
        }

        // open the repository file stream
        try {
            // open the file streams
            fileReader = new FileReader(fileRepository);
            reader = new BufferedReader(fileReader);
            // avoid the header line
            reader.readLine();
        } catch (FileNotFoundException ex) {
            Logger.getLogger(DownloadSPDX.class.getName()).log(Level.SEVERE, null, ex);
            System.out.println("DR104 - Failed to open streams");
            System.exit(-1);
        } catch (IOException ex) {
            Logger.getLogger(DownloadSPDX.class.getName()).log(Level.SEVERE, null, ex);
        }

        // the file is initialized, now get the last line processed (if any)
        final String lastRepository = utils.files.getLastLine(fileLog);
        // at this point we want to resume the operation (if necessary)
        if (lastRepository.isEmpty() == false) {
            System.out.println("Resuming the indexing after " + lastRepository);
            utils_deprecated.time.wait(3);
            moveToLastLine(lastRepository);
        }
    }

    /**
     * Moves the buffered reader pointer to the next line after the last one
     * that was processed and logged with success.
     * @param reader    The reader object
     */
    private void moveToLastLine(final String lastRep) {
        // let's move to the last repository indexed
        String line;
        try {
            // read through each line
            while ((line = reader.readLine()) != null) {
                if (line.startsWith(lastRep)) {
                    // we found a match
                    break;
                }
            }
            // we can't have a null value, this means that the rep doesn't exist
            if (line == null) {
                System.out.println("DR - Error, last mentioned repository " + "doesn't exist: " + lastRep);
                System.exit(-1);
            }

        } catch (IOException ex) {
            Logger.getLogger(DownloadSPDX.class.getName()).log(Level.SEVERE, null, ex);
        }
    }

    /**
     * Close the open files
     */
    private void concludeOperation() {
        // check if our reader object is still open
        if (reader != null) {
            try {
                // close it up
                reader.close();
            } catch (IOException ex) {
                Logger.getLogger(DownloadSPDX.class.getName()).log(Level.SEVERE, null, ex);
            }
        }

        // now close the file reader
        if (fileReader != null) {
            try {
                fileReader.close();
            } catch (IOException ex) {
                Logger.getLogger(DownloadSPDX.class.getName()).log(Level.SEVERE, null, ex);
            }
        }

    }

    /**
     * Read each line on the repository file and process the files accordingly
     */
    private void readLines() {
        String repository;
        try {
            // go through each line until null
            while ((repository = reader.readLine()) != null) {
                // Wait for a free slot
                while (hasFreeSlots() == false) {
                    utils_deprecated.time.wait(5);
                }
                //getRepository(repository, "01", "github.com");
                queueAdd(repository);
                launchNewDownload(repository, "github.com");
                queueRemove(repository);

            }

        } catch (IOException ex) {
            Logger.getLogger(DownloadSPDX.class.getName()).log(Level.SEVERE, null, ex);
        }
    }

    /**
     * Check all the slots being used right now.
     * 
     * @return if one of them is free them return the value as true. 
     * Otherwise, returns false
     */
    private boolean hasFreeSlots() {
        for (Boolean slot : slots) {
            if (slot == false) {
                return true;
            }
        }
        return false;
    }

    /**
     * Launches the new download of a repository
     */
    private void launchNewDownload(final String repositoryDetails, final String specificFolder) {

        // initialize the counter
        int i = 0;
        // now iterate each slot
        for (Boolean slot : slots) {
            // once we get a slot that is empty, launch the thread
            if (slot == false) {
                // mark the slot as used
                slots[i] = true;
                // now get the files using a thread
                final int id = i;
                Thread thread = new Thread() {
                    @Override
                    public void run() {
                        getRepository(repositoryDetails, id, specificFolder);
                    }
                };
                thread.start();
                return;
            }
            // go the next slot
            i++;
        }

        // no free slots? This shouldn't happen, signal a serious error
        System.out.println("No empty slots, operation failed!");
        System.exit(-1);
    }

    /**
     * Downloads a repository from the Internet, adds it up to the big archive
     * and then moves forward to the next one
     * @param repositoryDetails A repository line from our text file
     */
    private void getRepository(final String repositoryDetails, final int slotId, final String specificFolder) {
        // there always needs to be a space, otherwise exit the processing
        int i1 = repositoryDetails.indexOf(" ");
        if (i1 == -1) {
            System.out.println("DS364 - Error, unable to process: " + repositoryDetails);
            System.exit(-1);
        }
        // now get the repository name
        final String repository = repositoryDetails.substring(0, i1);

        System.out.println(slotId + "-> Downloading: " + repository);

        // Create the path where the files will be downloaded onto
        final File localPath = new File(folderDownload + "/" + slotId + "/" + specificFolder, repository);
        utils.files.mkdirs(localPath);

        final String packageName = repository.replace("/", "-");

        // create the file where the SPDX document will be placed
        final File localSPDX = new File(folderDownload + "/" + slotId + "/", packageName + ".spdx");

        // delete the older file if existing already
        localSPDX.delete();

        // do the download of files
        boolean success = download(slotId, localPath, repository);
        //svnExport(repository, localPath);

        // did we had success in downloading the files?
        if (success == false) {
            // move to the next one on the list
            return;
        }

        // delete the files that are not needed
        files.deleteHiddenFilesAndFolders(localPath, 25);
        // do we want to only index a specific type of files?
        //        deleteUnwantedFiles(localPath, 25);

        // create an SPDX document
        main.cmdLine.createCmdLineSPDX(localSPDX, localPath, packageName, "https://github.com/" + repository,
                "Nuno Brito");

        // now archive these files
        writeBigFile(localSPDX, slotId);

        //        System.out.println("Testing!!!!  ");
        //        System.exit(2);

        // now delete these files
        File localDelete;
        try {
            localDelete = new File(folderDownload.getCanonicalPath(), slotId + "");
            files.deleteHiddenFilesAndFolders(localDelete, 25);
            FileDeleteStrategy.FORCE.deleteQuietly(localDelete);
            // delete the SPDX document
            localSPDX.delete();
        } catch (IOException ex) {
            Logger.getLogger(DownloadSPDX.class.getName()).log(Level.SEVERE, null, ex);

        }

        // all done with success, write this name on our log of actions
        utils.files.addTextToFile(fileLog, "\n" + repository);

        // all done
        repCounter++;
        System.out.println(slotId + "--> #" + repCounter + ": " + repository);

        // clean up the slot
        slots[slotId] = false;
    }

    /**
     * The synchronised method that will write our files
     * @param localFile The files on disk that we want to add up
     */
    private synchronized void writeBigFile(final File localFile, int slotId) {
        // now archive these files
        bigArchive.setBasePath(folderDownload.getAbsolutePath() + "/" + slotId);
        bigArchive.addFile(localFile);
    }

    /**
    * Get the files from a given repository on github
    * @param slotId The thread where the download is happening
    * @param localPath The bigArchive on disk where the files will be placed
    * @param location The username/repository identification. We'd
    * expect something like triplecheck/reporter
    * @return True if everything went ok, false if something went wrong
    */
    public Boolean download(final int slotId, final File localPath, final String location) {
        // we can't have any older files
        files.deleteDir(localPath);
        final String REMOTE_URL = "https://github.com/" + location + ".git";
        try {

            Git.cloneRepository().setURI(REMOTE_URL).setDirectory(localPath).call();

            // now open the created repository
            FileRepositoryBuilder builder = new FileRepositoryBuilder();

            Repository repository = builder.setGitDir(localPath).readEnvironment() // scan environment GIT_* variables
                    .findGitDir() // scan up the file system tree
                    .build();

            System.out.println(slotId + "-> Downloaded repository: " + repository.getDirectory());

            repository.close();

        } catch (Exception ex) {
            Logger.getLogger(Repositories.class.getName()).log(Level.SEVERE, null, ex);
            // we need to add this event on the exception list
            utils_deprecated.files.addTextToFile(fileExceptionHappened, "\n" + location);
            // delete the files if possible
            files.deleteDir(localPath);
            System.out.println(slotId + " !!!! Failed to download: " + location);
            // clean up the slot
            slots[slotId] = false;
            return false;
        }

        System.out.println(slotId + "-> Downloaded: " + location);
        return true;
    }

    /**
     * Use the SVN binary installed on a Linux/OSX machine to export the contents
     * from an online repository
     * @param userRepository    A combination of user and repository. e.g. userA/repB
     * @param exportPath        To where we want to download the files
     */
    private void svnExport(final String userRepository, final File exportPath) {
        Process p;

        try {
            // use the locally installed SVN to export a bigArchive structure
            p = Runtime.getRuntime().exec("svn export --force " + "https://github.com/" + userRepository + "/trunk"
                    + " " + exportPath.getAbsolutePath());
            int exitVal = p.waitFor();

        } catch (IOException ex) {
            Logger.getLogger(DownloadSPDX.class.getName()).log(Level.SEVERE, null, ex);
        } catch (InterruptedException ex) {
            Logger.getLogger(DownloadSPDX.class.getName()).log(Level.SEVERE, null, ex);
        }

    }

    /**
     * The routine that will prepare our SPDX engine to generate documents
     */
    private void addSPDX() {
        engine.warmUp();
    }

}