Example usage for java.util Collection size

Introduction

In this page you can find the example usage for java.util Collection size.

Prototype

int size();

Source Link

Document

Returns the number of elements in this collection.

Usage

From source file:de.tudarmstadt.ukp.experiments.dip.wp1.documents.Step6HITPreparator.java

public static void main(String[] args) throws Exception {
    // input dir - list of xml query containers
    // step5-linguistic-annotation/
    System.err.println("Starting step 6 HIT Preparation");

    File inputDir = new File(args[0]);

    // output dir
    File outputDir = new File(args[1]);
    if (outputDir.exists()) {
        outputDir.delete();//from  w w w .j  a  v  a  2s.  c o  m
    }
    outputDir.mkdir();

    List<String> queries = new ArrayList<>();

    // iterate over query containers
    int countClueWeb = 0;
    int countSentence = 0;
    for (File f : FileUtils.listFiles(inputDir, new String[] { "xml" }, false)) {
        QueryResultContainer queryResultContainer = QueryResultContainer
                .fromXML(FileUtils.readFileToString(f, "utf-8"));
        if (queries.contains(f.getName()) || queries.size() == 0) {
            // groups contain only non-empty documents
            Map<Integer, List<QueryResultContainer.SingleRankedResult>> groups = new HashMap<>();

            // split to groups according to number of sentences
            for (QueryResultContainer.SingleRankedResult rankedResult : queryResultContainer.rankedResults) {
                if (rankedResult.originalXmi != null) {
                    byte[] bytes = new BASE64Decoder()
                            .decodeBuffer(new ByteArrayInputStream(rankedResult.originalXmi.getBytes()));
                    JCas jCas = JCasFactory.createJCas();
                    XmiCasDeserializer.deserialize(new ByteArrayInputStream(bytes), jCas.getCas());

                    Collection<Sentence> sentences = JCasUtil.select(jCas, Sentence.class);

                    int groupId = sentences.size() / 40;
                    if (rankedResult.originalXmi == null) {
                        System.err.println("Empty document: " + rankedResult.clueWebID);
                    } else {
                        if (!groups.containsKey(groupId)) {
                            groups.put(groupId, new ArrayList<>());

                        }
                    }
                    //handle it
                    groups.get(groupId).add(rankedResult);
                    countClueWeb++;
                }
            }

            for (Map.Entry<Integer, List<QueryResultContainer.SingleRankedResult>> entry : groups.entrySet()) {
                Integer groupId = entry.getKey();
                List<QueryResultContainer.SingleRankedResult> rankedResults = entry.getValue();

                // make sure the results are sorted
                // DEBUG
                //                for (QueryResultContainer.SingleRankedResult r : rankedResults) {
                //                    System.out.print(r.rank + "\t");
                //                }

                Collections.sort(rankedResults, (o1, o2) -> o1.rank.compareTo(o2.rank));

                // iterate over results for one query and group
                for (int i = 0; i < rankedResults.size() && i < TOP_RESULTS_PER_GROUP; i++) {
                    QueryResultContainer.SingleRankedResult rankedResult = rankedResults.get(i);

                    QueryResultContainer.SingleRankedResult r = rankedResults.get(i);
                    int rank = r.rank;
                    MustacheFactory mf = new DefaultMustacheFactory();
                    Mustache mustache = mf.compile("template/template.html");
                    String queryId = queryResultContainer.qID;
                    String query = queryResultContainer.query;
                    // make the first letter uppercase
                    query = query.substring(0, 1).toUpperCase() + query.substring(1);

                    List<String> relevantInformationExamples = queryResultContainer.relevantInformationExamples;
                    List<String> irrelevantInformationExamples = queryResultContainer.irrelevantInformationExamples;
                    byte[] bytes = new BASE64Decoder()
                            .decodeBuffer(new ByteArrayInputStream(rankedResult.originalXmi.getBytes()));

                    JCas jCas = JCasFactory.createJCas();
                    XmiCasDeserializer.deserialize(new ByteArrayInputStream(bytes), jCas.getCas());

                    List<generators.Sentence> sentences = new ArrayList<>();
                    List<Integer> paragraphs = new ArrayList<>();
                    paragraphs.add(0);

                    for (WebParagraph webParagraph : JCasUtil.select(jCas, WebParagraph.class)) {
                        for (Sentence s : JCasUtil.selectCovered(Sentence.class, webParagraph)) {

                            String sentenceBegin = String.valueOf(s.getBegin());
                            generators.Sentence sentence = new generators.Sentence(s.getCoveredText(),
                                    sentenceBegin);
                            sentences.add(sentence);
                            countSentence++;
                        }
                        int SentenceID = paragraphs.get(paragraphs.size() - 1);
                        if (sentences.size() > 120)
                            while (SentenceID < sentences.size()) {
                                if (!paragraphs.contains(SentenceID))
                                    paragraphs.add(SentenceID);
                                SentenceID = SentenceID + 120;
                            }
                        paragraphs.add(sentences.size());

                    }
                    System.err.println("Output dir: " + outputDir);
                    int startID = 0;
                    int endID;

                    for (int j = 0; j < paragraphs.size(); j++) {

                        endID = paragraphs.get(j);
                        int sentLength = endID - startID;
                        if (sentLength > 120 || j == paragraphs.size() - 1) {
                            if (sentLength > 120) {

                                endID = paragraphs.get(j - 1);
                                j--;
                            }
                            sentLength = endID - startID;
                            if (sentLength <= 40)
                                groupId = 40;
                            else if (sentLength <= 80 && sentLength > 40)
                                groupId = 80;
                            else if (sentLength > 80)
                                groupId = 120;

                            File folder = new File(outputDir + "/" + groupId);
                            if (!folder.exists()) {
                                System.err.println("creating directory: " + outputDir + "/" + groupId);
                                boolean result = false;

                                try {
                                    folder.mkdir();
                                    result = true;
                                } catch (SecurityException se) {
                                    //handle it
                                }
                                if (result) {
                                    System.out.println("DIR created");
                                }
                            }

                            String newHtmlFile = folder.getAbsolutePath() + "/" + f.getName() + "_"
                                    + rankedResult.clueWebID + "_" + sentLength + ".html";
                            System.err.println("Printing a file: " + newHtmlFile);
                            File newHTML = new File(newHtmlFile);
                            int t = 0;
                            while (newHTML.exists()) {
                                newHTML = new File(folder.getAbsolutePath() + "/" + f.getName() + "_"
                                        + rankedResult.clueWebID + "_" + sentLength + "." + t + ".html");
                                t++;
                            }
                            mustache.execute(new PrintWriter(new FileWriter(newHTML)),
                                    new generators(query, relevantInformationExamples,
                                            irrelevantInformationExamples, sentences.subList(startID, endID),
                                            queryId, rank))
                                    .flush();
                            startID = endID;
                        }
                    }
                }
            }

        }
    }
    System.out.println("Printed " + countClueWeb + " documents with " + countSentence + " sentences");
}

From source file:com.tamingtext.classifier.bayes.ExtractTrainingData.java

public static void main(String[] args) {

    log.info("Command-line arguments: " + Arrays.toString(args));

    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option inputOpt = obuilder.withLongName("dir").withRequired(true)
            .withArgument(abuilder.withName("dir").withMinimum(1).withMaximum(1).create())
            .withDescription("Lucene index directory containing input data").withShortName("d").create();

    Option categoryOpt = obuilder.withLongName("categories").withRequired(true)
            .withArgument(abuilder.withName("file").withMinimum(1).withMaximum(1).create())
            .withDescription("File containing a list of categories").withShortName("c").create();

    Option outputOpt = obuilder.withLongName("output").withRequired(false)
            .withArgument(abuilder.withName("output").withMinimum(1).withMaximum(1).create())
            .withDescription("Output directory").withShortName("o").create();

    Option categoryFieldsOpt = obuilder.withLongName("category-fields").withRequired(true)
            .withArgument(abuilder.withName("fields").withMinimum(1).withMaximum(1).create())
            .withDescription("Fields to match categories against (comma-delimited)").withShortName("cf")
            .create();/*from   w  ww. j  a  v a2 s  . c  o m*/

    Option textFieldsOpt = obuilder.withLongName("text-fields").withRequired(true)
            .withArgument(abuilder.withName("fields").withMinimum(1).withMaximum(1).create())
            .withDescription("Fields from which to extract training text (comma-delimited)").withShortName("tf")
            .create();

    Option useTermVectorsOpt = obuilder.withLongName("use-term-vectors").withDescription(
            "Extract term vectors containing preprocessed data " + "instead of unprocessed, stored text values")
            .withShortName("tv").create();

    Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h")
            .create();

    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(categoryOpt)
            .withOption(outputOpt).withOption(categoryFieldsOpt).withOption(textFieldsOpt)
            .withOption(useTermVectorsOpt).create();

    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }

        File inputDir = new File(cmdLine.getValue(inputOpt).toString());

        if (!inputDir.isDirectory()) {
            throw new IllegalArgumentException(inputDir + " does not exist or is not a directory");
        }

        File categoryFile = new File(cmdLine.getValue(categoryOpt).toString());

        if (!categoryFile.isFile()) {
            throw new IllegalArgumentException(categoryFile + " does not exist or is not a directory");
        }

        File outputDir = new File(cmdLine.getValue(outputOpt).toString());

        outputDir.mkdirs();

        if (!outputDir.isDirectory()) {
            throw new IllegalArgumentException(outputDir + " is not a directory or could not be created");
        }

        Collection<String> categoryFields = stringToList(cmdLine.getValue(categoryFieldsOpt).toString());

        if (categoryFields.size() < 1) {
            throw new IllegalArgumentException("At least one category field must be spcified.");
        }

        Collection<String> textFields = stringToList(cmdLine.getValue(textFieldsOpt).toString());

        if (categoryFields.size() < 1) {
            throw new IllegalArgumentException("At least one text field must be spcified.");
        }

        boolean useTermVectors = cmdLine.hasOption(useTermVectorsOpt);

        extractTraininingData(inputDir, categoryFile, categoryFields, textFields, outputDir, useTermVectors);

    } catch (OptionException e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);
    } catch (IOException e) {
        log.error("IOException", e);
    } finally {
        closeWriters();
    }
}

From source file:io.apiman.tools.i18n.TemplateScanner.java

public static void main(String[] args) throws IOException {
    if (args == null || args.length != 1) {
        System.out.println("Template directory not provided (no path provided).");
        System.exit(1);//from ww  w.  j av a 2 s  . c  om
    }
    File templateDir = new File(args[0]);
    if (!templateDir.isDirectory()) {
        System.out.println("Template directory not provided (provided path is not a directory).");
        System.exit(1);
    }

    if (!new File(templateDir, "dash.html").isFile()) {
        System.out.println("Template directory not provided (dash.html not found).");
        System.exit(1);
    }

    File outputDir = new File(templateDir, "../../../../../../tools/i18n/target");
    if (!outputDir.isDirectory()) {
        System.out.println("Output directory not found: " + outputDir);
        System.exit(1);
    }
    File outputFile = new File(outputDir, "scanner-messages.properties");
    if (outputFile.isFile() && !outputFile.delete()) {
        System.out.println("Couldn't delete the old messages.properties: " + outputFile);
        System.exit(1);
    }

    System.out.println("Starting scan.");
    System.out.println("Scanning template directory: " + templateDir.getAbsolutePath());

    String[] extensions = { "html", "include" };
    Collection<File> files = FileUtils.listFiles(templateDir, extensions, true);

    TreeMap<String, String> strings = new TreeMap<>();

    for (File file : files) {
        System.out.println("\tScanning file: " + file);
        scanFile(file, strings);
    }

    outputMessages(strings, outputFile);

    System.out.println("Scan complete.  Scanned " + files.size() + " files and discovered " + strings.size()
            + " translation strings.");
}

From source file:eu.annocultor.utils.OntologySubtractor.java

public static void main(String[] args) throws Exception {

    boolean copy = checkNoCopyOption(args);

    if (args.length == 2 || args.length == 3) {

        File sourceDir = new File(args[0]);
        File destinationDir = new File(args[1]);

        checkSrcAndDstDirs(sourceDir, destinationDir);

        Collection<String> filesWithDeletedStatements = listNameStamsForFilesWithDeletedStatements(sourceDir);

        if (filesWithDeletedStatements.isEmpty()) {
            System.out.println(//from  w ww . j a  v a  2  s .c  om
                    "Did not found any file *.*.*.deleted.rdf with statements to be deleted. Do nothing and exit.");
        } else {

            System.out.println(
                    "Found " + filesWithDeletedStatements.size() + " files with statements to be deleted");
            System.out.println(
                    "Copying all RDF files from " + sourceDir.getName() + " to " + destinationDir.getName());

            if (copy) {
                copyRdfFiles(sourceDir, destinationDir);
            }

            sutractAll(sourceDir, destinationDir, filesWithDeletedStatements);
        }
    } else {
        for (Object string : IOUtils.readLines(new AutoCloseInputStream(
                OntologySubtractor.class.getResourceAsStream("/subtractor/readme.txt")))) {
            System.out.println(string.toString());
        }
    }
}

From source file:com.evolveum.midpoint.testing.model.client.sample.Main.java

/**
 * @param args/*from   www  . j a v  a 2s  . co m*/
 */
public static void main(String[] args) {
    try {

        ModelPortType modelPort = createModelPort(args);

        SystemConfigurationType configurationType = getConfiguration(modelPort);
        System.out.println("Got system configuration");
        //         System.out.println(configurationType);

        UserType userAdministrator = searchUserByName(modelPort, "administrator");
        System.out.println("Got administrator user: " + userAdministrator.getOid());
        //         System.out.println(userAdministrator);

        RoleType sailorRole = searchRoleByName(modelPort, "Sailor");
        System.out.println("Got Sailor role");
        //         System.out.println(sailorRole);

        Collection<ResourceType> resources = listResources(modelPort);
        System.out.println("Resources (" + resources.size() + ")");
        //         dump(resources);

        Collection<UserType> users = listUsers(modelPort);
        System.out.println("Users (" + users.size() + ")");
        //            dump(users);

        Collection<TaskType> tasks = listTasks(modelPort);
        System.out.println("Tasks (" + tasks.size() + ")");
        //            dump(tasks);
        //            System.out.println("Next scheduled times: ");
        //            for (TaskType taskType : tasks) {
        //                System.out.println(" - " + getOrig(taskType.getName()) + ": " + taskType.getNextRunStartTimestamp());
        //            }

        String userGuybrushoid = createUserGuybrush(modelPort, sailorRole);
        System.out.println("Created user guybrush, OID: " + userGuybrushoid);

        UserType userGuybrush = getUser(modelPort, userGuybrushoid);
        System.out.println("Fetched user guybrush:");
        //         System.out.println(userGuybrush);
        System.out.println("Users fullName: " + ModelClientUtil.getOrig(userGuybrush.getFullName()));

        String userLeChuckOid = createUserFromSystemResource(modelPort, "user-lechuck.xml");
        System.out.println("Created user lechuck, OID: " + userLeChuckOid);

        changeUserPassword(modelPort, userGuybrushoid, "MIGHTYpirate");
        System.out.println("Changed user password");

        changeUserGivenName(modelPort, userLeChuckOid, "CHUCK");
        System.out.println("Changed user given name");

        assignRoles(modelPort, userGuybrushoid, ROLE_PIRATE_OID, ROLE_CAPTAIN_OID);
        System.out.println("Assigned roles");

        unAssignRoles(modelPort, userGuybrushoid, ROLE_CAPTAIN_OID);
        System.out.println("Unassigned roles");

        Collection<RoleType> roles = listRequestableRoles(modelPort);
        System.out.println("Found " + roles.size() + " requestable roles");
        //         System.out.println(roles);

        String seaSuperuserRole = createRoleFromSystemResource(modelPort, "role-sea-superuser.xml");
        System.out.println("Created role Sea Superuser, OID: " + seaSuperuserRole);

        assignRoles(modelPort, userLeChuckOid, seaSuperuserRole);
        System.out.println("Assigned role Sea Superuser to LeChuck");

        modifyRoleModifyInducement(modelPort, seaSuperuserRole);
        System.out.println("Modified role Sea Superuser - modified resource inducement");

        modifyRoleReplaceInducement(modelPort, seaSuperuserRole, 2, ROLE_CAPTAIN_OID);
        System.out.println("Modified role Sea Superuser - changed role inducement");

        reconcileUser(modelPort, userLeChuckOid);
        System.out.println("LeChuck reconciled.");

        // Uncomment the following lines if you want to see what midPoint really did
        // ... because deleting the user will delete also all the traces (except logs and audit of course).
        deleteUser(modelPort, userGuybrushoid);
        deleteUser(modelPort, userLeChuckOid);
        deleteRole(modelPort, seaSuperuserRole);
        System.out.println("Deleted user(s)");

    } catch (Exception e) {
        e.printStackTrace();
        System.exit(-1);
    }
}

From source file:br.com.autonomiccs.cloudTraces.main.CloudTracesSimulator.java

public static void main(String[] args) {
    validateInputFile(args);/*from   w ww  . j  a  v  a2  s .  com*/

    String cloudTracesFile = args[0];
    Collection<VirtualMachine> virtualMachines = getAllVirtualMachinesFromCloudTraces(cloudTracesFile);
    logger.info(String.format("#VirtualMachines [%d] found on [%s].", virtualMachines.size(), cloudTracesFile));

    Map<Integer, List<VirtualMachine>> mapVirtualMachinesTaskExecutionByTime = createMapVirtualMachinesTaskExecutionByTime(
            virtualMachines);
    logger.info(String.format("#Times [%d] that have tasks being executed by VMs ",
            mapVirtualMachinesTaskExecutionByTime.size()));

    Cloud cloud = createCloudEnvirtonmentToStartsimulation();
    logger.info("Cloud configuration: " + cloud);

    List<Integer> timesToExecuteTasks = new ArrayList<>(mapVirtualMachinesTaskExecutionByTime.keySet());
    Collections.sort(timesToExecuteTasks);

    Integer firstTimeInTimeUnitOfUsedCloudData = timesToExecuteTasks.get(0);
    Integer lastTimeInTimeUnitOfUserCloudData = timesToExecuteTasks.get(timesToExecuteTasks.size() - 1);

    logger.info("First time: " + firstTimeInTimeUnitOfUsedCloudData);
    logger.info("Last time: " + lastTimeInTimeUnitOfUserCloudData);

    double timeUnitPerLoopIteration = getTimeUnitPerLoopIteration(firstTimeInTimeUnitOfUsedCloudData,
            lastTimeInTimeUnitOfUserCloudData);
    logger.info("The time unit converted to trace time: " + timeUnitPerLoopIteration);

    double currentTime = firstTimeInTimeUnitOfUsedCloudData;

    long highetResourceAllocation = Long.MIN_VALUE;
    String cloudStateHighestMemoryAllocation = "";

    while (currentTime < lastTimeInTimeUnitOfUserCloudData + 2 * timeUnitPerLoopIteration) {
        logger.debug("Current time of iteration: " + currentTime);
        if (cloud.getMemoryAllocatedInBytes() > highetResourceAllocation) {
            highetResourceAllocation = cloud.getMemoryAllocatedInBytes();
            cloudStateHighestMemoryAllocation = cloud.toString();
        }
        applyLoadOnCloudForCurrentTime(mapVirtualMachinesTaskExecutionByTime, cloud, currentTime);
        destroyVirtualMachinesIfNeeded(cloud, currentTime);

        logger.info(String.format("Time [%.3f], cloud state [%s] ", currentTime, cloud));

        executeManagement(cloud, currentTime);
        logClustersConfigurationsAndStdAtTime(cloud.getClusters(), currentTime);

        currentTime += timeUnitPerLoopIteration;
    }
    logger.info("Cloud configuration after simulation: " + cloud);
    logger.info("Cloud highestResourceUsage: " + cloudStateHighestMemoryAllocation);
}

From source file:edu.umn.msi.tropix.proteomics.tools.DTAToMzXML.java

public static void main(final String[] args) throws Exception {
    if (args.length < 1) {
        usage();/* ww  w .  ja  va 2  s.  c o  m*/
        System.exit(0);
    }
    Collection<File> files = null;

    if (args[0].equals("-files")) {
        if (args.length < 2) {
            out.println("No files specified.");
            usage();
            exit(-1);
        } else {
            files = new ArrayList<File>(args.length - 1);
            for (int i = 1; i < args.length; i++) {
                files.add(new File(args[i]));
            }
        }
    } else if (args[0].equals("-directory")) {
        File directory;
        if (args.length < 2) {
            directory = new File(System.getProperty("user.dir"));
        } else {
            directory = new File(args[2]);
        }
        files = FileUtilsFactory.getInstance().listFiles(directory, new String[] { "dta" }, false);
    } else {
        usage();
        exit(-1);
    }

    final InMemoryDTAListImpl dtaList = new InMemoryDTAListImpl();
    File firstFile = null;
    if (files.size() == 0) {
        out.println("No files found.");
        exit(-1);
    } else {
        firstFile = files.iterator().next();
    }
    for (final File file : files) {
        dtaList.add(FileUtils.readFileToByteArray(file), file.getName());
    }

    final DTAToMzXMLConverter dtaToMzXMLConverter = new DTAToMzXMLConverterImpl();
    final MzXML mzxml = dtaToMzXMLConverter.dtaToMzXML(dtaList, null);
    final String mzxmlName = firstFile.getName().substring(0, firstFile.getName().indexOf(".")) + ".mzXML";
    new MzXMLUtility().serialize(mzxml, mzxmlName);
}

From source file:de.unileipzig.ub.indexer.App.java

public static void main(String[] args) throws IOException {

    // create Options object
    Options options = new Options();

    options.addOption("h", "help", false, "display this help");

    options.addOption("f", "filename", true, "name of the JSON file whose content should be indexed");
    options.addOption("i", "index", true, "the name of the target index");
    options.addOption("d", "doctype", true, "the name of the doctype (title, local, ...)");

    options.addOption("t", "host", true, "elasticsearch hostname (default: 0.0.0.0)");
    options.addOption("p", "port", true, "transport port (that's NOT the http port, default: 9300)");
    options.addOption("c", "cluster", true, "cluster name (default: elasticsearch_mdma)");

    options.addOption("b", "bulksize", true, "number of docs sent in one request (default: 3000)");
    options.addOption("v", "verbose", false, "show processing speed while indexing");
    options.addOption("s", "status", false, "only show status of index for file");

    options.addOption("r", "repair", false, "attempt to repair recoverable inconsistencies on the go");
    options.addOption("e", "debug", false, "set logging level to debug");
    options.addOption("l", "logfile", true, "logfile - in not specified only log to stdout");

    options.addOption("m", "memcached", true, "host and port of memcached (default: localhost:11211)");
    options.addOption("z", "latest-flag-on", true,
            "enable latest flag according to field (within content, e.g. 001)");
    options.addOption("a", "flat", false, "flat-mode: do not check for inconsistencies");

    CommandLineParser parser = new PosixParser();
    CommandLine cmd = null;//from  w w  w  . ja  va2 s.  c  o  m

    try {
        cmd = parser.parse(options, args);
    } catch (ParseException ex) {
        logger.error(ex);
        System.exit(1);
    }

    // setup logging
    Properties systemProperties = System.getProperties();
    systemProperties.put("net.spy.log.LoggerImpl", "net.spy.memcached.compat.log.Log4JLogger");
    System.setProperties(systemProperties);
    Logger.getLogger("net.spy.memcached").setLevel(Level.ERROR);

    Properties props = new Properties();
    props.load(props.getClass().getResourceAsStream("/log4j.properties"));

    if (cmd.hasOption("debug")) {
        props.setProperty("log4j.logger.de.unileipzig", "DEBUG");
    }

    if (cmd.hasOption("logfile")) {
        props.setProperty("log4j.rootLogger", "INFO, stdout, F");
        props.setProperty("log4j.appender.F", "org.apache.log4j.FileAppender");
        props.setProperty("log4j.appender.F.File", cmd.getOptionValue("logfile"));
        props.setProperty("log4j.appender.F.layout", "org.apache.log4j.PatternLayout");
        props.setProperty("log4j.appender.F.layout.ConversionPattern", "%5p | %d | %F | %L | %m%n");
    }

    PropertyConfigurator.configure(props);

    InetAddress addr = InetAddress.getLocalHost();
    String memcachedHostAndPort = addr.getHostAddress() + ":11211";
    if (cmd.hasOption("m")) {
        memcachedHostAndPort = cmd.getOptionValue("m");
    }

    // setup caching
    try {
        if (memcachedClient == null) {
            memcachedClient = new MemcachedClient(
                    new ConnectionFactoryBuilder().setFailureMode(FailureMode.Cancel).build(),
                    AddrUtil.getAddresses("0.0.0.0:11211"));
            try {
                // give client and server 500ms
                Thread.sleep(300);
            } catch (InterruptedException ex) {
            }

            Collection availableServers = memcachedClient.getAvailableServers();
            logger.info(availableServers);
            if (availableServers.size() == 0) {
                logger.info("no memcached servers found");
                memcachedClient.shutdown();
                memcachedClient = null;
            } else {
                logger.info(availableServers.size() + " memcached server(s) detected, fine.");
            }
        }
    } catch (IOException ex) {
        logger.warn("couldn't create a connection, bailing out: " + ex.getMessage());
    }

    // process options

    if (cmd.hasOption("h")) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp("indexer", options, true);
        quit(0);
    }

    boolean verbose = false;
    if (cmd.hasOption("verbose")) {
        verbose = true;
    }

    // ES options
    String[] hosts = new String[] { "0.0.0.0" };
    int port = 9300;
    String clusterName = "elasticsearch_mdma";
    int bulkSize = 3000;

    if (cmd.hasOption("host")) {
        hosts = cmd.getOptionValues("host");
    }
    if (cmd.hasOption("port")) {
        port = Integer.parseInt(cmd.getOptionValue("port"));
    }
    if (cmd.hasOption("cluster")) {
        clusterName = cmd.getOptionValue("cluster");
    }
    if (cmd.hasOption("bulksize")) {
        bulkSize = Integer.parseInt(cmd.getOptionValue("bulksize"));
        if (bulkSize < 1 || bulkSize > 100000) {
            logger.error("bulksize must be between 1 and 100,000");
            quit(1);
        }
    }

    // ES Client
    final Settings settings = ImmutableSettings.settingsBuilder().put("cluster.name", "elasticsearch_mdma")
            .build();
    final TransportClient client = new TransportClient(settings);
    for (String host : hosts) {
        client.addTransportAddress(new InetSocketTransportAddress(host, port));
    }

    if (cmd.hasOption("filename") && cmd.hasOption("index") && cmd.hasOption("doctype")) {

        final String filename = cmd.getOptionValue("filename");

        final File _file = new File(filename);
        if (_file.length() == 0) {
            logger.info(_file.getAbsolutePath() + " is empty, skipping");
            quit(0); // file is empty
        }

        // for flat mode: leave a stampfile beside the json to 
        // indicate previous successful processing
        File directory = new File(filename).getParentFile();
        File stampfile = new File(directory, DigestUtils.shaHex(filename) + ".indexed");

        long start = System.currentTimeMillis();
        long lineCount = 0;

        final String indexName = cmd.getOptionValue("index");
        final String docType = cmd.getOptionValue("doctype");
        BulkRequestBuilder bulkRequest = client.prepareBulk();

        try {
            if (cmd.hasOption("flat")) {
                // flat mode
                // .........
                if (stampfile.exists()) {
                    logger.info("SKIPPING, since it seems this file has already " + "been imported (found: "
                            + stampfile.getAbsolutePath() + ")");
                    quit(0);
                }
            } else {

                final String srcSHA1 = extractSrcSHA1(filename);

                logger.debug(filename + " srcsha1: " + srcSHA1);

                long docsInIndex = getIndexedRecordCount(client, indexName, srcSHA1);
                logger.debug(filename + " indexed: " + docsInIndex);

                long docsInFile = getLineCount(filename);
                logger.debug(filename + " lines: " + docsInFile);

                // in non-flat-mode, indexing would take care
                // of inconsistencies
                if (docsInIndex == docsInFile) {
                    logger.info("UP-TO DATE: " + filename + " (" + docsInIndex + ", " + srcSHA1 + ")");
                    client.close();
                    quit(0);
                }

                if (docsInIndex > 0) {
                    logger.warn("INCONSISTENCY DETECTED: " + filename + ": indexed:" + docsInIndex + " lines:"
                            + docsInFile);

                    if (!cmd.hasOption("r")) {
                        logger.warn(
                                "Please re-run indexer with --repair flag or delete residues first with: $ curl -XDELETE "
                                        + hosts[0] + ":9200/" + indexName
                                        + "/_query -d ' {\"term\" : { \"meta.srcsha1\" : \"" + srcSHA1
                                        + "\" }}'");
                        client.close();
                        quit(1);
                    } else {
                        logger.info("Attempting to clear residues...");
                        // attempt to repair once
                        DeleteByQueryResponse dbqr = client.prepareDeleteByQuery(indexName)
                                .setQuery(termQuery("meta.srcsha1", srcSHA1)).execute().actionGet();

                        Iterator<IndexDeleteByQueryResponse> it = dbqr.iterator();
                        long deletions = 0;
                        while (it.hasNext()) {
                            IndexDeleteByQueryResponse response = it.next();
                            deletions += 1;
                        }
                        logger.info("Deleted residues of " + filename);
                        logger.info("Refreshing [" + indexName + "]");
                        RefreshResponse refreshResponse = client.admin().indices()
                                .refresh(new RefreshRequest(indexName)).actionGet();

                        long indexedAfterDelete = getIndexedRecordCount(client, indexName, srcSHA1);
                        logger.info(indexedAfterDelete + " docs remained");
                        if (indexedAfterDelete > 0) {
                            logger.warn("Not all residues cleaned. Try to fix this manually: $ curl -XDELETE "
                                    + hosts[0] + ":9200/" + indexName
                                    + "/_query -d ' {\"term\" : { \"meta.srcsha1\" : \"" + srcSHA1 + "\" }}'");
                            quit(1);
                        } else {
                            logger.info("Residues are gone. Now trying to reindex: " + filename);
                        }
                    }
                }
            }

            logger.info("INDEXING-REQUIRED: " + filename);
            if (cmd.hasOption("status")) {
                quit(0);
            }

            HashSet idsInBatch = new HashSet();

            String idField = null;
            if (cmd.hasOption("z")) {
                idField = cmd.getOptionValue("z");
            }

            final FileReader fr = new FileReader(filename);
            final BufferedReader br = new BufferedReader(fr);

            String line;
            // one line is one document
            while ((line = br.readLine()) != null) {

                // "Latest-Flag" machine
                // This gets obsolete with a "flat" index
                if (cmd.hasOption("z")) {
                    // flag that indicates, whether the document
                    // about to be indexed will be the latest
                    boolean willBeLatest = true;

                    // check if there is a previous (lower meta.timestamp) document with 
                    // the same identifier (whatever that may be - queried under "content")
                    final String contentIdentifier = getContentIdentifier(line, idField);
                    idsInBatch.add(contentIdentifier);

                    // assumed in meta.timestamp
                    final Long timestamp = Long.parseLong(getTimestamp(line));

                    logger.debug("Checking whether record is latest (line: " + lineCount + ")");
                    logger.debug(contentIdentifier + ", " + timestamp);

                    // get all docs, which match the contentIdentifier
                    // by filter, which doesn't score
                    final TermFilterBuilder idFilter = new TermFilterBuilder("content." + idField,
                            contentIdentifier);
                    final TermFilterBuilder kindFilter = new TermFilterBuilder("meta.kind", docType);
                    final AndFilterBuilder afb = new AndFilterBuilder();
                    afb.add(idFilter).add(kindFilter);
                    final FilteredQueryBuilder fb = filteredQuery(matchAllQuery(), afb);

                    final SearchResponse searchResponse = client.prepareSearch(indexName)
                            .setSearchType(SearchType.DFS_QUERY_THEN_FETCH).setQuery(fb).setFrom(0)
                            .setSize(1200) // 3 years and 105 days assuming daily updates at the most
                            .setExplain(false).execute().actionGet();

                    final SearchHits searchHits = searchResponse.getHits();

                    logger.debug("docs with this id in the index: " + searchHits.getTotalHits());

                    for (final SearchHit hit : searchHits.getHits()) {
                        final String docId = hit.id();
                        final Map<String, Object> source = hit.sourceAsMap();
                        final Map meta = (Map) source.get("meta");
                        final Long docTimestamp = Long.parseLong(meta.get("timestamp").toString());
                        // if the indexed doc timestamp is lower the the current one, 
                        // remove any latest flag
                        if (timestamp >= docTimestamp) {
                            source.remove("latest");
                            final ObjectMapper mapper = new ObjectMapper();
                            // put the updated doc back
                            // IndexResponse response = 
                            client.prepareIndex(indexName, docType).setCreate(false).setId(docId)
                                    .setSource(mapper.writeValueAsBytes(source))
                                    .execute(new ActionListener<IndexResponse>() {
                                        public void onResponse(IndexResponse rspns) {
                                            logger.debug("Removed latest flag from " + contentIdentifier + ", "
                                                    + docTimestamp + ", " + hit.id() + " since (" + timestamp
                                                    + " > " + docTimestamp + ")");
                                        }

                                        public void onFailure(Throwable thrwbl) {
                                            logger.error("Could not remove flag from " + hit.id() + ", "
                                                    + contentIdentifier);
                                        }
                                    });
                            // .execute()
                            //.actionGet();
                        } else {
                            logger.debug("Doc " + hit.id() + " is newer (" + docTimestamp + ")");
                            willBeLatest = false;
                        }
                    }

                    if (willBeLatest) {
                        line = setLatestFlag(line);
                        logger.info("Setting latest flag on " + contentIdentifier + ", " + timestamp);
                    }

                    // end of latest-flag machine
                    // beware - this will be correct as long as there
                    // are no dups within one bulk!
                }

                bulkRequest.add(client.prepareIndex(indexName, docType).setSource(line));
                lineCount++;
                logger.debug("Added line " + lineCount + " to BULK");
                logger.debug(line);

                if (lineCount % bulkSize == 0) {

                    if (idsInBatch.size() != bulkSize && cmd.hasOption("z")) {
                        logger.error(
                                "This batch has duplications in the ID. That's not bad for the index, just makes the latest flag fuzzy");
                        logger.error(
                                "Bulk size was: " + bulkSize + ", but " + idsInBatch.size() + " IDs (only)");
                    }
                    idsInBatch.clear();

                    logger.debug("Issuing BULK request");

                    final long actionCount = bulkRequest.numberOfActions();
                    final BulkResponse bulkResponse = bulkRequest.execute().actionGet();
                    final long tookInMillis = bulkResponse.getTookInMillis();

                    if (bulkResponse.hasFailures()) {
                        logger.fatal("FAILED, bulk not indexed. exiting now.");
                        Iterator<BulkItemResponse> it = bulkResponse.iterator();
                        while (it.hasNext()) {
                            BulkItemResponse bir = it.next();
                            if (bir.isFailed()) {
                                Failure failure = bir.getFailure();
                                logger.fatal("id: " + failure.getId() + ", message: " + failure.getMessage()
                                        + ", type: " + failure.getType() + ", index: " + failure.getIndex());
                            }
                        }
                        quit(1);
                    } else {
                        if (verbose) {
                            final double elapsed = System.currentTimeMillis() - start;
                            final double speed = (lineCount / elapsed * 1000);
                            logger.info("OK (" + filename + ") " + lineCount + " docs indexed (" + actionCount
                                    + "/" + tookInMillis + "ms" + "/" + String.format("%.2f", speed) + "r/s)");
                        }
                    }
                    bulkRequest = client.prepareBulk();
                }
            }

            // handle the remaining items
            final long actionCount = bulkRequest.numberOfActions();
            if (actionCount > 0) {
                final BulkResponse bulkResponse = bulkRequest.execute().actionGet();
                final long tookInMillis = bulkResponse.getTookInMillis();

                if (bulkResponse.hasFailures()) {
                    logger.fatal("FAILED, bulk not indexed. exiting now.");
                    Iterator<BulkItemResponse> it = bulkResponse.iterator();
                    while (it.hasNext()) {
                        BulkItemResponse bir = it.next();
                        if (bir.isFailed()) {
                            Failure failure = bir.getFailure();
                            logger.fatal("id: " + failure.getId() + ", message: " + failure.getMessage()
                                    + ", type: " + failure.getType() + ", index: " + failure.getIndex());
                        }
                    }
                    quit(1);
                } else {

                    // trigger update now
                    RefreshResponse refreshResponse = client.admin().indices()
                            .refresh(new RefreshRequest(indexName)).actionGet();

                    if (verbose) {
                        final double elapsed = System.currentTimeMillis() - start;
                        final double speed = (lineCount / elapsed * 1000);
                        logger.info("OK (" + filename + ") " + lineCount + " docs indexed (" + actionCount + "/"
                                + tookInMillis + "ms" + "/" + String.format("%.2f", speed) + "r/s)");
                    }

                }

            }

            br.close();
            client.close();
            final double elapsed = (System.currentTimeMillis() - start) / 1000;
            final double speed = (lineCount / elapsed);
            logger.info("indexing (" + filename + ") " + lineCount + " docs took " + elapsed + "s (speed: "
                    + String.format("%.2f", speed) + "r/s)");
            if (cmd.hasOption("flat")) {
                try {
                    FileUtils.touch(stampfile);
                } catch (IOException ioe) {
                    logger.warn(".indexed files not created. Will reindex everything everytime.");
                }
            }
        } catch (IOException e) {
            client.close();
            logger.error(e);
            quit(1);
        } finally {
            client.close();
        }
    }
    quit(0);
}

From source file:kindleclippings.quizlet.QuizletSync.java

public static void main(String[] args)
        throws IOException, JSONException, URISyntaxException, InterruptedException, BackingStoreException {

    ProgressMonitor progress = new ProgressMonitor(null, "QuizletSync", "loading Kindle clippings file", 0,
            100);/*from   ww  w .  j  a  v a  2 s .  com*/
    progress.setMillisToPopup(0);
    progress.setMillisToDecideToPopup(0);
    progress.setProgress(0);
    try {

        Map<String, List<Clipping>> books = readClippingsFile();

        if (books == null)
            return;

        if (books.isEmpty()) {
            JOptionPane.showMessageDialog(null, "no clippings to be uploaded", "QuizletSync",
                    JOptionPane.OK_OPTION);
            return;
        }
        progress.setNote("checking Quizlet account");
        progress.setProgress(5);

        Preferences prefs = getPrefs();

        QuizletAPI api = new QuizletAPI(prefs.get("access_token", null));

        Collection<TermSet> sets = null;
        try {
            progress.setNote("checking Quizlet library");
            progress.setProgress(10);
            sets = api.getSets(prefs.get("user_id", null));
        } catch (IOException e) {
            if (e.toString().contains("401")) {
                // Not Authorized => Token has been revoked
                clearPrefs();
                prefs = getPrefs();
                api = new QuizletAPI(prefs.get("access_token", null));
                sets = api.getSets(prefs.get("user_id", null));
            } else {
                throw e;
            }
        }

        progress.setProgress(15);
        progress.setMaximum(15 + books.size());
        progress.setNote("uploading new notes");

        Map<String, TermSet> indexedSets = new HashMap<String, TermSet>(sets.size());

        for (TermSet t : sets) {
            indexedSets.put(t.getTitle(), t);
        }

        int pro = 15;
        int createdSets = 0;
        int createdTerms = 0;
        int updatedTerms = 0;
        for (List<Clipping> c : books.values()) {

            String book = c.get(0).getBook();
            progress.setNote(book);
            progress.setProgress(pro++);

            TermSet termSet = indexedSets.get(book);
            if (termSet == null) {
                if (c.size() < 2) {
                    System.err.println("ignored [" + book + "] (need at least two notes)");
                    continue;
                }

                addSet(api, book, c);
                createdSets++;
                createdTerms += c.size();
                continue;
            }
            // compare against existing terms
            for (Clipping cl : c) {
                if (!checkExistingTerm(cl, termSet)) {
                    addTerm(api, termSet, cl);
                    updatedTerms++;
                }
            }
        }
        progress.setProgress(pro++);

        if (createdSets == 0 && updatedTerms == 0) {
            JOptionPane.showMessageDialog(null, "Done.\nNo new data was uploaded", "QuizletSync",
                    JOptionPane.OK_OPTION);
        } else if (createdSets > 0) {
            JOptionPane.showMessageDialog(null,
                    String.format(
                            "Done.\nCreated %d new sets with %d cards, and added %d cards to existing sets",
                            createdSets, createdTerms, updatedTerms),
                    "QuizletSync", JOptionPane.OK_OPTION);
        } else {
            JOptionPane.showMessageDialog(null,
                    String.format("Done.\nAdded %d cards to existing sets", updatedTerms), "QuizletSync",
                    JOptionPane.OK_OPTION);
        }
    } finally {
        progress.close();
    }

    System.exit(0);
}

From source file:gov.nih.nci.ncicb.cadsr.common.persistence.dao.jdbc.JDBCAdminComponentDAO.java

public static void main(String[] args) {
    ServiceLocator locator = new SimpleServiceLocator();
    JDBCAdminComponentDAO jdbcAdminComponentDAO = new JDBCAdminComponentDAO(locator);

    /*//  w w w.  jav a2  s  .  c  o m
    int res = jdbcAdminComponentDAO.assignClassification(
      "99BA9DC8-2357-4E69-E034-080020C9C0E0",
      "29A8FB30-0AB1-11D6-A42F-0010A4C1E842"); // acId, csCsiId
    System.out.println ("res = " + res);
    */
    /*
    int deleteRes = jdbcAdminComponentDAO.removeClassification
      ("D66B85B6-4EDA-469B-E034-0003BA0B1A09");
    System.out.println ("deleteRes = " + deleteRes);
    Collection csito = jdbcAdminComponentDAO.retrieveClassifications(
      "29A8FB19-0AB1-11D6-A42F-0010A4C1E842");
    */

    Collection contacts = jdbcAdminComponentDAO.getContacts("0B244855-6696-5A67-E044-0003BA8EB8F1");
    System.out.println(contacts.size());

}