Example usage for java.util Collection size

List of usage examples for java.util Collection size

Introduction

In this page you can find the example usage for java.util Collection size.

Prototype

int size();

Source Link

Document

Returns the number of elements in this collection.

Usage

From source file:de.tudarmstadt.ukp.experiments.dip.wp1.documents.Step6HITPreparator.java

public static void main(String[] args) throws Exception {
    // input dir - list of xml query containers
    // step5-linguistic-annotation/
    System.err.println("Starting step 6 HIT Preparation");

    File inputDir = new File(args[0]);

    // output dir
    File outputDir = new File(args[1]);
    if (outputDir.exists()) {
        outputDir.delete();//from  w w w .j  a  v  a  2s.  c o  m
    }
    outputDir.mkdir();

    List<String> queries = new ArrayList<>();

    // iterate over query containers
    int countClueWeb = 0;
    int countSentence = 0;
    for (File f : FileUtils.listFiles(inputDir, new String[] { "xml" }, false)) {
        QueryResultContainer queryResultContainer = QueryResultContainer
                .fromXML(FileUtils.readFileToString(f, "utf-8"));
        if (queries.contains(f.getName()) || queries.size() == 0) {
            // groups contain only non-empty documents
            Map<Integer, List<QueryResultContainer.SingleRankedResult>> groups = new HashMap<>();

            // split to groups according to number of sentences
            for (QueryResultContainer.SingleRankedResult rankedResult : queryResultContainer.rankedResults) {
                if (rankedResult.originalXmi != null) {
                    byte[] bytes = new BASE64Decoder()
                            .decodeBuffer(new ByteArrayInputStream(rankedResult.originalXmi.getBytes()));
                    JCas jCas = JCasFactory.createJCas();
                    XmiCasDeserializer.deserialize(new ByteArrayInputStream(bytes), jCas.getCas());

                    Collection<Sentence> sentences = JCasUtil.select(jCas, Sentence.class);

                    int groupId = sentences.size() / 40;
                    if (rankedResult.originalXmi == null) {
                        System.err.println("Empty document: " + rankedResult.clueWebID);
                    } else {
                        if (!groups.containsKey(groupId)) {
                            groups.put(groupId, new ArrayList<>());

                        }
                    }
                    //handle it
                    groups.get(groupId).add(rankedResult);
                    countClueWeb++;
                }
            }

            for (Map.Entry<Integer, List<QueryResultContainer.SingleRankedResult>> entry : groups.entrySet()) {
                Integer groupId = entry.getKey();
                List<QueryResultContainer.SingleRankedResult> rankedResults = entry.getValue();

                // make sure the results are sorted
                // DEBUG
                //                for (QueryResultContainer.SingleRankedResult r : rankedResults) {
                //                    System.out.print(r.rank + "\t");
                //                }

                Collections.sort(rankedResults, (o1, o2) -> o1.rank.compareTo(o2.rank));

                // iterate over results for one query and group
                for (int i = 0; i < rankedResults.size() && i < TOP_RESULTS_PER_GROUP; i++) {
                    QueryResultContainer.SingleRankedResult rankedResult = rankedResults.get(i);

                    QueryResultContainer.SingleRankedResult r = rankedResults.get(i);
                    int rank = r.rank;
                    MustacheFactory mf = new DefaultMustacheFactory();
                    Mustache mustache = mf.compile("template/template.html");
                    String queryId = queryResultContainer.qID;
                    String query = queryResultContainer.query;
                    // make the first letter uppercase
                    query = query.substring(0, 1).toUpperCase() + query.substring(1);

                    List<String> relevantInformationExamples = queryResultContainer.relevantInformationExamples;
                    List<String> irrelevantInformationExamples = queryResultContainer.irrelevantInformationExamples;
                    byte[] bytes = new BASE64Decoder()
                            .decodeBuffer(new ByteArrayInputStream(rankedResult.originalXmi.getBytes()));

                    JCas jCas = JCasFactory.createJCas();
                    XmiCasDeserializer.deserialize(new ByteArrayInputStream(bytes), jCas.getCas());

                    List<generators.Sentence> sentences = new ArrayList<>();
                    List<Integer> paragraphs = new ArrayList<>();
                    paragraphs.add(0);

                    for (WebParagraph webParagraph : JCasUtil.select(jCas, WebParagraph.class)) {
                        for (Sentence s : JCasUtil.selectCovered(Sentence.class, webParagraph)) {

                            String sentenceBegin = String.valueOf(s.getBegin());
                            generators.Sentence sentence = new generators.Sentence(s.getCoveredText(),
                                    sentenceBegin);
                            sentences.add(sentence);
                            countSentence++;
                        }
                        int SentenceID = paragraphs.get(paragraphs.size() - 1);
                        if (sentences.size() > 120)
                            while (SentenceID < sentences.size()) {
                                if (!paragraphs.contains(SentenceID))
                                    paragraphs.add(SentenceID);
                                SentenceID = SentenceID + 120;
                            }
                        paragraphs.add(sentences.size());

                    }
                    System.err.println("Output dir: " + outputDir);
                    int startID = 0;
                    int endID;

                    for (int j = 0; j < paragraphs.size(); j++) {

                        endID = paragraphs.get(j);
                        int sentLength = endID - startID;
                        if (sentLength > 120 || j == paragraphs.size() - 1) {
                            if (sentLength > 120) {

                                endID = paragraphs.get(j - 1);
                                j--;
                            }
                            sentLength = endID - startID;
                            if (sentLength <= 40)
                                groupId = 40;
                            else if (sentLength <= 80 && sentLength > 40)
                                groupId = 80;
                            else if (sentLength > 80)
                                groupId = 120;

                            File folder = new File(outputDir + "/" + groupId);
                            if (!folder.exists()) {
                                System.err.println("creating directory: " + outputDir + "/" + groupId);
                                boolean result = false;

                                try {
                                    folder.mkdir();
                                    result = true;
                                } catch (SecurityException se) {
                                    //handle it
                                }
                                if (result) {
                                    System.out.println("DIR created");
                                }
                            }

                            String newHtmlFile = folder.getAbsolutePath() + "/" + f.getName() + "_"
                                    + rankedResult.clueWebID + "_" + sentLength + ".html";
                            System.err.println("Printing a file: " + newHtmlFile);
                            File newHTML = new File(newHtmlFile);
                            int t = 0;
                            while (newHTML.exists()) {
                                newHTML = new File(folder.getAbsolutePath() + "/" + f.getName() + "_"
                                        + rankedResult.clueWebID + "_" + sentLength + "." + t + ".html");
                                t++;
                            }
                            mustache.execute(new PrintWriter(new FileWriter(newHTML)),
                                    new generators(query, relevantInformationExamples,
                                            irrelevantInformationExamples, sentences.subList(startID, endID),
                                            queryId, rank))
                                    .flush();
                            startID = endID;
                        }
                    }
                }
            }

        }
    }
    System.out.println("Printed " + countClueWeb + " documents with " + countSentence + " sentences");
}

From source file:com.tamingtext.classifier.bayes.ExtractTrainingData.java

public static void main(String[] args) {

    log.info("Command-line arguments: " + Arrays.toString(args));

    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option inputOpt = obuilder.withLongName("dir").withRequired(true)
            .withArgument(abuilder.withName("dir").withMinimum(1).withMaximum(1).create())
            .withDescription("Lucene index directory containing input data").withShortName("d").create();

    Option categoryOpt = obuilder.withLongName("categories").withRequired(true)
            .withArgument(abuilder.withName("file").withMinimum(1).withMaximum(1).create())
            .withDescription("File containing a list of categories").withShortName("c").create();

    Option outputOpt = obuilder.withLongName("output").withRequired(false)
            .withArgument(abuilder.withName("output").withMinimum(1).withMaximum(1).create())
            .withDescription("Output directory").withShortName("o").create();

    Option categoryFieldsOpt = obuilder.withLongName("category-fields").withRequired(true)
            .withArgument(abuilder.withName("fields").withMinimum(1).withMaximum(1).create())
            .withDescription("Fields to match categories against (comma-delimited)").withShortName("cf")
            .create();/*from   w  ww. j  a  v a2 s  . c  o m*/

    Option textFieldsOpt = obuilder.withLongName("text-fields").withRequired(true)
            .withArgument(abuilder.withName("fields").withMinimum(1).withMaximum(1).create())
            .withDescription("Fields from which to extract training text (comma-delimited)").withShortName("tf")
            .create();

    Option useTermVectorsOpt = obuilder.withLongName("use-term-vectors").withDescription(
            "Extract term vectors containing preprocessed data " + "instead of unprocessed, stored text values")
            .withShortName("tv").create();

    Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h")
            .create();

    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(categoryOpt)
            .withOption(outputOpt).withOption(categoryFieldsOpt).withOption(textFieldsOpt)
            .withOption(useTermVectorsOpt).create();

    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }

        File inputDir = new File(cmdLine.getValue(inputOpt).toString());

        if (!inputDir.isDirectory()) {
            throw new IllegalArgumentException(inputDir + " does not exist or is not a directory");
        }

        File categoryFile = new File(cmdLine.getValue(categoryOpt).toString());

        if (!categoryFile.isFile()) {
            throw new IllegalArgumentException(categoryFile + " does not exist or is not a directory");
        }

        File outputDir = new File(cmdLine.getValue(outputOpt).toString());

        outputDir.mkdirs();

        if (!outputDir.isDirectory()) {
            throw new IllegalArgumentException(outputDir + " is not a directory or could not be created");
        }

        Collection<String> categoryFields = stringToList(cmdLine.getValue(categoryFieldsOpt).toString());

        if (categoryFields.size() < 1) {
            throw new IllegalArgumentException("At least one category field must be spcified.");
        }

        Collection<String> textFields = stringToList(cmdLine.getValue(textFieldsOpt).toString());

        if (categoryFields.size() < 1) {
            throw new IllegalArgumentException("At least one text field must be spcified.");
        }

        boolean useTermVectors = cmdLine.hasOption(useTermVectorsOpt);

        extractTraininingData(inputDir, categoryFile, categoryFields, textFields, outputDir, useTermVectors);

    } catch (OptionException e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);
    } catch (IOException e) {
        log.error("IOException", e);
    } finally {
        closeWriters();
    }
}

From source file:io.apiman.tools.i18n.TemplateScanner.java

public static void main(String[] args) throws IOException {
    if (args == null || args.length != 1) {
        System.out.println("Template directory not provided (no path provided).");
        System.exit(1);//from ww  w.  j av a 2 s  . c  om
    }
    File templateDir = new File(args[0]);
    if (!templateDir.isDirectory()) {
        System.out.println("Template directory not provided (provided path is not a directory).");
        System.exit(1);
    }

    if (!new File(templateDir, "dash.html").isFile()) {
        System.out.println("Template directory not provided (dash.html not found).");
        System.exit(1);
    }

    File outputDir = new File(templateDir, "../../../../../../tools/i18n/target");
    if (!outputDir.isDirectory()) {
        System.out.println("Output directory not found: " + outputDir);
        System.exit(1);
    }
    File outputFile = new File(outputDir, "scanner-messages.properties");
    if (outputFile.isFile() && !outputFile.delete()) {
        System.out.println("Couldn't delete the old messages.properties: " + outputFile);
        System.exit(1);
    }

    System.out.println("Starting scan.");
    System.out.println("Scanning template directory: " + templateDir.getAbsolutePath());

    String[] extensions = { "html", "include" };
    Collection<File> files = FileUtils.listFiles(templateDir, extensions, true);

    TreeMap<String, String> strings = new TreeMap<>();

    for (File file : files) {
        System.out.println("\tScanning file: " + file);
        scanFile(file, strings);
    }

    outputMessages(strings, outputFile);

    System.out.println("Scan complete.  Scanned " + files.size() + " files and discovered " + strings.size()
            + " translation strings.");
}

From source file:eu.annocultor.utils.OntologySubtractor.java

public static void main(String[] args) throws Exception {

    boolean copy = checkNoCopyOption(args);

    if (args.length == 2 || args.length == 3) {

        File sourceDir = new File(args[0]);
        File destinationDir = new File(args[1]);

        checkSrcAndDstDirs(sourceDir, destinationDir);

        Collection<String> filesWithDeletedStatements = listNameStamsForFilesWithDeletedStatements(sourceDir);

        if (filesWithDeletedStatements.isEmpty()) {
            System.out.println(//from  w ww . j a  v a  2  s .c  om
                    "Did not found any file *.*.*.deleted.rdf with statements to be deleted. Do nothing and exit.");
        } else {

            System.out.println(
                    "Found " + filesWithDeletedStatements.size() + " files with statements to be deleted");
            System.out.println(
                    "Copying all RDF files from " + sourceDir.getName() + " to " + destinationDir.getName());

            if (copy) {
                copyRdfFiles(sourceDir, destinationDir);
            }

            sutractAll(sourceDir, destinationDir, filesWithDeletedStatements);
        }
    } else {
        for (Object string : IOUtils.readLines(new AutoCloseInputStream(
                OntologySubtractor.class.getResourceAsStream("/subtractor/readme.txt")))) {
            System.out.println(string.toString());
        }
    }
}

From source file:com.evolveum.midpoint.testing.model.client.sample.Main.java

/**
 * @param args/*from   www  . j a v  a 2s  . co m*/
 */
public static void main(String[] args) {
    try {

        ModelPortType modelPort = createModelPort(args);

        SystemConfigurationType configurationType = getConfiguration(modelPort);
        System.out.println("Got system configuration");
        //         System.out.println(configurationType);

        UserType userAdministrator = searchUserByName(modelPort, "administrator");
        System.out.println("Got administrator user: " + userAdministrator.getOid());
        //         System.out.println(userAdministrator);

        RoleType sailorRole = searchRoleByName(modelPort, "Sailor");
        System.out.println("Got Sailor role");
        //         System.out.println(sailorRole);

        Collection<ResourceType> resources = listResources(modelPort);
        System.out.println("Resources (" + resources.size() + ")");
        //         dump(resources);

        Collection<UserType> users = listUsers(modelPort);
        System.out.println("Users (" + users.size() + ")");
        //            dump(users);

        Collection<TaskType> tasks = listTasks(modelPort);
        System.out.println("Tasks (" + tasks.size() + ")");
        //            dump(tasks);
        //            System.out.println("Next scheduled times: ");
        //            for (TaskType taskType : tasks) {
        //                System.out.println(" - " + getOrig(taskType.getName()) + ": " + taskType.getNextRunStartTimestamp());
        //            }

        String userGuybrushoid = createUserGuybrush(modelPort, sailorRole);
        System.out.println("Created user guybrush, OID: " + userGuybrushoid);

        UserType userGuybrush = getUser(modelPort, userGuybrushoid);
        System.out.println("Fetched user guybrush:");
        //         System.out.println(userGuybrush);
        System.out.println("Users fullName: " + ModelClientUtil.getOrig(userGuybrush.getFullName()));

        String userLeChuckOid = createUserFromSystemResource(modelPort, "user-lechuck.xml");
        System.out.println("Created user lechuck, OID: " + userLeChuckOid);

        changeUserPassword(modelPort, userGuybrushoid, "MIGHTYpirate");
        System.out.println("Changed user password");

        changeUserGivenName(modelPort, userLeChuckOid, "CHUCK");
        System.out.println("Changed user given name");

        assignRoles(modelPort, userGuybrushoid, ROLE_PIRATE_OID, ROLE_CAPTAIN_OID);
        System.out.println("Assigned roles");

        unAssignRoles(modelPort, userGuybrushoid, ROLE_CAPTAIN_OID);
        System.out.println("Unassigned roles");

        Collection<RoleType> roles = listRequestableRoles(modelPort);
        System.out.println("Found " + roles.size() + " requestable roles");
        //         System.out.println(roles);

        String seaSuperuserRole = createRoleFromSystemResource(modelPort, "role-sea-superuser.xml");
        System.out.println("Created role Sea Superuser, OID: " + seaSuperuserRole);

        assignRoles(modelPort, userLeChuckOid, seaSuperuserRole);
        System.out.println("Assigned role Sea Superuser to LeChuck");

        modifyRoleModifyInducement(modelPort, seaSuperuserRole);
        System.out.println("Modified role Sea Superuser - modified resource inducement");

        modifyRoleReplaceInducement(modelPort, seaSuperuserRole, 2, ROLE_CAPTAIN_OID);
        System.out.println("Modified role Sea Superuser - changed role inducement");

        reconcileUser(modelPort, userLeChuckOid);
        System.out.println("LeChuck reconciled.");

        // Uncomment the following lines if you want to see what midPoint really did
        // ... because deleting the user will delete also all the traces (except logs and audit of course).
        deleteUser(modelPort, userGuybrushoid);
        deleteUser(modelPort, userLeChuckOid);
        deleteRole(modelPort, seaSuperuserRole);
        System.out.println("Deleted user(s)");

    } catch (Exception e) {
        e.printStackTrace();
        System.exit(-1);
    }
}

From source file:br.com.autonomiccs.cloudTraces.main.CloudTracesSimulator.java

public static void main(String[] args) {
    validateInputFile(args);/*from   w ww  . j  a  v  a2  s .  com*/

    String cloudTracesFile = args[0];
    Collection<VirtualMachine> virtualMachines = getAllVirtualMachinesFromCloudTraces(cloudTracesFile);
    logger.info(String.format("#VirtualMachines [%d] found on [%s].", virtualMachines.size(), cloudTracesFile));

    Map<Integer, List<VirtualMachine>> mapVirtualMachinesTaskExecutionByTime = createMapVirtualMachinesTaskExecutionByTime(
            virtualMachines);
    logger.info(String.format("#Times [%d] that have tasks being executed by VMs ",
            mapVirtualMachinesTaskExecutionByTime.size()));

    Cloud cloud = createCloudEnvirtonmentToStartsimulation();
    logger.info("Cloud configuration: " + cloud);

    List<Integer> timesToExecuteTasks = new ArrayList<>(mapVirtualMachinesTaskExecutionByTime.keySet());
    Collections.sort(timesToExecuteTasks);

    Integer firstTimeInTimeUnitOfUsedCloudData = timesToExecuteTasks.get(0);
    Integer lastTimeInTimeUnitOfUserCloudData = timesToExecuteTasks.get(timesToExecuteTasks.size() - 1);

    logger.info("First time: " + firstTimeInTimeUnitOfUsedCloudData);
    logger.info("Last time: " + lastTimeInTimeUnitOfUserCloudData);

    double timeUnitPerLoopIteration = getTimeUnitPerLoopIteration(firstTimeInTimeUnitOfUsedCloudData,
            lastTimeInTimeUnitOfUserCloudData);
    logger.info("The time unit converted to trace time: " + timeUnitPerLoopIteration);

    double currentTime = firstTimeInTimeUnitOfUsedCloudData;

    long highetResourceAllocation = Long.MIN_VALUE;
    String cloudStateHighestMemoryAllocation = "";

    while (currentTime < lastTimeInTimeUnitOfUserCloudData + 2 * timeUnitPerLoopIteration) {
        logger.debug("Current time of iteration: " + currentTime);
        if (cloud.getMemoryAllocatedInBytes() > highetResourceAllocation) {
            highetResourceAllocation = cloud.getMemoryAllocatedInBytes();
            cloudStateHighestMemoryAllocation = cloud.toString();
        }
        applyLoadOnCloudForCurrentTime(mapVirtualMachinesTaskExecutionByTime, cloud, currentTime);
        destroyVirtualMachinesIfNeeded(cloud, currentTime);

        logger.info(String.format("Time [%.3f], cloud state [%s] ", currentTime, cloud));

        executeManagement(cloud, currentTime);
        logClustersConfigurationsAndStdAtTime(cloud.getClusters(), currentTime);

        currentTime += timeUnitPerLoopIteration;
    }
    logger.info("Cloud configuration after simulation: " + cloud);
    logger.info("Cloud highestResourceUsage: " + cloudStateHighestMemoryAllocation);
}

From source file:edu.umn.msi.tropix.proteomics.tools.DTAToMzXML.java

public static void main(final String[] args) throws Exception {
    if (args.length < 1) {
        usage();/* ww  w .  ja  va 2  s.  c o  m*/
        System.exit(0);
    }
    Collection<File> files = null;

    if (args[0].equals("-files")) {
        if (args.length < 2) {
            out.println("No files specified.");
            usage();
            exit(-1);
        } else {
            files = new ArrayList<File>(args.length - 1);
            for (int i = 1; i < args.length; i++) {
                files.add(new File(args[i]));
            }
        }
    } else if (args[0].equals("-directory")) {
        File directory;
        if (args.length < 2) {
            directory = new File(System.getProperty("user.dir"));
        } else {
            directory = new File(args[2]);
        }
        files = FileUtilsFactory.getInstance().listFiles(directory, new String[] { "dta" }, false);
    } else {
        usage();
        exit(-1);
    }

    final InMemoryDTAListImpl dtaList = new InMemoryDTAListImpl();
    File firstFile = null;
    if (files.size() == 0) {
        out.println("No files found.");
        exit(-1);
    } else {
        firstFile = files.iterator().next();
    }
    for (final File file : files) {
        dtaList.add(FileUtils.readFileToByteArray(file), file.getName());
    }

    final DTAToMzXMLConverter dtaToMzXMLConverter = new DTAToMzXMLConverterImpl();
    final MzXML mzxml = dtaToMzXMLConverter.dtaToMzXML(dtaList, null);
    final String mzxmlName = firstFile.getName().substring(0, firstFile.getName().indexOf(".")) + ".mzXML";
    new MzXMLUtility().serialize(mzxml, mzxmlName);
}

From source file:de.unileipzig.ub.indexer.App.java

public static void main(String[] args) throws IOException {

    // create Options object
    Options options = new Options();

    options.addOption("h", "help", false, "display this help");

    options.addOption("f", "filename", true, "name of the JSON file whose content should be indexed");
    options.addOption("i", "index", true, "the name of the target index");
    options.addOption("d", "doctype", true, "the name of the doctype (title, local, ...)");

    options.addOption("t", "host", true, "elasticsearch hostname (default: 0.0.0.0)");
    options.addOption("p", "port", true, "transport port (that's NOT the http port, default: 9300)");
    options.addOption("c", "cluster", true, "cluster name (default: elasticsearch_mdma)");

    options.addOption("b", "bulksize", true, "number of docs sent in one request (default: 3000)");
    options.addOption("v", "verbose", false, "show processing speed while indexing");
    options.addOption("s", "status", false, "only show status of index for file");

    options.addOption("r", "repair", false, "attempt to repair recoverable inconsistencies on the go");
    options.addOption("e", "debug", false, "set logging level to debug");
    options.addOption("l", "logfile", true, "logfile - in not specified only log to stdout");

    options.addOption("m", "memcached", true, "host and port of memcached (default: localhost:11211)");
    options.addOption("z", "latest-flag-on", true,
            "enable latest flag according to field (within content, e.g. 001)");
    options.addOption("a", "flat", false, "flat-mode: do not check for inconsistencies");

    CommandLineParser parser = new PosixParser();
    CommandLine cmd = null;//from  w w  w  . ja  va2 s.  c  o  m

    try {
        cmd = parser.parse(options, args);
    } catch (ParseException ex) {
        logger.error(ex);
        System.exit(1);
    }

    // setup logging
    Properties systemProperties = System.getProperties();
    systemProperties.put("net.spy.log.LoggerImpl", "net.spy.memcached.compat.log.Log4JLogger");
    System.setProperties(systemProperties);
    Logger.getLogger("net.spy.memcached").setLevel(Level.ERROR);

    Properties props = new Properties();
    props.load(props.getClass().getResourceAsStream("/log4j.properties"));

    if (cmd.hasOption("debug")) {
        props.setProperty("log4j.logger.de.unileipzig", "DEBUG");
    }

    if (cmd.hasOption("logfile")) {
        props.setProperty("log4j.rootLogger", "INFO, stdout, F");
        props.setProperty("log4j.appender.F", "org.apache.log4j.FileAppender");
        props.setProperty("log4j.appender.F.File", cmd.getOptionValue("logfile"));
        props.setProperty("log4j.appender.F.layout", "org.apache.log4j.PatternLayout");
        props.setProperty("log4j.appender.F.layout.ConversionPattern", "%5p | %d | %F | %L | %m%n");
    }

    PropertyConfigurator.configure(props);

    InetAddress addr = InetAddress.getLocalHost();
    String memcachedHostAndPort = addr.getHostAddress() + ":11211";
    if (cmd.hasOption("m")) {
        memcachedHostAndPort = cmd.getOptionValue("m");
    }

    // setup caching
    try {
        if (memcachedClient == null) {
            memcachedClient = new MemcachedClient(
                    new ConnectionFactoryBuilder().setFailureMode(FailureMode.Cancel).build(),
                    AddrUtil.getAddresses("0.0.0.0:11211"));
            try {
                // give client and server 500ms
                Thread.sleep(300);
            } catch (InterruptedException ex) {
            }

            Collection availableServers = memcachedClient.getAvailableServers();
            logger.info(availableServers);
            if (availableServers.size() == 0) {
                logger.info("no memcached servers found");
                memcachedClient.shutdown();
                memcachedClient = null;
            } else {
                logger.info(availableServers.size() + " memcached server(s) detected, fine.");
            }
        }
    } catch (IOException ex) {
        logger.warn("couldn't create a connection, bailing out: " + ex.getMessage());
    }

    // process options

    if (cmd.hasOption("h")) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp("indexer", options, true);
        quit(0);
    }

    boolean verbose = false;
    if (cmd.hasOption("verbose")) {
        verbose = true;
    }

    // ES options
    String[] hosts = new String[] { "0.0.0.0" };
    int port = 9300;
    String clusterName = "elasticsearch_mdma";
    int bulkSize = 3000;

    if (cmd.hasOption("host")) {
        hosts = cmd.getOptionValues("host");
    }
    if (cmd.hasOption("port")) {
        port = Integer.parseInt(cmd.getOptionValue("port"));
    }
    if (cmd.hasOption("cluster")) {
        clusterName = cmd.getOptionValue("cluster");
    }
    if (cmd.hasOption("bulksize")) {
        bulkSize = Integer.parseInt(cmd.getOptionValue("bulksize"));
        if (bulkSize < 1 || bulkSize > 100000) {
            logger.error("bulksize must be between 1 and 100,000");
            quit(1);
        }
    }

    // ES Client
    final Settings settings = ImmutableSettings.settingsBuilder().put("cluster.name", "elasticsearch_mdma")
            .build();
    final TransportClient client = new TransportClient(settings);
    for (String host : hosts) {
        client.addTransportAddress(new InetSocketTransportAddress(host, port));
    }

    if (cmd.hasOption("filename") && cmd.hasOption("index") && cmd.hasOption("doctype")) {

        final String filename = cmd.getOptionValue("filename");

        final File _file = new File(filename);
        if (_file.length() == 0) {
            logger.info(_file.getAbsolutePath() + " is empty, skipping");
            quit(0); // file is empty
        }

        // for flat mode: leave a stampfile beside the json to 
        // indicate previous successful processing
        File directory = new File(filename).getParentFile();
        File stampfile = new File(directory, DigestUtils.shaHex(filename) + ".indexed");

        long start = System.currentTimeMillis();
        long lineCount = 0;

        final String indexName = cmd.getOptionValue("index");
        final String docType = cmd.getOptionValue("doctype");
        BulkRequestBuilder bulkRequest = client.prepareBulk();

        try {
            if (cmd.hasOption("flat")) {
                // flat mode
                // .........
                if (stampfile.exists()) {
                    logger.info("SKIPPING, since it seems this file has already " + "been imported (found: "
                            + stampfile.getAbsolutePath() + ")");
                    quit(0);
                }
            } else {

                final String srcSHA1 = extractSrcSHA1(filename);

                logger.debug(filename + " srcsha1: " + srcSHA1);

                long docsInIndex = getIndexedRecordCount(client, indexName, srcSHA1);
                logger.debug(filename + " indexed: " + docsInIndex);

                long docsInFile = getLineCount(filename);
                logger.debug(filename + " lines: " + docsInFile);

                // in non-flat-mode, indexing would take care
                // of inconsistencies
                if (docsInIndex == docsInFile) {
                    logger.info("UP-TO DATE: " + filename + " (" + docsInIndex + ", " + srcSHA1 + ")");
                    client.close();
                    quit(0);
                }

                if (docsInIndex > 0) {
                    logger.warn("INCONSISTENCY DETECTED: " + filename + ": indexed:" + docsInIndex + " lines:"
                            + docsInFile);

                    if (!cmd.hasOption("r")) {
                        logger.warn(
                                "Please re-run indexer with --repair flag or delete residues first with: $ curl -XDELETE "
                                        + hosts[0] + ":9200/" + indexName
                                        + "/_query -d ' {\"term\" : { \"meta.srcsha1\" : \"" + srcSHA1
                                        + "\" }}'");
                        client.close();
                        quit(1);
                    } else {
                        logger.info("Attempting to clear residues...");
                        // attempt to repair once
                        DeleteByQueryResponse dbqr = client.prepareDeleteByQuery(indexName)
                                .setQuery(termQuery("meta.srcsha1", srcSHA1)).execute().actionGet();

                        Iterator<IndexDeleteByQueryResponse> it = dbqr.iterator();
                        long deletions = 0;
                        while (it.hasNext()) {
                            IndexDeleteByQueryResponse response = it.next();
                            deletions += 1;
                        }
                        logger.info("Deleted residues of " + filename);
                        logger.info("Refreshing [" + indexName + "]");
                        RefreshResponse refreshResponse = client.admin().indices()
                                .refresh(new RefreshRequest(indexName)).actionGet();

                        long indexedAfterDelete = getIndexedRecordCount(client, indexName, srcSHA1);
                        logger.info(indexedAfterDelete + " docs remained");
                        if (indexedAfterDelete > 0) {
                            logger.warn("Not all residues cleaned. Try to fix this manually: $ curl -XDELETE "
                                    + hosts[0] + ":9200/" + indexName
                                    + "/_query -d ' {\"term\" : { \"meta.srcsha1\" : \"" + srcSHA1 + "\" }}'");
                            quit(1);
                        } else {
                            logger.info("Residues are gone. Now trying to reindex: " + filename);
                        }
                    }
                }
            }

            logger.info("INDEXING-REQUIRED: " + filename);
            if (cmd.hasOption("status")) {
                quit(0);
            }

            HashSet idsInBatch = new HashSet();

            String idField = null;
            if (cmd.hasOption("z")) {
                idField = cmd.getOptionValue("z");
            }

            final FileReader fr = new FileReader(filename);
            final BufferedReader br = new BufferedReader(fr);

            String line;
            // one line is one document
            while ((line = br.readLine()) != null) {

                // "Latest-Flag" machine
                // This gets obsolete with a "flat" index
                if (cmd.hasOption("z")) {
                    // flag that indicates, whether the document
                    // about to be indexed will be the latest
                    boolean willBeLatest = true;

                    // check if there is a previous (lower meta.timestamp) document with 
                    // the same identifier (whatever that may be - queried under "content")
                    final String contentIdentifier = getContentIdentifier(line, idField);
                    idsInBatch.add(contentIdentifier);

                    // assumed in meta.timestamp
                    final Long timestamp = Long.parseLong(getTimestamp(line));

                    logger.debug("Checking whether record is latest (line: " + lineCount + ")");
                    logger.debug(contentIdentifier + ", " + timestamp);

                    // get all docs, which match the contentIdentifier
                    // by filter, which doesn't score
                    final TermFilterBuilder idFilter = new TermFilterBuilder("content." + idField,
                            contentIdentifier);
                    final TermFilterBuilder kindFilter = new TermFilterBuilder("meta.kind", docType);
                    final AndFilterBuilder afb = new AndFilterBuilder();
                    afb.add(idFilter).add(kindFilter);
                    final FilteredQueryBuilder fb = filteredQuery(matchAllQuery(), afb);

                    final SearchResponse searchResponse = client.prepareSearch(indexName)
                            .setSearchType(SearchType.DFS_QUERY_THEN_FETCH).setQuery(fb).setFrom(0)
                            .setSize(1200) // 3 years and 105 days assuming daily updates at the most
                            .setExplain(false).execute().actionGet();

                    final SearchHits searchHits = searchResponse.getHits();

                    logger.debug("docs with this id in the index: " + searchHits.getTotalHits());

                    for (final SearchHit hit : searchHits.getHits()) {
                        final String docId = hit.id();
                        final Map<String, Object> source = hit.sourceAsMap();
                        final Map meta = (Map) source.get("meta");
                        final Long docTimestamp = Long.parseLong(meta.get("timestamp").toString());
                        // if the indexed doc timestamp is lower the the current one, 
                        // remove any latest flag
                        if (timestamp >= docTimestamp) {
                            source.remove("latest");
                            final ObjectMapper mapper = new ObjectMapper();
                            // put the updated doc back
                            // IndexResponse response = 
                            client.prepareIndex(indexName, docType).setCreate(false).setId(docId)
                                    .setSource(mapper.writeValueAsBytes(source))
                                    .execute(new ActionListener<IndexResponse>() {
                                        public void onResponse(IndexResponse rspns) {
                                            logger.debug("Removed latest flag from " + contentIdentifier + ", "
                                                    + docTimestamp + ", " + hit.id() + " since (" + timestamp
                                                    + " > " + docTimestamp + ")");
                                        }

                                        public void onFailure(Throwable thrwbl) {
                                            logger.error("Could not remove flag from " + hit.id() + ", "
                                                    + contentIdentifier);
                                        }
                                    });
                            // .execute()
                            //.actionGet();
                        } else {
                            logger.debug("Doc " + hit.id() + " is newer (" + docTimestamp + ")");
                            willBeLatest = false;
                        }
                    }

                    if (willBeLatest) {
                        line = setLatestFlag(line);
                        logger.info("Setting latest flag on " + contentIdentifier + ", " + timestamp);
                    }

                    // end of latest-flag machine
                    // beware - this will be correct as long as there
                    // are no dups within one bulk!
                }

                bulkRequest.add(client.prepareIndex(indexName, docType).setSource(line));
                lineCount++;
                logger.debug("Added line " + lineCount + " to BULK");
                logger.debug(line);

                if (lineCount % bulkSize == 0) {

                    if (idsInBatch.size() != bulkSize && cmd.hasOption("z")) {
                        logger.error(
                                "This batch has duplications in the ID. That's not bad for the index, just makes the latest flag fuzzy");
                        logger.error(
                                "Bulk size was: " + bulkSize + ", but " + idsInBatch.size() + " IDs (only)");
                    }
                    idsInBatch.clear();

                    logger.debug("Issuing BULK request");

                    final long actionCount = bulkRequest.numberOfActions();
                    final BulkResponse bulkResponse = bulkRequest.execute().actionGet();
                    final long tookInMillis = bulkResponse.getTookInMillis();

                    if (bulkResponse.hasFailures()) {
                        logger.fatal("FAILED, bulk not indexed. exiting now.");
                        Iterator<BulkItemResponse> it = bulkResponse.iterator();
                        while (it.hasNext()) {
                            BulkItemResponse bir = it.next();
                            if (bir.isFailed()) {
                                Failure failure = bir.getFailure();
                                logger.fatal("id: " + failure.getId() + ", message: " + failure.getMessage()
                                        + ", type: " + failure.getType() + ", index: " + failure.getIndex());
                            }
                        }
                        quit(1);
                    } else {
                        if (verbose) {
                            final double elapsed = System.currentTimeMillis() - start;
                            final double speed = (lineCount / elapsed * 1000);
                            logger.info("OK (" + filename + ") " + lineCount + " docs indexed (" + actionCount
                                    + "/" + tookInMillis + "ms" + "/" + String.format("%.2f", speed) + "r/s)");
                        }
                    }
                    bulkRequest = client.prepareBulk();
                }
            }

            // handle the remaining items
            final long actionCount = bulkRequest.numberOfActions();
            if (actionCount > 0) {
                final BulkResponse bulkResponse = bulkRequest.execute().actionGet();
                final long tookInMillis = bulkResponse.getTookInMillis();

                if (bulkResponse.hasFailures()) {
                    logger.fatal("FAILED, bulk not indexed. exiting now.");
                    Iterator<BulkItemResponse> it = bulkResponse.iterator();
                    while (it.hasNext()) {
                        BulkItemResponse bir = it.next();
                        if (bir.isFailed()) {
                            Failure failure = bir.getFailure();
                            logger.fatal("id: " + failure.getId() + ", message: " + failure.getMessage()
                                    + ", type: " + failure.getType() + ", index: " + failure.getIndex());
                        }
                    }
                    quit(1);
                } else {

                    // trigger update now
                    RefreshResponse refreshResponse = client.admin().indices()
                            .refresh(new RefreshRequest(indexName)).actionGet();

                    if (verbose) {
                        final double elapsed = System.currentTimeMillis() - start;
                        final double speed = (lineCount / elapsed * 1000);
                        logger.info("OK (" + filename + ") " + lineCount + " docs indexed (" + actionCount + "/"
                                + tookInMillis + "ms" + "/" + String.format("%.2f", speed) + "r/s)");
                    }

                }

            }

            br.close();
            client.close();
            final double elapsed = (System.currentTimeMillis() - start) / 1000;
            final double speed = (lineCount / elapsed);
            logger.info("indexing (" + filename + ") " + lineCount + " docs took " + elapsed + "s (speed: "
                    + String.format("%.2f", speed) + "r/s)");
            if (cmd.hasOption("flat")) {
                try {
                    FileUtils.touch(stampfile);
                } catch (IOException ioe) {
                    logger.warn(".indexed files not created. Will reindex everything everytime.");
                }
            }
        } catch (IOException e) {
            client.close();
            logger.error(e);
            quit(1);
        } finally {
            client.close();
        }
    }
    quit(0);
}

From source file:kindleclippings.quizlet.QuizletSync.java

public static void main(String[] args)
        throws IOException, JSONException, URISyntaxException, InterruptedException, BackingStoreException {

    ProgressMonitor progress = new ProgressMonitor(null, "QuizletSync", "loading Kindle clippings file", 0,
            100);/*from   ww  w .  j  a  v a  2 s .  com*/
    progress.setMillisToPopup(0);
    progress.setMillisToDecideToPopup(0);
    progress.setProgress(0);
    try {

        Map<String, List<Clipping>> books = readClippingsFile();

        if (books == null)
            return;

        if (books.isEmpty()) {
            JOptionPane.showMessageDialog(null, "no clippings to be uploaded", "QuizletSync",
                    JOptionPane.OK_OPTION);
            return;
        }
        progress.setNote("checking Quizlet account");
        progress.setProgress(5);

        Preferences prefs = getPrefs();

        QuizletAPI api = new QuizletAPI(prefs.get("access_token", null));

        Collection<TermSet> sets = null;
        try {
            progress.setNote("checking Quizlet library");
            progress.setProgress(10);
            sets = api.getSets(prefs.get("user_id", null));
        } catch (IOException e) {
            if (e.toString().contains("401")) {
                // Not Authorized => Token has been revoked
                clearPrefs();
                prefs = getPrefs();
                api = new QuizletAPI(prefs.get("access_token", null));
                sets = api.getSets(prefs.get("user_id", null));
            } else {
                throw e;
            }
        }

        progress.setProgress(15);
        progress.setMaximum(15 + books.size());
        progress.setNote("uploading new notes");

        Map<String, TermSet> indexedSets = new HashMap<String, TermSet>(sets.size());

        for (TermSet t : sets) {
            indexedSets.put(t.getTitle(), t);
        }

        int pro = 15;
        int createdSets = 0;
        int createdTerms = 0;
        int updatedTerms = 0;
        for (List<Clipping> c : books.values()) {

            String book = c.get(0).getBook();
            progress.setNote(book);
            progress.setProgress(pro++);

            TermSet termSet = indexedSets.get(book);
            if (termSet == null) {
                if (c.size() < 2) {
                    System.err.println("ignored [" + book + "] (need at least two notes)");
                    continue;
                }

                addSet(api, book, c);
                createdSets++;
                createdTerms += c.size();
                continue;
            }
            // compare against existing terms
            for (Clipping cl : c) {
                if (!checkExistingTerm(cl, termSet)) {
                    addTerm(api, termSet, cl);
                    updatedTerms++;
                }
            }
        }
        progress.setProgress(pro++);

        if (createdSets == 0 && updatedTerms == 0) {
            JOptionPane.showMessageDialog(null, "Done.\nNo new data was uploaded", "QuizletSync",
                    JOptionPane.OK_OPTION);
        } else if (createdSets > 0) {
            JOptionPane.showMessageDialog(null,
                    String.format(
                            "Done.\nCreated %d new sets with %d cards, and added %d cards to existing sets",
                            createdSets, createdTerms, updatedTerms),
                    "QuizletSync", JOptionPane.OK_OPTION);
        } else {
            JOptionPane.showMessageDialog(null,
                    String.format("Done.\nAdded %d cards to existing sets", updatedTerms), "QuizletSync",
                    JOptionPane.OK_OPTION);
        }
    } finally {
        progress.close();
    }

    System.exit(0);
}

From source file:gov.nih.nci.ncicb.cadsr.common.persistence.dao.jdbc.JDBCAdminComponentDAO.java

public static void main(String[] args) {
    ServiceLocator locator = new SimpleServiceLocator();
    JDBCAdminComponentDAO jdbcAdminComponentDAO = new JDBCAdminComponentDAO(locator);

    /*//  w w w.  jav a2  s  .  c  o m
    int res = jdbcAdminComponentDAO.assignClassification(
      "99BA9DC8-2357-4E69-E034-080020C9C0E0",
      "29A8FB30-0AB1-11D6-A42F-0010A4C1E842"); // acId, csCsiId
    System.out.println ("res = " + res);
    */
    /*
    int deleteRes = jdbcAdminComponentDAO.removeClassification
      ("D66B85B6-4EDA-469B-E034-0003BA0B1A09");
    System.out.println ("deleteRes = " + deleteRes);
    Collection csito = jdbcAdminComponentDAO.retrieveClassifications(
      "29A8FB19-0AB1-11D6-A42F-0010A4C1E842");
    */

    Collection contacts = jdbcAdminComponentDAO.getContacts("0B244855-6696-5A67-E044-0003BA8EB8F1");
    System.out.println(contacts.size());

}