Example usage for java.util HashSet add

List of usage examples for java.util HashSet add

Introduction

In this page you can find the example usage for java.util HashSet add.

Prototype

public boolean add(E e) 

Source Link

Document

Adds the specified element to this set if it is not already present.

Usage

From source file:com.yahoo.ycsb.db.AsyncHBaseClient.java

public static void main(String[] args) {
    if (args.length != 4) {
        System.out.println("usage: ahc zkquorum zkpath threadcount operation_count");
        System.exit(0);//from   w  w  w.j  a v a2  s  .c  om
    }

    final int keyspace = 10000; //120000000;
    final String zkQuorum = args[0];
    final String zkPath = args[1];
    final int threadcount = Integer.parseInt(args[2]);
    final int opcount = Integer.parseInt(args[3]) / threadcount;

    Vector<Thread> allthreads = new Vector<Thread>();

    for (int i = 0; i < threadcount; i++) {
        Thread t = new Thread() {
            public void run() {
                try {
                    Random random = new Random();

                    AsyncHBaseClient cli = new AsyncHBaseClient();

                    Properties props = new Properties();
                    props.setProperty("columnfamily", "f1");
                    props.setProperty("zkquorum", zkQuorum);
                    props.setProperty("zkpath", zkPath);
                    props.setProperty("debug", "true");
                    cli.setProperties(props);

                    cli.init();

                    HashMap<String, ByteIterator> result = Maps.newHashMap();

                    long accum = 0;

                    for (int i = 0; i < opcount; i++) {
                        int keynum = random.nextInt(keyspace);
                        String key = "user" + keynum;
                        long st = System.currentTimeMillis();
                        int rescode;

                        HashMap<String, ByteIterator> hm = Maps.newHashMap();
                        hm.put("field1", new ByteArrayByteIterator("value1".getBytes("UTF-8")));
                        hm.put("field2", new ByteArrayByteIterator("value2".getBytes("UTF-8")));
                        hm.put("field3", new ByteArrayByteIterator("value3".getBytes("UTF-8")));
                        hm.put("efield", new ByteArrayByteIterator(HBaseClient.EMPTY_ARRAY));
                        rescode = cli.insert("bench", key, hm);
                        HashSet<String> s = Sets.newHashSet();
                        s.add("field1");
                        s.add("field2");

                        rescode = cli.read("bench", key, s, result);
                        rescode = cli.delete("bench", key);
                        rescode = cli.read("bench", key, s, result);

                        HashSet<String> scanFields = Sets.newHashSet();
                        scanFields.add("field1");
                        scanFields.add("field3");
                        Vector<HashMap<String, ByteIterator>> scanResults = new Vector<HashMap<String, ByteIterator>>();
                        rescode = cli.scan("bench", "user2", 20, null, scanResults);

                        long en = System.currentTimeMillis();

                        accum += (en - st);

                        if (rescode != Ok) {
                            System.out.println("Error " + rescode + " for " + key);
                        }

                        if (i % 10 == 0) {
                            System.out.println(
                                    i + " operations, average latency: " + (((double) accum) / ((double) i)));
                        }
                    }

                    System.out.println(new ToStringBuilder(cli._client.stats(), ToStringStyle.MULTI_LINE_STYLE)
                            .toString());
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
        };
        allthreads.add(t);
    }

    long st = System.currentTimeMillis();
    for (Thread t : allthreads) {
        t.start();
    }

    for (Thread t : allthreads) {
        try {
            t.join();
        } catch (InterruptedException e) {
        }
    }
    long en = System.currentTimeMillis();

    System.out.println("Throughput: " + ((1000.0) * (((double) (opcount * threadcount)) / ((double) (en - st))))
            + " ops/sec");

}

From source file:edu.umass.cs.reconfiguration.deprecated.ReconfigurableClient.java

/**
 * Simple test client for the reconfiguration package. Clients only know the
 * set of all reconfigurators, not active replicas for any name. All
 * information about active replicas for a name is obtained from
 * reconfigurators. Any request can be sent to any reconfigurator and it
 * will forward to the appropriate reconfigurator if necessary and relay
 * back the response.//  ww  w.  j  a v  a  2s .  c  o  m
 * 
 * @param args
 */
public static void main(String[] args) {
    ReconfigurableClient client = null;
    try {
        /*
         * Client can only send/receive clear text or do server-only
         * authentication
         */
        JSONMessenger<?> messenger = new JSONMessenger<String>((new MessageNIOTransport<String, JSONObject>(
                null, null, new PacketDemultiplexerDefault(), true, ReconfigurationConfig.getClientSSLMode())));
        client = new ReconfigurableClient(ReconfigurationConfig.getReconfiguratorAddresses(), messenger);
        int numRequests = 2;
        String requestValuePrefix = "request_value";
        long nameReqInterArrivalTime = 200;
        long NCReqInterArrivalTime = 1000;
        String initValue = "initial_value";
        int numIterations = 10000;
        boolean testReconfigureRC = true;

        for (int j = 0; j < numIterations; j++) {
            String namePrefix = "name" + (int) (Math.random() * Integer.MAX_VALUE);
            String reconfiguratorID = "RC" + (int) (Math.random() * 64000);
            long t0 = System.currentTimeMillis();

            // /////////////request active replicas////////////////////
            t0 = System.currentTimeMillis();
            do
                client.sendRequest(client.makeRequestActiveReplicas(namePrefix));
            while (!client.waitForFailure(namePrefix));
            DelayProfiler.updateDelay("requestActives", t0);

            // active replicas for name initially don't exist
            assert (client.getActiveReplicas() == null || client.getActiveReplicas().isEmpty());
            // ////////////////////////////////////////////////////////

            // ////////////////////create name/////////////////////////
            t0 = System.currentTimeMillis();
            do
                client.sendRequest(client.makeCreateNameRequest(namePrefix, initValue));
            while (!client.waitForSuccess(namePrefix));
            DelayProfiler.updateDelay("createName", t0);
            // ////////////////////////////////////////////////////////

            /*
             * Verify that active replicas for name now exist. The only
             * reason the query is repeated is because it is possible to
             * find the name non-existent briefly if the query is sent to a
             * different reconfigurator that hasn't yet caught up with the
             * creation (but will eventually do so).
             */
            // ////////////////////////////////////////////////////////
            t0 = System.currentTimeMillis();
            do
                client.sendRequest(client.makeRequestActiveReplicas(namePrefix));
            while (!client.waitForSuccess(namePrefix));
            DelayProfiler.updateDelay("requestActives", t0);

            assert (client.getActiveReplicas() != null && !client.getActiveReplicas().isEmpty());
            // ////////////////////////////////////////////////////////

            // ///////send a stream of app requests sequentially///////
            for (int i = 0; i < numRequests; i++) {
                t0 = System.currentTimeMillis();
                do
                    client.sendRequest(client.makeRequest(namePrefix, requestValuePrefix + i));
                while (!client.rcvdAppReply(namePrefix));
                DelayProfiler.updateDelay("appPaxosRequest", t0);
                Thread.sleep(nameReqInterArrivalTime);
            }
            // ////////////////////////////////////////////////////////

            // ////////////////////////////////////////////////////////
            // request current active replicas (possibly reconfigured)
            t0 = System.currentTimeMillis();
            do
                client.sendRequest(client.makeRequestActiveReplicas(namePrefix));
            while (!client.waitForSuccess(namePrefix));
            DelayProfiler.updateDelay("requestActives", t0);
            // ////////////////////////////////////////////////////////

            // ///////////////delete name, retransmit if error////////////
            t0 = System.currentTimeMillis();
            do
                client.sendRequest(client.makeDeleteNameRequest(namePrefix));
            while (!client.waitForSuccess(namePrefix));
            DelayProfiler.updateDelay("deleteName", t0);

            Thread.sleep(nameReqInterArrivalTime);
            // ////////////////////////////////////////////////////////

            // ////////////////////////////////////////////////////////
            // verify that active replicas for name now don't exist. The
            t0 = System.currentTimeMillis();
            do
                client.sendRequest(client.makeRequestActiveReplicas(namePrefix));
            while (!client.waitForFailure(namePrefix));
            DelayProfiler.updateDelay("requestActives", t0);

            assert (client.getActiveReplicas() == null || client.getActiveReplicas().isEmpty());
            // ////////////////////////////////////////////////////////

            if (!testReconfigureRC)
                continue;

            // ////////////////////////////////////////////////////////
            // add RC node; the port below does not matter in this test
            t0 = System.currentTimeMillis();
            // do
            client.sendRequest(new ReconfigureRCNodeConfig<String>(null, reconfiguratorID,
                    new InetSocketAddress(InetAddress.getByName("localhost"), TEST_PORT)));
            while (!client
                    .waitForReconfigureRCSuccess(AbstractReconfiguratorDB.RecordNames.RC_NODES.toString()))
                ;
            DelayProfiler.updateDelay("addReconfigurator", t0);
            // ////////////////////////////////////////////////////////

            Thread.sleep(NCReqInterArrivalTime);

            // //////////////// delete just added RC node//////////////////
            HashSet<String> deleted = new HashSet<String>();
            deleted.add(reconfiguratorID);
            t0 = System.currentTimeMillis();
            // do
            client.sendRequest(new ReconfigureRCNodeConfig<String>(null, null, deleted));
            while (!client
                    .waitForReconfigureRCSuccess(AbstractReconfiguratorDB.RecordNames.RC_NODES.toString())) {
            }
            DelayProfiler.updateDelay("removeReconfigurator", t0);
            // ////////////////////////////////////////////////////////

            Thread.sleep(NCReqInterArrivalTime);

            client.log.info("\n\n\n\n==================Successfully completed iteration " + j + ":\n"
                    + DelayProfiler.getStats() + "\n\n\n\n");
        }

        // client.messenger.stop();
    } catch (IOException ioe) {
        ioe.printStackTrace();
    } catch (JSONException je) {
        je.printStackTrace();
    } catch (InterruptedException ie) {
        ie.printStackTrace();
    } catch (RequestParseException e) {
        e.printStackTrace();
    }
}

From source file:eu.fbk.dkm.sectionextractor.WikipediaSectionTitlesExtractor.java

public static void main(String args[]) throws IOException {

    CommandLineWithLogger commandLineWithLogger = new CommandLineWithLogger();
    commandLineWithLogger.addOption(OptionBuilder.withArgName("file").hasArg()
            .withDescription("wikipedia xml dump file").isRequired().withLongOpt("wikipedia-dump").create("d"));
    commandLineWithLogger.addOption(OptionBuilder.withArgName("file").hasArg().withDescription("Filter file")
            .withLongOpt("filter").create("f"));
    commandLineWithLogger.addOption(OptionBuilder.withArgName("dir").hasArg().withDescription("output file")
            .isRequired().withLongOpt("output-file").create("o"));
    commandLineWithLogger.addOption(OptionBuilder.withArgName("int").hasArg()
            .withDescription("max depth (default " + MAX_DEPTH + ")").withLongOpt("max-depth").create("m"));
    commandLineWithLogger.addOption(OptionBuilder.withArgName("int").hasArg()
            .withDescription("max num of sections").withLongOpt("max-num").create("n"));
    commandLineWithLogger.addOption(new Option("l", "print titles"));

    commandLineWithLogger.addOption(OptionBuilder.withArgName("int").hasArg()
            .withDescription(// ww w  . ja v a2  s.  co m
                    "number of threads (default " + AbstractWikipediaXmlDumpParser.DEFAULT_THREADS_NUMBER + ")")
            .withLongOpt("num-threads").create("t"));
    commandLineWithLogger.addOption(OptionBuilder.withArgName("int").hasArg()
            .withDescription("number of pages to process (default all)").withLongOpt("num-pages").create("p"));
    commandLineWithLogger.addOption(OptionBuilder.withArgName("int").hasArg()
            .withDescription("receive notification every n pages (default "
                    + AbstractWikipediaExtractor.DEFAULT_NOTIFICATION_POINT + ")")
            .withLongOpt("notification-point").create("b"));

    CommandLine commandLine = null;
    try {
        commandLine = commandLineWithLogger.getCommandLine(args);
        PropertyConfigurator.configure(commandLineWithLogger.getLoggerProps());
    } catch (Exception e) {
        System.exit(1);
    }

    int numThreads = Integer.parseInt(commandLine.getOptionValue("num-threads",
            Integer.toString(AbstractWikipediaXmlDumpParser.DEFAULT_THREADS_NUMBER)));
    int numPages = Integer.parseInt(commandLine.getOptionValue("num-pages",
            Integer.toString(AbstractWikipediaExtractor.DEFAULT_NUM_PAGES)));
    int notificationPoint = Integer.parseInt(commandLine.getOptionValue("notification-point",
            Integer.toString(AbstractWikipediaExtractor.DEFAULT_NOTIFICATION_POINT)));

    int configuredDepth = Integer
            .parseInt(commandLine.getOptionValue("max-depth", Integer.toString(MAX_DEPTH)));
    int maxNum = Integer.parseInt(commandLine.getOptionValue("max-num", "0"));
    boolean printTitles = commandLine.hasOption("l");

    HashSet<String> pagesToConsider = null;
    String filterFileName = commandLine.getOptionValue("filter");
    if (filterFileName != null) {
        File filterFile = new File(filterFileName);
        if (filterFile.exists()) {
            pagesToConsider = new HashSet<>();
            List<String> lines = Files.readLines(filterFile, Charsets.UTF_8);
            for (String line : lines) {
                line = line.trim();
                if (line.length() == 0) {
                    continue;
                }

                line = line.replaceAll("\\s+", "_");

                pagesToConsider.add(line);
            }
        }
    }

    File outputFile = new File(commandLine.getOptionValue("output-file"));
    ExtractorParameters extractorParameters = new ExtractorParameters(
            commandLine.getOptionValue("wikipedia-dump"), outputFile.getAbsolutePath());

    WikipediaExtractor wikipediaPageParser = new WikipediaSectionTitlesExtractor(numThreads, numPages,
            extractorParameters.getLocale(), outputFile, configuredDepth, maxNum, printTitles, pagesToConsider);
    wikipediaPageParser.setNotificationPoint(notificationPoint);
    wikipediaPageParser.start(extractorParameters);

    logger.info("extraction ended " + new Date());

}

From source file:edu.cmu.lti.oaqa.knn4qa.apps.CollectionDiffer.java

public static void main(String[] args) {
    Options options = new Options();

    options.addOption("i1", null, true, "Input file 1");
    options.addOption("i2", null, true, "Input file 2");
    options.addOption("o", null, true, "Output file");

    CommandLineParser parser = new org.apache.commons.cli.GnuParser();

    try {/* w  w  w  .  j  a va  2s  .c om*/
        CommandLine cmd = parser.parse(options, args);

        InputStream input1 = null, input2 = null;

        if (cmd.hasOption("i1")) {
            input1 = CompressUtils.createInputStream(cmd.getOptionValue("i1"));
        } else {
            Usage("Specify 'Input file 1'");
        }
        if (cmd.hasOption("i2")) {
            input2 = CompressUtils.createInputStream(cmd.getOptionValue("i2"));
        } else {
            Usage("Specify 'Input file 2'");
        }

        HashSet<String> hSubj = new HashSet<String>();

        BufferedWriter out = null;

        if (cmd.hasOption("o")) {
            String outFile = cmd.getOptionValue("o");

            out = new BufferedWriter(new OutputStreamWriter(CompressUtils.createOutputStream(outFile)));
        } else {
            Usage("Specify 'Output file'");
        }

        XmlIterator inpIter2 = new XmlIterator(input2, YahooAnswersReader.DOCUMENT_TAG);

        int docNum = 1;
        for (String oneRec = inpIter2.readNext(); !oneRec.isEmpty(); oneRec = inpIter2.readNext(), ++docNum) {
            if (docNum % 10000 == 0) {
                System.out.println(String.format(
                        "Loaded and memorized questions for %d documents from the second input file", docNum));
            }
            ParsedQuestion q = YahooAnswersParser.parse(oneRec, false);
            hSubj.add(q.mQuestion);
        }

        XmlIterator inpIter1 = new XmlIterator(input1, YahooAnswersReader.DOCUMENT_TAG);

        System.out.println("=============================================");
        System.out.println("Memoization is done... now let's diff!!!");
        System.out.println("=============================================");

        docNum = 1;
        int skipOverlapQty = 0, skipErrorQty = 0;
        for (String oneRec = inpIter1.readNext(); !oneRec.isEmpty(); ++docNum, oneRec = inpIter1.readNext()) {
            if (docNum % 10000 == 0) {
                System.out.println(String.format("Processed %d documents from the first input file", docNum));
            }

            oneRec = oneRec.trim() + System.getProperty("line.separator");

            ParsedQuestion q = null;
            try {
                q = YahooAnswersParser.parse(oneRec, false);
            } catch (Exception e) {
                // If <bestanswer>...</bestanswer> is missing we may end up here...
                // This is a bit funny, because this element is supposed to be mandatory,
                // but it's not.
                System.err.println("Skipping due to parsing error, exception: " + e);
                skipErrorQty++;
                continue;
            }
            if (hSubj.contains(q.mQuestion.trim())) {
                //System.out.println(String.format("Skipping uri='%s', question='%s'", q.mQuestUri, q.mQuestion));
                skipOverlapQty++;
                continue;
            }

            out.write(oneRec);
        }
        System.out.println(
                String.format("Processed %d documents, skipped because of overlap/errors %d/%d documents",
                        docNum - 1, skipOverlapQty, skipErrorQty));
        out.close();
    } catch (ParseException e) {
        Usage("Cannot parse arguments");
    } catch (Exception e) {
        e.printStackTrace();
        System.err.println("Terminating due to an exception: " + e);
        System.exit(1);
    }
}

From source file:InlineSchemaValidator.java

/** Main program entry point. */
public static void main(String[] argv) {

    // is there anything to do?
    if (argv.length == 0) {
        printUsage();/*from   w w  w  . j av a 2  s.co m*/
        System.exit(1);
    }

    // variables
    Vector schemas = null;
    Vector instances = null;
    HashMap prefixMappings = null;
    HashMap uriMappings = null;
    String docURI = argv[argv.length - 1];
    String schemaLanguage = DEFAULT_SCHEMA_LANGUAGE;
    int repetition = DEFAULT_REPETITION;
    boolean schemaFullChecking = DEFAULT_SCHEMA_FULL_CHECKING;
    boolean honourAllSchemaLocations = DEFAULT_HONOUR_ALL_SCHEMA_LOCATIONS;
    boolean validateAnnotations = DEFAULT_VALIDATE_ANNOTATIONS;
    boolean generateSyntheticAnnotations = DEFAULT_GENERATE_SYNTHETIC_ANNOTATIONS;
    boolean memoryUsage = DEFAULT_MEMORY_USAGE;

    // process arguments
    for (int i = 0; i < argv.length - 1; ++i) {
        String arg = argv[i];
        if (arg.startsWith("-")) {
            String option = arg.substring(1);
            if (option.equals("l")) {
                // get schema language name
                if (++i == argv.length) {
                    System.err.println("error: Missing argument to -l option.");
                } else {
                    schemaLanguage = argv[i];
                }
                continue;
            }
            if (option.equals("x")) {
                if (++i == argv.length) {
                    System.err.println("error: Missing argument to -x option.");
                    continue;
                }
                String number = argv[i];
                try {
                    int value = Integer.parseInt(number);
                    if (value < 1) {
                        System.err.println("error: Repetition must be at least 1.");
                        continue;
                    }
                    repetition = value;
                } catch (NumberFormatException e) {
                    System.err.println("error: invalid number (" + number + ").");
                }
                continue;
            }
            if (arg.equals("-a")) {
                // process -a: xpath expressions for schemas
                if (schemas == null) {
                    schemas = new Vector();
                }
                while (i + 1 < argv.length - 1 && !(arg = argv[i + 1]).startsWith("-")) {
                    schemas.add(arg);
                    ++i;
                }
                continue;
            }
            if (arg.equals("-i")) {
                // process -i: xpath expressions for instance documents
                if (instances == null) {
                    instances = new Vector();
                }
                while (i + 1 < argv.length - 1 && !(arg = argv[i + 1]).startsWith("-")) {
                    instances.add(arg);
                    ++i;
                }
                continue;
            }
            if (arg.equals("-nm")) {
                String prefix;
                String uri;
                while (i + 2 < argv.length - 1 && !(prefix = argv[i + 1]).startsWith("-")
                        && !(uri = argv[i + 2]).startsWith("-")) {
                    if (prefixMappings == null) {
                        prefixMappings = new HashMap();
                        uriMappings = new HashMap();
                    }
                    prefixMappings.put(prefix, uri);
                    HashSet prefixes = (HashSet) uriMappings.get(uri);
                    if (prefixes == null) {
                        prefixes = new HashSet();
                        uriMappings.put(uri, prefixes);
                    }
                    prefixes.add(prefix);
                    i += 2;
                }
                continue;
            }
            if (option.equalsIgnoreCase("f")) {
                schemaFullChecking = option.equals("f");
                continue;
            }
            if (option.equalsIgnoreCase("hs")) {
                honourAllSchemaLocations = option.equals("hs");
                continue;
            }
            if (option.equalsIgnoreCase("va")) {
                validateAnnotations = option.equals("va");
                continue;
            }
            if (option.equalsIgnoreCase("ga")) {
                generateSyntheticAnnotations = option.equals("ga");
                continue;
            }
            if (option.equalsIgnoreCase("m")) {
                memoryUsage = option.equals("m");
                continue;
            }
            if (option.equals("h")) {
                printUsage();
                continue;
            }
            System.err.println("error: unknown option (" + option + ").");
            continue;
        }
    }

    try {
        // Create new instance of inline schema validator.
        InlineSchemaValidator inlineSchemaValidator = new InlineSchemaValidator(prefixMappings, uriMappings);

        // Parse document containing schemas and validation roots
        DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
        dbf.setNamespaceAware(true);
        DocumentBuilder db = dbf.newDocumentBuilder();
        db.setErrorHandler(inlineSchemaValidator);
        Document doc = db.parse(docURI);

        // Create XPath factory for selecting schema and validation roots
        XPathFactory xpf = XPathFactory.newInstance();
        XPath xpath = xpf.newXPath();
        xpath.setNamespaceContext(inlineSchemaValidator);

        // Select schema roots from the DOM
        NodeList[] schemaNodes = new NodeList[schemas != null ? schemas.size() : 0];
        for (int i = 0; i < schemaNodes.length; ++i) {
            XPathExpression xpathSchema = xpath.compile((String) schemas.elementAt(i));
            schemaNodes[i] = (NodeList) xpathSchema.evaluate(doc, XPathConstants.NODESET);
        }

        // Select validation roots from the DOM
        NodeList[] instanceNodes = new NodeList[instances != null ? instances.size() : 0];
        for (int i = 0; i < instanceNodes.length; ++i) {
            XPathExpression xpathInstance = xpath.compile((String) instances.elementAt(i));
            instanceNodes[i] = (NodeList) xpathInstance.evaluate(doc, XPathConstants.NODESET);
        }

        // Create SchemaFactory and configure
        SchemaFactory factory = SchemaFactory.newInstance(schemaLanguage);
        factory.setErrorHandler(inlineSchemaValidator);

        try {
            factory.setFeature(SCHEMA_FULL_CHECKING_FEATURE_ID, schemaFullChecking);
        } catch (SAXNotRecognizedException e) {
            System.err.println("warning: SchemaFactory does not recognize feature ("
                    + SCHEMA_FULL_CHECKING_FEATURE_ID + ")");
        } catch (SAXNotSupportedException e) {
            System.err.println("warning: SchemaFactory does not support feature ("
                    + SCHEMA_FULL_CHECKING_FEATURE_ID + ")");
        }
        try {
            factory.setFeature(HONOUR_ALL_SCHEMA_LOCATIONS_ID, honourAllSchemaLocations);
        } catch (SAXNotRecognizedException e) {
            System.err.println("warning: SchemaFactory does not recognize feature ("
                    + HONOUR_ALL_SCHEMA_LOCATIONS_ID + ")");
        } catch (SAXNotSupportedException e) {
            System.err.println(
                    "warning: SchemaFactory does not support feature (" + HONOUR_ALL_SCHEMA_LOCATIONS_ID + ")");
        }
        try {
            factory.setFeature(VALIDATE_ANNOTATIONS_ID, validateAnnotations);
        } catch (SAXNotRecognizedException e) {
            System.err.println(
                    "warning: SchemaFactory does not recognize feature (" + VALIDATE_ANNOTATIONS_ID + ")");
        } catch (SAXNotSupportedException e) {
            System.err.println(
                    "warning: SchemaFactory does not support feature (" + VALIDATE_ANNOTATIONS_ID + ")");
        }
        try {
            factory.setFeature(GENERATE_SYNTHETIC_ANNOTATIONS_ID, generateSyntheticAnnotations);
        } catch (SAXNotRecognizedException e) {
            System.err.println("warning: SchemaFactory does not recognize feature ("
                    + GENERATE_SYNTHETIC_ANNOTATIONS_ID + ")");
        } catch (SAXNotSupportedException e) {
            System.err.println("warning: SchemaFactory does not support feature ("
                    + GENERATE_SYNTHETIC_ANNOTATIONS_ID + ")");
        }

        // Build Schema from sources
        Schema schema;
        {
            DOMSource[] sources;
            int size = 0;
            for (int i = 0; i < schemaNodes.length; ++i) {
                size += schemaNodes[i].getLength();
            }
            sources = new DOMSource[size];
            if (size == 0) {
                schema = factory.newSchema();
            } else {
                int count = 0;
                for (int i = 0; i < schemaNodes.length; ++i) {
                    NodeList nodeList = schemaNodes[i];
                    int nodeListLength = nodeList.getLength();
                    for (int j = 0; j < nodeListLength; ++j) {
                        sources[count++] = new DOMSource(nodeList.item(j));
                    }
                }
                schema = factory.newSchema(sources);
            }
        }

        // Setup validator and input source.
        Validator validator = schema.newValidator();
        validator.setErrorHandler(inlineSchemaValidator);

        try {
            validator.setFeature(SCHEMA_FULL_CHECKING_FEATURE_ID, schemaFullChecking);
        } catch (SAXNotRecognizedException e) {
            System.err.println(
                    "warning: Validator does not recognize feature (" + SCHEMA_FULL_CHECKING_FEATURE_ID + ")");
        } catch (SAXNotSupportedException e) {
            System.err.println(
                    "warning: Validator does not support feature (" + SCHEMA_FULL_CHECKING_FEATURE_ID + ")");
        }
        try {
            validator.setFeature(HONOUR_ALL_SCHEMA_LOCATIONS_ID, honourAllSchemaLocations);
        } catch (SAXNotRecognizedException e) {
            System.err.println(
                    "warning: Validator does not recognize feature (" + HONOUR_ALL_SCHEMA_LOCATIONS_ID + ")");
        } catch (SAXNotSupportedException e) {
            System.err.println(
                    "warning: Validator does not support feature (" + HONOUR_ALL_SCHEMA_LOCATIONS_ID + ")");
        }
        try {
            validator.setFeature(VALIDATE_ANNOTATIONS_ID, validateAnnotations);
        } catch (SAXNotRecognizedException e) {
            System.err
                    .println("warning: Validator does not recognize feature (" + VALIDATE_ANNOTATIONS_ID + ")");
        } catch (SAXNotSupportedException e) {
            System.err.println("warning: Validator does not support feature (" + VALIDATE_ANNOTATIONS_ID + ")");
        }
        try {
            validator.setFeature(GENERATE_SYNTHETIC_ANNOTATIONS_ID, generateSyntheticAnnotations);
        } catch (SAXNotRecognizedException e) {
            System.err.println("warning: Validator does not recognize feature ("
                    + GENERATE_SYNTHETIC_ANNOTATIONS_ID + ")");
        } catch (SAXNotSupportedException e) {
            System.err.println(
                    "warning: Validator does not support feature (" + GENERATE_SYNTHETIC_ANNOTATIONS_ID + ")");
        }

        // Validate instance documents
        for (int i = 0; i < instanceNodes.length; ++i) {
            NodeList nodeList = instanceNodes[i];
            int nodeListLength = nodeList.getLength();
            for (int j = 0; j < nodeListLength; ++j) {
                DOMSource source = new DOMSource(nodeList.item(j));
                source.setSystemId(docURI);
                inlineSchemaValidator.validate(validator, source, docURI, repetition, memoryUsage);
            }
        }
    } catch (SAXParseException e) {
        // ignore
    } catch (Exception e) {
        System.err.println("error: Parse error occurred - " + e.getMessage());
        if (e instanceof SAXException) {
            Exception nested = ((SAXException) e).getException();
            if (nested != null) {
                e = nested;
            }
        }
        e.printStackTrace(System.err);
    }
}

From source file:eu.fbk.dkm.sectionextractor.pantheon.WikipediaGoodTextExtractor.java

public static void main(String args[]) throws IOException {

    CommandLineWithLogger commandLineWithLogger = new CommandLineWithLogger();
    commandLineWithLogger.addOption(OptionBuilder.withArgName("file").hasArg()
            .withDescription("wikipedia xml dump file").isRequired().withLongOpt("wikipedia-dump").create("d"));
    commandLineWithLogger.addOption(OptionBuilder.withArgName("dir").hasArg()
            .withDescription("output directory in which to store output files").isRequired()
            .withLongOpt("output-dir").create("o"));
    commandLineWithLogger//from  ww w.  j  av  a  2s.c om
            .addOption(OptionBuilder.withDescription("use NAF format").withLongOpt("naf").create("n"));
    commandLineWithLogger.addOption(OptionBuilder.withDescription("tokenize and ssplit with Stanford")
            .withLongOpt("stanford").create("s"));

    commandLineWithLogger.addOption(OptionBuilder.withArgName("file").hasArg().withDescription("Filter file")
            .withLongOpt("filter").create("f"));
    commandLineWithLogger.addOption(OptionBuilder.withArgName("file").hasArg()
            .withDescription("ID and category file").withLongOpt("idcat").create("i"));
    commandLineWithLogger.addOption(OptionBuilder.withArgName("file").hasArg().withDescription("Redirect file")
            .withLongOpt("redirect").create("r"));

    commandLineWithLogger.addOption(OptionBuilder.withArgName("int").hasArg()
            .withDescription(
                    "number of threads (default " + AbstractWikipediaXmlDumpParser.DEFAULT_THREADS_NUMBER + ")")
            .withLongOpt("num-threads").create("t"));
    commandLineWithLogger.addOption(OptionBuilder.withArgName("int").hasArg()
            .withDescription("number of pages to process (default all)").withLongOpt("num-pages").create("p"));
    commandLineWithLogger.addOption(OptionBuilder.withArgName("int").hasArg()
            .withDescription("receive notification every n pages (default "
                    + AbstractWikipediaExtractor.DEFAULT_NOTIFICATION_POINT + ")")
            .withLongOpt("notification-point").create("b"));
    commandLineWithLogger.addOption(new Option("n", "NAF format"));

    CommandLine commandLine = null;
    try {
        commandLine = commandLineWithLogger.getCommandLine(args);
        PropertyConfigurator.configure(commandLineWithLogger.getLoggerProps());
    } catch (Exception e) {
        System.exit(1);
    }

    int numThreads = Integer.parseInt(commandLine.getOptionValue("num-threads",
            Integer.toString(AbstractWikipediaXmlDumpParser.DEFAULT_THREADS_NUMBER)));
    int numPages = Integer.parseInt(commandLine.getOptionValue("num-pages",
            Integer.toString(AbstractWikipediaExtractor.DEFAULT_NUM_PAGES)));
    int notificationPoint = Integer.parseInt(commandLine.getOptionValue("notification-point",
            Integer.toString(AbstractWikipediaExtractor.DEFAULT_NOTIFICATION_POINT)));

    boolean nafFormat = commandLine.hasOption("n");
    boolean useStanford = commandLine.hasOption("s");

    HashMap<Integer, String> idCategory = new HashMap<>();
    String idcatFileName = commandLine.getOptionValue("idcat");
    if (idcatFileName != null) {
        logger.info("Loading categories");
        File idcatFile = new File(idcatFileName);
        if (idcatFile.exists()) {
            List<String> lines = Files.readLines(idcatFile, Charsets.UTF_8);
            for (String line : lines) {
                line = line.trim();
                if (line.length() == 0) {
                    continue;
                }

                String[] parts = line.split("\\s+");
                if (parts.length < 3) {
                    continue;
                }

                idCategory.put(Integer.parseInt(parts[1]), parts[2]);
            }
        }
    }

    HashMap<String, String> redirects = new HashMap<>();
    String redirectFileName = commandLine.getOptionValue("redirect");
    if (redirectFileName != null) {
        logger.info("Loading redirects");
        File redirectFile = new File(redirectFileName);
        if (redirectFile.exists()) {
            List<String> lines = Files.readLines(redirectFile, Charsets.UTF_8);
            for (String line : lines) {
                line = line.trim();
                if (line.length() == 0) {
                    continue;
                }

                String[] parts = line.split("\\t+");
                if (parts.length < 2) {
                    continue;
                }

                redirects.put(parts[0], parts[1]);
            }
        }
    }

    HashSet<String> pagesToConsider = null;
    String filterFileName = commandLine.getOptionValue("filter");
    if (filterFileName != null) {
        logger.info("Loading file list");
        File filterFile = new File(filterFileName);
        if (filterFile.exists()) {
            pagesToConsider = new HashSet<>();
            List<String> lines = Files.readLines(filterFile, Charsets.UTF_8);
            for (String line : lines) {
                line = line.trim();
                if (line.length() == 0) {
                    continue;
                }

                line = line.replaceAll("\\s+", "_");

                pagesToConsider.add(line);

                addRedirects(pagesToConsider, redirects, line, 0);
            }
        }
    }

    ExtractorParameters extractorParameters = new ExtractorParameters(
            commandLine.getOptionValue("wikipedia-dump"), commandLine.getOptionValue("output-dir"));

    File outputFolder = new File(commandLine.getOptionValue("output-dir"));
    if (!outputFolder.exists()) {
        boolean mkdirs = outputFolder.mkdirs();
        if (!mkdirs) {
            throw new IOException("Unable to create folder " + outputFolder.getAbsolutePath());
        }
    }

    WikipediaExtractor wikipediaPageParser = new WikipediaGoodTextExtractor(numThreads, numPages,
            extractorParameters.getLocale(), outputFolder, nafFormat, pagesToConsider, useStanford, idCategory);
    wikipediaPageParser.setNotificationPoint(notificationPoint);
    wikipediaPageParser.start(extractorParameters);

    logger.info("extraction ended " + new Date());

}

From source file:edu.ku.brc.web.ParsePaleo.java

/**
 * @param args/*from  w  ww.ja  v a  2s . c  o m*/
 */
public static void main(String[] args) {
    if (true) {
        ParsePaleo pp = new ParsePaleo();
        pp.processAll();
        return;
    }
    try {
        HashSet<String> set = new HashSet<String>();
        for (String line : FileUtils.readLines(new File("/Users/rods/Downloads/ages.txt"))) {
            //Pattern p = Pattern.compile("\"([^\"\\]*(\\.[^\"\\]*)*)\"|\'([^\'\\]*(\\.[^\'\\]*)*)\'");
            //Pattern p = Pattern.compile("\"([^\"]*)\"|(\\S+)");
            Pattern p = Pattern.compile("\"([^\"]*)\"");
            Matcher m = p.matcher(line);

            //List<String> animals = new ArrayList()<String>();
            while (m.find()) {
                //System.out.println(m.group());
                set.add(replace(m.group(), "\"", ""));
                //animals.add(m.group());
            }
        }
        for (String str : set) {
            System.out.println(str);
        }
    } catch (Exception ex) {
        ex.printStackTrace();
    }
}

From source file:metaTile.Main.java

/**
 * @param args/*from  w  w w.  j a va2s  .  com*/
 * @throws IOException 
 */
public static void main(String[] args) throws IOException {
    try {
        /* parse the command line arguments */
        // create the command line parser
        CommandLineParser parser = new PosixParser();

        // create the Options
        Options options = new Options();
        options.addOption("i", "input", true, "File to read original tile list from.");
        options.addOption("o", "output", true, "File to write shorter meta-tile list to.");
        options.addOption("m", "metatiles", true,
                "Number of tiles in x and y direction to group into one meta-tile.");

        // parse the command line arguments
        CommandLine commandLine = parser.parse(options, args);

        if (!commandLine.hasOption("input") || !commandLine.hasOption("output")
                || !commandLine.hasOption("metatiles"))
            printUsage(options);

        String inputFileName = commandLine.getOptionValue("input");
        String outputFileName = commandLine.getOptionValue("output");
        int metaTileSize = Integer.parseInt(commandLine.getOptionValue("metatiles"));

        ArrayList<RenderingTile> tiles = new ArrayList<RenderingTile>();

        BufferedReader tileListReader = new BufferedReader(new FileReader(new File(inputFileName)));

        BufferedWriter renderMetatileListWriter = new BufferedWriter(new FileWriter(new File(outputFileName)));

        String line = tileListReader.readLine();
        while (line != null) {
            String[] columns = line.split("/");

            if (columns.length == 3)
                tiles.add(new RenderingTile(Integer.parseInt(columns[0]), Integer.parseInt(columns[1]),
                        Integer.parseInt(columns[2])));

            line = tileListReader.readLine();
        }

        tileListReader.close();

        int hits = 0;

        // tiles which we are already rendering as the top left corner of 4x4 metatiles
        HashSet<RenderingTile> whitelist = new HashSet<RenderingTile>();

        // for each tile in the list see if it has a meta-tile in the whitelist already
        for (int i = 0; i < tiles.size(); i++) {
            boolean hit = false; // by default we aren't already rendering this tile as part of another metatile
            for (int dx = 0; dx < metaTileSize; dx++) {
                for (int dy = 0; dy < metaTileSize; dy++) {
                    RenderingTile candidate = new RenderingTile(tiles.get(i).z, tiles.get(i).x - dx,
                            tiles.get(i).y - dy);
                    if (whitelist.contains(candidate)) {
                        hit = true;
                        // now exit the two for loops iterating over tiles inside a meta-tile
                        dx = metaTileSize;
                        dy = metaTileSize;
                    }
                }
            }

            // if this tile doesn't already have a meta-tile in the whitelist, add it
            if (hit == false) {
                hits++;
                renderMetatileListWriter.write(tiles.get(i).toString() + "/" + metaTileSize + "\n");
                whitelist.add(tiles.get(i));
            }
        }
        renderMetatileListWriter.close();
        System.out.println(
                "Reduced " + tiles.size() + " tiles into " + hits + " metatiles of size " + metaTileSize);
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:module.entities.UsernameChecker.CheckOpengovUsernames.java

/**
 * @param args the command line arguments
 *//*from  w w  w .  j a  v a 2 s  .  c  om*/
public static void main(String[] args) throws SQLException, IOException {
    //        args = new String[1];
    //        args[0] = "searchConf.txt";
    Date d = new Date();
    long milTime = d.getTime();
    long execStart = System.nanoTime();
    Timestamp startTime = new Timestamp(milTime);
    long lStartTime;
    long lEndTime = 0;
    int status_id = 1;
    JSONObject obj = new JSONObject();
    if (args.length != 1) {
        System.out.println("None or too many argument parameters where defined! "
                + "\nPlease provide ONLY the configuration file name as the only argument.");
    } else {
        try {
            configFile = args[0];
            initLexicons();
            Database.init();
            lStartTime = System.currentTimeMillis();
            System.out.println("Opengov username identification process started at: " + startTime);
            usernameCheckerId = Database.LogUsernameChecker(lStartTime);
            TreeMap<Integer, String> OpenGovUsernames = Database.GetOpenGovUsers();
            HashSet<ReportEntry> report_names = new HashSet<>();
            if (OpenGovUsernames.size() > 0) {
                for (int userID : OpenGovUsernames.keySet()) {
                    String DBusername = Normalizer
                            .normalize(OpenGovUsernames.get(userID).toUpperCase(locale), Normalizer.Form.NFD)
                            .replaceAll("\\p{M}", "");
                    String username = "";
                    int type;
                    String[] splitUsername = DBusername.split(" ");
                    if (checkNameInLexicons(splitUsername)) {
                        for (String splText : splitUsername) {
                            username += splText + " ";
                        }
                        type = 1;
                    } else if (checkOrgInLexicons(splitUsername)) {
                        for (String splText : splitUsername) {
                            username += splText + " ";
                        }
                        type = 2;
                    } else {
                        username = DBusername;
                        type = -1;
                    }
                    ReportEntry cerEntry = new ReportEntry(userID, username.trim(), type);
                    report_names.add(cerEntry);
                }
                status_id = 2;
                obj.put("message", "Opengov username checker finished with no errors");
                obj.put("details", "");
                Database.UpdateOpengovUsersReportName(report_names);
                lEndTime = System.currentTimeMillis();
            } else {
                status_id = 2;
                obj.put("message", "Opengov username checker finished with no errors");
                obj.put("details", "No usernames needed to be checked");
                lEndTime = System.currentTimeMillis();
            }
        } catch (Exception ex) {
            System.err.println(ex.getMessage());
            status_id = 3;
            obj.put("message", "Opengov username checker encountered an error");
            obj.put("details", ex.getMessage().toString());
            lEndTime = System.currentTimeMillis();
        }
    }
    long execEnd = System.nanoTime();
    long executionTime = (execEnd - execStart);
    System.out.println("Total process time: " + (((executionTime / 1000000) / 1000) / 60) + " minutes.");
    Database.UpdateLogUsernameChecker(lEndTime, status_id, usernameCheckerId, obj);
    Database.closeConnection();
}

From source file:eu.fbk.dkm.sectionextractor.PageClassMerger.java

public static void main(String args[]) throws IOException {

    CommandLineWithLogger commandLineWithLogger = new CommandLineWithLogger();
    commandLineWithLogger.addOption(OptionBuilder.withArgName("file").hasArg()
            .withDescription("WikiData ID file").isRequired().withLongOpt("wikidata-id").create("i"));
    commandLineWithLogger.addOption(OptionBuilder.withArgName("file").hasArg()
            .withDescription("Airpedia Person file").isRequired().withLongOpt("airpedia").create("a"));
    commandLineWithLogger.addOption(OptionBuilder.withArgName("file").hasArg().withDescription("Output file")
            .isRequired().withLongOpt("output").create("o"));
    CommandLine commandLine = null;/*  www.  ja va2  s. c  o m*/
    try {
        commandLine = commandLineWithLogger.getCommandLine(args);
        PropertyConfigurator.configure(commandLineWithLogger.getLoggerProps());
    } catch (Exception e) {
        System.exit(1);
    }

    String wikiIDFileName = commandLine.getOptionValue("wikidata-id");
    String airpediaFileName = commandLine.getOptionValue("airpedia");
    String outputFileName = commandLine.getOptionValue("output");

    HashMap<Integer, String> wikiIDs = new HashMap<>();
    HashSet<Integer> airpediaClasses = new HashSet<>();

    List<String> strings;

    logger.info("Loading file " + wikiIDFileName);
    strings = Files.readLines(new File(wikiIDFileName), Charsets.UTF_8);
    for (String line : strings) {
        line = line.trim();
        if (line.length() == 0) {
            continue;
        }
        if (line.startsWith("#")) {
            continue;
        }

        String[] parts = line.split("\t");
        if (parts.length < 2) {
            continue;
        }

        int id;
        try {
            id = Integer.parseInt(parts[0]);
        } catch (Exception e) {
            continue;
        }
        wikiIDs.put(id, parts[1]);
    }

    logger.info("Loading file " + airpediaFileName);
    strings = Files.readLines(new File(airpediaFileName), Charsets.UTF_8);
    for (String line : strings) {
        line = line.trim();
        if (line.length() == 0) {
            continue;
        }
        if (line.startsWith("#")) {
            continue;
        }

        String[] parts = line.split("\t");
        if (parts.length < 2) {
            continue;
        }

        int id;
        try {
            id = Integer.parseInt(parts[0]);
        } catch (Exception e) {
            continue;
        }
        airpediaClasses.add(id);
    }

    logger.info("Saving information");
    BufferedWriter writer = new BufferedWriter(new FileWriter(outputFileName));
    for (int i : wikiIDs.keySet()) {
        if (!airpediaClasses.contains(i)) {
            continue;
        }

        writer.append(wikiIDs.get(i)).append("\n");
    }
    writer.close();
}