Example usage for java.util HashSet HashSet

List of usage examples for java.util HashSet HashSet

Introduction

In this page you can find the example usage for java.util HashSet HashSet.

Prototype

public HashSet() 

Source Link

Document

Constructs a new, empty set; the backing HashMap instance has default initial capacity (16) and load factor (0.75).

Usage

From source file:TwitterClustering.java

public static void main(String[] args) throws FileNotFoundException, IOException {
    // TODO code application logic here

    File outFile = new File(args[3]);
    Scanner s = new Scanner(new File(args[1])).useDelimiter(",");
    JSONParser parser = new JSONParser();
    Set<Cluster> clusterSet = new HashSet<Cluster>();
    HashMap<String, Tweet> tweets = new HashMap();
    FileWriter fw = new FileWriter(outFile.getAbsoluteFile());
    BufferedWriter bw = new BufferedWriter(fw);

    // init//from  ww  w  . j ava 2s .  co  m
    try {

        Object obj = parser.parse(new FileReader(args[2]));

        JSONArray jsonArray = (JSONArray) obj;

        for (int i = 0; i < jsonArray.size(); i++) {

            Tweet twt = new Tweet();
            JSONObject jObj = (JSONObject) jsonArray.get(i);
            String text = jObj.get("text").toString();

            long sum = 0;
            for (int y = 0; y < text.toCharArray().length; y++) {

                sum += (int) text.toCharArray()[y];
            }

            String[] token = text.split(" ");
            String tID = jObj.get("id").toString();

            Set<String> mySet = new HashSet<String>(Arrays.asList(token));
            twt.setAttributeValue(sum);
            twt.setText(mySet);
            twt.setTweetID(tID);
            tweets.put(tID, twt);

        }

        // preparing initial clusters
        int i = 0;
        while (s.hasNext()) {
            String id = s.next();// id
            Tweet t = tweets.get(id.trim());
            clusterSet.add(new Cluster(i + 1, t, new LinkedList()));
            i++;
        }

        Iterator it = tweets.entrySet().iterator();

        for (int l = 0; l < 2; l++) { // limit to 25 iterations

            while (it.hasNext()) {
                Map.Entry me = (Map.Entry) it.next();

                // calculate distance to each centroid
                Tweet p = (Tweet) me.getValue();
                HashMap<Cluster, Float> distMap = new HashMap();

                for (Cluster clust : clusterSet) {

                    distMap.put(clust, jaccardDistance(p.getText(), clust.getCentroid().getText()));
                }

                HashMap<Cluster, Float> sorted = (HashMap<Cluster, Float>) sortByValue(distMap);

                sorted.keySet().iterator().next().getMembers().add(p);

            }

            // calculate new centroid and update Clusterset
            for (Cluster clust : clusterSet) {

                TreeMap<String, Long> tDistMap = new TreeMap();

                Tweet newCentroid = null;
                Long avgSumDist = new Long(0);
                for (int j = 0; j < clust.getMembers().size(); j++) {

                    avgSumDist += clust.getMembers().get(j).getAttributeValue();
                    tDistMap.put(clust.getMembers().get(j).getTweetID(),
                            clust.getMembers().get(j).getAttributeValue());
                }
                if (clust.getMembers().size() != 0) {
                    avgSumDist /= (clust.getMembers().size());
                }

                ArrayList<Long> listValues = new ArrayList<Long>(tDistMap.values());

                if (tDistMap.containsValue(findClosestNumber(listValues, avgSumDist))) {
                    // found closest
                    newCentroid = tweets
                            .get(getKeyByValue(tDistMap, findClosestNumber(listValues, avgSumDist)));
                    clust.setCentroid(newCentroid);
                }

            }

        }
        // create an iterator
        Iterator iterator = clusterSet.iterator();

        // check values
        while (iterator.hasNext()) {

            Cluster c = (Cluster) iterator.next();
            bw.write(c.getId() + "\t");
            System.out.print(c.getId() + "\t");

            for (Tweet t : c.getMembers()) {
                bw.write(t.getTweetID() + ", ");
                System.out.print(t.getTweetID() + ",");

            }
            bw.write("\n");
            System.out.println("");
        }

        System.out.println("");

        System.out.println("SSE " + sumSquaredErrror(clusterSet));

    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        bw.close();
        fw.close();
    }
}

From source file:com.music.tools.ScaleTester.java

public static void main(String[] args) {
    System.out.println(/*  ww  w.java  2 s.  c  o  m*/
            "Usage: java ScaleTester <fundamental frequency> <chromatic scale size> <scale size> <use ET>");
    final AudioFormat af = new AudioFormat(sampleRate, 16, 1, true, true);
    try {
        fundamentalFreq = getArgument(args, 0, FUNDAMENTAL_FREQUENCY, Double.class);
        int pitchesInChromaticScale = getArgument(args, 1, CHROMATIC_SCALE_SILZE, Integer.class);

        List<Double> harmonicFrequencies = new ArrayList<>();
        List<String> ratios = new ArrayList<>();
        Set<Double> frequencies = new HashSet<Double>();
        frequencies.add(fundamentalFreq);
        int octaveMultiplier = 2;
        for (int i = 2; i < 100; i++) {
            // Exclude the 7th harmonic TODO exclude the 11th as well?
            // http://www.phy.mtu.edu/~suits/badnote.html
            if (i % 7 == 0) {
                continue;
            }
            double actualFreq = fundamentalFreq * i;
            double closestTonicRatio = actualFreq / (fundamentalFreq * octaveMultiplier);
            if (closestTonicRatio < 1 || closestTonicRatio > 2) {
                octaveMultiplier *= 2;
            }
            double closestTonic = actualFreq - actualFreq % (fundamentalFreq * octaveMultiplier);
            double normalizedFreq = fundamentalFreq * (actualFreq / closestTonic);

            harmonicFrequencies.add(actualFreq);
            frequencies.add(normalizedFreq);
            if (frequencies.size() == pitchesInChromaticScale) {
                break;
            }
        }

        System.out.println("Harmonic (overtone) frequencies: " + harmonicFrequencies);
        System.out.println("Transposed harmonic frequencies: " + frequencies);

        List<Double> chromaticScale = new ArrayList<>(frequencies);
        Collections.sort(chromaticScale);

        // find the "perfect" interval (e.g. perfect fifth)
        int perfectIntervalIndex = 0;
        int idx = 0;
        for (Iterator<Double> it = chromaticScale.iterator(); it.hasNext();) {
            Double noteFreq = it.next();
            long[] fraction = findCommonFraction(noteFreq / fundamentalFreq);
            fractionCache.put(noteFreq, fraction);
            if (fraction[0] == 3 && fraction[1] == 2) {
                perfectIntervalIndex = idx;
                System.out.println("Perfect interval (3/2) idx: " + perfectIntervalIndex);
            }
            idx++;
            ratios.add(Arrays.toString(fraction));
        }
        System.out.println("Ratios to fundemental frequency: " + ratios);

        if (getBooleanArgument(args, 4, USE_ET)) {
            chromaticScale = temper(chromaticScale);
        }

        System.out.println();
        System.out.println("Chromatic scale: " + chromaticScale);

        Set<Double> scaleSet = new HashSet<Double>();
        scaleSet.add(chromaticScale.get(0));
        idx = 0;
        List<Double> orderedInCircle = new ArrayList<>();
        // now go around the circle of perfect intervals and put the notes
        // in order
        while (orderedInCircle.size() < chromaticScale.size()) {
            orderedInCircle.add(chromaticScale.get(idx));
            idx += perfectIntervalIndex;
            idx = idx % chromaticScale.size();
        }
        System.out.println("Pitches Ordered in circle of perfect intervals: " + orderedInCircle);

        List<Double> scale = new ArrayList<Double>(scaleSet);
        int currentIdxInCircle = orderedInCircle.size() - 1; // start with
                                                             // the last
                                                             // note in the
                                                             // circle
        int scaleSize = getArgument(args, 3, SCALE_SIZE, Integer.class);
        while (scale.size() < scaleSize) {
            double pitch = orderedInCircle.get(currentIdxInCircle % orderedInCircle.size());
            if (!scale.contains(pitch)) {
                scale.add(pitch);
            }
            currentIdxInCircle++;
        }
        Collections.sort(scale);

        System.out.println("Scale: " + scale);

        SourceDataLine line = AudioSystem.getSourceDataLine(af);
        line.open(af);
        line.start();

        Double[] scaleFrequencies = scale.toArray(new Double[scale.size()]);

        // first play the whole scale
        WaveMelodyGenerator.playScale(line, scaleFrequencies);
        // then generate a random melody in the scale
        WaveMelodyGenerator.playMelody(line, scaleFrequencies);

        line.drain();
        line.close();
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:com.glaf.mail.MailSenderImpl.java

public static void main(String[] args) throws Exception {
    Map<String, Object> dataMap = new java.util.HashMap<String, Object>();
    dataMap.put("taskDescription", "?");
    dataMap.put("processStarterName", "?");
    dataMap.put("serviceUrl", "http://127.0.0.1:8080/glaf");
    dataMap.put("callback", "http://127.0.0.1:8080/glaf/task.jsp");

    MailMessage mailMessage = new MailMessage();
    mailMessage.setFrom("joy@127.0.0.1");
    mailMessage.setTo("joy@127.0.0.1");
    mailMessage.setSubject("");
    mailMessage.setDataMap(dataMap);//w  ww.  ja  va2s  . c  om
    mailMessage.setContent("");
    // mailMessage.setTemplateId(args[0]);
    mailMessage.setSupportExpression(false);

    Collection<Object> files = new HashSet<Object>();

    mailMessage.setFiles(files);
    mailMessage.setSaveMessage(false);
    MailSender mailSender = ContextFactory.getBean("mailSender");
    mailSender.send(mailMessage);
}

From source file:org.switchyard.quickstarts.demo.policy.security.wss.signencrypt.WorkServiceMain.java

public static void main(String... args) throws Exception {
    Set<String> policies = new HashSet<String>();
    for (String arg : args) {
        arg = Strings.trimToNull(arg);//ww  w. j  av a 2 s  .co m
        if (arg != null) {
            if (arg.equals(CONFIDENTIALITY) || arg.equals(SIGNENCRYPT) || arg.equals(HELP)) {
                policies.add(arg);
            } else {
                LOGGER.error(MAVEN_USAGE);
                throw new Exception(MAVEN_USAGE);
            }
        }
    }
    if (policies.contains(HELP)) {
        LOGGER.info(MAVEN_USAGE);
    } else {
        final String scheme;
        final int port;
        if (policies.contains(CONFIDENTIALITY)) {
            scheme = "https";
            port = getPort(8443);
            SSLContext sslcontext = SSLContext.getInstance("TLS");
            sslcontext.init(null, null, null);
            SSLSocketFactory sf = new SSLSocketFactory(sslcontext, SSLSocketFactory.STRICT_HOSTNAME_VERIFIER);
            Scheme https = new Scheme(scheme, port, sf);
            SchemeRegistry sr = new SchemeRegistry();
            sr.register(https);
        } else {
            scheme = "http";
            port = getPort(8080);
        }
        boolean signencrypt = policies.contains(SIGNENCRYPT);
        invokeWorkService(scheme, port, getContext(), signencrypt);
    }
}

From source file:TaxReturn.java

public static void main(String[] pArgs) throws Exception {
    TaxReturn return1 = new TaxReturn("012-68-3242", 1998, "O'Brien", new BigDecimal(43000.00));
    TaxReturn return2 = new TaxReturn("012-68-3242", 1999, "O'Brien", new BigDecimal(45000.00));
    TaxReturn return3 = new TaxReturn("012-68-3242", 1999, "O'Brien", new BigDecimal(53222.00));

    System.out.println("HashCodeBuilder: " + return2.hashCode());
    Set set = new HashSet();
    set.add(return1);
    set.add(return2);
    set.add(return3);
    System.out.println(set);//from   www .  j  a  v a2 s. co  m
}

From source file:com.glaf.jbpm.action.MultiPooledTaskInstanceAction.java

public static void main(String[] args) throws Exception {
    String actorIdxy = "{joy,sam},{pp,qq},{kit,cora},{eyb2000,huangcw}";
    StringTokenizer st2 = new StringTokenizer(actorIdxy, ";");
    while (st2.hasMoreTokens()) {
        String elem2 = st2.nextToken();
        if (StringUtils.isNotEmpty(elem2)) {
            elem2 = elem2.trim();/*from  w  ww.  j a  v  a  2 s  .c  o  m*/
            if ((elem2.length() > 0 && elem2.charAt(0) == '{') && elem2.endsWith("}")) {
                elem2 = elem2.substring(elem2.indexOf("{") + 1, elem2.indexOf("}"));
                Set<String> actorIds = new HashSet<String>();
                StringTokenizer st4 = new StringTokenizer(elem2, ",");
                while (st4.hasMoreTokens()) {
                    String elem4 = st4.nextToken();
                    elem4 = elem4.trim();
                    if (elem4.length() > 0) {
                        actorIds.add(elem4);
                    }
                }
                System.out.println(actorIds);
            }
        }
    }
}

From source file:de.tudarmstadt.ukp.experiments.argumentation.convincingness.sampling.Step4MTurkOutputCollector.java

@SuppressWarnings("unchecked")
public static void main(String[] args) throws Exception {
    String inputDirWithArgumentPairs = args[0];

    File[] resultFiles;/*  w w  w .ja v  a  2s.  c  om*/

    if (args[1].contains("*")) {
        File path = new File(args[1]);
        File directory = path.getParentFile();
        String regex = path.getName().replaceAll("\\*", "");

        List<File> files = new ArrayList<>(FileUtils.listFiles(directory, new String[] { regex }, false));
        resultFiles = new File[files.size()];
        for (int i = 0; i < files.size(); i++) {
            resultFiles[i] = files.get(i);
        }
    } else {
        // result file is a comma-separated list of CSV files from MTurk
        String[] split = args[1].split(",");
        resultFiles = new File[split.length];
        for (int i = 0; i < split.length; i++) {
            resultFiles[i] = new File(split[i]);
        }
    }

    File outputDir = new File(args[2]);

    if (!outputDir.exists()) {
        if (!outputDir.mkdirs()) {
            throw new IOException("Cannot create directory " + outputDir);
        }
    }

    // error if output folder not empty to prevent any confusion by mixing files
    if (!FileUtils.listFiles(outputDir, null, false).isEmpty()) {
        throw new IllegalArgumentException("Output dir " + outputDir + " is not empty");
    }

    // collected assignments with empty reason for rejections
    Set<String> assignmentsWithEmptyReason = new HashSet<>();

    // parse with first line as header
    MTurkOutputReader mTurkOutputReader = new MTurkOutputReader(resultFiles);

    Collection<File> files = FileUtils.listFiles(new File(inputDirWithArgumentPairs), new String[] { "xml" },
            false);

    if (files.isEmpty()) {
        throw new IOException("No xml files found in " + inputDirWithArgumentPairs);
    }

    // statistics: how many hits with how many assignments ; hit ID / assignments
    Map<String, Map<String, Integer>> assignmentsPerHits = new HashMap<>();

    // collect accept/reject statistics
    for (Map<String, String> record : mTurkOutputReader) {
        boolean wasRejected = "Rejected".equals(record.get("assignmentstatus"));
        String hitID = record.get("hitid");
        String hitTypeId = record.get("hittypeid");

        if (!wasRejected) {
            // update statistics
            if (!assignmentsPerHits.containsKey(hitTypeId)) {
                assignmentsPerHits.put(hitTypeId, new HashMap<String, Integer>());
            }

            if (!assignmentsPerHits.get(hitTypeId).containsKey(hitID)) {
                assignmentsPerHits.get(hitTypeId).put(hitID, 0);
            }

            assignmentsPerHits.get(hitTypeId).put(hitID, assignmentsPerHits.get(hitTypeId).get(hitID) + 1);
        }
    }

    // statistics: how many hits with how many assignments ; hit ID / assignments
    Map<String, Integer> approvedAssignmentsPerHit = new HashMap<>();
    Map<String, Integer> rejectedAssignmentsPerHit = new HashMap<>();

    // collect accept/reject statistics
    for (Map<String, String> record : mTurkOutputReader) {
        boolean approved = "Approved".equals(record.get("assignmentstatus"));
        boolean rejected = "Rejected".equals(record.get("assignmentstatus"));
        String hitID = record.get("hitid");

        if (approved) {
            // update statistics
            if (!approvedAssignmentsPerHit.containsKey(hitID)) {
                approvedAssignmentsPerHit.put(hitID, 0);
            }

            approvedAssignmentsPerHit.put(hitID, approvedAssignmentsPerHit.get(hitID) + 1);
        } else if (rejected) {
            // update statistics
            if (!rejectedAssignmentsPerHit.containsKey(hitID)) {
                rejectedAssignmentsPerHit.put(hitID, 0);
            }

            rejectedAssignmentsPerHit.put(hitID, rejectedAssignmentsPerHit.get(hitID) + 1);
        } else {
            throw new IllegalStateException(
                    "Unknown state: " + record.get("assignmentstatus") + " HITID: " + hitID);
        }
    }

    //        System.out.println("Approved: " + approvedAssignmentsPerHit);
    //        System.out.println("Rejected: " + rejectedAssignmentsPerHit);

    System.out.println("Approved (values): " + new HashSet<>(approvedAssignmentsPerHit.values()));
    System.out.println("Rejected (values): " + new HashSet<>(rejectedAssignmentsPerHit.values()));
    // rejection statistics
    int totalRejected = 0;
    for (Map.Entry<String, Integer> rejectionEntry : rejectedAssignmentsPerHit.entrySet()) {
        totalRejected += rejectionEntry.getValue();
    }

    System.out.println("Total rejections: " + totalRejected);

    /*
    // generate .success files for adding more annotations
    for (File resultFile : resultFiles) {
    String hitTypeID = mTurkOutputReader.getHitTypeIdForFile().get(resultFile);
            
    // assignments for that hittypeid (= file)
    Map<String, Integer> assignments = assignmentsPerHits.get(hitTypeID);
            
    prepareUpdateHITsFiles(assignments, hitTypeID, resultFile);
    }
    */

    int totalSavedPairs = 0;

    // load all previously prepared argument pairs
    for (File file : files) {
        List<ArgumentPair> argumentPairs = (List<ArgumentPair>) XStreamTools.getXStream().fromXML(file);

        List<AnnotatedArgumentPair> annotatedArgumentPairs = new ArrayList<>();

        for (ArgumentPair argumentPair : argumentPairs) {
            AnnotatedArgumentPair annotatedArgumentPair = new AnnotatedArgumentPair(argumentPair);

            // is there such an answer?
            String key = "Answer." + argumentPair.getId();

            // iterate only if there is such column to save time
            if (mTurkOutputReader.getColumnNames().contains(key)) {
                // now find the results
                for (Map<String, String> record : mTurkOutputReader) {
                    if (record.containsKey(key)) {
                        // extract the values
                        AnnotatedArgumentPair.MTurkAssignment assignment = new AnnotatedArgumentPair.MTurkAssignment();

                        boolean wasRejected = "Rejected".equals(record.get("assignmentstatus"));

                        // only non-rejected (if required)
                        if (!wasRejected) {
                            String hitID = record.get("hitid");
                            String workerID = record.get("workerid");
                            String assignmentId = record.get("assignmentid");
                            try {
                                assignment.setAssignmentAcceptTime(
                                        DATE_FORMAT.parse(record.get("assignmentaccepttime")));
                                assignment.setAssignmentSubmitTime(
                                        DATE_FORMAT.parse(record.get("assignmentsubmittime")));
                                assignment.setHitComment(record.get("Answer.feedback"));
                                assignment.setHitID(hitID);
                                assignment.setTurkID(workerID);
                                assignment.setAssignmentId(assignmentId);

                                // and answer specific fields
                                String valueRaw = record.get(key);

                                // so far the label has had format aXXX_aYYY_a1, aXXX_aYYY_a2, or aXXX_aYYY_equal
                                // strip now only true label
                                String label = valueRaw.split("_")[2];

                                assignment.setValue(label);
                                String reason = record.get(key + "_reason");

                                // missing reason
                                if (reason == null) {
                                    assignmentsWithEmptyReason.add(assignmentId);
                                } else {
                                    assignment.setReason(reason);

                                    // get worker's stance
                                    String stanceRaw = record.get(key + "_stance");
                                    if (stanceRaw != null) {
                                        // parse stance
                                        String stance = stanceRaw.split("_stance_")[1];
                                        assignment.setWorkerStance(stance);
                                    }

                                    // we take maximal 5 assignments
                                    Collections.sort(annotatedArgumentPair.mTurkAssignments,
                                            new Comparator<AnnotatedArgumentPair.MTurkAssignment>() {
                                                @Override
                                                public int compare(AnnotatedArgumentPair.MTurkAssignment o1,
                                                        AnnotatedArgumentPair.MTurkAssignment o2) {
                                                    return o1.getAssignmentAcceptTime()
                                                            .compareTo(o2.getAssignmentAcceptTime());
                                                }
                                            });

                                    if (annotatedArgumentPair.mTurkAssignments
                                            .size() < MAXIMUM_ASSIGNMENTS_PER_HIT) {
                                        annotatedArgumentPair.mTurkAssignments.add(assignment);
                                    }
                                }
                            } catch (IllegalArgumentException | NullPointerException ex) {
                                System.err.println("Malformed annotations for HIT " + hitID + ", worker "
                                        + workerID + ", assignment " + assignmentId + "; " + ex.getMessage()
                                        + ", full record: " + record);
                            }
                        }
                    }
                }
            }

            // and if there are some annotations, add it to the result set
            if (!annotatedArgumentPair.mTurkAssignments.isEmpty()) {
                annotatedArgumentPairs.add(annotatedArgumentPair);
            }
        }

        if (!annotatedArgumentPairs.isEmpty()) {
            File outputFile = new File(outputDir, file.getName());
            XStreamTools.toXML(annotatedArgumentPairs, outputFile);

            System.out.println("Saved " + annotatedArgumentPairs.size() + " annotated pairs to " + outputFile);
            totalSavedPairs += annotatedArgumentPairs.size();
        }
    }

    System.out.println("Total saved " + totalSavedPairs + " pairs");

    // print assignments with empty reasons
    if (!assignmentsWithEmptyReason.isEmpty()) {
        System.out.println(
                "== Assignments with empty reason:\nassignmentIdToReject\tassignmentIdToRejectComment");
        for (String assignmentId : assignmentsWithEmptyReason) {
            System.out.println(
                    assignmentId + "\t\"Dear worker, you did not fill the required field with a reason.\"");
        }
    }

}

From source file:de.citec.sc.matoll.process.Matoll_CreateMax.java

public static void main(String[] args) throws IOException, ParserConfigurationException, SAXException,
        InstantiationException, IllegalAccessException, ClassNotFoundException, Exception {

    String directory;//from w ww .j  a v  a 2s .  co  m
    String gold_standard_lexicon;
    String output_lexicon;
    String configFile;
    Language language;
    String output;

    Stopwords stopwords = new Stopwords();

    HashMap<String, Double> maxima;
    maxima = new HashMap<String, Double>();

    if (args.length < 3) {
        System.out.print("Usage: Matoll --mode=train/test <DIRECTORY> <CONFIG>\n");
        return;

    }

    //      Classifier classifier;

    directory = args[1];
    configFile = args[2];

    final Config config = new Config();

    config.loadFromFile(configFile);

    gold_standard_lexicon = config.getGoldStandardLexicon();

    String model_file = config.getModel();

    output_lexicon = config.getOutputLexicon();
    output = config.getOutput();

    language = config.getLanguage();

    LexiconLoader loader = new LexiconLoader();
    Lexicon gold = loader.loadFromFile(gold_standard_lexicon);

    Set<String> uris = new HashSet<>();
    //        Map<Integer,String> sentence_list = new HashMap<>();
    Map<Integer, Set<Integer>> mapping_words_sentences = new HashMap<>();

    //consider only properties
    for (LexicalEntry entry : gold.getEntries()) {
        try {
            for (Sense sense : entry.getSenseBehaviours().keySet()) {
                String tmp_uri = sense.getReference().getURI().replace("http://dbpedia.org/ontology/", "");
                if (!Character.isUpperCase(tmp_uri.charAt(0))) {
                    uris.add(sense.getReference().getURI());
                }
            }
        } catch (Exception e) {
        }
        ;
    }

    ModelPreprocessor preprocessor = new ModelPreprocessor(language);
    preprocessor.setCoreferenceResolution(false);
    Set<String> dep = new HashSet<>();
    dep.add("prep");
    dep.add("appos");
    dep.add("nn");
    dep.add("dobj");
    dep.add("pobj");
    dep.add("num");
    preprocessor.setDEP(dep);

    List<File> list_files = new ArrayList<>();

    if (config.getFiles().isEmpty()) {
        File folder = new File(directory);
        File[] files = folder.listFiles();
        for (File file : files) {
            if (file.toString().contains(".ttl"))
                list_files.add(file);
        }
    } else {
        list_files.addAll(config.getFiles());
    }
    System.out.println(list_files.size());

    int sentence_counter = 0;
    Map<String, Set<Integer>> bag_words_uri = new HashMap<>();
    Map<String, Integer> mapping_word_id = new HashMap<>();
    for (File file : list_files) {
        Model model = RDFDataMgr.loadModel(file.toString());
        for (Model sentence : getSentences(model)) {
            String reference = getReference(sentence);
            reference = reference.replace("http://dbpedia/", "http://dbpedia.org/");
            if (uris.contains(reference)) {
                sentence_counter += 1;
                Set<Integer> words_ids = getBagOfWords(sentence, stopwords, mapping_word_id);
                //TODO: add sentence preprocessing
                String obj = getObject(sentence);
                String subj = getSubject(sentence);
                preprocessor.preprocess(sentence, subj, obj, language);
                //TODO: also return marker if object or subject of property (in SPARQL this has to be optional of course)
                String parsed_sentence = getParsedSentence(sentence);
                try (FileWriter fw = new FileWriter("mapping_sentences_to_ids_goldstandard.tsv", true);
                        BufferedWriter bw = new BufferedWriter(fw);
                        PrintWriter out = new PrintWriter(bw)) {
                    out.println(sentence_counter + "\t" + parsed_sentence);
                } catch (IOException e) {
                    e.printStackTrace();
                }
                for (Integer word_id : words_ids) {
                    if (mapping_words_sentences.containsKey(word_id)) {
                        Set<Integer> tmp_set = mapping_words_sentences.get(word_id);
                        tmp_set.add(sentence_counter);
                        mapping_words_sentences.put(word_id, tmp_set);

                    } else {
                        Set<Integer> tmp_set = new HashSet<>();
                        tmp_set.add(sentence_counter);
                        mapping_words_sentences.put(word_id, tmp_set);
                    }

                }
                if (bag_words_uri.containsKey(reference)) {
                    Set<Integer> tmp = bag_words_uri.get(reference);
                    for (Integer w : words_ids) {
                        tmp.add(w);

                    }
                    bag_words_uri.put(reference, tmp);
                } else {
                    Set<Integer> tmp = new HashSet<>();
                    for (Integer w : words_ids) {
                        tmp.add(w);
                    }
                    bag_words_uri.put(reference, tmp);
                }
            }

        }
        model.close();

    }

    PrintWriter writer = new PrintWriter("bag_of_words_only_goldstandard.tsv");
    StringBuilder string_builder = new StringBuilder();
    for (String r : bag_words_uri.keySet()) {
        string_builder.append(r);
        for (Integer i : bag_words_uri.get(r)) {
            string_builder.append("\t");
            string_builder.append(i);
        }
        string_builder.append("\n");
    }
    writer.write(string_builder.toString());
    writer.close();

    writer = new PrintWriter("mapping_words_to_sentenceids_goldstandard.tsv");
    string_builder = new StringBuilder();
    for (Integer w : mapping_words_sentences.keySet()) {
        string_builder.append(w);
        for (int i : mapping_words_sentences.get(w)) {
            string_builder.append("\t");
            string_builder.append(i);
        }
        string_builder.append("\n");
    }
    writer.write(string_builder.toString());
    writer.close();

}

From source file:dkpro.similarity.algorithms.vsm.store.convert.ConvertLuceneToVectorIndex.java

public static void main(String[] args) throws Exception {
    File inputPath = new File(args[0]);
    File outputPath = new File(args[1]);

    deleteQuietly(outputPath);//from   w  w  w . j a v  a2s . c  o  m
    outputPath.mkdirs();

    boolean ignoreNumerics = true;
    boolean ignoreCardinal = true;
    boolean ignoreMonetary = true;
    int minTermLength = 3;
    int minDocFreq = 5;

    System.out.println("Quality criteria");
    System.out.println("Minimum term length            : " + minTermLength);
    System.out.println("Minimum document frequency     : " + minDocFreq);
    System.out.println("Ignore numeric tokens          : " + ignoreNumerics);
    System.out.println("Ignore cardinal numeric tokens : " + ignoreNumerics);
    System.out.println("Ignore money values            : " + ignoreMonetary);

    System.out.print("Fetching terms list... ");

    IndexReader reader = IndexReader.open(FSDirectory.open(inputPath));
    TermEnum termEnum = reader.terms();
    Set<String> terms = new HashSet<String>();
    int ignoredTerms = 0;
    while (termEnum.next()) {
        String term = termEnum.term().text();
        if (((minTermLength > 0) && (term.length() < minTermLength)) || (ignoreCardinal && isCardinal(term))
                || (ignoreMonetary && isMonetary(term)) || (ignoreNumerics && isNumericSpace(term))
                || ((minDocFreq > 0) && (termEnum.docFreq() < minDocFreq))) {
            ignoredTerms++;
            continue;
        }

        terms.add(term);
    }
    reader.close();

    System.out.println(terms.size() + " terms found. " + ignoredTerms + " terms ignored.");

    System.out.println("Opening source ESA index " + inputPath);
    VectorReader source = new LuceneVectorReader(inputPath);
    System.out.println("Opening destination ESA index " + inputPath);
    VectorIndexWriter esaWriter = new VectorIndexWriter(outputPath, source.getConceptCount());

    ProgressMeter p = new ProgressMeter(terms.size());
    for (String term : terms) {
        Vector vector = source.getVector(term);
        esaWriter.put(term, vector);

        p.next();
        System.out.println("[" + term + "] " + p);
    }

    esaWriter.close();
}

From source file:org.switchyard.quickstarts.demo.security.propagation.jms.WorkServiceMain.java

public static void main(String... args) throws Exception {
    Set<String> policies = new HashSet<String>();
    for (String arg : args) {
        arg = Strings.trimToNull(arg);// www.  j  a v  a 2s .  c o  m
        if (arg != null) {
            if (arg.equals(CONFIDENTIALITY) || arg.equals(CLIENT_AUTHENTICATION) || arg.equals(HELP)) {
                policies.add(arg);
            } else {
                LOGGER.error(MAVEN_USAGE);
                throw new Exception(MAVEN_USAGE);
            }
        }
    }
    if (policies.contains(HELP)) {
        LOGGER.info(MAVEN_USAGE);
    } else {
        final String scheme;
        final int port;
        if (policies.contains(CONFIDENTIALITY)) {
            scheme = "https";
            port = getPort(8443);
            SSLContext sslcontext = SSLContext.getInstance("TLS");
            sslcontext.init(null, null, null);
            SSLSocketFactory sf = new SSLSocketFactory(sslcontext, SSLSocketFactory.STRICT_HOSTNAME_VERIFIER);
            Scheme https = new Scheme(scheme, port, sf);
            SchemeRegistry sr = new SchemeRegistry();
            sr.register(https);
        } else {
            scheme = "http";
            port = getPort(8080);
        }
        String[] userPass = policies.contains(CLIENT_AUTHENTICATION) ? new String[] { "kermit", "the-frog-1" }
                : null;
        invokeWorkService(scheme, port, getContext(), userPass);
    }
}