Example usage for java.util HashSet HashSet

Introduction

In this page you can find the example usage for java.util HashSet HashSet.

Prototype

public HashSet()

Source Link

Document

Constructs a new, empty set; the backing HashMap instance has default initial capacity (16) and load factor (0.75).

Usage

From source file:TwitterClustering.java

public static void main(String[] args) throws FileNotFoundException, IOException {
    // TODO code application logic here

    File outFile = new File(args[3]);
    Scanner s = new Scanner(new File(args[1])).useDelimiter(",");
    JSONParser parser = new JSONParser();
    Set<Cluster> clusterSet = new HashSet<Cluster>();
    HashMap<String, Tweet> tweets = new HashMap();
    FileWriter fw = new FileWriter(outFile.getAbsoluteFile());
    BufferedWriter bw = new BufferedWriter(fw);

    // init//from  ww  w  . j ava 2s .  co  m
    try {

        Object obj = parser.parse(new FileReader(args[2]));

        JSONArray jsonArray = (JSONArray) obj;

        for (int i = 0; i < jsonArray.size(); i++) {

            Tweet twt = new Tweet();
            JSONObject jObj = (JSONObject) jsonArray.get(i);
            String text = jObj.get("text").toString();

            long sum = 0;
            for (int y = 0; y < text.toCharArray().length; y++) {

                sum += (int) text.toCharArray()[y];
            }

            String[] token = text.split(" ");
            String tID = jObj.get("id").toString();

            Set<String> mySet = new HashSet<String>(Arrays.asList(token));
            twt.setAttributeValue(sum);
            twt.setText(mySet);
            twt.setTweetID(tID);
            tweets.put(tID, twt);

        }

        // preparing initial clusters
        int i = 0;
        while (s.hasNext()) {
            String id = s.next();// id
            Tweet t = tweets.get(id.trim());
            clusterSet.add(new Cluster(i + 1, t, new LinkedList()));
            i++;
        }

        Iterator it = tweets.entrySet().iterator();

        for (int l = 0; l < 2; l++) { // limit to 25 iterations

            while (it.hasNext()) {
                Map.Entry me = (Map.Entry) it.next();

                // calculate distance to each centroid
                Tweet p = (Tweet) me.getValue();
                HashMap<Cluster, Float> distMap = new HashMap();

                for (Cluster clust : clusterSet) {

                    distMap.put(clust, jaccardDistance(p.getText(), clust.getCentroid().getText()));
                }

                HashMap<Cluster, Float> sorted = (HashMap<Cluster, Float>) sortByValue(distMap);

                sorted.keySet().iterator().next().getMembers().add(p);

            }

            // calculate new centroid and update Clusterset
            for (Cluster clust : clusterSet) {

                TreeMap<String, Long> tDistMap = new TreeMap();

                Tweet newCentroid = null;
                Long avgSumDist = new Long(0);
                for (int j = 0; j < clust.getMembers().size(); j++) {

                    avgSumDist += clust.getMembers().get(j).getAttributeValue();
                    tDistMap.put(clust.getMembers().get(j).getTweetID(),
                            clust.getMembers().get(j).getAttributeValue());
                }
                if (clust.getMembers().size() != 0) {
                    avgSumDist /= (clust.getMembers().size());
                }

                ArrayList<Long> listValues = new ArrayList<Long>(tDistMap.values());

                if (tDistMap.containsValue(findClosestNumber(listValues, avgSumDist))) {
                    // found closest
                    newCentroid = tweets
                            .get(getKeyByValue(tDistMap, findClosestNumber(listValues, avgSumDist)));
                    clust.setCentroid(newCentroid);
                }

            }

        }
        // create an iterator
        Iterator iterator = clusterSet.iterator();

        // check values
        while (iterator.hasNext()) {

            Cluster c = (Cluster) iterator.next();
            bw.write(c.getId() + "\t");
            System.out.print(c.getId() + "\t");

            for (Tweet t : c.getMembers()) {
                bw.write(t.getTweetID() + ", ");
                System.out.print(t.getTweetID() + ",");

            }
            bw.write("\n");
            System.out.println("");
        }

        System.out.println("");

        System.out.println("SSE " + sumSquaredErrror(clusterSet));

    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        bw.close();
        fw.close();
    }
}

From source file:com.music.tools.ScaleTester.java

public static void main(String[] args) {
    System.out.println(/*  ww  w.java  2 s.  c  o  m*/
            "Usage: java ScaleTester <fundamental frequency> <chromatic scale size> <scale size> <use ET>");
    final AudioFormat af = new AudioFormat(sampleRate, 16, 1, true, true);
    try {
        fundamentalFreq = getArgument(args, 0, FUNDAMENTAL_FREQUENCY, Double.class);
        int pitchesInChromaticScale = getArgument(args, 1, CHROMATIC_SCALE_SILZE, Integer.class);

        List<Double> harmonicFrequencies = new ArrayList<>();
        List<String> ratios = new ArrayList<>();
        Set<Double> frequencies = new HashSet<Double>();
        frequencies.add(fundamentalFreq);
        int octaveMultiplier = 2;
        for (int i = 2; i < 100; i++) {
            // Exclude the 7th harmonic TODO exclude the 11th as well?
            // http://www.phy.mtu.edu/~suits/badnote.html
            if (i % 7 == 0) {
                continue;
            }
            double actualFreq = fundamentalFreq * i;
            double closestTonicRatio = actualFreq / (fundamentalFreq * octaveMultiplier);
            if (closestTonicRatio < 1 || closestTonicRatio > 2) {
                octaveMultiplier *= 2;
            }
            double closestTonic = actualFreq - actualFreq % (fundamentalFreq * octaveMultiplier);
            double normalizedFreq = fundamentalFreq * (actualFreq / closestTonic);

            harmonicFrequencies.add(actualFreq);
            frequencies.add(normalizedFreq);
            if (frequencies.size() == pitchesInChromaticScale) {
                break;
            }
        }

        System.out.println("Harmonic (overtone) frequencies: " + harmonicFrequencies);
        System.out.println("Transposed harmonic frequencies: " + frequencies);

        List<Double> chromaticScale = new ArrayList<>(frequencies);
        Collections.sort(chromaticScale);

        // find the "perfect" interval (e.g. perfect fifth)
        int perfectIntervalIndex = 0;
        int idx = 0;
        for (Iterator<Double> it = chromaticScale.iterator(); it.hasNext();) {
            Double noteFreq = it.next();
            long[] fraction = findCommonFraction(noteFreq / fundamentalFreq);
            fractionCache.put(noteFreq, fraction);
            if (fraction[0] == 3 && fraction[1] == 2) {
                perfectIntervalIndex = idx;
                System.out.println("Perfect interval (3/2) idx: " + perfectIntervalIndex);
            }
            idx++;
            ratios.add(Arrays.toString(fraction));
        }
        System.out.println("Ratios to fundemental frequency: " + ratios);

        if (getBooleanArgument(args, 4, USE_ET)) {
            chromaticScale = temper(chromaticScale);
        }

        System.out.println();
        System.out.println("Chromatic scale: " + chromaticScale);

        Set<Double> scaleSet = new HashSet<Double>();
        scaleSet.add(chromaticScale.get(0));
        idx = 0;
        List<Double> orderedInCircle = new ArrayList<>();
        // now go around the circle of perfect intervals and put the notes
        // in order
        while (orderedInCircle.size() < chromaticScale.size()) {
            orderedInCircle.add(chromaticScale.get(idx));
            idx += perfectIntervalIndex;
            idx = idx % chromaticScale.size();
        }
        System.out.println("Pitches Ordered in circle of perfect intervals: " + orderedInCircle);

        List<Double> scale = new ArrayList<Double>(scaleSet);
        int currentIdxInCircle = orderedInCircle.size() - 1; // start with
                                                             // the last
                                                             // note in the
                                                             // circle
        int scaleSize = getArgument(args, 3, SCALE_SIZE, Integer.class);
        while (scale.size() < scaleSize) {
            double pitch = orderedInCircle.get(currentIdxInCircle % orderedInCircle.size());
            if (!scale.contains(pitch)) {
                scale.add(pitch);
            }
            currentIdxInCircle++;
        }
        Collections.sort(scale);

        System.out.println("Scale: " + scale);

        SourceDataLine line = AudioSystem.getSourceDataLine(af);
        line.open(af);
        line.start();

        Double[] scaleFrequencies = scale.toArray(new Double[scale.size()]);

        // first play the whole scale
        WaveMelodyGenerator.playScale(line, scaleFrequencies);
        // then generate a random melody in the scale
        WaveMelodyGenerator.playMelody(line, scaleFrequencies);

        line.drain();
        line.close();
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:com.glaf.mail.MailSenderImpl.java

public static void main(String[] args) throws Exception {
    Map<String, Object> dataMap = new java.util.HashMap<String, Object>();
    dataMap.put("taskDescription", "?");
    dataMap.put("processStarterName", "?");
    dataMap.put("serviceUrl", "http://127.0.0.1:8080/glaf");
    dataMap.put("callback", "http://127.0.0.1:8080/glaf/task.jsp");

    MailMessage mailMessage = new MailMessage();
    mailMessage.setFrom("joy@127.0.0.1");
    mailMessage.setTo("joy@127.0.0.1");
    mailMessage.setSubject("");
    mailMessage.setDataMap(dataMap);//w  ww.  ja  va2s  . c  om
    mailMessage.setContent("");
    // mailMessage.setTemplateId(args[0]);
    mailMessage.setSupportExpression(false);

    Collection<Object> files = new HashSet<Object>();

    mailMessage.setFiles(files);
    mailMessage.setSaveMessage(false);
    MailSender mailSender = ContextFactory.getBean("mailSender");
    mailSender.send(mailMessage);
}

From source file:org.switchyard.quickstarts.demo.policy.security.wss.signencrypt.WorkServiceMain.java

public static void main(String... args) throws Exception {
    Set<String> policies = new HashSet<String>();
    for (String arg : args) {
        arg = Strings.trimToNull(arg);//ww  w. j  av a 2 s  .co m
        if (arg != null) {
            if (arg.equals(CONFIDENTIALITY) || arg.equals(SIGNENCRYPT) || arg.equals(HELP)) {
                policies.add(arg);
            } else {
                LOGGER.error(MAVEN_USAGE);
                throw new Exception(MAVEN_USAGE);
            }
        }
    }
    if (policies.contains(HELP)) {
        LOGGER.info(MAVEN_USAGE);
    } else {
        final String scheme;
        final int port;
        if (policies.contains(CONFIDENTIALITY)) {
            scheme = "https";
            port = getPort(8443);
            SSLContext sslcontext = SSLContext.getInstance("TLS");
            sslcontext.init(null, null, null);
            SSLSocketFactory sf = new SSLSocketFactory(sslcontext, SSLSocketFactory.STRICT_HOSTNAME_VERIFIER);
            Scheme https = new Scheme(scheme, port, sf);
            SchemeRegistry sr = new SchemeRegistry();
            sr.register(https);
        } else {
            scheme = "http";
            port = getPort(8080);
        }
        boolean signencrypt = policies.contains(SIGNENCRYPT);
        invokeWorkService(scheme, port, getContext(), signencrypt);
    }
}

From source file:TaxReturn.java

public static void main(String[] pArgs) throws Exception {
    TaxReturn return1 = new TaxReturn("012-68-3242", 1998, "O'Brien", new BigDecimal(43000.00));
    TaxReturn return2 = new TaxReturn("012-68-3242", 1999, "O'Brien", new BigDecimal(45000.00));
    TaxReturn return3 = new TaxReturn("012-68-3242", 1999, "O'Brien", new BigDecimal(53222.00));

    System.out.println("HashCodeBuilder: " + return2.hashCode());
    Set set = new HashSet();
    set.add(return1);
    set.add(return2);
    set.add(return3);
    System.out.println(set);//from   www .  j  a  v a2 s. co  m
}

From source file:com.glaf.jbpm.action.MultiPooledTaskInstanceAction.java

public static void main(String[] args) throws Exception {
    String actorIdxy = "{joy,sam},{pp,qq},{kit,cora},{eyb2000,huangcw}";
    StringTokenizer st2 = new StringTokenizer(actorIdxy, ";");
    while (st2.hasMoreTokens()) {
        String elem2 = st2.nextToken();
        if (StringUtils.isNotEmpty(elem2)) {
            elem2 = elem2.trim();/*from  w  ww.  j a  v  a  2 s  .c  o  m*/
            if ((elem2.length() > 0 && elem2.charAt(0) == '{') && elem2.endsWith("}")) {
                elem2 = elem2.substring(elem2.indexOf("{") + 1, elem2.indexOf("}"));
                Set<String> actorIds = new HashSet<String>();
                StringTokenizer st4 = new StringTokenizer(elem2, ",");
                while (st4.hasMoreTokens()) {
                    String elem4 = st4.nextToken();
                    elem4 = elem4.trim();
                    if (elem4.length() > 0) {
                        actorIds.add(elem4);
                    }
                }
                System.out.println(actorIds);
            }
        }
    }
}

From source file:de.tudarmstadt.ukp.experiments.argumentation.convincingness.sampling.Step4MTurkOutputCollector.java

@SuppressWarnings("unchecked")
public static void main(String[] args) throws Exception {
    String inputDirWithArgumentPairs = args[0];

    File[] resultFiles;/*  w w  w .ja v  a  2s.  c  om*/

    if (args[1].contains("*")) {
        File path = new File(args[1]);
        File directory = path.getParentFile();
        String regex = path.getName().replaceAll("\\*", "");

        List<File> files = new ArrayList<>(FileUtils.listFiles(directory, new String[] { regex }, false));
        resultFiles = new File[files.size()];
        for (int i = 0; i < files.size(); i++) {
            resultFiles[i] = files.get(i);
        }
    } else {
        // result file is a comma-separated list of CSV files from MTurk
        String[] split = args[1].split(",");
        resultFiles = new File[split.length];
        for (int i = 0; i < split.length; i++) {
            resultFiles[i] = new File(split[i]);
        }
    }

    File outputDir = new File(args[2]);

    if (!outputDir.exists()) {
        if (!outputDir.mkdirs()) {
            throw new IOException("Cannot create directory " + outputDir);
        }
    }

    // error if output folder not empty to prevent any confusion by mixing files
    if (!FileUtils.listFiles(outputDir, null, false).isEmpty()) {
        throw new IllegalArgumentException("Output dir " + outputDir + " is not empty");
    }

    // collected assignments with empty reason for rejections
    Set<String> assignmentsWithEmptyReason = new HashSet<>();

    // parse with first line as header
    MTurkOutputReader mTurkOutputReader = new MTurkOutputReader(resultFiles);

    Collection<File> files = FileUtils.listFiles(new File(inputDirWithArgumentPairs), new String[] { "xml" },
            false);

    if (files.isEmpty()) {
        throw new IOException("No xml files found in " + inputDirWithArgumentPairs);
    }

    // statistics: how many hits with how many assignments ; hit ID / assignments
    Map<String, Map<String, Integer>> assignmentsPerHits = new HashMap<>();

    // collect accept/reject statistics
    for (Map<String, String> record : mTurkOutputReader) {
        boolean wasRejected = "Rejected".equals(record.get("assignmentstatus"));
        String hitID = record.get("hitid");
        String hitTypeId = record.get("hittypeid");

        if (!wasRejected) {
            // update statistics
            if (!assignmentsPerHits.containsKey(hitTypeId)) {
                assignmentsPerHits.put(hitTypeId, new HashMap<String, Integer>());
            }

            if (!assignmentsPerHits.get(hitTypeId).containsKey(hitID)) {
                assignmentsPerHits.get(hitTypeId).put(hitID, 0);
            }

            assignmentsPerHits.get(hitTypeId).put(hitID, assignmentsPerHits.get(hitTypeId).get(hitID) + 1);
        }
    }

    // statistics: how many hits with how many assignments ; hit ID / assignments
    Map<String, Integer> approvedAssignmentsPerHit = new HashMap<>();
    Map<String, Integer> rejectedAssignmentsPerHit = new HashMap<>();

    // collect accept/reject statistics
    for (Map<String, String> record : mTurkOutputReader) {
        boolean approved = "Approved".equals(record.get("assignmentstatus"));
        boolean rejected = "Rejected".equals(record.get("assignmentstatus"));
        String hitID = record.get("hitid");

        if (approved) {
            // update statistics
            if (!approvedAssignmentsPerHit.containsKey(hitID)) {
                approvedAssignmentsPerHit.put(hitID, 0);
            }

            approvedAssignmentsPerHit.put(hitID, approvedAssignmentsPerHit.get(hitID) + 1);
        } else if (rejected) {
            // update statistics
            if (!rejectedAssignmentsPerHit.containsKey(hitID)) {
                rejectedAssignmentsPerHit.put(hitID, 0);
            }

            rejectedAssignmentsPerHit.put(hitID, rejectedAssignmentsPerHit.get(hitID) + 1);
        } else {
            throw new IllegalStateException(
                    "Unknown state: " + record.get("assignmentstatus") + " HITID: " + hitID);
        }
    }

    //        System.out.println("Approved: " + approvedAssignmentsPerHit);
    //        System.out.println("Rejected: " + rejectedAssignmentsPerHit);

    System.out.println("Approved (values): " + new HashSet<>(approvedAssignmentsPerHit.values()));
    System.out.println("Rejected (values): " + new HashSet<>(rejectedAssignmentsPerHit.values()));
    // rejection statistics
    int totalRejected = 0;
    for (Map.Entry<String, Integer> rejectionEntry : rejectedAssignmentsPerHit.entrySet()) {
        totalRejected += rejectionEntry.getValue();
    }

    System.out.println("Total rejections: " + totalRejected);

    /*
    // generate .success files for adding more annotations
    for (File resultFile : resultFiles) {
    String hitTypeID = mTurkOutputReader.getHitTypeIdForFile().get(resultFile);
            
    // assignments for that hittypeid (= file)
    Map<String, Integer> assignments = assignmentsPerHits.get(hitTypeID);
            
    prepareUpdateHITsFiles(assignments, hitTypeID, resultFile);
    }
    */

    int totalSavedPairs = 0;

    // load all previously prepared argument pairs
    for (File file : files) {
        List<ArgumentPair> argumentPairs = (List<ArgumentPair>) XStreamTools.getXStream().fromXML(file);

        List<AnnotatedArgumentPair> annotatedArgumentPairs = new ArrayList<>();

        for (ArgumentPair argumentPair : argumentPairs) {
            AnnotatedArgumentPair annotatedArgumentPair = new AnnotatedArgumentPair(argumentPair);

            // is there such an answer?
            String key = "Answer." + argumentPair.getId();

            // iterate only if there is such column to save time
            if (mTurkOutputReader.getColumnNames().contains(key)) {
                // now find the results
                for (Map<String, String> record : mTurkOutputReader) {
                    if (record.containsKey(key)) {
                        // extract the values
                        AnnotatedArgumentPair.MTurkAssignment assignment = new AnnotatedArgumentPair.MTurkAssignment();

                        boolean wasRejected = "Rejected".equals(record.get("assignmentstatus"));

                        // only non-rejected (if required)
                        if (!wasRejected) {
                            String hitID = record.get("hitid");
                            String workerID = record.get("workerid");
                            String assignmentId = record.get("assignmentid");
                            try {
                                assignment.setAssignmentAcceptTime(
                                        DATE_FORMAT.parse(record.get("assignmentaccepttime")));
                                assignment.setAssignmentSubmitTime(
                                        DATE_FORMAT.parse(record.get("assignmentsubmittime")));
                                assignment.setHitComment(record.get("Answer.feedback"));
                                assignment.setHitID(hitID);
                                assignment.setTurkID(workerID);
                                assignment.setAssignmentId(assignmentId);

                                // and answer specific fields
                                String valueRaw = record.get(key);

                                // so far the label has had format aXXX_aYYY_a1, aXXX_aYYY_a2, or aXXX_aYYY_equal
                                // strip now only true label
                                String label = valueRaw.split("_")[2];

                                assignment.setValue(label);
                                String reason = record.get(key + "_reason");

                                // missing reason
                                if (reason == null) {
                                    assignmentsWithEmptyReason.add(assignmentId);
                                } else {
                                    assignment.setReason(reason);

                                    // get worker's stance
                                    String stanceRaw = record.get(key + "_stance");
                                    if (stanceRaw != null) {
                                        // parse stance
                                        String stance = stanceRaw.split("_stance_")[1];
                                        assignment.setWorkerStance(stance);
                                    }

                                    // we take maximal 5 assignments
                                    Collections.sort(annotatedArgumentPair.mTurkAssignments,
                                            new Comparator<AnnotatedArgumentPair.MTurkAssignment>() {
                                                @Override
                                                public int compare(AnnotatedArgumentPair.MTurkAssignment o1,
                                                        AnnotatedArgumentPair.MTurkAssignment o2) {
                                                    return o1.getAssignmentAcceptTime()
                                                            .compareTo(o2.getAssignmentAcceptTime());
                                                }
                                            });

                                    if (annotatedArgumentPair.mTurkAssignments
                                            .size() < MAXIMUM_ASSIGNMENTS_PER_HIT) {
                                        annotatedArgumentPair.mTurkAssignments.add(assignment);
                                    }
                                }
                            } catch (IllegalArgumentException | NullPointerException ex) {
                                System.err.println("Malformed annotations for HIT " + hitID + ", worker "
                                        + workerID + ", assignment " + assignmentId + "; " + ex.getMessage()
                                        + ", full record: " + record);
                            }
                        }
                    }
                }
            }

            // and if there are some annotations, add it to the result set
            if (!annotatedArgumentPair.mTurkAssignments.isEmpty()) {
                annotatedArgumentPairs.add(annotatedArgumentPair);
            }
        }

        if (!annotatedArgumentPairs.isEmpty()) {
            File outputFile = new File(outputDir, file.getName());
            XStreamTools.toXML(annotatedArgumentPairs, outputFile);

            System.out.println("Saved " + annotatedArgumentPairs.size() + " annotated pairs to " + outputFile);
            totalSavedPairs += annotatedArgumentPairs.size();
        }
    }

    System.out.println("Total saved " + totalSavedPairs + " pairs");

    // print assignments with empty reasons
    if (!assignmentsWithEmptyReason.isEmpty()) {
        System.out.println(
                "== Assignments with empty reason:\nassignmentIdToReject\tassignmentIdToRejectComment");
        for (String assignmentId : assignmentsWithEmptyReason) {
            System.out.println(
                    assignmentId + "\t\"Dear worker, you did not fill the required field with a reason.\"");
        }
    }

}

From source file:de.citec.sc.matoll.process.Matoll_CreateMax.java

public static void main(String[] args) throws IOException, ParserConfigurationException, SAXException,
        InstantiationException, IllegalAccessException, ClassNotFoundException, Exception {

    String directory;//from w ww .j  a v  a 2s .  co  m
    String gold_standard_lexicon;
    String output_lexicon;
    String configFile;
    Language language;
    String output;

    Stopwords stopwords = new Stopwords();

    HashMap<String, Double> maxima;
    maxima = new HashMap<String, Double>();

    if (args.length < 3) {
        System.out.print("Usage: Matoll --mode=train/test <DIRECTORY> <CONFIG>\n");
        return;

    }

    //      Classifier classifier;

    directory = args[1];
    configFile = args[2];

    final Config config = new Config();

    config.loadFromFile(configFile);

    gold_standard_lexicon = config.getGoldStandardLexicon();

    String model_file = config.getModel();

    output_lexicon = config.getOutputLexicon();
    output = config.getOutput();

    language = config.getLanguage();

    LexiconLoader loader = new LexiconLoader();
    Lexicon gold = loader.loadFromFile(gold_standard_lexicon);

    Set<String> uris = new HashSet<>();
    //        Map<Integer,String> sentence_list = new HashMap<>();
    Map<Integer, Set<Integer>> mapping_words_sentences = new HashMap<>();

    //consider only properties
    for (LexicalEntry entry : gold.getEntries()) {
        try {
            for (Sense sense : entry.getSenseBehaviours().keySet()) {
                String tmp_uri = sense.getReference().getURI().replace("http://dbpedia.org/ontology/", "");
                if (!Character.isUpperCase(tmp_uri.charAt(0))) {
                    uris.add(sense.getReference().getURI());
                }
            }
        } catch (Exception e) {
        }
        ;
    }

    ModelPreprocessor preprocessor = new ModelPreprocessor(language);
    preprocessor.setCoreferenceResolution(false);
    Set<String> dep = new HashSet<>();
    dep.add("prep");
    dep.add("appos");
    dep.add("nn");
    dep.add("dobj");
    dep.add("pobj");
    dep.add("num");
    preprocessor.setDEP(dep);

    List<File> list_files = new ArrayList<>();

    if (config.getFiles().isEmpty()) {
        File folder = new File(directory);
        File[] files = folder.listFiles();
        for (File file : files) {
            if (file.toString().contains(".ttl"))
                list_files.add(file);
        }
    } else {
        list_files.addAll(config.getFiles());
    }
    System.out.println(list_files.size());

    int sentence_counter = 0;
    Map<String, Set<Integer>> bag_words_uri = new HashMap<>();
    Map<String, Integer> mapping_word_id = new HashMap<>();
    for (File file : list_files) {
        Model model = RDFDataMgr.loadModel(file.toString());
        for (Model sentence : getSentences(model)) {
            String reference = getReference(sentence);
            reference = reference.replace("http://dbpedia/", "http://dbpedia.org/");
            if (uris.contains(reference)) {
                sentence_counter += 1;
                Set<Integer> words_ids = getBagOfWords(sentence, stopwords, mapping_word_id);
                //TODO: add sentence preprocessing
                String obj = getObject(sentence);
                String subj = getSubject(sentence);
                preprocessor.preprocess(sentence, subj, obj, language);
                //TODO: also return marker if object or subject of property (in SPARQL this has to be optional of course)
                String parsed_sentence = getParsedSentence(sentence);
                try (FileWriter fw = new FileWriter("mapping_sentences_to_ids_goldstandard.tsv", true);
                        BufferedWriter bw = new BufferedWriter(fw);
                        PrintWriter out = new PrintWriter(bw)) {
                    out.println(sentence_counter + "\t" + parsed_sentence);
                } catch (IOException e) {
                    e.printStackTrace();
                }
                for (Integer word_id : words_ids) {
                    if (mapping_words_sentences.containsKey(word_id)) {
                        Set<Integer> tmp_set = mapping_words_sentences.get(word_id);
                        tmp_set.add(sentence_counter);
                        mapping_words_sentences.put(word_id, tmp_set);

                    } else {
                        Set<Integer> tmp_set = new HashSet<>();
                        tmp_set.add(sentence_counter);
                        mapping_words_sentences.put(word_id, tmp_set);
                    }

                }
                if (bag_words_uri.containsKey(reference)) {
                    Set<Integer> tmp = bag_words_uri.get(reference);
                    for (Integer w : words_ids) {
                        tmp.add(w);

                    }
                    bag_words_uri.put(reference, tmp);
                } else {
                    Set<Integer> tmp = new HashSet<>();
                    for (Integer w : words_ids) {
                        tmp.add(w);
                    }
                    bag_words_uri.put(reference, tmp);
                }
            }

        }
        model.close();

    }

    PrintWriter writer = new PrintWriter("bag_of_words_only_goldstandard.tsv");
    StringBuilder string_builder = new StringBuilder();
    for (String r : bag_words_uri.keySet()) {
        string_builder.append(r);
        for (Integer i : bag_words_uri.get(r)) {
            string_builder.append("\t");
            string_builder.append(i);
        }
        string_builder.append("\n");
    }
    writer.write(string_builder.toString());
    writer.close();

    writer = new PrintWriter("mapping_words_to_sentenceids_goldstandard.tsv");
    string_builder = new StringBuilder();
    for (Integer w : mapping_words_sentences.keySet()) {
        string_builder.append(w);
        for (int i : mapping_words_sentences.get(w)) {
            string_builder.append("\t");
            string_builder.append(i);
        }
        string_builder.append("\n");
    }
    writer.write(string_builder.toString());
    writer.close();

}

From source file:dkpro.similarity.algorithms.vsm.store.convert.ConvertLuceneToVectorIndex.java

public static void main(String[] args) throws Exception {
    File inputPath = new File(args[0]);
    File outputPath = new File(args[1]);

    deleteQuietly(outputPath);//from   w  w  w . j a v  a2s . c  o  m
    outputPath.mkdirs();

    boolean ignoreNumerics = true;
    boolean ignoreCardinal = true;
    boolean ignoreMonetary = true;
    int minTermLength = 3;
    int minDocFreq = 5;

    System.out.println("Quality criteria");
    System.out.println("Minimum term length            : " + minTermLength);
    System.out.println("Minimum document frequency     : " + minDocFreq);
    System.out.println("Ignore numeric tokens          : " + ignoreNumerics);
    System.out.println("Ignore cardinal numeric tokens : " + ignoreNumerics);
    System.out.println("Ignore money values            : " + ignoreMonetary);

    System.out.print("Fetching terms list... ");

    IndexReader reader = IndexReader.open(FSDirectory.open(inputPath));
    TermEnum termEnum = reader.terms();
    Set<String> terms = new HashSet<String>();
    int ignoredTerms = 0;
    while (termEnum.next()) {
        String term = termEnum.term().text();
        if (((minTermLength > 0) && (term.length() < minTermLength)) || (ignoreCardinal && isCardinal(term))
                || (ignoreMonetary && isMonetary(term)) || (ignoreNumerics && isNumericSpace(term))
                || ((minDocFreq > 0) && (termEnum.docFreq() < minDocFreq))) {
            ignoredTerms++;
            continue;
        }

        terms.add(term);
    }
    reader.close();

    System.out.println(terms.size() + " terms found. " + ignoredTerms + " terms ignored.");

    System.out.println("Opening source ESA index " + inputPath);
    VectorReader source = new LuceneVectorReader(inputPath);
    System.out.println("Opening destination ESA index " + inputPath);
    VectorIndexWriter esaWriter = new VectorIndexWriter(outputPath, source.getConceptCount());

    ProgressMeter p = new ProgressMeter(terms.size());
    for (String term : terms) {
        Vector vector = source.getVector(term);
        esaWriter.put(term, vector);

        p.next();
        System.out.println("[" + term + "] " + p);
    }

    esaWriter.close();
}

From source file:org.switchyard.quickstarts.demo.security.propagation.jms.WorkServiceMain.java

public static void main(String... args) throws Exception {
    Set<String> policies = new HashSet<String>();
    for (String arg : args) {
        arg = Strings.trimToNull(arg);// www.  j  a v  a 2s .  c o  m
        if (arg != null) {
            if (arg.equals(CONFIDENTIALITY) || arg.equals(CLIENT_AUTHENTICATION) || arg.equals(HELP)) {
                policies.add(arg);
            } else {
                LOGGER.error(MAVEN_USAGE);
                throw new Exception(MAVEN_USAGE);
            }
        }
    }
    if (policies.contains(HELP)) {
        LOGGER.info(MAVEN_USAGE);
    } else {
        final String scheme;
        final int port;
        if (policies.contains(CONFIDENTIALITY)) {
            scheme = "https";
            port = getPort(8443);
            SSLContext sslcontext = SSLContext.getInstance("TLS");
            sslcontext.init(null, null, null);
            SSLSocketFactory sf = new SSLSocketFactory(sslcontext, SSLSocketFactory.STRICT_HOSTNAME_VERIFIER);
            Scheme https = new Scheme(scheme, port, sf);
            SchemeRegistry sr = new SchemeRegistry();
            sr.register(https);
        } else {
            scheme = "http";
            port = getPort(8080);
        }
        String[] userPass = policies.contains(CLIENT_AUTHENTICATION) ? new String[] { "kermit", "the-frog-1" }
                : null;
        invokeWorkService(scheme, port, getContext(), userPass);
    }
}