Example usage for java.util Map.Entry get

List of usage examples for java.util Map.Entry get

Introduction

In this page you can find the example usage for java.util Map.Entry get.

Prototype

V get(Object key);

Source Link

Document

Returns the value to which the specified key is mapped, or null if this map contains no mapping for the key.

Usage

From source file:org.springframework.boot.cli.command.init.ProjectGenerationRequest.java

private static void filter(Map<String, ProjectType> projects, String tag, String tagValue) {
    for (Iterator<Map.Entry<String, ProjectType>> it = projects.entrySet().iterator(); it.hasNext();) {
        Map.Entry<String, ProjectType> entry = it.next();
        String value = entry.getValue().getTags().get(tag);
        if (!tagValue.equals(value)) {
            it.remove();/*from w  ww  .j  av  a2  s  .  c o m*/
        }
    }
}

From source file:io.Tools.java

/**
 * Create test PDB and Chemcomp folder. Also all PDB files in resources are copied there so all test can use this
 * folder/*from   w w w  . j av a  2  s. c o  m*/
 *
 * @return
 */
public static String createPermanentTestFolder() {

    String d = System.getProperty("user.home");
    String builtTestFolder = d + File.separator + "Documents" + File.separator + testFolderName
            + File.separator;
    final File baseDir = new File(builtTestFolder);

    String builttestPDBFolder = builtTestFolder + File.separator + "pdb";
    baseDir.mkdirs();
    final File pdbDir = new File(builttestPDBFolder);
    if (Files.exists(Paths.get(builttestPDBFolder))) {
        try {
            FileUtils.deleteDirectory(pdbDir);
        } catch (IOException e) {
        }
    }
    pdbDir.mkdir();

    String builttestChemcompFolder = builtTestFolder + File.separator + "chemcomp";
    final File chemcompDir = new File(builttestChemcompFolder);
    if (Files.exists(Paths.get(builttestChemcompFolder))) {
        try {
            FileUtils.deleteDirectory(chemcompDir);
        } catch (IOException e) {
        }
    }

    chemcompDir.mkdirs();

    pdbDir.mkdir();
    testChemcompFolder = builtTestFolder;
    testPDBFolder = builttestPDBFolder;

    String resourcesPDBFolder = null;
    try {
        URL url = BiojavaReaderFromPDBFolderTest.class.getClassLoader().getResource("pdb/1di9.cif.gz");
        File pdb1di9file = new File(url.toURI());
        resourcesPDBFolder = pdb1di9file.getParent();
        Map<String, List<MMcifFileInfos>> indexPDBFileInFolder = IOTools
                .indexPDBFileInFolder(new File(resourcesPDBFolder).toString());
        for (Map.Entry<String, List<MMcifFileInfos>> entry : indexPDBFileInFolder.entrySet()) {
            try {
                FileUtils.copyFileToDirectory(new File(entry.getValue().get(0).getPathToFile().toString()),
                        pdbDir);
            } catch (IOException e) {
                e.printStackTrace();
            }
        }

    } catch (URISyntaxException e) {
        e.printStackTrace();
    }

    String resourcesChemcompFolder = null;
    try {
        URL url = BiojavaReaderFromPDBFolderTest.class.getClassLoader().getResource("chemcomp/0DY.cif.gz");
        File chemcomp0DY = new File(url.toURI());
        resourcesChemcompFolder = chemcomp0DY.getParent();
        Map<String, List<Path>> indexPDBFileInFolder = IOTools
                .indexChemcompFileInFolder(new File(resourcesChemcompFolder).toString());
        for (Map.Entry<String, List<Path>> entry : indexPDBFileInFolder.entrySet()) {
            try {
                FileUtils.copyFileToDirectory(new File(entry.getValue().get(0).toString()),
                        new File(builttestChemcompFolder));
            } catch (IOException e) {
                e.printStackTrace();
            }
        }

    } catch (URISyntaxException e) {
        e.printStackTrace();
    }
    return testChemcompFolder;
}

From source file:com.mirth.connect.server.util.AttachmentUtil.java

public static byte[] reAttachMessage(String raw, ImmutableConnectorMessage connectorMessage,
        String charsetEncoding, boolean binary) {
    try {// www  .  ja  va 2  s.c o  m
        Map<Integer, Map<Integer, Object>> replacementObjects = new TreeMap<Integer, Map<Integer, Object>>();
        // Determine the buffersize during the first pass for better memory performance
        int bufferSize = raw.length();
        int index = 0;
        int endIndex;
        // Initialize the objects here so only one retrieval of the attachment content is ever needed.
        byte[] dicomObject = null;
        Map<String, Attachment> attachmentMap = null;

        // Handle the special case if only a dicom message is requested. 
        // In this case we can skip any byte appending and thus do not need to base64 encode the dicom object
        // if the type is binary.
        if (raw.trim().equals(PREFIX + DICOM_KEY + SUFFIX)) {
            dicomObject = DICOMUtil.getDICOMRawBytes(connectorMessage);

            if (!binary) {
                dicomObject = Base64Util.encodeBase64(dicomObject);
            }

            return dicomObject;
        }

        // Check the raw string in one pass for any attachments.
        // Stores the start and end indices to replace, along with the attachment content.
        while ((index = raw.indexOf(PREFIX, index)) != -1) {
            if (raw.startsWith(DICOM_KEY + SUFFIX, index + PREFIX.length())) {
                if (dicomObject == null) {
                    // Unfortunately, if the dicom data needs to appended to other base64 data, it must be done so in base64.
                    dicomObject = Base64Util.encodeBase64(DICOMUtil.getDICOMRawBytes(connectorMessage));
                }

                endIndex = index + PREFIX.length() + DICOM_KEY.length() + SUFFIX.length();

                Map<Integer, Object> replacementMap = new HashMap<Integer, Object>();
                replacementMap.put(KEY_END_INDEX, endIndex);
                replacementMap.put(KEY_DATA, dicomObject);
                replacementObjects.put(index, replacementMap);

                bufferSize += dicomObject.length;
                index += endIndex - index;
            } else if (raw.startsWith(ATTACHMENT_KEY, index + PREFIX.length())) {
                if (attachmentMap == null) {
                    List<Attachment> list = getMessageAttachments(connectorMessage);

                    // Store the attachments in a map with the attachment's Id as the key
                    attachmentMap = new HashMap<String, Attachment>();
                    for (Attachment attachment : list) {
                        attachmentMap.put(attachment.getId(), attachment);
                    }
                }

                int attachmentIdStartIndex = index + PREFIX.length() + ATTACHMENT_KEY.length();
                int attachmentIdEndIndex = attachmentIdStartIndex + ATTACHMENT_ID_LENGTH;
                endIndex = attachmentIdEndIndex + SUFFIX.length();
                String attachmentId = raw.substring(attachmentIdStartIndex,
                        attachmentIdStartIndex + ATTACHMENT_ID_LENGTH);

                if (raw.substring(attachmentIdEndIndex, endIndex).equals(SUFFIX)) {
                    Map<Integer, Object> replacementMap = new HashMap<Integer, Object>();
                    replacementMap.put(KEY_END_INDEX, endIndex);

                    if (attachmentMap.containsKey(attachmentId)) {
                        Attachment attachment = attachmentMap.get(attachmentId);
                        replacementMap.put(KEY_DATA, attachment.getContent());

                        bufferSize += attachment.getContent().length;
                    } else {
                        replacementMap.put(KEY_DATA, new byte[0]);
                    }

                    replacementObjects.put(index, replacementMap);
                }
            } else {
                endIndex = index + PREFIX.length();
            }

            index += endIndex - index;
        }
        // Release the object pointers of the attachment content so they aren't held in memory for the entire method
        dicomObject = null;
        attachmentMap = null;

        // Initialize the stream's buffer size. The buffer size will always be slightly large than needed,
        // because the template keys are never removed from the buffer size.
        // It is not worth doing any extra calculations for the amount of memory saved. 
        ByteArrayOutputStream baos = new ByteArrayOutputStream(bufferSize);

        int segmentStartIndex = 0;
        for (Map.Entry<Integer, Map<Integer, Object>> entry : replacementObjects.entrySet()) {
            int startReplacementIndex = entry.getKey();
            int endReplacementIndex = (Integer) entry.getValue().get(KEY_END_INDEX);
            byte[] data = (byte[]) entry.getValue().get(KEY_DATA);

            // Allows the memory used by the attachments to be released at the end of the loop
            entry.getValue().clear();

            byte[] templateSegment;
            // If the data is binary, the content should be in base64, so using US-ASCII as the charset encoding should be sufficient.
            if (binary) {
                templateSegment = StringUtils
                        .getBytesUsAscii(raw.substring(segmentStartIndex, startReplacementIndex));
            } else {
                templateSegment = StringUtil.getBytesUncheckedChunked(
                        raw.substring(segmentStartIndex, startReplacementIndex), Constants.ATTACHMENT_CHARSET);
            }

            baos.write(templateSegment);
            baos.write(data);

            segmentStartIndex = endReplacementIndex;
        }

        byte[] templateSegment;
        if (binary) {
            templateSegment = StringUtils.getBytesUsAscii(raw.substring(segmentStartIndex));
        } else {
            templateSegment = StringUtil.getBytesUncheckedChunked(raw.substring(segmentStartIndex),
                    Constants.ATTACHMENT_CHARSET);
        }

        byte[] combined;
        // If there are no attachments, don't bother writing to the output stream.
        if (segmentStartIndex == 0) {
            combined = templateSegment;
        } else {
            // Write the segment after the last replacement.
            baos.write(templateSegment);

            combined = baos.toByteArray();
            // Release the memory used by the byte array stream. ByteArrayOutputStreams do not need to be closed. 
            baos = null;
        }

        templateSegment = null;

        // If binary, the content should be in base64 so it is necessary to decode the data.
        if (binary) {
            combined = Base64Util.decodeBase64(combined);
        } else if (charsetEncoding != null
                && !charsetEncoding.toUpperCase().equals(Constants.ATTACHMENT_CHARSET.toUpperCase())) {
            // Convert the byte array to a string using the internal encoding.
            String combinedString = StringUtils.newString(combined, Constants.ATTACHMENT_CHARSET);
            // First release the reference to the old byte data so it can be reallocated if necessary.
            combined = null;
            // Convert the string to a byte array using the requested encoding
            combined = StringUtil.getBytesUncheckedChunked(combinedString, charsetEncoding);
        }

        return combined;
    } catch (Exception e) {
        logger.error("Error reattaching attachments", e);
        return null;
    }
}

From source file:com.mirth.connect.server.util.MessageAttachmentUtil.java

public static byte[] reAttachMessage(String raw, ImmutableConnectorMessage connectorMessage,
        String charsetEncoding, boolean binary) {
    try {//w w w . j av a 2s  . co  m
        Map<Integer, Map<Integer, Object>> replacementObjects = new TreeMap<Integer, Map<Integer, Object>>();
        // Determine the buffersize during the first pass for better memory performance
        int bufferSize = raw.length();
        int index = 0;
        int endIndex;
        // Initialize the objects here so only one retrieval of the attachment content is ever needed.
        byte[] dicomObject = null;
        Map<String, Attachment> attachmentMap = null;

        // Handle the special case if only a dicom message is requested. 
        // In this case we can skip any byte appending and thus do not need to base64 encode the dicom object
        // if the type is binary.
        if (raw.trim().equals(PREFIX + DICOM_KEY + SUFFIX)) {
            dicomObject = DICOMMessageUtil.getDICOMRawBytes(connectorMessage);

            if (!binary) {
                dicomObject = Base64Util.encodeBase64(dicomObject);
            }

            return dicomObject;
        }

        // Check the raw string in one pass for any attachments.
        // Stores the start and end indices to replace, along with the attachment content.
        while ((index = raw.indexOf(PREFIX, index)) != -1) {
            if (raw.startsWith(DICOM_KEY + SUFFIX, index + PREFIX.length())) {
                if (dicomObject == null) {
                    // Unfortunately, if the dicom data needs to appended to other base64 data, it must be done so in base64.
                    dicomObject = Base64Util.encodeBase64(DICOMMessageUtil.getDICOMRawBytes(connectorMessage));
                }

                endIndex = index + PREFIX.length() + DICOM_KEY.length() + SUFFIX.length();

                Map<Integer, Object> replacementMap = new HashMap<Integer, Object>();
                replacementMap.put(KEY_END_INDEX, endIndex);
                replacementMap.put(KEY_DATA, dicomObject);
                replacementObjects.put(index, replacementMap);

                bufferSize += dicomObject.length;
                index += endIndex - index;
            } else if (raw.startsWith(ATTACHMENT_KEY, index + PREFIX.length())) {
                if (attachmentMap == null) {
                    List<Attachment> list = getMessageAttachments(connectorMessage);

                    // Store the attachments in a map with the attachment's Id as the key
                    attachmentMap = new HashMap<String, Attachment>();
                    for (Attachment attachment : list) {
                        attachmentMap.put(attachment.getId(), attachment);
                    }
                }

                int attachmentIdStartIndex = index + PREFIX.length() + ATTACHMENT_KEY.length();
                int attachmentIdEndIndex = attachmentIdStartIndex + ATTACHMENT_ID_LENGTH;
                endIndex = attachmentIdEndIndex + SUFFIX.length();
                String attachmentId = raw.substring(attachmentIdStartIndex,
                        attachmentIdStartIndex + ATTACHMENT_ID_LENGTH);

                if (raw.substring(attachmentIdEndIndex, endIndex).equals(SUFFIX)) {
                    Map<Integer, Object> replacementMap = new HashMap<Integer, Object>();
                    replacementMap.put(KEY_END_INDEX, endIndex);

                    if (attachmentMap.containsKey(attachmentId)) {
                        Attachment attachment = attachmentMap.get(attachmentId);
                        replacementMap.put(KEY_DATA, attachment.getContent());

                        bufferSize += attachment.getContent().length;
                    } else {
                        replacementMap.put(KEY_DATA, new byte[0]);
                    }

                    replacementObjects.put(index, replacementMap);
                }
            } else {
                endIndex = index + PREFIX.length();
            }

            index += endIndex - index;
        }
        // Release the object pointers of the attachment content so they aren't held in memory for the entire method
        dicomObject = null;
        attachmentMap = null;

        // Initialize the stream's buffer size. The buffer size will always be slightly large than needed,
        // because the template keys are never removed from the buffer size.
        // It is not worth doing any extra calculations for the amount of memory saved. 
        ByteArrayOutputStream baos = new ByteArrayOutputStream(bufferSize);

        int segmentStartIndex = 0;
        for (Map.Entry<Integer, Map<Integer, Object>> entry : replacementObjects.entrySet()) {
            int startReplacementIndex = entry.getKey();
            int endReplacementIndex = (Integer) entry.getValue().get(KEY_END_INDEX);
            byte[] data = (byte[]) entry.getValue().get(KEY_DATA);

            // Allows the memory used by the attachments to be released at the end of the loop
            entry.getValue().clear();

            byte[] templateSegment;
            // If the data is binary, the content should be in base64, so using US-ASCII as the charset encoding should be sufficient.
            if (binary) {
                templateSegment = StringUtils
                        .getBytesUsAscii(raw.substring(segmentStartIndex, startReplacementIndex));
            } else {
                templateSegment = StringUtil.getBytesUncheckedChunked(
                        raw.substring(segmentStartIndex, startReplacementIndex), Constants.ATTACHMENT_CHARSET);
            }

            baos.write(templateSegment);
            baos.write(data);

            segmentStartIndex = endReplacementIndex;
        }

        byte[] templateSegment;
        if (binary) {
            templateSegment = StringUtils.getBytesUsAscii(raw.substring(segmentStartIndex));
        } else {
            templateSegment = StringUtil.getBytesUncheckedChunked(raw.substring(segmentStartIndex),
                    Constants.ATTACHMENT_CHARSET);
        }

        byte[] combined;
        // If there are no attachments, don't bother writing to the output stream.
        if (segmentStartIndex == 0) {
            combined = templateSegment;
        } else {
            // Write the segment after the last replacement.
            baos.write(templateSegment);

            combined = baos.toByteArray();
            // Release the memory used by the byte array stream. ByteArrayOutputStreams do not need to be closed. 
            baos = null;
        }

        templateSegment = null;

        // If binary, the content should be in base64 so it is necessary to decode the data.
        if (binary) {
            combined = Base64Util.decodeBase64(combined);
        } else if (charsetEncoding != null
                && !charsetEncoding.toUpperCase().equals(Constants.ATTACHMENT_CHARSET.toUpperCase())) {
            // Convert the byte array to a string using the internal encoding.
            String combinedString = StringUtils.newString(combined, Constants.ATTACHMENT_CHARSET);
            // First release the reference to the old byte data so it can be reallocated if necessary.
            combined = null;
            // Convert the string to a byte array using the requested encoding
            combined = StringUtil.getBytesUncheckedChunked(combinedString, charsetEncoding);
        }

        return combined;
    } catch (Exception e) {
        logger.error("Error reattaching attachments", e);
        return null;
    }
}

From source file:eu.delving.sip.files.StorageHelper.java

static Collection<File> findLatestPrefixFiles(File dir, Storage.FileType fileType) {
    File[] files = dir.listFiles(new PrefixFileFilter(fileType));
    Map<String, List<File>> map = new TreeMap<String, List<File>>();
    for (File file : files) {
        String prefix = extractName(file, fileType);
        if (prefix == null)
            continue;
        List<File> list = map.get(prefix);
        if (list == null) {
            map.put(prefix, list = new ArrayList<File>());
        }/*  w  ww.j  a va2s. co  m*/
        list.add(file);
    }
    List<File> latestFiles = new ArrayList<File>();
    for (Map.Entry<String, List<File>> entry : map.entrySet()) {
        if (entry.getValue().size() == 1) {
            latestFiles.add(entry.getValue().get(0));
        } else {
            latestFiles
                    .add(getRecent(entry.getValue().toArray(new File[entry.getValue().size()]), 0, fileType));
        }
    }
    return latestFiles;
}

From source file:org.elasticsearch.client.sniff.ElasticsearchNodesSnifferTests.java

private static SniffResponse buildSniffResponse(ElasticsearchNodesSniffer.Scheme scheme) throws IOException {
    int numNodes = RandomNumbers.randomIntBetween(getRandom(), 1, 5);
    List<Node> nodes = new ArrayList<>(numNodes);
    JsonFactory jsonFactory = new JsonFactory();
    StringWriter writer = new StringWriter();
    JsonGenerator generator = jsonFactory.createGenerator(writer);
    generator.writeStartObject();//w  w  w  .  j  a  v a2  s  .  c o  m
    if (getRandom().nextBoolean()) {
        generator.writeStringField("cluster_name", "elasticsearch");
    }
    if (getRandom().nextBoolean()) {
        generator.writeObjectFieldStart("bogus_object");
        generator.writeEndObject();
    }
    generator.writeObjectFieldStart("nodes");
    for (int i = 0; i < numNodes; i++) {
        String nodeId = RandomStrings.randomAsciiOfLengthBetween(getRandom(), 5, 10);
        String host = "host" + i;
        int port = RandomNumbers.randomIntBetween(getRandom(), 9200, 9299);
        HttpHost publishHost = new HttpHost(host, port, scheme.toString());
        Set<HttpHost> boundHosts = new HashSet<>();
        boundHosts.add(publishHost);

        if (randomBoolean()) {
            int bound = between(1, 5);
            for (int b = 0; b < bound; b++) {
                boundHosts.add(new HttpHost(host + b, port, scheme.toString()));
            }
        }

        int numAttributes = between(0, 5);
        Map<String, List<String>> attributes = new HashMap<>(numAttributes);
        for (int j = 0; j < numAttributes; j++) {
            int numValues = frequently() ? 1 : between(2, 5);
            List<String> values = new ArrayList<>();
            for (int v = 0; v < numValues; v++) {
                values.add(j + "value" + v);
            }
            attributes.put("attr" + j, values);
        }

        Node node = new Node(publishHost, boundHosts, randomAsciiAlphanumOfLength(5),
                randomAsciiAlphanumOfLength(5),
                new Node.Roles(randomBoolean(), randomBoolean(), randomBoolean()), attributes);

        generator.writeObjectFieldStart(nodeId);
        if (getRandom().nextBoolean()) {
            generator.writeObjectFieldStart("bogus_object");
            generator.writeEndObject();
        }
        if (getRandom().nextBoolean()) {
            generator.writeArrayFieldStart("bogus_array");
            generator.writeStartObject();
            generator.writeEndObject();
            generator.writeEndArray();
        }
        boolean isHttpEnabled = rarely() == false;
        if (isHttpEnabled) {
            nodes.add(node);
            generator.writeObjectFieldStart("http");
            generator.writeArrayFieldStart("bound_address");
            for (HttpHost bound : boundHosts) {
                generator.writeString(bound.toHostString());
            }
            generator.writeEndArray();
            if (getRandom().nextBoolean()) {
                generator.writeObjectFieldStart("bogus_object");
                generator.writeEndObject();
            }
            generator.writeStringField("publish_address", publishHost.toHostString());
            if (getRandom().nextBoolean()) {
                generator.writeNumberField("max_content_length_in_bytes", 104857600);
            }
            generator.writeEndObject();
        }

        List<String> roles = Arrays.asList(new String[] { "master", "data", "ingest" });
        Collections.shuffle(roles, getRandom());
        generator.writeArrayFieldStart("roles");
        for (String role : roles) {
            if ("master".equals(role) && node.getRoles().isMasterEligible()) {
                generator.writeString("master");
            }
            if ("data".equals(role) && node.getRoles().isData()) {
                generator.writeString("data");
            }
            if ("ingest".equals(role) && node.getRoles().isIngest()) {
                generator.writeString("ingest");
            }
        }
        generator.writeEndArray();

        generator.writeFieldName("version");
        generator.writeString(node.getVersion());
        generator.writeFieldName("name");
        generator.writeString(node.getName());

        if (numAttributes > 0) {
            generator.writeObjectFieldStart("attributes");
            for (Map.Entry<String, List<String>> entry : attributes.entrySet()) {
                if (entry.getValue().size() == 1) {
                    generator.writeStringField(entry.getKey(), entry.getValue().get(0));
                } else {
                    for (int v = 0; v < entry.getValue().size(); v++) {
                        generator.writeStringField(entry.getKey() + "." + v, entry.getValue().get(v));
                    }
                }
            }
            generator.writeEndObject();
        }
        generator.writeEndObject();
    }
    generator.writeEndObject();
    generator.writeEndObject();
    generator.close();
    return SniffResponse.buildResponse(writer.toString(), nodes);
}

From source file:com.iti.request.NearbyService.java

public static List<String> getTrains(City from, City to) {

    Map<String, List<City>> trainsSchedule = getTrainsSchedule();
    List<String> result = new ArrayList<>();

    for (Map.Entry<String, List<City>> train : trainsSchedule.entrySet()) {
        int fromIndex = train.getValue().indexOf(from);
        int toIndex = train.getValue().indexOf(to);

        if (fromIndex == -1 || toIndex == -1) {
            continue;
        } else if (fromIndex >= toIndex) {
            continue;
        } else {/*from   w  ww  .j a v  a 2 s.  co  m*/
            result.add("  :" + train.getKey() + "   "
                    + train.getValue().get(fromIndex).getTime());
        }
    }

    return result;
}

From source file:com.incapture.rapgen.output.OutputWriter.java

/**
 * Some files are composed of multiple templates. So the map passed in here is filename to template order to template.
 * E.g. "file.txt"->1->"some code" "file.txt"->2->"other code" and so on.
 *
 * @param rootFolder//from w  w w  .  j a v a 2  s. c o  m
 * @param pathToTemplate
 */
public static void writeMultiPartTemplates(String rootFolder,
        Map<String, Map<String, StringTemplate>> pathToTemplate) {
    // For each file, dump the templates
    for (Map.Entry<String, Map<String, StringTemplate>> entry : pathToTemplate.entrySet()) {
        File file = new File(rootFolder, entry.getKey());
        file.getParentFile().mkdirs();

        BufferedWriter bow = null;
        try {
            bow = new BufferedWriter(new FileWriter(file));
            Set<String> sections = entry.getValue().keySet();
            SortedSet<String> sorted = new TreeSet<String>();
            sorted.addAll(sections);
            for (String sec : sorted) {
                bow.write(entry.getValue().get(sec).toString());
                bow.newLine();
            }
            bow.close();
        } catch (IOException e) {
            System.err.println(e.getMessage());
        } finally {
            if (bow != null) {
                try {
                    bow.close();
                } catch (IOException e) {
                    System.err.println("Error closing output stream: " + ExceptionToString.format(e));
                }
            }
        }
    }
}

From source file:rgu.jclos.foldbuilder.FoldBuilder.java

/**
 * Generates K folds and writes them to disk
 * @param inputFile The CSV file from which the data comes from.
 * @param outputDirectory The directory in which the folds will be written.
 * @param separator The separating character in the CSV file.
 * @param indexLabel The index of the labels in the CSV file. Used for stratification of the folds.
 * @param k The number of folds to generates.
 * @param speak Whether to print some status messages along the way.
 * @return A pair containing a list of folds with ids of documents, and a dictionary that allows the user to retrieve aformentioned documents using the ids, in order to save space.
 * @throws IOException If something stops the program from reading or writing the files.
 *///from  w ww  .j a v a  2 s .c  o m
public static Pair<List<Set<String>>, Map<String, Instance>> getFolds(String inputFile, String outputDirectory,
        String separator, int indexLabel, int k, boolean speak) throws IOException {
    Random rng = new Random();
    Map<String, Instance> dictionary = new HashMap<>();
    Map<String, Integer> classes = new HashMap<>();
    Map<String, List<String>> reversedDictionary = new HashMap<>();
    int id = 0;
    for (String line : Files.readAllLines(new File(inputFile).toPath())) {
        Instance inst = new Instance();
        String[] elements = line.split(separator);
        inst.content = line;
        inst.label = elements[indexLabel];
        String iid = "inst" + id;
        dictionary.put(iid, inst);
        classes.put(inst.label, classes.getOrDefault(inst.label, 0) + 1);
        if (reversedDictionary.containsKey(inst.label)) {
            reversedDictionary.get(inst.label).add(iid);
        } else {
            List<String> ids = new ArrayList<>();
            ids.add(iid);
            reversedDictionary.put(inst.label, ids);
        }
        id++;
    }

    int numberOfInstances = id;
    int sizeOfEachFold = (int) Math.floor(numberOfInstances / k);
    Map<String, Double> classRatios = new HashMap<>();
    for (Map.Entry<String, Integer> classFrequency : classes.entrySet()) {
        classRatios.put(classFrequency.getKey(),
                (double) classFrequency.getValue() / (double) numberOfInstances);
    }

    List<Set<String>> folds = new ArrayList<>();
    for (int i = 0; i < k; i++) {
        Set<String> fold = new HashSet<>();
        for (Map.Entry<String, List<String>> c : reversedDictionary.entrySet()) {
            int currentSize = fold.size();
            int numberRequired = (int) Math.floor(classRatios.get(c.getKey()) * sizeOfEachFold);
            while (fold.size() < currentSize + numberRequired && c.getValue().size() > 0) {
                int nextPick = rng.nextInt(c.getValue().size());
                fold.add(c.getValue().get(nextPick));
                c.getValue().remove(nextPick);
            }
        }
        folds.add(fold);
        if (speak)
            System.out.println("Finished computing fold " + (i + 1) + " of size " + fold.size());
    }

    if (speak)
        System.out.println("Writing folds on disk");

    return Pair.of(folds, dictionary);
}

From source file:rgu.jclos.foldbuilder.FoldBuilder.java

/**
* Generates K folds and writes them to disk
* @param inputFile The CSV file from which the data comes from.
* @param outputDirectory The directory in which the folds will be written.
* @param separator The separating character in the CSV file.
* @param indexLabel The index of the labels in the CSV file. Used for stratification of the folds.
* @param k The number of folds to generates.
* @param speak Whether to print some status messages along the way.
* @return A pair containing a list of folds with ids of documents, and a dictionary that allows the user to retrieve aformentioned documents using the ids, in order to save space.
* @throws IOException If something stops the program from reading or writing the files.
*//*  ww w. java2  s  . co  m*/
private static Pair<List<Set<String>>, Map<String, Instance>> getFolds(String inputFile, String outputDirectory,
        String separator, String indexLabel, int k, boolean speak) throws IOException {
    Random rng = new Random();
    Map<String, Instance> dictionary = new HashMap<>();
    Map<String, Integer> classes = new HashMap<>();
    Map<String, List<String>> reversedDictionary = new HashMap<>();
    int id = 0;

    List<String> lines = Files.readAllLines(new File(inputFile).toPath());
    String[] elts = lines.get(0).split(separator);
    int labIndex = indexLabel.equals("first") ? 0
            : indexLabel.equals("last") ? elts.length - 1 : Integer.parseInt(indexLabel);

    for (String line : Files.readAllLines(new File(inputFile).toPath())) {
        Instance inst = new Instance();
        String[] elements = line.split(separator);
        inst.content = line;
        inst.label = elements[labIndex];
        String iid = "inst" + id;
        dictionary.put(iid, inst);
        classes.put(inst.label, classes.getOrDefault(inst.label, 0) + 1);
        if (reversedDictionary.containsKey(inst.label)) {
            reversedDictionary.get(inst.label).add(iid);
        } else {
            List<String> ids = new ArrayList<>();
            ids.add(iid);
            reversedDictionary.put(inst.label, ids);
        }
        id++;
    }

    int numberOfInstances = id;
    int sizeOfEachFold = (int) Math.floor(numberOfInstances / k);
    Map<String, Double> classRatios = new HashMap<>();
    for (Map.Entry<String, Integer> classFrequency : classes.entrySet()) {
        classRatios.put(classFrequency.getKey(),
                (double) classFrequency.getValue() / (double) numberOfInstances);
    }

    List<Set<String>> folds = new ArrayList<>();
    for (int i = 0; i < k; i++) {
        Set<String> fold = new HashSet<>();
        for (Map.Entry<String, List<String>> c : reversedDictionary.entrySet()) {
            int currentSize = fold.size();
            int numberRequired = (int) Math.floor(classRatios.get(c.getKey()) * sizeOfEachFold);
            while (fold.size() < currentSize + numberRequired && c.getValue().size() > 0) {
                int nextPick = rng.nextInt(c.getValue().size());
                fold.add(c.getValue().get(nextPick));
                c.getValue().remove(nextPick);
            }
        }
        folds.add(fold);
        if (speak)
            System.out.println("Finished computing fold " + (i + 1) + " of size " + fold.size());
    }

    if (speak)
        System.out.println("Writing folds on disk");

    return Pair.of(folds, dictionary);
}