Example usage for java.util HashMap values

Introduction

In this page you can find the example usage for java.util HashMap values.

Prototype

public Collection<V> values()

Source Link

Document

Returns a Collection view of the values contained in this map.

Usage

From source file:fr.cirad.mgdb.exporting.markeroriented.GFFExportHandler.java

@Override
public void exportData(OutputStream outputStream, String sModule, List<SampleId> sampleIDs,
        ProgressIndicator progress, DBCursor markerCursor, Map<Comparable, Comparable> markerSynonyms,
        int nMinimumGenotypeQuality, int nMinimumReadDepth, Map<String, InputStream> readyToExportFiles)
        throws Exception {
    MongoTemplate mongoTemplate = MongoTemplateManager.get(sModule);
    ZipOutputStream zos = new ZipOutputStream(outputStream);

    if (readyToExportFiles != null)
        for (String readyToExportFile : readyToExportFiles.keySet()) {
            zos.putNextEntry(new ZipEntry(readyToExportFile));
            InputStream inputStream = readyToExportFiles.get(readyToExportFile);
            byte[] dataBlock = new byte[1024];
            int count = inputStream.read(dataBlock, 0, 1024);
            while (count != -1) {
                zos.write(dataBlock, 0, count);
                count = inputStream.read(dataBlock, 0, 1024);
            }/*from   w ww.j  ava2 s .  c om*/
        }

    File warningFile = File.createTempFile("export_warnings_", "");
    FileWriter warningFileWriter = new FileWriter(warningFile);

    int markerCount = markerCursor.count();

    List<Individual> individuals = getIndividualsFromSamples(sModule, sampleIDs);
    ArrayList<String> individualList = new ArrayList<String>();
    for (int i = 0; i < sampleIDs.size(); i++) {
        Individual individual = individuals.get(i);
        if (!individualList.contains(individual.getId())) {
            individualList.add(individual.getId());
        }
    }

    String exportName = sModule + "_" + markerCount + "variants_" + individualList.size() + "individuals";
    zos.putNextEntry(new ZipEntry(exportName + ".gff3"));
    String header = "##gff-version 3" + LINE_SEPARATOR;
    zos.write(header.getBytes());

    TreeMap<String, String> typeToOntology = new TreeMap<String, String>();
    typeToOntology.put(Type.SNP.toString(), "SO:0000694");
    typeToOntology.put(Type.INDEL.toString(), "SO:1000032");
    typeToOntology.put(Type.MIXED.toString(), "SO:0001059");
    typeToOntology.put(Type.SYMBOLIC.toString(), "SO:0000109");
    typeToOntology.put(Type.MNP.toString(), "SO:0001059");

    int avgObjSize = (Integer) mongoTemplate
            .getCollection(mongoTemplate.getCollectionName(VariantRunData.class)).getStats().get("avgObjSize");
    int nChunkSize = nMaxChunkSizeInMb * 1024 * 1024 / avgObjSize;
    short nProgress = 0, nPreviousProgress = 0;
    long nLoadedMarkerCount = 0;

    while (markerCursor.hasNext()) {
        int nLoadedMarkerCountInLoop = 0;
        Map<Comparable, String> markerChromosomalPositions = new LinkedHashMap<Comparable, String>();
        boolean fStartingNewChunk = true;
        markerCursor.batchSize(nChunkSize);
        while (markerCursor.hasNext() && (fStartingNewChunk || nLoadedMarkerCountInLoop % nChunkSize != 0)) {
            DBObject exportVariant = markerCursor.next();
            DBObject refPos = (DBObject) exportVariant.get(VariantData.FIELDNAME_REFERENCE_POSITION);
            markerChromosomalPositions.put((Comparable) exportVariant.get("_id"),
                    refPos.get(ReferencePosition.FIELDNAME_SEQUENCE) + ":"
                            + refPos.get(ReferencePosition.FIELDNAME_START_SITE));
            nLoadedMarkerCountInLoop++;
            fStartingNewChunk = false;
        }

        List<Comparable> currentMarkers = new ArrayList<Comparable>(markerChromosomalPositions.keySet());
        LinkedHashMap<VariantData, Collection<VariantRunData>> variantsAndRuns = MgdbDao.getSampleGenotypes(
                mongoTemplate, sampleIDs, currentMarkers, true,
                null /*new Sort(VariantData.FIELDNAME_REFERENCE_POSITION + "." + ChromosomalPosition.FIELDNAME_SEQUENCE).and(new Sort(VariantData.FIELDNAME_REFERENCE_POSITION + "." + ChromosomalPosition.FIELDNAME_START_SITE))*/); // query mongo db for matching genotypes
        for (VariantData variant : variantsAndRuns.keySet()) // read data and write results into temporary files (one per sample)
        {
            Comparable variantId = variant.getId();
            List<String> variantDataOrigin = new ArrayList<String>();

            Map<String, Integer> gqValueForSampleId = new LinkedHashMap<String, Integer>();
            Map<String, Integer> dpValueForSampleId = new LinkedHashMap<String, Integer>();
            Map<String, List<String>> individualGenotypes = new LinkedHashMap<String, List<String>>();
            List<String> chromAndPos = Helper.split(markerChromosomalPositions.get(variantId), ":");
            if (chromAndPos.size() == 0)
                LOG.warn("Chromosomal position not found for marker " + variantId);
            // LOG.debug(marker + "\t" + (chromAndPos.length == 0 ? "0" : chromAndPos[0]) + "\t" + 0 + "\t" + (chromAndPos.length == 0 ? 0l : Long.parseLong(chromAndPos[1])) + LINE_SEPARATOR);
            if (markerSynonyms != null) {
                Comparable syn = markerSynonyms.get(variantId);
                if (syn != null)
                    variantId = syn;
            }

            Collection<VariantRunData> runs = variantsAndRuns.get(variant);
            if (runs != null)
                for (VariantRunData run : runs)
                    for (Integer sampleIndex : run.getSampleGenotypes().keySet()) {
                        SampleGenotype sampleGenotype = run.getSampleGenotypes().get(sampleIndex);
                        String individualId = individuals
                                .get(sampleIDs.indexOf(new SampleId(run.getId().getProjectId(), sampleIndex)))
                                .getId();

                        Integer gq = null;
                        try {
                            gq = (Integer) sampleGenotype.getAdditionalInfo().get(VariantData.GT_FIELD_GQ);
                        } catch (Exception ignored) {
                        }
                        if (gq != null && gq < nMinimumGenotypeQuality)
                            continue;

                        Integer dp = null;
                        try {
                            dp = (Integer) sampleGenotype.getAdditionalInfo().get(VariantData.GT_FIELD_DP);
                        } catch (Exception ignored) {
                        }
                        if (dp != null && dp < nMinimumReadDepth)
                            continue;

                        String gtCode = sampleGenotype.getCode();
                        List<String> storedIndividualGenotypes = individualGenotypes.get(individualId);
                        if (storedIndividualGenotypes == null) {
                            storedIndividualGenotypes = new ArrayList<String>();
                            individualGenotypes.put(individualId, storedIndividualGenotypes);
                        }
                        storedIndividualGenotypes.add(gtCode);
                    }

            zos.write((chromAndPos.get(0) + "\t" + StringUtils.join(variantDataOrigin, ";") /*source*/ + "\t"
                    + typeToOntology.get(variant.getType()) + "\t" + Long.parseLong(chromAndPos.get(1)) + "\t"
                    + Long.parseLong(chromAndPos.get(1)) + "\t" + "." + "\t" + "+" + "\t" + "." + "\t")
                            .getBytes());
            Comparable syn = markerSynonyms == null ? null : markerSynonyms.get(variant.getId());
            zos.write(("ID=" + variant.getId() + ";" + (syn != null ? "Name=" + syn + ";" : "") + "alleles="
                    + StringUtils.join(variant.getKnownAlleleList(), "/") + ";" + "refallele="
                    + variant.getKnownAlleleList().get(0) + ";").getBytes());

            for (int j = 0; j < individualList
                    .size(); j++ /* we use this list because it has the proper ordering*/) {

                NumberFormat nf = NumberFormat.getInstance(Locale.US);
                nf.setMaximumFractionDigits(4);
                HashMap<String, Integer> compt1 = new HashMap<String, Integer>();
                int highestGenotypeCount = 0;
                int sum = 0;

                String individualId = individualList.get(j);
                List<String> genotypes = individualGenotypes.get(individualId);
                HashMap<Object, Integer> genotypeCounts = new HashMap<Object, Integer>(); // will help us to keep track of missing genotypes

                String mostFrequentGenotype = null;
                if (genotypes != null)
                    for (String genotype : genotypes) {
                        if (genotype.length() == 0)
                            continue; /* skip missing genotypes */

                        int count = 0;
                        for (String t : variant.getAllelesFromGenotypeCode(genotype)) {
                            for (String t1 : variant.getKnownAlleleList()) {
                                if (t.equals(t1) && !(compt1.containsKey(t1))) {
                                    count++;
                                    compt1.put(t1, count);
                                } else if (t.equals(t1) && compt1.containsKey(t1)) {
                                    if (compt1.get(t1) != 0) {
                                        count++;
                                        compt1.put(t1, count);
                                    } else
                                        compt1.put(t1, count);
                                } else if (!(compt1.containsKey(t1))) {
                                    compt1.put(t1, 0);
                                }
                            }
                        }
                        for (int countValue : compt1.values()) {
                            sum += countValue;
                        }

                        int gtCount = 1 + MgdbDao.getCountForKey(genotypeCounts, genotype);
                        if (gtCount > highestGenotypeCount) {
                            highestGenotypeCount = gtCount;
                            mostFrequentGenotype = genotype;
                        }
                        genotypeCounts.put(genotype, gtCount);
                    }

                List<String> alleles = mostFrequentGenotype == null ? new ArrayList<String>()
                        : variant.getAllelesFromGenotypeCode(mostFrequentGenotype);

                if (alleles.size() != 0) {
                    zos.write(("acounts=" + individualId + ":").getBytes());

                    for (String knowAllelesCompt : compt1.keySet()) {
                        zos.write(
                                (knowAllelesCompt + " " + nf.format(compt1.get(knowAllelesCompt) / (float) sum)
                                        + " " + compt1.get(knowAllelesCompt) + " ").getBytes());
                    }
                    zos.write((alleles.size() + ";").getBytes());
                }
                if (genotypeCounts.size() > 1) {
                    Comparable sVariantId = markerSynonyms != null ? markerSynonyms.get(variant.getId())
                            : variant.getId();
                    warningFileWriter.write("- Dissimilar genotypes found for variant "
                            + (sVariantId == null ? variant.getId() : sVariantId) + ", individual "
                            + individualId + ". Exporting most frequent: " + StringUtils.join(alleles, ",")
                            + "\n");
                }
            }
            zos.write((LINE_SEPARATOR).getBytes());
        }

        if (progress.hasAborted())
            return;

        nLoadedMarkerCount += nLoadedMarkerCountInLoop;
        nProgress = (short) (nLoadedMarkerCount * 100 / markerCount);
        if (nProgress > nPreviousProgress) {
            //            if (nProgress%5 == 0)
            //               LOG.info("========================= exportData: " + nProgress + "% =========================" + (System.currentTimeMillis() - before)/1000 + "s");
            progress.setCurrentStepProgress(nProgress);
            nPreviousProgress = nProgress;
        }
    }

    warningFileWriter.close();
    if (warningFile.length() > 0) {
        zos.putNextEntry(new ZipEntry(exportName + "-REMARKS.txt"));
        int nWarningCount = 0;
        BufferedReader in = new BufferedReader(new FileReader(warningFile));
        String sLine;
        while ((sLine = in.readLine()) != null) {
            zos.write((sLine + "\n").getBytes());
            in.readLine();
            nWarningCount++;
        }
        LOG.info("Number of Warnings for export (" + exportName + "): " + nWarningCount);
        in.close();
    }
    warningFile.delete();

    zos.close();
    progress.setCurrentStepProgress((short) 100);
}

From source file:org.apache.hadoop.hbase.master.assignment.AssignmentManager.java

private void processAssignQueue() {
    final HashMap<HRegionInfo, RegionStateNode> regions = waitOnAssignQueue();
    if (regions == null || regions.size() == 0 || !isRunning()) {
        return;//from   w  ww .j  a v a 2  s .  co m
    }

    if (LOG.isTraceEnabled()) {
        LOG.trace("PROCESS ASSIGN QUEUE regionCount=" + regions.size());
    }

    // TODO: Optimize balancer. pass a RegionPlan?
    final HashMap<HRegionInfo, ServerName> retainMap = new HashMap<HRegionInfo, ServerName>();
    final List<HRegionInfo> rrList = new ArrayList<HRegionInfo>();
    for (RegionStateNode regionNode : regions.values()) {
        if (regionNode.getRegionLocation() != null) {
            retainMap.put(regionNode.getRegionInfo(), regionNode.getRegionLocation());
        } else {
            rrList.add(regionNode.getRegionInfo());
        }
    }

    // TODO: connect with the listener to invalidate the cache
    final LoadBalancer balancer = getBalancer();

    // TODO use events
    List<ServerName> servers = master.getServerManager().createDestinationServersList();
    for (int i = 0; servers.size() < 1; ++i) {
        if (i % 4 == 0) {
            LOG.warn("no server available, unable to find a location for " + regions.size()
                    + " unassigned regions. waiting");
        }

        // the was AM killed
        if (!isRunning()) {
            LOG.debug("aborting assignment-queue with " + regions.size() + " not assigned");
            return;
        }

        Threads.sleep(250);
        servers = master.getServerManager().createDestinationServersList();
    }

    final boolean isTraceEnabled = LOG.isTraceEnabled();
    if (isTraceEnabled) {
        LOG.trace("available servers count=" + servers.size() + ": " + servers);
    }

    // ask the balancer where to place regions
    if (!retainMap.isEmpty()) {
        if (isTraceEnabled) {
            LOG.trace("retain assign regions=" + retainMap);
        }
        try {
            acceptPlan(regions, balancer.retainAssignment(retainMap, servers));
        } catch (HBaseIOException e) {
            LOG.warn("unable to retain assignment", e);
            addToPendingAssignment(regions, retainMap.keySet());
        }
    }

    // TODO: Do we need to split retain and round-robin?
    // the retain seems to fallback to round-robin/random if the region is not in the map.
    if (!rrList.isEmpty()) {
        Collections.sort(rrList);
        if (isTraceEnabled) {
            LOG.trace("round robin regions=" + rrList);
        }
        try {
            acceptPlan(regions, balancer.roundRobinAssignment(rrList, servers));
        } catch (HBaseIOException e) {
            LOG.warn("unable to round-robin assignment", e);
            addToPendingAssignment(regions, rrList);
        }
    }
}

From source file:com.searchcode.app.jobs.repository.IndexGitRepoJob.java

/**
 * Only works if we have path to GIT/*from   w ww . j  av a2s  .  c o  m*/
 */
public List<CodeOwner> getBlameInfoExternal(int codeLinesSize, String repoName, String repoLocations,
        String fileName) {
    List<CodeOwner> codeOwners = new ArrayList<>(codeLinesSize);

    // -w is to ignore whitespace bug
    ProcessBuilder processBuilder = new ProcessBuilder(this.GIT_BINARY_PATH, "blame", "-c", "-w", fileName);
    // The / part is required due to centos bug for version 1.1.1
    processBuilder.directory(new File(repoLocations + "/" + repoName));

    Process process = null;
    BufferedReader bufferedReader = null;

    try {
        process = processBuilder.start();

        InputStream is = process.getInputStream();
        InputStreamReader isr = new InputStreamReader(is, Values.CHARSET_UTF8);
        bufferedReader = new BufferedReader(isr);
        String line;
        DateFormat df = new SimpleDateFormat("yyyy-mm-dd kk:mm:ss");

        HashMap<String, CodeOwner> owners = new HashMap<>();

        boolean foundSomething = false;

        while ((line = bufferedReader.readLine()) != null) {
            Singleton.getLogger().info("Blame line " + repoName + fileName + ": " + line);
            String[] split = line.split("\t");

            if (split.length > 2 && split[1].length() != 0) {
                foundSomething = true;
                String author = split[1].substring(1);
                int commitTime = (int) (System.currentTimeMillis() / 1000);
                try {
                    commitTime = (int) (df.parse(split[2]).getTime() / 1000);
                } catch (ParseException ex) {
                    Singleton.getLogger().info("time parse expection for " + repoName + fileName);
                }

                if (owners.containsKey(author)) {
                    CodeOwner codeOwner = owners.get(author);
                    codeOwner.incrementLines();

                    int timestamp = codeOwner.getMostRecentUnixCommitTimestamp();

                    if (commitTime > timestamp) {
                        codeOwner.setMostRecentUnixCommitTimestamp(commitTime);
                    }
                    owners.put(author, codeOwner);
                } else {
                    owners.put(author, new CodeOwner(author, 1, commitTime));
                }
            }
        }

        if (foundSomething == false) {
            // External call for CentOS issue
            String[] split = fileName.split("/");

            if (split.length != 1) {
                codeOwners = getBlameInfoExternal(codeLinesSize, repoName, repoLocations,
                        String.join("/", Arrays.asList(split).subList(1, split.length)));
            }

        } else {
            codeOwners = new ArrayList<>(owners.values());
        }

    } catch (IOException | StringIndexOutOfBoundsException ex) {
        Singleton.getLogger().info("getBlameInfoExternal repoloc: " + repoLocations + "/" + repoName);
        Singleton.getLogger().info("getBlameInfoExternal fileName: " + fileName);
        Singleton.getLogger()
                .warning("ERROR - caught a " + ex.getClass() + " in " + this.getClass()
                        + " getBlameInfoExternal for " + repoName + " " + fileName + "\n with message: "
                        + ex.getMessage());
    } finally {
        Singleton.getHelpers().closeQuietly(process);
        Singleton.getHelpers().closeQuietly(bufferedReader);
    }

    return codeOwners;
}

From source file:com.logsniffer.event.h2.H2SnifferPersistence.java

@Override
public Map<Log, IncrementData> getIncrementDataByLog(final Sniffer sniffer, final LogSource<?> source)
        throws IOException {
    final List<Log> logs = source.getLogs();
    if (logs.size() > 0) {
        final HashMap<Log, IncrementData> incs = new HashMap<Log, IncrementData>();
        final HashMap<String, Log> logMapping = new HashMap<String, Log>();
        for (final Log log : logs) {
            logMapping.put(log.getPath(), log);
        }//from   w  w  w  .ja  va 2s.co  m
        jdbcTemplate.query(
                "SELECT NEXT_POINTER, DATA, LOG FROM SNIFFERS_SCANNER_IDATA WHERE SNIFFER=? AND SOURCE=? AND LOG IN ("
                        + StringUtils.repeat("?", ",", logs.size()) + ") ORDER BY LOG",
                ArrayUtils.addAll(new Object[] { sniffer.getId(), source.getId() },
                        logMapping.keySet().toArray(new Object[logMapping.size()])),
                new RowCallbackHandler() {
                    @Override
                    public void processRow(final ResultSet rs) throws SQLException {
                        final String logPath = rs.getString("LOG");
                        final Log log = logMapping.get(logPath);
                        if (log != null) {
                            final IncrementData data = new IncrementData();
                            data.setData(JSONObject.fromObject(rs.getString("DATA")));
                            try {
                                final String jsonStr = rs.getString("NEXT_POINTER");
                                if (StringUtils.isNotBlank(jsonStr)) {
                                    data.setNextOffset(source.getLogAccess(log).getFromJSON(jsonStr));
                                }
                                incs.put(log, data);
                            } catch (final IOException e) {
                                throw new SQLException("Failed to construct pointer in log: " + log, e);
                            }
                        } else {
                            logger.error("Didn't find log '{}' for selected incrementdata", logPath);
                        }
                    }
                });
        // Create empty entries for not yet persisted
        for (final Log log : logMapping.values()) {
            if (!incs.containsKey(log)) {
                incs.put(log, new IncrementData());
            }
        }
        return incs;
    } else {
        return Collections.emptyMap();
    }
}

From source file:gov.anl.cue.arcane.engine.matrix.MatrixModel.java

/**
 * Imports a matrix model from a template spreadsheet.
 *
 * @param matrixEngine the matrix engine
 * @param fileName            the file name
 * @return the results/* w  w w  .  ja v  a 2  s  . c o m*/
 */
public static MatrixModel importTemplate(MatrixEngine matrixEngine, String fileName) {

    // Declare the results storage.
    MatrixModel matrixModel = new MatrixModel(matrixEngine);

    // Try to read the template spreadsheet.
    try {

        // Find the node request counts.
        HashMap<Integer, Integer> nodeCounts = MatrixModel.importTemplateDimensions(fileName);

        // Find the node base index counts.
        HashMap<Integer, Integer> nodeBases = MatrixModel.findNodeBases(nodeCounts);

        // Find the dimensions of the template spreadsheet.
        int nodeRequests = nodeCounts.size();
        int nodeCount = 0;
        for (Integer nodeRequest : nodeCounts.values()) {
            nodeCount += nodeRequest;
        }

        // Attempt to open the template spreadsheet.
        XSSFWorkbook workbook = new XSSFWorkbook(new FileInputStream(new File(fileName)));

        // Extract the fitness function.
        Iterator<XSSFSheet> sheets = MatrixModel.importTemplateExtractFitnessInformation(matrixModel,
                nodeCounts, nodeRequests, workbook);

        // Scan the variables.
        MatrixModel.importTemplateScanVariables(matrixModel, nodeCounts, nodeBases, nodeRequests, nodeCount,
                sheets);

        // Normalize the new model.
        matrixModel.normalize();

        // Catch errors.
    } catch (Exception e) {

        // Note an error.
        matrixModel = null;

    }

    // Return the results.
    return matrixModel;

}

From source file:de.tor.tribes.ui.views.DSWorkbenchSOSRequestAnalyzer.java

private void copySelectionToClipboardAsBBCode() {
    HashMap<Tribe, SOSRequest> selectedRequests = new HashMap<>();
    List<DefenseInformation> selection = getSelectedRows();
    if (selection.isEmpty()) {
        showInfo("Keine SOS Anfragen eingelesen");
        return;// ww  w . j av a 2s . c o m
    }

    for (DefenseInformation info : selection) {
        Tribe defender = info.getTarget().getTribe();
        SOSRequest request = selectedRequests.get(defender);
        if (request == null) {
            request = new SOSRequest(defender);
            selectedRequests.put(defender, request);
        }
        TargetInformation targetInfo = request.addTarget(info.getTarget());
        targetInfo.merge(info.getTargetInformation());
    }

    try {
        boolean extended = (JOptionPaneHelper.showQuestionConfirmBox(this,
                "Erweiterte BB-Codes verwenden (nur fr Forum und Notizen geeignet)?", "Erweiterter BB-Code",
                "Nein", "Ja") == JOptionPane.YES_OPTION);

        StringBuilder buffer = new StringBuilder();
        if (extended) {
            buffer.append("[u][size=12]SOS Anfragen[/size][/u]\n\n");
        } else {
            buffer.append("[u]SOS Anfragen[/u]\n\n");
        }

        List<SOSRequest> requests = new LinkedList<>();
        CollectionUtils.addAll(requests, selectedRequests.values());
        buffer.append(new SosListFormatter().formatElements(requests, extended));

        if (extended) {
            buffer.append("\n[size=8]Erstellt am ");
            buffer.append(
                    new SimpleDateFormat("dd.MM.yy 'um' HH:mm:ss").format(Calendar.getInstance().getTime()));
            buffer.append(" mit DS Workbench ");
            buffer.append(Constants.VERSION).append(Constants.VERSION_ADDITION + "[/size]\n");
        } else {
            buffer.append("\nErstellt am ");
            buffer.append(
                    new SimpleDateFormat("dd.MM.yy 'um' HH:mm:ss").format(Calendar.getInstance().getTime()));
            buffer.append(" mit DS Workbench ");
            buffer.append(Constants.VERSION).append(Constants.VERSION_ADDITION + "\n");
        }

        String b = buffer.toString();
        StringTokenizer t = new StringTokenizer(b, "[");
        int cnt = t.countTokens();
        if (cnt > 1000) {
            if (JOptionPaneHelper.showQuestionConfirmBox(this,
                    "Die momentan vorhandenen Anfragen bentigen mehr als 1000 BB-Codes\n"
                            + "und knnen daher im Spiel (Forum/IGM/Notizen) nicht auf einmal dargestellt werden.\nTrotzdem exportieren?",
                    "Zu viele BB-Codes", "Nein", "Ja") == JOptionPane.NO_OPTION) {
                return;
            }
        }

        Toolkit.getDefaultToolkit().getSystemClipboard().setContents(new StringSelection(b), null);
        showSuccess("Daten in Zwischenablage kopiert");
    } catch (Exception e) {
        logger.error("Failed to copy data to clipboard", e);
        showError("Fehler beim Kopieren in die Zwischenablage");
    }

}

From source file:es.upm.dit.gsi.barmas.dataset.utils.DatasetSplitter.java

/**
 * @param folds/*from  ww  w.ja  v a2s . c om*/
 * @param minAgents
 * @param maxAgents
 * @param originalDatasetPath
 * @param outputDir
 * @param scenario
 * @param logger
 */
public void splitDataset(int folds, int minAgents, int maxAgents, String originalDatasetPath, String outputDir,
        String scenario, Logger logger) {

    int ratioint = (int) ((1 / (double) folds) * 100);
    double roundedratio = ((double) ratioint) / 100;

    // Look for essentials
    List<String[]> essentials = this.getEssentials(originalDatasetPath, logger);

    for (int fold = 0; fold < folds; fold++) {
        String outputDirWithRatio = outputDir + "/" + roundedratio + "testRatio/iteration-" + fold;
        File dir = new File(outputDirWithRatio);
        if (!dir.exists() || !dir.isDirectory()) {
            dir.mkdirs();
        }

        logger.finer("--> splitDataset()");
        logger.fine("Creating experiment.info...");

        try {

            Instances originalData = this.getDataFromCSV(originalDatasetPath);

            originalData.randomize(new Random());
            originalData.stratify(folds);

            // TestDataSet
            Instances testData = originalData.testCV(folds, fold);
            CSVSaver saver = new CSVSaver();
            ArffSaver arffsaver = new ArffSaver();
            File file = new File(outputDirWithRatio + File.separator + "test-dataset.csv");
            if (!file.exists()) {
                saver.resetOptions();
                saver.setInstances(testData);
                saver.setFile(file);
                saver.writeBatch();
            }

            file = new File(outputDirWithRatio + File.separator + "test-dataset.arff");
            if (!file.exists()) {
                arffsaver.resetOptions();
                arffsaver.setInstances(testData);
                arffsaver.setFile(file);
                arffsaver.writeBatch();
            }

            // BayesCentralDataset
            Instances trainData = originalData.trainCV(folds, fold);
            file = new File(outputDirWithRatio + File.separator + "bayes-central-dataset.csv");
            if (!file.exists()) {
                saver.resetOptions();
                saver.setInstances(trainData);
                saver.setFile(file);
                saver.writeBatch();
                this.copyFileUsingApacheCommonsIO(file,
                        new File(
                                outputDirWithRatio + File.separator + "bayes-central-dataset-noEssentials.csv"),
                        logger);
                CsvWriter w = new CsvWriter(new FileWriter(file, true), ',');
                for (String[] essential : essentials) {
                    w.writeRecord(essential);
                }
                w.close();
            }
            file = new File(outputDirWithRatio + File.separator + "bayes-central-dataset.arff");
            if (!file.exists()) {
                arffsaver.resetOptions();
                arffsaver.setInstances(trainData);
                arffsaver.setFile(file);
                arffsaver.writeBatch();
                this.copyFileUsingApacheCommonsIO(file, new File(
                        outputDirWithRatio + File.separator + "bayes-central-dataset-noEssentials.arff"),
                        logger);
                CsvWriter w = new CsvWriter(new FileWriter(file, true), ',');
                for (String[] essential : essentials) {
                    w.writeRecord(essential);
                }
                w.close();
            }

            // Agent datasets
            CsvReader csvreader = new CsvReader(new FileReader(new File(originalDatasetPath)));
            csvreader.readHeaders();
            String[] headers = csvreader.getHeaders();
            csvreader.close();

            for (int agents = minAgents; agents <= maxAgents; agents++) {
                this.createExperimentInfoFile(folds, agents, originalDatasetPath, outputDirWithRatio, scenario,
                        logger);
                HashMap<String, CsvWriter> writers = new HashMap<String, CsvWriter>();
                String agentsDatasetsDir = outputDirWithRatio + File.separator + agents + "agents";
                HashMap<String, CsvWriter> arffWriters = new HashMap<String, CsvWriter>();
                File f = new File(agentsDatasetsDir);
                if (!f.isDirectory()) {
                    f.mkdirs();
                }
                Instances copy = new Instances(trainData);
                copy.delete();
                for (int i = 0; i < agents; i++) {
                    String fileName = agentsDatasetsDir + File.separator + "agent-" + i + "-dataset.csv";
                    file = new File(fileName);
                    if (!file.exists()) {
                        CsvWriter writer = new CsvWriter(new FileWriter(fileName), ',');
                        writer.writeRecord(headers);
                        writers.put("AGENT" + i, writer);
                    }
                    fileName = agentsDatasetsDir + File.separator + "agent-" + i + "-dataset.arff";
                    file = new File(fileName);
                    if (!file.exists()) {
                        arffsaver.resetOptions();
                        arffsaver.setInstances(copy);
                        arffsaver.setFile(new File(fileName));
                        arffsaver.writeBatch();
                        CsvWriter arffwriter = new CsvWriter(new FileWriter(fileName, true), ',');
                        arffWriters.put("AGENT" + i, arffwriter);
                    }

                    logger.fine("AGENT" + i + " dataset created in csv and arff formats.");
                }
                // Append essentials to all
                for (String[] essential : essentials) {
                    for (CsvWriter wr : writers.values()) {
                        wr.writeRecord(essential);
                    }
                    for (CsvWriter arffwr : arffWriters.values()) {
                        arffwr.writeRecord(essential);
                    }
                }

                int agentCounter = 0;
                for (int j = 0; j < trainData.numInstances(); j++) {
                    Instance instance = trainData.instance(j);
                    CsvWriter writer = writers.get("AGENT" + agentCounter);
                    CsvWriter arffwriter = arffWriters.get("AGENT" + agentCounter);
                    String[] row = new String[instance.numAttributes()];
                    for (int a = 0; a < instance.numAttributes(); a++) {
                        row[a] = instance.stringValue(a);
                    }
                    if (writer != null) {
                        writer.writeRecord(row);
                    }
                    if (arffwriter != null) {
                        arffwriter.writeRecord(row);
                    }
                    agentCounter++;
                    if (agentCounter == agents) {
                        agentCounter = 0;
                    }
                }

                for (CsvWriter wr : writers.values()) {
                    wr.close();
                }
                for (CsvWriter arffwr : arffWriters.values()) {
                    arffwr.close();
                }
            }

        } catch (Exception e) {
            logger.severe("Exception while splitting dataset. ->");
            logger.severe(e.getMessage());
            System.exit(1);
        }

        logger.finest("Dataset for fold " + fold + " created.");
    }

    logger.finer("<-- splitDataset()");

}

From source file:com.nuxeo.intranet.jenkins.web.JenkinsJsonConverter.java

public List<Map<String, Serializable>> mergeData(List<Map<String, Serializable>> oldData,
        List<Map<String, Serializable>> newData) {
    // reset counters and merged data
    newFailingCount = 0;// w w  w.  ja  v a2 s .  c  o m
    fixedCount = 0;
    unchangedCount = 0;
    mergedData = null;

    // gather up all old info, and use a map for easier reference
    HashMap<String, Map<String, Serializable>> res = new LinkedHashMap<String, Map<String, Serializable>>();
    if (oldData != null) {
        for (Map<String, Serializable> item : oldData) {
            res.put((String) item.get("job_id"), item);
        }
    }

    // add up new values and merge if already in the existing list
    if (newData != null) {
        for (Map<String, Serializable> item : newData) {
            String id = (String) item.get("job_id");
            String build_number = (String) item.get("build_number");
            if (res.containsKey(id)) {
                Map<String, Serializable> oldItem = res.get(id);
                String oldBuildNumber = String.valueOf(oldItem.get("build_number"));
                if (build_number != null && build_number.equals(oldBuildNumber)) {
                    // already the same job => update claimer and comment
                    // override claimer and comments
                    oldItem.put("claimer", item.get("claimer"));
                    oldItem.put("comment", item.get("comment"));
                    unchangedCount++;
                } else {
                    oldItem.put("updated_build_number", build_number);
                    String oldType = (String) oldItem.get("updated_type");
                    String newType = (String) item.get("type");
                    oldItem.put("updated_type", newType);
                    oldItem.put("updated_comment", item.get("comment"));
                    // only override claimer
                    oldItem.put("claimer", item.get("claimer"));
                    if ("SUCCESS".equals(newType) && !"SUCCESS".equals(oldType)) {
                        fixedCount++;
                    }
                }
                res.put(id, oldItem);
            } else {
                if (oldData != null && !oldData.isEmpty()) {
                    item.put("newly_failing", "true");
                }
                newFailingCount++;
                res.put(id, item);
            }
        }
    }

    unchangedCount = res.size() - (fixedCount + newFailingCount);

    mergedData = new ArrayList<Map<String, Serializable>>(res.values());
    return mergedData;
}

From source file:edu.ku.brc.specify.conversion.AgentConverter.java

/**
 * @param addressHash/* ww  w.ja  v a  2s.c o m*/
 */
private void dumpInfo(final String fileName, final HashMap<Integer, AddressInfo> addressHash) {
    try {
        File file = new File(fileName);
        PrintWriter pw = new PrintWriter(file);
        pw.println("------- AgentInfo Dump-------- ");
        for (AgentInfo agentInfo : agentHash.values()) {
            pw.println(agentInfo.toString());
            for (Integer agtAdrId : agentInfo.getAddrs().keySet()) {
                pw.println(
                        "    AgentAddrId: " + agtAdrId + "    AddrId: " + agentInfo.getAddrs().get(agtAdrId));
            }
        }

        pw.println("\n------- AddressInfo Dump-------- ");
        for (AddressInfo addrInfo : addressHash.values()) {
            pw.println(addrInfo.toString());
        }
        pw.println("\n");
        pw.close();

        //System.out.println(FileUtils.readFileToString(file));

    } catch (Exception e1) {
        e1.printStackTrace();
    }
}

From source file:com.searchcode.app.jobs.IndexGitRepoJob.java

/**
 * Only works if we have path to GIT//from w ww .ja  va 2s .  c  om
 */
private List<CodeOwner> getBlameInfoExternal(int codeLinesSize, String repoName, String repoLocations,
        String fileName) {
    List<CodeOwner> codeOwners = new ArrayList<>(codeLinesSize);

    try {
        // -w is to ignore whitespace bug
        ProcessBuilder processBuilder = new ProcessBuilder(this.GITBINARYPATH, "blame", "-c", "-w", fileName);
        // The / part is required due to centos bug for version 1.1.1
        processBuilder.directory(new File(repoLocations + "/" + repoName));

        Process process = processBuilder.start();

        InputStream is = process.getInputStream();
        InputStreamReader isr = new InputStreamReader(is);
        BufferedReader br = new BufferedReader(isr);
        String line;
        DateFormat df = new SimpleDateFormat("yyyy-mm-dd kk:mm:ss");

        HashMap<String, CodeOwner> owners = new HashMap<>();

        boolean foundSomething = false;

        while ((line = br.readLine()) != null) {
            Singleton.getLogger().info("Blame line " + repoName + fileName + ": " + line);
            String[] split = line.split("\t");

            if (split.length > 2 && split[1].length() != 0) {
                foundSomething = true;
                String author = split[1].substring(1);
                int commitTime = (int) (System.currentTimeMillis() / 1000);
                try {
                    commitTime = (int) (df.parse(split[2]).getTime() / 1000);
                } catch (ParseException ex) {
                    Singleton.getLogger().info("time parse expection for " + repoName + fileName);
                }

                if (owners.containsKey(author)) {
                    CodeOwner codeOwner = owners.get(author);
                    codeOwner.incrementLines();

                    int timestamp = codeOwner.getMostRecentUnixCommitTimestamp();

                    if (commitTime > timestamp) {
                        codeOwner.setMostRecentUnixCommitTimestamp(commitTime);
                    }
                    owners.put(author, codeOwner);
                } else {
                    owners.put(author, new CodeOwner(author, 1, commitTime));
                }
            }
        }

        if (foundSomething == false) {
            // External call for CentOS issue
            String[] split = fileName.split("/");

            if (split.length != 1) {
                codeOwners = getBlameInfoExternal(codeLinesSize, repoName, repoLocations,
                        String.join("/", Arrays.asList(split).subList(1, split.length)));
            }

        } else {
            codeOwners = new ArrayList<>(owners.values());
        }

    } catch (IOException | StringIndexOutOfBoundsException ex) {
        Singleton.getLogger().info("getBlameInfoExternal repoloc: " + repoLocations + "/" + repoName);
        Singleton.getLogger().info("getBlameInfoExternal fileName: " + fileName);
        Singleton.getLogger().warning("ERROR - caught a " + ex.getClass() + " in " + this.getClass()
                + "\n with message: " + ex.getMessage());
    }

    return codeOwners;
}