org.icgc.dcc.download.server.config.RecordsStatsServiceConfig.java Source code

Java tutorial

Introduction

Here is the source code for org.icgc.dcc.download.server.config.RecordsStatsServiceConfig.java

Source

/*
 * Copyright (c) 2016 The Ontario Institute for Cancer Research. All rights reserved.                             
 *                                                                                                               
 * This program and the accompanying materials are made available under the terms of the GNU Public License v3.0.
 * You should have received a copy of the GNU General Public License along with                                  
 * this program. If not, see <http://www.gnu.org/licenses/>.                                                     
 *                                                                                                               
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY                           
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES                          
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT                           
 * SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,                                
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED                          
 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;                               
 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER                              
 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN                         
 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
package org.icgc.dcc.download.server.config;

import static com.google.common.base.Preconditions.checkState;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.InputStreamReader;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.zip.GZIPInputStream;

import lombok.Cleanup;
import lombok.SneakyThrows;
import lombok.val;
import lombok.extern.slf4j.Slf4j;

import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.icgc.dcc.common.core.util.Splitters;
import org.icgc.dcc.common.hadoop.fs.HadoopUtils;
import org.icgc.dcc.download.core.model.DownloadDataType;
import org.icgc.dcc.download.server.config.Properties.JobProperties;
import org.icgc.dcc.download.server.service.RecordStatsService;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;

import com.google.common.collect.HashBasedTable;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Maps;
import com.google.common.collect.Table;

@Slf4j
@Configuration
public class RecordsStatsServiceConfig {

    @Autowired
    Properties properties;
    @Autowired
    FileSystem fileSystem;

    @Bean
    public RecordStatsService recordStatsService() {
        return new RecordStatsService(readStatsTable(properties.jobProperties(), fileSystem),
                resolveRecordWeights(properties.downloadServerProperties().getRecordWeightsFile()));
    }

    @SneakyThrows
    private Table<String, DownloadDataType, Long> readStatsTable(JobProperties jobProperties,
            FileSystem fileSystem) {
        val statsPath = new Path(jobProperties.getInputDir(), "stats");
        // TODO: filter only gz-compressed files.
        val statsFiles = HadoopUtils.lsFile(fileSystem, statsPath);
        Table<String, DownloadDataType, Long> statsTable = HashBasedTable.create();
        for (val file : statsFiles) {
            log.info("Reading record statistics file: {}", file);
            @Cleanup
            val reader = getBufferedReader(fileSystem, file);
            try {
                readRecordStats(reader, statsTable);
            } catch (IllegalArgumentException e) {
                log.error("Failed to read {} records statistics file.", file);
                throw e;
            }
        }

        return statsTable;
    }

    @SneakyThrows
    private static void readRecordStats(BufferedReader reader, Table<String, DownloadDataType, Long> statsTable) {
        String line = null;
        while ((line = reader.readLine()) != null) {
            log.info("Parsing line: {}", line);
            val parts = Splitters.TAB.splitToList(line);
            ensureStatsFileFormat(parts);

            val donorId = parts.get(0);
            val type = DownloadDataType.valueOf(parts.get(1));
            val count = Long.valueOf(parts.get(2));

            val currentValue = statsTable.get(donorId, type);
            checkState(currentValue == null, "Record stats for donor '%s' and type '%s' already exists(%s).",
                    donorId, type, currentValue);
            statsTable.put(donorId, type, count);
        }
    }

    private static void ensureStatsFileFormat(List<String> parts) {
        val partsNum = parts.size();
        checkState(partsNum == 3, "Malformed records statistics file. Expected 3 columns, but got {}", partsNum);
    }

    @SneakyThrows
    private static BufferedReader getBufferedReader(FileSystem fileSystem, Path input) {
        val gzip = new GZIPInputStream(fileSystem.open(input));

        return new BufferedReader(new InputStreamReader(gzip));
    }

    @SneakyThrows
    private static Map<DownloadDataType, Integer> resolveRecordWeights(String recordWeightsFile) {
        val fileReader = createrRecordWeightsFileReader(recordWeightsFile);
        String line = null;
        val recordWeights = ImmutableMap.<DownloadDataType, Integer>builder();
        while ((line = fileReader.readLine()) != null) {
            recordWeights.put(parseEntry(line));
        }

        return recordWeights.build();
    }

    private static Entry<? extends DownloadDataType, ? extends Integer> parseEntry(String line) {
        val parts = Splitters.TAB.splitToList(line);
        val partsNum = parts.size();
        checkState(partsNum == 2, "Malformed records weights file. Expected 2 columns, but got %s", partsNum);
        val type = DownloadDataType.valueOf(parts.get(0));
        val weight = Integer.valueOf(parts.get(1));

        return Maps.immutableEntry(type, weight);
    }

    @SneakyThrows
    private static BufferedReader createrRecordWeightsFileReader(String recordWeightsFile) {
        val file = new File(recordWeightsFile);
        checkState(file.canRead(), "Failed to read " + file.getAbsolutePath());

        return new BufferedReader(new FileReader(file));
    }

}