com.aliyun.fs.oss.nat.BufferReader.java Source code

Java tutorial

Introduction

Here is the source code for com.aliyun.fs.oss.nat.BufferReader.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * <p>
 * http://www.apache.org/licenses/LICENSE-2.0
 * <p>
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.aliyun.fs.oss.nat;

import com.aliyun.fs.oss.common.NativeFileSystemStore;
import com.aliyun.fs.oss.utils.Task;
import com.aliyun.fs.oss.utils.TaskEngine;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;

import java.io.EOFException;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.math.BigDecimal;
import java.util.Arrays;
import java.util.concurrent.atomic.AtomicInteger;

/**
 * thread1 thread2        thread2^m
 *  |        |                |
 * /        /                /
 * [0][1][2][3][4][5][6][7]...|...[2^n-6][2^n-5][2^n-4][2^n-3][2^n-2][2^n-1]
 */
public class BufferReader {
    public static final Log LOG = LogFactory.getLog(BufferReader.class);

    private NativeFileSystemStore store;
    private int concurrentStreams;
    private TaskEngine taskEngine;
    private Configuration conf;
    private int bufferSize;
    private String key;
    private byte[] buffer = null;
    private Task[] readers;
    private int[] splitContentSize;
    private AtomicInteger halfReading = new AtomicInteger(0);
    private AtomicInteger ready0 = new AtomicInteger(0);
    private AtomicInteger ready1 = new AtomicInteger(0);
    private boolean closed = false;
    private int cacheIdx = 0;
    private int splitSize = 0;
    private long fileContentLength;
    private long pos = 0;
    private boolean squeezed0 = false;
    private boolean squeezed1 = false;
    private int realContentSize = 0;
    private double lastProgress = 0.0d;
    private AtomicInteger halfConsuming = new AtomicInteger(1);
    private int algorithmVersion;
    private InputStream in;
    private long lengthToFetch;
    private long instreamStart = 0;

    public BufferReader(NativeFileSystemStore store, String key, Configuration conf, int algorithmVersion)
            throws IOException {
        this.store = store;
        this.key = key;
        this.conf = conf;
        this.algorithmVersion = algorithmVersion;
        if (store.retrieveMetadata(key).getLength() < 5 * 1024 * 1024) {
            this.algorithmVersion = 2;
        }
        prepareBeforeFetch();
    }

    private void prepareBeforeFetch() throws IOException {
        if (algorithmVersion == 1) {
            this.fileContentLength = store.retrieveMetadata(key).getLength();
            this.lengthToFetch = fileContentLength - pos;
            this.bufferSize = lengthToFetch < 16 * 1024 * 1024 ? 1024 * 1024
                    : (lengthToFetch > 1024 * 1024 * 1024 ? 64 * 1024 * 1024 : (int) (lengthToFetch / 16));
            if (Math.log(bufferSize) / Math.log(2) != 0) {
                int power = (int) Math.ceil(Math.log(bufferSize) / Math.log(2));
                this.bufferSize = (int) Math.pow(2, power);
            }

            if (buffer == null) {
                buffer = new byte[bufferSize];
            }
            this.concurrentStreams = conf.getInt("fs.oss.reader.concurrent.number", 4);
            if ((Math.log(concurrentStreams) / Math.log(2)) != 0) {
                int power = (int) Math.ceil(Math.log(concurrentStreams) / Math.log(2));
                this.concurrentStreams = (int) Math.pow(2, power);
            }
            this.readers = new ConcurrentReader[concurrentStreams];
            this.splitContentSize = new int[concurrentStreams * 2];
            this.splitSize = bufferSize / concurrentStreams / 2;

            initializeTaskEngine();
        } else {
            in = store.retrieve(key, pos);
        }
    }

    private void initializeTaskEngine() {
        for (int i = 0; i < concurrentStreams; i++) {
            try {
                readers[i] = new ConcurrentReader(i);
            } catch (FileNotFoundException e) {
                LOG.error(e);
            }
        }
        this.taskEngine = new TaskEngine(Arrays.asList(this.readers), concurrentStreams, concurrentStreams);
        this.taskEngine.executeTask();
    }

    public void close() {
        LOG.info("Closing input stream for '" + key + "'.");
        closed = true;
        try {
            if (algorithmVersion == 1) {
                taskEngine.shutdown();
            } else {
                if (in != null) {
                    in.close();
                    in = null;
                }
            }
        } catch (IOException e) {
            LOG.error("Failed to close input stream.", e);
        } finally {
            System.gc();
            buffer = null;
        }
    }

    public synchronized int read() throws IOException {
        if (algorithmVersion == 1) {
            while (true) {
                if (halfReading.get() == 0) {
                    int i = 0;
                    while (!(ready0.get() == concurrentStreams)) {
                        i++;
                        try {
                            Thread.sleep(100);
                        } catch (InterruptedException e) {
                            LOG.warn("Something wrong, keep waiting.");
                        }
                        if (i % 100 == 0) {
                            LOG.warn("waiting for fetching oss data at half-0, has completed " + ready0.get());
                        }
                    }
                    if (!squeezed0) {
                        realContentSize = squeeze();
                        squeezed0 = true;
                        squeezed1 = false;
                        progressPrint();
                    }

                    // read data from buffer half-0
                    if (pos >= fileContentLength) {
                        close();
                        return -1;
                    } else if (cacheIdx < realContentSize) {
                        int ret = buffer[cacheIdx];
                        cacheIdx++;
                        pos++;
                        return ret;
                    } else {
                        ready0.set(0);
                        halfReading.set(1);
                        cacheIdx = 0;
                        halfConsuming.addAndGet(1);
                    }
                } else {
                    int i = 0;
                    while (!(ready1.get() == concurrentStreams)) {
                        i++;
                        try {
                            Thread.sleep(100);
                        } catch (InterruptedException e) {
                            LOG.warn("Something wrong, keep waiting.");
                        }
                        if (i % 100 == 0) {
                            LOG.warn("waiting for fetching oss data at half-1, has completed " + ready1.get());
                        }
                    }
                    if (!squeezed1) {
                        realContentSize = squeeze();
                        squeezed0 = false;
                        squeezed1 = true;
                        progressPrint();
                    }

                    // read data from buffer half-1
                    if (pos >= fileContentLength) {
                        close();
                        return -1;
                    } else if (cacheIdx < realContentSize) {
                        int ret = buffer[bufferSize / 2 + cacheIdx];
                        cacheIdx++;
                        return ret;
                    } else {
                        ready1.set(0);
                        halfReading.set(0);
                        cacheIdx = 0;
                        halfConsuming.addAndGet(1);
                    }
                }
            }
        } else {
            int result = in.read();
            if (result != -1) {
                pos++;
            }

            return result;
        }
    }

    public synchronized int read(byte[] b, int off, int len) throws IOException {
        if (algorithmVersion == 1) {
            while (true) {
                if (halfReading.get() == 0) {
                    int j = 0;
                    while (!(ready0.get() == concurrentStreams)) {
                        j++;
                        try {
                            Thread.sleep(100);
                        } catch (InterruptedException e) {
                            LOG.warn("Something wrong, keep waiting.");
                        }
                        if (j % 100 == 0) {
                            LOG.warn("waiting for fetching oss data at half-0, has completed " + ready0.get());
                        }
                    }
                    if (!squeezed0) {
                        realContentSize = squeeze();
                        squeezed0 = true;
                        squeezed1 = false;
                        progressPrint();
                    }

                    // read data from buffer half-0
                    int size = 0;
                    if (pos >= fileContentLength) {
                        close();
                        return -1;
                    } else if (cacheIdx < realContentSize) {
                        for (int i = 0; i < len && cacheIdx < realContentSize; i++) {
                            b[off + i] = buffer[cacheIdx];
                            cacheIdx++;
                            pos++;
                            size++;
                        }
                        return size;
                    } else {
                        ready0.set(0);
                        halfReading.set(1);
                        cacheIdx = 0;
                        halfConsuming.addAndGet(1);
                    }
                } else {
                    int j = 0;
                    while (!(ready1.get() == concurrentStreams)) {
                        j++;
                        try {
                            Thread.sleep(100);
                        } catch (InterruptedException e) {
                            LOG.warn("Something wrong, keep waiting.");
                        }
                        if (j % 100 == 0) {
                            LOG.warn("waiting for fetching oss data at half-1, has completed " + ready1.get());
                        }
                    }
                    if (!squeezed1) {
                        realContentSize = squeeze();
                        squeezed0 = false;
                        squeezed1 = true;
                        progressPrint();
                    }

                    // read data from buffer half-1
                    int size = 0;
                    if (pos >= fileContentLength) {
                        close();
                        return -1;
                    } else if (cacheIdx < realContentSize) {
                        for (int i = 0; i < len && cacheIdx < realContentSize; i++) {
                            b[off + i] = buffer[bufferSize / 2 + cacheIdx];
                            cacheIdx++;
                            pos++;
                            size++;
                        }
                        return size;
                    } else {
                        ready1.set(0);
                        halfReading.set(0);
                        cacheIdx = 0;
                        halfConsuming.addAndGet(1);
                    }
                }
            }
        } else {
            int result = in.read(b, off, len);
            if (result > 0) {
                pos += result;
            }

            return result;
        }
    }

    public synchronized void seek(long newpos) throws IOException {
        if (newpos < 0) {
            throw new EOFException("negative seek position: " + newpos);
        }

        if (pos != newpos) {
            // the seek is attempting to move to the current position
            updateInnerStream(newpos);
        }
    }

    private synchronized void updateInnerStream(long newpos) throws IOException {
        this.pos = newpos;
        this.instreamStart = newpos;
        try {
            if (algorithmVersion == 1) {
                closed = true;
                taskEngine.shutdown();
                closed = false;
            } else {
                if (in != null) {
                    in.close();
                    in = null;
                }
            }
        } catch (IOException e) {
            LOG.error("Failed to close input stream.", e);
        }
        LOG.info("Closed previous input stream.");
        reset();
        LOG.info("Opening key '" + key + "' for reading at position '" + newpos + "'.");
        prepareBeforeFetch();
    }

    private void reset() {
        halfReading.set(0);
        ready0.set(0);
        ready1.set(0);
        cacheIdx = 0;
        squeezed0 = false;
        squeezed1 = false;
        realContentSize = 0;
        lastProgress = 0.0d;
        halfConsuming.set(1);
    }

    private int squeeze() {
        int totalSize = 0;
        int begin;
        if (halfReading.get() == 0) {
            for (int i = 0; i < concurrentStreams; i++) {
                totalSize += splitContentSize[i];
            }
            begin = 0;

            int cacheIdx;
            if (totalSize != bufferSize / 2) {
                cacheIdx = splitContentSize[0];
                for (int i = 1; i < concurrentStreams; i++) {
                    for (int j = 0; j < splitContentSize[i]; j++) {
                        buffer[begin + cacheIdx] = buffer[begin + splitSize * i + j];
                        cacheIdx++;
                    }
                }
            }
        } else {
            for (int i = 0; i < concurrentStreams; i++) {
                totalSize += splitContentSize[concurrentStreams + i];
            }
            begin = bufferSize / 2;

            int cacheIdx;
            if (totalSize != bufferSize / 2) {
                cacheIdx = splitContentSize[concurrentStreams];
                for (int i = 1; i < concurrentStreams; i++) {
                    for (int j = 0; j < splitContentSize[concurrentStreams + i]; j++) {
                        buffer[begin + cacheIdx] = buffer[begin + splitSize * i + j];
                        cacheIdx++;
                    }
                }
            }
        }

        return totalSize;
    }

    public long getPos() {
        return pos;
    }

    private void progressPrint() {
        long hasRead = pos + realContentSize - instreamStart;
        double currentProgress = hasRead >= lengthToFetch ? 1.0d : (double) hasRead / lengthToFetch;
        if (currentProgress - lastProgress >= 0.1 || currentProgress == 1.0d) {
            BigDecimal b = new BigDecimal(currentProgress);
            LOG.info("Current progress of reading '" + key + " [" + instreamStart + ":...]' is "
                    + b.setScale(2, BigDecimal.ROUND_HALF_UP).doubleValue());
            lastProgress = currentProgress;
        }
    }

    private class ConcurrentReader extends Task {
        private final Log LOG = LogFactory.getLog(ConcurrentReader.class);
        int halfFetched = 1;
        private Boolean preRead = true;
        private int readerId = -1;
        private boolean half0Completed = false;
        private boolean half1Completed = false;
        private int half0StartPos = -1;
        private int half1StartPos = -1;
        private int length = -1;
        private boolean _continue = true;

        public ConcurrentReader(int readerId) throws FileNotFoundException {
            assert (bufferSize % 2 == 0);
            assert (concurrentStreams % 2 == 0);
            this.readerId = readerId;
            this.length = bufferSize / (2 * concurrentStreams);
            assert (concurrentStreams * length * 2 == bufferSize);

            this.half0StartPos = readerId * length;
            this.half1StartPos = bufferSize / 2 + readerId * length;
        }

        @Override
        public void execute(TaskEngine engineRef) throws IOException {
            int i = 0;
            while (!closed && _continue) {
                if (preRead) {
                    // fetch oss data for half-0 at the first time, as there is
                    // no data in buffer.
                    _continue = fetchData(half0StartPos);
                    half0Completed = true;
                    half1Completed = false;
                    ready0.addAndGet(1);
                    preRead = false;
                } else if ((halfFetched <= halfConsuming.get()) && (halfFetched % 2 == 1) && !half1Completed) {
                    // fetch oss data for half-1
                    _continue = fetchData(half1StartPos);
                    half1Completed = true;
                    half0Completed = false;
                    ready1.addAndGet(1);
                    halfFetched++;
                } else if (halfFetched <= halfConsuming.get() && (halfFetched % 2 == 0) && !half0Completed) {
                    // fetch oss data for half-0
                    _continue = fetchData(half0StartPos);
                    half0Completed = true;
                    half1Completed = false;
                    ready0.addAndGet(1);
                    halfFetched++;
                } else {
                    i++;
                    // waiting for `halfReading` block data to be consumed
                    try {
                        Thread.sleep(100);
                    } catch (InterruptedException e) {
                    }
                    if (i % 600 == 0) {
                        LOG.info("[ConcurrentReader-" + readerId + "] waiting for " + "consuming cached data.");
                    }
                }
            }
        }

        private boolean fetchData(int startPos) throws IOException {
            boolean _continue = true;
            if (startPos == half0StartPos) {
                splitContentSize[readerId] = 0;
            } else {
                splitContentSize[concurrentStreams + readerId] = 0;
            }
            long newPos;
            int fetchLength;
            if (preRead && bufferSize / 2 >= lengthToFetch) {
                _continue = false;
                fetchLength = (int) lengthToFetch / concurrentStreams;
                newPos = instreamStart + fetchLength * readerId;
                if (readerId == (concurrentStreams - 1)) {
                    fetchLength = (int) lengthToFetch - fetchLength * (concurrentStreams - 1);
                }
            } else if (preRead) {
                fetchLength = bufferSize / (2 * concurrentStreams);
                newPos = instreamStart + fetchLength * readerId;
            } else if ((long) (halfFetched + 1) * bufferSize / 2 >= lengthToFetch) {
                _continue = false;
                fetchLength = (int) (lengthToFetch - (long) halfFetched * bufferSize / 2) / concurrentStreams;
                newPos = instreamStart + (long) halfFetched * bufferSize / 2 + readerId * fetchLength;
                if (readerId == (concurrentStreams - 1)) {
                    fetchLength = (int) (lengthToFetch - (long) halfFetched * bufferSize / 2
                            - (fetchLength * (concurrentStreams - 1)));
                }
            } else {
                fetchLength = bufferSize / (2 * concurrentStreams);
                newPos = instreamStart + (long) halfFetched * bufferSize / 2 + readerId * fetchLength;
            }
            InputStream in;
            try {
                in = store.retrieve(key, newPos, fetchLength);
            } catch (Exception e) {
                LOG.warn(e.getMessage(), e);
                throw new IOException("[ConcurrentReader-" + readerId + "] Cannot " + "open oss input stream");
            }

            int off = startPos;
            int tries = 10;
            int result;
            boolean retry = true;
            int hasRead = 0;
            do {
                try {
                    result = in.read(buffer, off, fetchLength - hasRead);
                    if (result > 0) {
                        off += result;
                        hasRead += result;
                    } else if (result == -1) {
                        break;
                    }
                    retry = hasRead < fetchLength;
                } catch (EOFException e0) {
                    LOG.warn(e0.getMessage(), e0);
                    throw e0;
                } catch (Exception e1) {
                    tries--;
                    if (tries == 0) {
                        throw new IOException(e1);
                    }

                    try {
                        Thread.sleep(100);
                    } catch (InterruptedException e2) {
                        LOG.warn(e2.getMessage());
                    }
                    if (in != null) {
                        try {
                            in.close();
                        } catch (Exception e) {
                            // do nothing
                        } finally {
                            in = null;
                        }
                    }
                    try {
                        in = store.retrieve(key, newPos, fetchLength);
                    } catch (Exception e) {
                        LOG.warn(e.getMessage(), e);
                        throw new IOException(
                                "[ConcurrentReader-" + readerId + "] " + "Cannot open oss input stream", e);
                    }
                    off = startPos;
                    hasRead = 0;
                }
            } while (tries > 0 && retry);
            in.close();
            if (startPos == half0StartPos) {
                splitContentSize[readerId] = hasRead;
            } else {
                splitContentSize[concurrentStreams + readerId] = hasRead;
            }

            return _continue;
        }
    }
}