ColumnStorage.ColumnProject.java Source code

Java tutorial

Introduction

Here is the source code for ColumnStorage.ColumnProject.java

Source

/**
* Tencent is pleased to support the open source community by making TDW available.
* Copyright (C) 2014 THL A29 Limited, a Tencent company. All rights reserved.
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use 
* this file except in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed 
* under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS 
* OF ANY KIND, either express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
package ColumnStorage;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.Set;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

import Comm.ConstVar;
import Comm.SEException;
import FormatStorage.FormatDataFile;

public class ColumnProject {
    Log LOG = LogFactory.getLog("ColumnProject");

    static class ColumnInfo {
        String name;
        Set<Short> idxs = new HashSet<Short>();
        int beginLine = 0;
        int endLine = 0;
        int beginKey = 0;
        int endKey = 0;
    }

    ArrayList<ColumnInfo> infos = new ArrayList<ColumnInfo>();
    Configuration conf = null;

    ColumnProject(Configuration conf) {
        this.conf = conf;
    }

    public ColumnProject(Path path, Configuration conf) throws Exception {
        String name = path.toString() + ConstVar.Navigator;
        Path naviPath = new Path(name);

        this.conf = conf;
        FileSystem fs = FileSystem.get(conf);

        loadColmnInfoFromHeadInfo(fs, path);

    }

    void saveNavigator(FileSystem fs, Path naviPath) throws IOException {
        int size = infos.size();
        if (size == 0) {
            return;
        }

        try {
            FSDataOutputStream out = fs.create(naviPath);

            out.writeInt(ConstVar.NaviMagic);
            out.writeShort((short) size);

            for (int i = 0; i < size; i++) {
                saveColumnInfo(out, infos.get(i));
            }

            out.close();
        } catch (IOException e) {
            LOG.error("save Column info fail:" + e.getMessage());
        }

    }

    void saveColumnInfo(FSDataOutputStream out, ColumnInfo info) throws IOException {
        short nameLen = (short) info.name.length();
        if (nameLen == 0) {
            return;
        }

        short idxLen = (short) info.idxs.size();
        if (idxLen == 0) {
            return;
        }

        out.writeShort(nameLen);
        out.write(info.name.getBytes());

        out.writeShort(idxLen);

        Iterator<Short> iterator = info.idxs.iterator();
        while (iterator.hasNext()) {
            out.writeShort(iterator.next());
        }

        out.writeInt(info.beginKey);
        out.writeInt(info.endKey);
        out.writeInt(info.beginLine);
        out.writeInt(info.endLine);
    }

    void loadColmnInfoFromHeadInfo(FileSystem fs, Path path) throws Exception {
        FileStatus[] status = fs.listStatus(path);
        if (status == null) {
            return;
        }
        if (status == null || status.length == 0) {
            return;
        }

        for (int i = 0; i < status.length; i++) {
            String fileName = status[i].getPath().toString();
            try {
                FormatDataFile fd = new FormatDataFile(conf);
                fd.open(fileName);

                ColumnInfo columnInfo = new ColumnInfo();
                columnInfo.idxs = fd.head.fieldMap().idxs();
                columnInfo.name = fileName;

                infos.add(columnInfo);

                fd.close();
            } catch (SEException.ErrorFileFormat e) {
                LOG.info("get error file format exception:" + e.getMessage() + ", file:" + fileName);
                continue;
            } catch (Exception e) {
                LOG.error("load column info fail:" + e.getMessage());
                throw e;
            }
        }

    }

    void showInfos() {
        LOG.info("infos.size:" + infos.size());
        for (int i = 0; i < infos.size(); i++) {
            ColumnInfo info = infos.get(i);
            System.out.println("name:" + info.name);

            Iterator<Short> iterator = info.idxs.iterator();
            while (iterator.hasNext()) {
                System.out.println(iterator.next());
            }
        }
    }

    void loadColmnInfoFromNavigator(FileSystem fs, Path naviPath) throws Exception {
        FSDataInputStream in = fs.open(naviPath);

        int magic = in.readInt();
        if (magic != ConstVar.NaviMagic) {
            throw new SEException.ErrorFileFormat("invalid navi magic:" + magic + ",file:" + naviPath.toString());
        }

        short infoNum = in.readShort();
        for (int i = 0; i < infoNum; i++) {
            infos.add(loadColumnInfo(in));
        }
    }

    ColumnInfo loadColumnInfo(FSDataInputStream in) throws IOException {
        ColumnInfo info = new ColumnInfo();

        short nameLen = in.readShort();
        byte[] nameBuf = new byte[nameLen];
        in.readFully(nameBuf);
        info.name = new String(nameBuf);

        short idxLen = in.readShort();
        for (int i = 0; i < idxLen; i++) {
            info.idxs.add(in.readShort());
        }

        info.beginKey = in.readInt();
        info.endKey = in.readInt();
        info.beginLine = in.readInt();
        info.endLine = in.readInt();

        return info;
    }

    public ArrayList<ColumnInfo> infos() {
        return infos;
    }

    public ArrayList<String> getFileNameByIndex(ArrayList<Short> idx) {
        if (idx == null) {
            return null;
        }

        if (idx.size() == 0) {
            return null;
        }

        if (infos.size() == 0) {
            return null;
        }

        LinkedHashSet<String> result = new LinkedHashSet<String>();
        short foundTimes = 0;
        int size = idx.size();
        int count = 0;
        for (int i = 0; i < size; i++) {
            count = 0;
            while (count < infos.size()) {
                ColumnInfo info = infos.get(count);

                if (!info.idxs.contains(idx.get(i))) {

                    count++;
                    continue;
                } else {
                    foundTimes++;
                    result.add(info.name);
                    break;
                }
            }
        }

        if (foundTimes == size) {
            ArrayList<String> rrArrayList = new ArrayList<String>();
            Iterator<String> iterator = result.iterator();
            while (iterator.hasNext()) {
                rrArrayList.add(iterator.next());
            }

            return rrArrayList;
        } else {
            return null;
        }
    }
}