Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.carbondata.processing.globalsurrogategenerator; import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.nio.charset.Charset; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.concurrent.Callable; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import org.carbondata.common.logging.LogService; import org.carbondata.common.logging.LogServiceFactory; import org.carbondata.core.carbon.CarbonDef.CubeDimension; import org.carbondata.core.carbon.CarbonDef.Hierarchy; import org.carbondata.core.carbon.CarbonDef.Level; import org.carbondata.core.carbon.CarbonDef.RelationOrJoin; import org.carbondata.core.carbon.CarbonDef.Schema; import org.carbondata.core.carbon.CarbonDef.Table; import org.carbondata.core.constants.CarbonCommonConstants; import org.carbondata.core.datastorage.store.filesystem.CarbonFile; import org.carbondata.core.datastorage.store.impl.FileFactory; import org.carbondata.core.datastorage.store.impl.FileFactory.FileType; import org.carbondata.core.util.CarbonProperties; import org.carbondata.core.util.CarbonUtil; import org.carbondata.processing.util.CarbonSchemaParser; import org.apache.commons.codec.binary.Base64; public class LevelGlobalSurrogateGeneratorThread implements Callable<Void> { /** * LOGGER */ private static final LogService LOGGER = LogServiceFactory .getLogService(LevelGlobalSurrogateGeneratorThread.class.getName()); private String[][] partitionLocation; private CubeDimension dimension; private Schema schema; private String tableName; private String partitionColumn; public LevelGlobalSurrogateGeneratorThread(final String[][] partitionLocation, final CubeDimension dimension, final Schema schema, final String tableName, final String partitionColumn) { this.partitionLocation = partitionLocation; this.dimension = dimension; this.schema = schema; this.tableName = tableName; this.partitionColumn = partitionColumn; } public static Map<String, Integer> readLevelFileAndUpdateCache(CarbonFile memberFile) throws IOException { DataInputStream inputStream = null; Map<String, Integer> localMemberMap = new HashMap<String, Integer>( CarbonCommonConstants.DEFAULT_COLLECTION_SIZE); try { inputStream = FileFactory.getDataInputStream(memberFile.getPath(), FileFactory.getFileType(memberFile.getPath())); long currentPosition = 4; long size = memberFile.getSize() - 4; boolean enableEncoding = Boolean.valueOf( CarbonProperties.getInstance().getProperty(CarbonCommonConstants.ENABLE_BASE64_ENCODING, CarbonCommonConstants.ENABLE_BASE64_ENCODING_DEFAULT)); int surrogateValue = inputStream.readInt(); while (currentPosition < size) { int len = inputStream.readInt(); currentPosition += 4; byte[] rowBytes = new byte[len]; inputStream.readFully(rowBytes); currentPosition += len; String decodedValue = null; if (enableEncoding) { decodedValue = new String(Base64.decodeBase64(rowBytes), Charset.defaultCharset()); } else { decodedValue = new String(rowBytes, Charset.defaultCharset()); } localMemberMap.put(decodedValue, surrogateValue); surrogateValue++; } } catch (Exception e) { LOGGER.error(e, e.getMessage()); CarbonUtil.closeStreams(inputStream); } finally { CarbonUtil.closeStreams(inputStream); } return localMemberMap; } @Override public Void call() throws Exception { long currentTimeMillis = System.currentTimeMillis(); long currentTimeMillis1 = System.currentTimeMillis(); Hierarchy[] extractHierarchies = CarbonSchemaParser.extractHierarchies(schema, dimension); Level cubeLevel = extractHierarchies[0].levels[0]; boolean isPartitionColumn = partitionColumn.equals(cubeLevel.name); if (partitionColumn.equals(cubeLevel.name)) { isPartitionColumn = true; } RelationOrJoin relation = extractHierarchies[0].relation; String hierarchyTable = relation == null ? tableName : ((Table) extractHierarchies[0].relation).name; String levelFileName = hierarchyTable + '_' + cubeLevel.name; List<PartitionMemberVo> partitionMemberVoList = new ArrayList<PartitionMemberVo>( CarbonCommonConstants.CONSTANT_SIZE_TEN); ExecutorService ex = Executors.newFixedThreadPool(10); PartitionMemberVo memberVo = null; List<Future<Map<String, Integer>>> submitList = new ArrayList<Future<Map<String, Integer>>>( CarbonCommonConstants.CONSTANT_SIZE_TEN); for (int i = 0; i < partitionLocation.length; i++) { int partitionLength = partitionLocation[i].length; if (partitionLength == 0) { LOGGER.info("partition length is 0"); continue; } String path = partitionLocation[i][partitionLength - 1] + '/' + levelFileName + ".level"; FileType fileType = FileFactory.getFileType(path); if (!FileFactory.isFileExist(path, fileType)) { LOGGER.info("File does not exist at path :: " + path); continue; } CarbonFile carbonFile = FileFactory.getCarbonFile(path, fileType); memberVo = new PartitionMemberVo(); memberVo.setPath(partitionLocation[i][partitionLength - 1]); partitionMemberVoList.add(memberVo); Future<Map<String, Integer>> submit = ex.submit(new ReaderThread(carbonFile)); submitList.add(submit); } ex.shutdown(); ex.awaitTermination(1, TimeUnit.DAYS); if (partitionMemberVoList.size() < 1) { return null; } int maxSeqenceKey = getMaxSequenceKeyAssigned(levelFileName + ".globallevel"); int index = 0; for (Future<Map<String, Integer>> future : submitList) { partitionMemberVoList.get(index).setMembersMap(future.get()); index++; } LOGGER.info("Time Taken to read surrogate for Level: " + levelFileName + " : " + (System.currentTimeMillis() - currentTimeMillis)); currentTimeMillis = System.currentTimeMillis(); ex = Executors.newFixedThreadPool(5); createGlobalSurrogateKey(currentTimeMillis, currentTimeMillis1, isPartitionColumn, levelFileName, partitionMemberVoList, ex, maxSeqenceKey); return null; } private void createGlobalSurrogateKey(long currentTimeMillis, long currentTimeMillis1, boolean isPartitionColumn, String levelFileName, List<PartitionMemberVo> partitionMemberVoList, ExecutorService ex, int maxSeqenceKey) throws InterruptedException { int[] key = new int[partitionMemberVoList.get(0).getMembersMap().size()]; int[] value = new int[partitionMemberVoList.get(0).getMembersMap().size()]; int[] localKey = null; int[] localValue = null; int countVal = 0; int minSeqenceKey = Integer.MAX_VALUE; if (!isPartitionColumn) { for (Entry<String, Integer> entryInfo : partitionMemberVoList.get(0).getMembersMap().entrySet()) { if (minSeqenceKey > entryInfo.getValue()) { minSeqenceKey = entryInfo.getValue(); } key[countVal] = entryInfo.getValue(); value[countVal] = ++maxSeqenceKey; countVal++; } localKey = key; localValue = value; LOGGER.info("Time Taken to generate global surrogate for Level: " + levelFileName + " : " + (System.currentTimeMillis() - currentTimeMillis)); currentTimeMillis = System.currentTimeMillis(); ex.submit(new WriterThread(localKey, localValue, partitionMemberVoList.get(0).getPath(), levelFileName + ".globallevel", maxSeqenceKey, minSeqenceKey)); processNonPartitionedColumn(currentTimeMillis, levelFileName, partitionMemberVoList, ex, maxSeqenceKey); } else { for (int i = 0; i < partitionMemberVoList.size(); i++) { countVal = 0; minSeqenceKey = Integer.MAX_VALUE; key = new int[partitionMemberVoList.get(i).getMembersMap().size()]; value = new int[partitionMemberVoList.get(i).getMembersMap().size()]; for (Entry<String, Integer> entry : partitionMemberVoList.get(i).getMembersMap().entrySet()) { if (minSeqenceKey > entry.getValue()) { minSeqenceKey = entry.getValue(); } key[countVal] = entry.getValue(); value[countVal] = ++maxSeqenceKey; countVal++; } localKey = key; localValue = value; LOGGER.info("Time Taken to generate global surrogate for Level: " + levelFileName + " : " + (System.currentTimeMillis() - currentTimeMillis)); currentTimeMillis = System.currentTimeMillis(); ex.submit(new WriterThread(localKey, localValue, partitionMemberVoList.get(i).getPath(), levelFileName + ".globallevel", maxSeqenceKey, minSeqenceKey)); } } LOGGER.info("Time Taken to write global surrogate for Level: " + levelFileName + " : " + (System.currentTimeMillis() - currentTimeMillis1)); ex.shutdown(); ex.awaitTermination(1, TimeUnit.DAYS); } private void processNonPartitionedColumn(long currentTimeMillis, String levelFileName, List<PartitionMemberVo> partitionMemberVoList, ExecutorService ex, int maxSeqenceKey) { int[] key; int[] value; int[] localKey; int[] localValue; int counter; int minSeqenceKey; Integer surrogateKey = null; boolean isFound = false; List<Integer> notFoundSurrogateKeys = null; for (int i = 1; i < partitionMemberVoList.size(); i++) { counter = 0; minSeqenceKey = Integer.MAX_VALUE; key = new int[partitionMemberVoList.get(i).getMembersMap().size()]; value = new int[partitionMemberVoList.get(i).getMembersMap().size()]; notFoundSurrogateKeys = new ArrayList<Integer>(); for (Entry<String, Integer> entry : partitionMemberVoList.get(i).getMembersMap().entrySet()) { surrogateKey = null; isFound = false; for (int j = 0; j < i; j++) { surrogateKey = partitionMemberVoList.get(j).getMembersMap().get(entry.getKey()); if (null != surrogateKey) { isFound = true; if (minSeqenceKey > entry.getValue()) { minSeqenceKey = entry.getValue(); } key[counter] = entry.getValue(); value[counter] = surrogateKey; counter++; break; } } if (!isFound) { notFoundSurrogateKeys.add(entry.getValue()); } } for (Integer notFoundSurrgates : notFoundSurrogateKeys) { key[counter] = notFoundSurrgates; value[counter] = ++maxSeqenceKey; counter++; } localKey = key; localValue = value; ex.submit(new WriterThread(localKey, localValue, partitionMemberVoList.get(i).getPath(), levelFileName + ".globallevel", maxSeqenceKey, minSeqenceKey)); LOGGER.info("Time Taken to generate global surrogate for Level: " + levelFileName + " : " + (System.currentTimeMillis() - currentTimeMillis)); currentTimeMillis = System.currentTimeMillis(); } } private int getMaxSequenceKeyAssigned(String levelFileName) { int maxKey = 0; if (partitionLocation[0].length < 2) { return maxKey; } int maxKeyAssigned = 0; for (int i = 0; i < partitionLocation.length; i++) { for (int j = 0; j < partitionLocation[i].length - 1; j++) { CarbonFile carbonFile = FileFactory.getCarbonFile(partitionLocation[i][j] + '/' + levelFileName, FileFactory.getFileType(partitionLocation[i][j] + '/' + levelFileName)); if (carbonFile.exists()) { maxKeyAssigned = getMaxKeyAssigned(carbonFile); if (maxKey < maxKeyAssigned) { maxKey = maxKeyAssigned; } } } } return maxKey; } private int getMaxKeyAssigned(CarbonFile memberFile) { DataInputStream inputStream = null; try { inputStream = FileFactory.getDataInputStream(memberFile.getPath(), FileFactory.getFileType(memberFile.getPath())); return inputStream.readInt(); } catch (FileNotFoundException e) { LOGGER.error(e, e.getMessage()); } catch (IOException e) { LOGGER.error(e, e.getMessage()); } finally { CarbonUtil.closeStreams(inputStream); } return -1; } private void writeGlobalSurrogateKeyFile(String string, int[] key, int[] value, String fileName, int currentMaxKey, int minValue) { DataOutputStream stream = null; try { stream = FileFactory.getDataOutputStream(string + '/' + fileName, FileFactory.getFileType(string + '/' + fileName), 10240); int size = key.length; stream.writeInt(currentMaxKey); stream.writeInt(minValue); stream.writeInt(size); for (int i = 0; i < size; i++) { stream.writeInt(key[i]); stream.writeInt(value[i]); } } catch (FileNotFoundException e) { LOGGER.error(e, e.getMessage()); } catch (IOException e) { LOGGER.error(e, e.getMessage()); } finally { CarbonUtil.closeStreams(stream); } } private final class WriterThread implements Callable<Void> { int currentMaxKey; int minValue; private int[] key; private int[] value; private String path; private String fileName; private WriterThread(int[] key, int[] value, String path, String fileName, int currentMaxKey, int minValue) { this.key = key; this.value = value; this.path = path; this.fileName = fileName; this.currentMaxKey = currentMaxKey; this.minValue = minValue; } @Override public Void call() throws Exception { writeGlobalSurrogateKeyFile(path, key, value, fileName, currentMaxKey, minValue); return null; } } }