Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.kylin.dict.lookup; import java.io.DataInput; import java.io.DataOutput; import java.io.File; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import com.google.common.base.Strings; import org.apache.commons.io.IOUtils; import org.apache.commons.lang.ArrayUtils; import org.apache.kylin.common.persistence.ResourceStore; import org.apache.kylin.common.persistence.RootPersistentEntity; import org.apache.kylin.common.util.Dictionary; import org.apache.kylin.dict.StringBytesConverter; import org.apache.kylin.dict.TrieDictionary; import org.apache.kylin.dict.TrieDictionaryBuilder; import org.apache.kylin.metadata.model.ColumnDesc; import org.apache.kylin.metadata.model.TableDesc; import org.apache.kylin.source.IReadableTable; import com.fasterxml.jackson.annotation.JsonAutoDetect; import com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility; import com.fasterxml.jackson.annotation.JsonProperty; /** * @author yangli9 */ @SuppressWarnings("serial") @JsonAutoDetect(fieldVisibility = Visibility.NONE, getterVisibility = Visibility.NONE, isGetterVisibility = Visibility.NONE, setterVisibility = Visibility.NONE) public class SnapshotTable extends RootPersistentEntity implements IReadableTable { @JsonProperty("tableName") private String tableName; @JsonProperty("signature") private TableSignature signature; @JsonProperty("useDictionary") private boolean useDictionary; private ArrayList<int[]> rowIndices; private Dictionary<String> dict; // default constructor for JSON serialization public SnapshotTable() { } SnapshotTable(IReadableTable table, String tableName) throws IOException { this.tableName = tableName; this.signature = table.getSignature(); this.useDictionary = true; } public void takeSnapshot(IReadableTable table, TableDesc tableDesc) throws IOException { this.signature = table.getSignature(); int maxIndex = tableDesc.getMaxColumnIndex(); TrieDictionaryBuilder<String> b = new TrieDictionaryBuilder<String>(new StringBytesConverter()); TableReader reader = table.getReader(); try { while (reader.next()) { String[] row = reader.getRow(); if (row.length <= maxIndex) { throw new IllegalStateException("Bad hive table row, " + tableDesc + " expect " + (maxIndex + 1) + " columns, but got " + Arrays.toString(row)); } for (ColumnDesc column : tableDesc.getColumns()) { String cell = row[column.getZeroBasedIndex()]; if (cell != null) b.addValue(cell); } } } finally { IOUtils.closeQuietly(reader); } this.dict = b.build(0); ArrayList<int[]> allRowIndices = new ArrayList<int[]>(); reader = table.getReader(); try { while (reader.next()) { String[] row = reader.getRow(); int[] rowIndex = new int[tableDesc.getColumnCount()]; for (ColumnDesc column : tableDesc.getColumns()) { rowIndex[column.getZeroBasedIndex()] = dict.getIdFromValue(row[column.getZeroBasedIndex()]); } allRowIndices.add(rowIndex); } } finally { IOUtils.closeQuietly(reader); } this.rowIndices = allRowIndices; } public String getResourcePath() { return getResourceDir() + "/" + uuid + ".snapshot"; } public String getResourceDir() { if (Strings.isNullOrEmpty(tableName)) { return getOldResourceDir(); } else { return ResourceStore.SNAPSHOT_RESOURCE_ROOT + "/" + tableName; } } private String getOldResourceDir() { return ResourceStore.SNAPSHOT_RESOURCE_ROOT + "/" + new File(signature.getPath()).getName(); } @Override public TableReader getReader() throws IOException { return new TableReader() { int i = -1; @Override public boolean next() throws IOException { i++; return i < rowIndices.size(); } @Override public String[] getRow() { int[] rowIndex = rowIndices.get(i); String[] row = new String[rowIndex.length]; for (int x = 0; x < row.length; x++) { row[x] = dict.getValueFromId(rowIndex[x]); } return row; } @Override public void close() throws IOException { } }; } @Override public TableSignature getSignature() throws IOException { return signature; } @Override public boolean exists() throws IOException { return true; } /** * a naive implementation * * @return */ @Override public int hashCode() { int[] parts = new int[this.rowIndices.size()]; for (int i = 0; i < parts.length; ++i) parts[i] = Arrays.hashCode(this.rowIndices.get(i)); return Arrays.hashCode(parts); } @Override public boolean equals(Object o) { if ((o instanceof SnapshotTable) == false) return false; SnapshotTable that = (SnapshotTable) o; if (this.dict.equals(that.dict) == false) return false; //compare row by row if (this.rowIndices.size() != that.rowIndices.size()) return false; for (int i = 0; i < this.rowIndices.size(); ++i) { if (!ArrayUtils.isEquals(this.rowIndices.get(i), that.rowIndices.get(i))) return false; } return true; } private static String NULL_STR; { try { // a special placeholder to indicate a NULL; 0, 9, 127, 255 are a few invisible ASCII characters NULL_STR = new String(new byte[] { 0, 9, 127, (byte) 255 }, "ISO-8859-1"); } catch (UnsupportedEncodingException e) { // does not happen } } void writeData(DataOutput out) throws IOException { out.writeInt(rowIndices.size()); if (rowIndices.size() > 0) { int n = rowIndices.get(0).length; out.writeInt(n); if (this.useDictionary == true) { dict.write(out); for (int i = 0; i < rowIndices.size(); i++) { int[] row = rowIndices.get(i); for (int j = 0; j < n; j++) { out.writeInt(row[j]); } } } else { for (int i = 0; i < rowIndices.size(); i++) { int[] row = rowIndices.get(i); for (int j = 0; j < n; j++) { // NULL_STR is tricky, but we don't want to break the current snapshots out.writeUTF(dict.getValueFromId(row[j]) == null ? NULL_STR : dict.getValueFromId(row[j])); } } } } } void readData(DataInput in) throws IOException { int rowNum = in.readInt(); if (rowNum > 0) { int n = in.readInt(); rowIndices = new ArrayList<int[]>(rowNum); if (this.useDictionary == true) { this.dict = new TrieDictionary<String>(); dict.readFields(in); for (int i = 0; i < rowNum; i++) { int[] row = new int[n]; this.rowIndices.add(row); for (int j = 0; j < n; j++) { row[j] = in.readInt(); } } } else { List<String[]> rows = new ArrayList<String[]>(rowNum); TrieDictionaryBuilder<String> b = new TrieDictionaryBuilder<String>(new StringBytesConverter()); for (int i = 0; i < rowNum; i++) { String[] row = new String[n]; rows.add(row); for (int j = 0; j < n; j++) { row[j] = in.readUTF(); // NULL_STR is tricky, but we don't want to break the current snapshots if (row[j].equals(NULL_STR)) row[j] = null; b.addValue(row[j]); } } this.dict = b.build(0); for (String[] row : rows) { int[] rowIndex = new int[n]; for (int i = 0; i < n; i++) { rowIndex[i] = dict.getIdFromValue(row[i]); } this.rowIndices.add(rowIndex); } } } else { rowIndices = new ArrayList<int[]>(); dict = new TrieDictionary<String>(); } } }