Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hive.streaming; import java.io.IOException; import java.io.InputStream; import java.lang.management.ManagementFactory; import java.lang.management.MemoryMXBean; import java.lang.management.MemoryUsage; import java.net.URI; import java.net.URISyntaxException; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Properties; import java.util.Scanner; import java.util.Set; import java.util.concurrent.atomic.AtomicBoolean; import java.util.stream.Collectors; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.HeapMemoryMonitor; import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.llap.LlapUtil; import org.apache.hadoop.hive.metastore.Warehouse; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.io.AcidOutputFormat; import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.io.RecordUpdater; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.serde2.AbstractSerDe; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.SubStructObjectInspector; import org.apache.hadoop.util.ReflectionUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public abstract class AbstractRecordWriter implements RecordWriter { private static final Logger LOG = LoggerFactory.getLogger(AbstractRecordWriter.class.getName()); private static final String DEFAULT_LINE_DELIMITER_PATTERN = "[\r\n]"; private Integer statementId; protected HiveConf conf; protected StreamingConnection conn; protected Table table; protected List<String> inputColumns; protected List<String> inputTypes; protected String fullyQualifiedTableName; protected Map<String, List<RecordUpdater>> updaters = new HashMap<>(); protected Map<String, Path> partitionPaths = new HashMap<>(); protected Set<String> addedPartitions = new HashSet<>(); // input OI includes table columns + partition columns protected StructObjectInspector inputRowObjectInspector; // output OI strips off the partition columns and retains other columns protected ObjectInspector outputRowObjectInspector; protected List<String> partitionColumns = new ArrayList<>(); protected ObjectInspector[] partitionObjInspectors = null; protected StructField[] partitionStructFields = null; protected Object[] partitionFieldData; protected ObjectInspector[] bucketObjInspectors = null; protected StructField[] bucketStructFields = null; protected Object[] bucketFieldData; protected List<Integer> bucketIds = new ArrayList<>(); protected int totalBuckets; protected String defaultPartitionName; protected boolean isBucketed; protected AcidOutputFormat<?, ?> acidOutputFormat; protected Long curBatchMinWriteId; protected Long curBatchMaxWriteId; protected final String lineDelimiter; protected HeapMemoryMonitor heapMemoryMonitor; // if low memory canary is set and if records after set canary exceeds threshold, trigger a flush. // This is to avoid getting notified of low memory too often and flushing too often. protected AtomicBoolean lowMemoryCanary; protected long ingestSizeBytes = 0; protected boolean autoFlush; protected float memoryUsageThreshold; protected long ingestSizeThreshold; protected FileSystem fs; public AbstractRecordWriter(final String lineDelimiter) { this.lineDelimiter = lineDelimiter == null || lineDelimiter.isEmpty() ? DEFAULT_LINE_DELIMITER_PATTERN : lineDelimiter; } protected static class OrcMemoryPressureMonitor implements HeapMemoryMonitor.Listener { private static final Logger LOG = LoggerFactory.getLogger(OrcMemoryPressureMonitor.class.getName()); private final AtomicBoolean lowMemoryCanary; OrcMemoryPressureMonitor(final AtomicBoolean lowMemoryCanary) { this.lowMemoryCanary = lowMemoryCanary; } @Override public void memoryUsageAboveThreshold(final long usedMemory, final long maxMemory) { if (LOG.isDebugEnabled()) { LOG.debug("Orc memory pressure notified! usedMemory: {} maxMemory: {}.", LlapUtil.humanReadableByteCount(usedMemory), LlapUtil.humanReadableByteCount(maxMemory)); } lowMemoryCanary.set(true); } } @Override public void init(StreamingConnection conn, long minWriteId, long maxWriteId) throws StreamingException { init(conn, minWriteId, maxWriteId, -1); } @Override public void init(StreamingConnection conn, long minWriteId, long maxWriteId, int statementId) throws StreamingException { if (conn == null) { throw new StreamingException("Streaming connection cannot be null during record writer initialization"); } this.conn = conn; this.curBatchMinWriteId = minWriteId; this.curBatchMaxWriteId = maxWriteId; this.statementId = statementId; this.conf = conn.getHiveConf(); this.defaultPartitionName = conf.getVar(HiveConf.ConfVars.DEFAULTPARTITIONNAME); this.table = conn.getTable(); String location = table.getSd().getLocation(); try { URI uri = new URI(location); this.fs = FileSystem.newInstance(uri, conf); if (LOG.isDebugEnabled()) { LOG.debug("Created new filesystem instance: {}", System.identityHashCode(this.fs)); } } catch (URISyntaxException e) { throw new StreamingException("Unable to create URI from location: " + location, e); } catch (IOException e) { throw new StreamingException("Unable to get filesystem for location: " + location, e); } this.inputColumns = table.getSd().getCols().stream().map(FieldSchema::getName).collect(Collectors.toList()); this.inputTypes = table.getSd().getCols().stream().map(FieldSchema::getType).collect(Collectors.toList()); if (conn.isPartitionedTable() && conn.isDynamicPartitioning()) { this.partitionColumns = table.getPartitionKeys().stream().map(FieldSchema::getName) .collect(Collectors.toList()); this.inputColumns.addAll(partitionColumns); this.inputTypes.addAll( table.getPartitionKeys().stream().map(FieldSchema::getType).collect(Collectors.toList())); } this.fullyQualifiedTableName = Warehouse.getQualifiedName(table.getDbName(), table.getTableName()); String outFormatName = this.table.getSd().getOutputFormat(); try { this.acidOutputFormat = (AcidOutputFormat<?, ?>) ReflectionUtils .newInstance(JavaUtils.loadClass(outFormatName), conf); } catch (Exception e) { String shadePrefix = conf.getVar(HiveConf.ConfVars.HIVE_CLASSLOADER_SHADE_PREFIX); if (shadePrefix != null && !shadePrefix.trim().isEmpty()) { try { LOG.info("Shade prefix: {} specified. Using as fallback to load {}..", shadePrefix, outFormatName); this.acidOutputFormat = (AcidOutputFormat<?, ?>) ReflectionUtils .newInstance(JavaUtils.loadClass(shadePrefix, outFormatName), conf); } catch (ClassNotFoundException e1) { throw new StreamingException(e.getMessage(), e); } } else { throw new StreamingException(e.getMessage(), e); } } setupMemoryMonitoring(); try { final AbstractSerDe serDe = createSerde(); this.inputRowObjectInspector = (StructObjectInspector) serDe.getObjectInspector(); if (conn.isPartitionedTable() && conn.isDynamicPartitioning()) { preparePartitioningFields(); int dpStartCol = inputRowObjectInspector.getAllStructFieldRefs().size() - table.getPartitionKeys().size(); this.outputRowObjectInspector = new SubStructObjectInspector(inputRowObjectInspector, 0, dpStartCol); } else { this.outputRowObjectInspector = inputRowObjectInspector; } prepareBucketingFields(); } catch (SerDeException e) { throw new StreamingException("Unable to create SerDe", e); } } protected void setupMemoryMonitoring() { this.autoFlush = conf.getBoolVar(HiveConf.ConfVars.HIVE_STREAMING_AUTO_FLUSH_ENABLED); this.memoryUsageThreshold = conf.getFloatVar(HiveConf.ConfVars.HIVE_HEAP_MEMORY_MONITOR_USAGE_THRESHOLD); this.ingestSizeThreshold = conf.getSizeVar(HiveConf.ConfVars.HIVE_STREAMING_AUTO_FLUSH_CHECK_INTERVAL_SIZE); LOG.info("Memory monitoring settings - autoFlush: {} memoryUsageThreshold: {} ingestSizeThreshold: {}", autoFlush, memoryUsageThreshold, ingestSizeBytes); this.heapMemoryMonitor = new HeapMemoryMonitor(memoryUsageThreshold); MemoryUsage tenuredMemUsage = heapMemoryMonitor.getTenuredGenMemoryUsage(); if (tenuredMemUsage != null) { lowMemoryCanary = new AtomicBoolean(false); heapMemoryMonitor.registerListener(new OrcMemoryPressureMonitor(lowMemoryCanary)); heapMemoryMonitor.start(); // alert if we already running low on memory (starting with low memory will lead to frequent auto flush) float currentUsage = (float) tenuredMemUsage.getUsed() / (float) tenuredMemUsage.getMax(); if (currentUsage > memoryUsageThreshold) { LOG.warn( "LOW MEMORY ALERT! Tenured gen memory is already low. Increase memory to improve performance." + " Used: {} Max: {}", LlapUtil.humanReadableByteCount(tenuredMemUsage.getUsed()), LlapUtil.humanReadableByteCount(tenuredMemUsage.getMax())); } } } protected void prepareBucketingFields() { this.isBucketed = table.getSd().getNumBuckets() > 0; // For unbucketed tables we have exactly 1 RecordUpdater (until HIVE-19208) for each AbstractRecordWriter which // ends up writing to a file bucket_000000. // See also {@link #getBucket(Object)} this.totalBuckets = isBucketed ? table.getSd().getNumBuckets() : 1; if (isBucketed) { this.bucketIds = getBucketColIDs(table.getSd().getBucketCols(), table.getSd().getCols()); this.bucketFieldData = new Object[bucketIds.size()]; this.bucketObjInspectors = getObjectInspectorsForBucketedCols(bucketIds, inputRowObjectInspector); this.bucketStructFields = new StructField[bucketIds.size()]; List<? extends StructField> allFields = inputRowObjectInspector.getAllStructFieldRefs(); for (int i = 0; i < bucketIds.size(); i++) { bucketStructFields[i] = allFields.get(bucketIds.get(i)); } } } protected void preparePartitioningFields() { final int numPartitions = table.getPartitionKeys().size(); this.partitionFieldData = new Object[numPartitions]; this.partitionObjInspectors = new ObjectInspector[numPartitions]; int startIdx = inputRowObjectInspector.getAllStructFieldRefs().size() - numPartitions; int endIdx = inputRowObjectInspector.getAllStructFieldRefs().size(); int j = 0; for (int i = startIdx; i < endIdx; i++) { StructField structField = inputRowObjectInspector.getAllStructFieldRefs().get(i); partitionObjInspectors[j++] = structField.getFieldObjectInspector(); } this.partitionStructFields = new StructField[partitionColumns.size()]; for (int i = 0; i < partitionColumns.size(); i++) { String partCol = partitionColumns.get(i); partitionStructFields[i] = inputRowObjectInspector.getStructFieldRef(partCol); } } /** * used to tag error msgs to provided some breadcrumbs */ protected String getWatermark(String partition) { return partition + " writeIds[" + curBatchMinWriteId + "," + curBatchMaxWriteId + "]"; } // return the column numbers of the bucketed columns protected List<Integer> getBucketColIDs(List<String> bucketCols, List<FieldSchema> cols) { ArrayList<Integer> result = new ArrayList<>(bucketCols.size()); HashSet<String> bucketSet = new HashSet<>(bucketCols); for (int i = 0; i < cols.size(); i++) { if (bucketSet.contains(cols.get(i).getName())) { result.add(i); } } return result; } /** * Create SerDe for the record writer. * * @return - serde * @throws SerializationError - if serde cannot be created. */ public abstract AbstractSerDe createSerde() throws SerializationError; /** * Encode a record as an Object that Hive can read with the ObjectInspector associated with the * serde returned by {@link #createSerde}. This is public so that test frameworks can use it. * * @param record record to be deserialized * @return deserialized record as an Object * @throws SerializationError - any error during serialization or deserialization of record */ public abstract Object encode(byte[] record) throws SerializationError; // returns the bucket number to which the record belongs to protected int getBucket(Object row) { if (!isBucketed) { return 0; } Object[] bucketFields = getBucketFields(row); int bucketingVersion = Utilities .getBucketingVersion(table.getParameters().get(hive_metastoreConstants.TABLE_BUCKETING_VERSION)); return bucketingVersion == 2 ? ObjectInspectorUtils.getBucketNumber(bucketFields, bucketObjInspectors, totalBuckets) : ObjectInspectorUtils.getBucketNumberOld(bucketFields, bucketObjInspectors, totalBuckets); } protected List<String> getPartitionValues(final Object row) { if (!conn.isPartitionedTable()) { return null; } List<String> partitionValues = new ArrayList<>(); if (conn.isPartitionedTable() && conn.isDynamicPartitioning()) { Object[] partitionFields = getPartitionFields(row); for (int i = 0; i < partitionObjInspectors.length; i++) { ObjectInspector oi = partitionObjInspectors[i]; Object field = partitionFields[i]; Object partitionValue = ObjectInspectorUtils.copyToStandardObject(field, oi, ObjectInspectorUtils.ObjectInspectorCopyOption.WRITABLE); if (partitionValue == null || partitionValue.toString().length() == 0) { partitionValues.add(defaultPartitionName); } else { partitionValues.add(partitionValue.toString()); } } } else { partitionValues = conn.getStaticPartitionValues(); } return partitionValues; } @Override public void flush() throws StreamingIOFailure { try { if (LOG.isDebugEnabled()) { logStats("Stats before flush:"); } for (Map.Entry<String, List<RecordUpdater>> entry : updaters.entrySet()) { if (LOG.isDebugEnabled()) { LOG.debug("Flushing record updater for partitions: {}", entry.getKey()); } for (RecordUpdater updater : entry.getValue()) { if (updater != null) { updater.flush(); } } } ingestSizeBytes = 0; if (LOG.isDebugEnabled()) { logStats("Stats after flush:"); } } catch (IOException e) { throw new StreamingIOFailure("Unable to flush recordUpdater", e); } } @Override public void close() throws StreamingIOFailure { heapMemoryMonitor.close(); boolean haveError = false; String partition = null; if (LOG.isDebugEnabled()) { logStats("Stats before close:"); } for (Map.Entry<String, List<RecordUpdater>> entry : updaters.entrySet()) { partition = entry.getKey(); if (LOG.isDebugEnabled()) { LOG.debug("Closing updater for partitions: {}", partition); } for (RecordUpdater updater : entry.getValue()) { if (updater != null) { try { //try not to leave any files open updater.close(false); } catch (Exception ex) { haveError = true; LOG.error("Unable to close " + updater + " due to: " + ex.getMessage(), ex); } } } entry.getValue().clear(); } updaters.clear(); if (LOG.isDebugEnabled()) { logStats("Stats after close:"); } try { this.fs.close(); } catch (IOException e) { throw new StreamingIOFailure("Error while closing FileSystem", e); } if (haveError) { throw new StreamingIOFailure("Encountered errors while closing (see logs) " + getWatermark(partition)); } } protected static ObjectInspector[] getObjectInspectorsForBucketedCols(List<Integer> bucketIds, StructObjectInspector recordObjInspector) { ObjectInspector[] result = new ObjectInspector[bucketIds.size()]; for (int i = 0; i < bucketIds.size(); i++) { int bucketId = bucketIds.get(i); result[i] = recordObjInspector.getAllStructFieldRefs().get(bucketId).getFieldObjectInspector(); } return result; } protected Object[] getBucketFields(Object row) { for (int i = 0; i < bucketIds.size(); i++) { bucketFieldData[i] = inputRowObjectInspector.getStructFieldData(row, bucketStructFields[i]); } return bucketFieldData; } protected Object[] getPartitionFields(Object row) { for (int i = 0; i < partitionFieldData.length; i++) { partitionFieldData[i] = inputRowObjectInspector.getStructFieldData(row, partitionStructFields[i]); } return partitionFieldData; } @Override public void write(final long writeId, final InputStream inputStream) throws StreamingException { try (Scanner scanner = new Scanner(inputStream).useDelimiter(lineDelimiter)) { while (scanner.hasNext()) { write(writeId, scanner.next().getBytes()); } } } @Override public void write(final long writeId, final byte[] record) throws StreamingException { checkAutoFlush(); ingestSizeBytes += record.length; try { Object encodedRow = encode(record); int bucket = getBucket(encodedRow); List<String> partitionValues = getPartitionValues(encodedRow); getRecordUpdater(partitionValues, bucket).insert(writeId, encodedRow); // ingest size bytes gets resetted on flush() whereas connection stats is not conn.getConnectionStats().incrementRecordsWritten(); conn.getConnectionStats().incrementRecordsSize(record.length); } catch (IOException e) { throw new StreamingIOFailure("Error writing record in transaction write id (" + writeId + ")", e); } } protected void checkAutoFlush() throws StreamingIOFailure { if (!autoFlush) { return; } if (lowMemoryCanary != null) { if (lowMemoryCanary.get() && ingestSizeBytes > ingestSizeThreshold) { if (LOG.isDebugEnabled()) { LOG.debug( "Low memory canary is set and ingestion size (buffered) threshold '{}' exceeded. " + "Flushing all record updaters..", LlapUtil.humanReadableByteCount(ingestSizeThreshold)); } flush(); conn.getConnectionStats().incrementAutoFlushCount(); lowMemoryCanary.set(false); } } else { if (ingestSizeBytes > ingestSizeThreshold) { MemoryMXBean mxBean = ManagementFactory.getMemoryMXBean(); MemoryUsage heapUsage = mxBean.getHeapMemoryUsage(); float memUsedFraction = ((float) heapUsage.getUsed() / (float) heapUsage.getMax()); if (memUsedFraction > memoryUsageThreshold) { if (LOG.isDebugEnabled()) { LOG.info( "Memory usage threshold '{}' and ingestion size (buffered) threshold '{}' exceeded. " + "Flushing all record updaters..", memUsedFraction, LlapUtil.humanReadableByteCount(ingestSizeThreshold)); } flush(); conn.getConnectionStats().incrementAutoFlushCount(); } } } } @Override public Set<String> getPartitions() { return addedPartitions; } protected RecordUpdater createRecordUpdater(final Path partitionPath, int bucketId, Long minWriteId, Long maxWriteID) throws IOException { // Initialize table properties from the table parameters. This is required because the table // may define certain table parameters that may be required while writing. The table parameter // 'transactional_properties' is one such example. Properties tblProperties = new Properties(); tblProperties.putAll(table.getParameters()); return acidOutputFormat.getRecordUpdater(partitionPath, new AcidOutputFormat.Options(conf).filesystem(fs).inspector(outputRowObjectInspector) .bucket(bucketId).tableProperties(tblProperties).minimumWriteId(minWriteId) .maximumWriteId(maxWriteID).statementId(statementId).finalDestination(partitionPath)); } /** * Returns the file that would be used to store rows under this. * parameters * @param partitionValues partition values * @param bucketId bucket id * @param minWriteId min write Id * @param maxWriteId max write Id * @param statementId statement Id * @param table table * @return the location of the file. * @throws StreamingException when the path is not found */ @Override public Path getDeltaFileLocation(List<String> partitionValues, Integer bucketId, Long minWriteId, Long maxWriteId, Integer statementId, Table table) throws StreamingException { Path destLocation; if (partitionValues == null) { destLocation = new Path(table.getSd().getLocation()); } else { Map<String, String> partSpec = Warehouse.makeSpecFromValues(table.getPartitionKeys(), partitionValues); try { destLocation = new Path(table.getDataLocation(), Warehouse.makePartPath(partSpec)); } catch (MetaException e) { throw new StreamingException("Unable to retrieve the delta file location" + " for values: " + partitionValues + ", minWriteId: " + minWriteId + ", maxWriteId: " + maxWriteId + ", statementId: " + statementId, e); } } AcidOutputFormat.Options options = new AcidOutputFormat.Options(conf).filesystem(fs) .inspector(outputRowObjectInspector).bucket(bucketId).minimumWriteId(minWriteId) .maximumWriteId(maxWriteId).statementId(statementId).finalDestination(destLocation); return AcidUtils.createFilename(destLocation, options); } protected RecordUpdater getRecordUpdater(List<String> partitionValues, int bucketId) throws StreamingIOFailure { RecordUpdater recordUpdater; String key; Path destLocation; try { key = partitionValues == null ? fullyQualifiedTableName : partitionValues.toString(); // add partition in metastore for dynamic partition. We make a metastore call for every new partition value that // we encounter even if partition already exists (exists check require a metastore call anyways). if (partitionPaths.containsKey(key)) { destLocation = partitionPaths.get(key); } else { // un-partitioned table if (partitionValues == null) { destLocation = new Path(table.getSd().getLocation()); } else { PartitionInfo partitionInfo = conn.createPartitionIfNotExists(partitionValues); // collect the newly added partitions. connection.commitTransaction() will report the dynamically added // partitions to TxnHandler if (!partitionInfo.isExists()) { addedPartitions.add(partitionInfo.getName()); } else { if (LOG.isDebugEnabled()) { LOG.debug("Partition {} already exists for table {}", partitionInfo.getName(), fullyQualifiedTableName); } } destLocation = new Path(partitionInfo.getPartitionLocation()); } partitionPaths.put(key, destLocation); } updaters.computeIfAbsent(key, k -> initializeBuckets()); recordUpdater = updaters.get(key).get(bucketId); } catch (StreamingException e) { throw new StreamingIOFailure("Unable to create partition: " + partitionValues + "for " + conn, e); } if (recordUpdater == null) { try { recordUpdater = createRecordUpdater(destLocation, bucketId, curBatchMinWriteId, curBatchMaxWriteId); } catch (IOException e) { String errMsg = "Failed creating RecordUpdater for " + getWatermark(destLocation.toString()); LOG.error(errMsg, e); throw new StreamingIOFailure(errMsg, e); } List<RecordUpdater> partitionUpdaters = updaters.get(key); partitionUpdaters.set(bucketId, recordUpdater); } return recordUpdater; } protected List<RecordUpdater> initializeBuckets() { List<RecordUpdater> result = new ArrayList<>(totalBuckets); for (int bucket = 0; bucket < totalBuckets; bucket++) { result.add(bucket, null); //so that get(i) returns null rather than ArrayOutOfBounds } return result; } protected void logStats(final String prefix) { int openRecordUpdaters = updaters.values().stream().mapToInt(List::size).sum(); long bufferedRecords = updaters.values().stream().flatMap(List::stream).filter(Objects::nonNull) .mapToLong(RecordUpdater::getBufferedRowCount).sum(); MemoryUsage memoryUsage = heapMemoryMonitor.getTenuredGenMemoryUsage(); String oldGenUsage = "NA"; if (memoryUsage != null) { oldGenUsage = "used/max => " + LlapUtil.humanReadableByteCount(memoryUsage.getUsed()) + "/" + LlapUtil.humanReadableByteCount(memoryUsage.getMax()); } LOG.debug( "{} [record-updaters: {}, partitions: {}, buffered-records: {} total-records: {} " + "buffered-ingest-size: {}, total-ingest-size: {} tenured-memory-usage: {}]", prefix, openRecordUpdaters, partitionPaths.size(), bufferedRecords, conn.getConnectionStats().getRecordsWritten(), LlapUtil.humanReadableByteCount(ingestSizeBytes), LlapUtil.humanReadableByteCount(conn.getConnectionStats().getRecordsSize()), oldGenUsage); } }