org.apache.beam.sdk.extensions.sql.impl.udf.BuiltinStringFunctions.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.beam.sdk.extensions.sql.impl.udf.BuiltinStringFunctions.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.beam.sdk.extensions.sql.impl.udf;

import static java.nio.charset.StandardCharsets.UTF_8;

import com.google.auto.service.AutoService;
import java.util.Arrays;
import org.apache.beam.sdk.schemas.Schema.TypeName;
import org.apache.commons.codec.DecoderException;
import org.apache.commons.codec.binary.Hex;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;

/** BuiltinStringFunctions. */
@AutoService(BeamBuiltinFunctionProvider.class)
public class BuiltinStringFunctions extends BeamBuiltinFunctionProvider {

    // return a explicitly null for Boolean has NP_BOOLEAN_RETURN_NULL warning.
    // return null for boolean is not allowed.
    // TODO: handle null input.
    @UDF(funcName = "ENDS_WITH", parameterArray = { TypeName.STRING }, returnType = TypeName.STRING)
    public Boolean endsWith(String str1, String str2) {
        return str1.endsWith(str2);
    }

    // return a explicitly null for Boolean has NP_BOOLEAN_RETURN_NULL warning.
    // return null for boolean is not allowed.
    // TODO: handle null input.
    @UDF(funcName = "STARTS_WITH", parameterArray = { TypeName.STRING }, returnType = TypeName.STRING)
    public Boolean startsWith(String str1, String str2) {
        return str1.startsWith(str2);
    }

    @UDF(funcName = "LENGTH", parameterArray = { TypeName.STRING }, returnType = TypeName.INT64)
    public Long lengthString(String str) {
        if (str == null) {
            return null;
        }
        return (long) str.length();
    }

    @UDF(funcName = "LENGTH", parameterArray = { TypeName.BYTES }, returnType = TypeName.INT64)
    public Long lengthBytes(byte[] bytes) {
        if (bytes == null) {
            return null;
        }
        return (long) bytes.length;
    }

    @UDF(funcName = "REVERSE", parameterArray = { TypeName.STRING }, returnType = TypeName.STRING)
    public String reverseString(String str) {
        if (str == null) {
            return null;
        }
        return new StringBuilder(str).reverse().toString();
    }

    @UDF(funcName = "REVERSE", parameterArray = { TypeName.BYTES }, returnType = TypeName.BYTES)
    public byte[] reverseBytes(byte[] bytes) {
        if (bytes == null) {
            return null;
        }
        byte[] ret = Arrays.copyOf(bytes, bytes.length);
        ArrayUtils.reverse(ret);
        return ret;
    }

    @UDF(funcName = "FROM_HEX", parameterArray = { TypeName.STRING }, returnType = TypeName.BYTES)
    public byte[] fromHex(String str) {
        if (str == null) {
            return null;
        }

        try {
            return Hex.decodeHex(str.toCharArray());
        } catch (DecoderException e) {
            throw new RuntimeException(e);
        }
    }

    @UDF(funcName = "TO_HEX", parameterArray = { TypeName.BYTES }, returnType = TypeName.STRING)
    public String toHex(byte[] bytes) {
        if (bytes == null) {
            return null;
        }

        return Hex.encodeHexString(bytes);
    }

    @UDF(funcName = "LPAD", parameterArray = { TypeName.STRING, TypeName.INT64 }, returnType = TypeName.STRING)
    public String lpad(String originalValue, Long returnLength) {
        return lpad(originalValue, returnLength, " ");
    }

    @UDF(funcName = "LPAD", parameterArray = { TypeName.STRING, TypeName.INT64,
            TypeName.STRING }, returnType = TypeName.STRING)
    public String lpad(String originalValue, Long returnLength, String pattern) {
        if (originalValue == null || returnLength == null || pattern == null) {
            return null;
        }

        if (returnLength < -1 || pattern.isEmpty()) {
            throw new IllegalArgumentException("returnLength cannot be 0 or pattern cannot be empty.");
        }

        if (originalValue.length() == returnLength) {
            return originalValue;
        } else if (originalValue.length() < returnLength) { // add padding to left
            return StringUtils.leftPad(originalValue, Math.toIntExact(returnLength), pattern);
        } else { // truncating string by str.substring
            // Java String can only hold a string with Integer.MAX_VALUE as longest length.
            return originalValue.substring(0, Math.toIntExact(returnLength));
        }
    }

    @UDF(funcName = "LPAD", parameterArray = { TypeName.BYTES, TypeName.INT64 }, returnType = TypeName.BYTES)
    public byte[] lpad(byte[] originalValue, Long returnLength) {
        return lpad(originalValue, returnLength, " ".getBytes(UTF_8));
    }

    @UDF(funcName = "LPAD", parameterArray = { TypeName.BYTES, TypeName.INT64,
            TypeName.BYTES }, returnType = TypeName.BYTES)
    public byte[] lpad(byte[] originalValue, Long returnLength, byte[] pattern) {
        if (originalValue == null || returnLength == null || pattern == null) {
            return null;
        }
        if (returnLength < -1 || pattern.length == 0) {
            throw new IllegalArgumentException("returnLength cannot be 0 or pattern cannot be empty.");
        }

        int returnLengthInt = Math.toIntExact(returnLength);

        if (originalValue.length == returnLengthInt) {
            return originalValue;
        } else if (originalValue.length < returnLengthInt) { // add padding to left
            byte[] ret = new byte[returnLengthInt];
            // step one: pad #(returnLengthInt - originalValue.length) bytes to left side.
            int paddingOff = 0;
            int paddingLeftBytes = returnLengthInt - originalValue.length;
            byteArrayPadding(ret, pattern, paddingOff, paddingLeftBytes);

            // step two: copy originalValue.
            System.arraycopy(originalValue, 0, ret, returnLengthInt - originalValue.length, originalValue.length);
            return ret;
        } else { // truncating string by str.substring
            // Java String can only hold a string with Integer.MAX_VALUE as longest length.
            byte[] ret = new byte[returnLengthInt];
            System.arraycopy(originalValue, 0, ret, 0, returnLengthInt);
            return ret;
        }
    }

    @UDF(funcName = "RPAD", parameterArray = { TypeName.STRING, TypeName.INT64 }, returnType = TypeName.STRING)
    public String rpad(String originalValue, Long returnLength) {
        return lpad(originalValue, returnLength, " ");
    }

    @UDF(funcName = "RPAD", parameterArray = { TypeName.STRING, TypeName.INT64,
            TypeName.STRING }, returnType = TypeName.STRING)
    public String rpad(String originalValue, Long returnLength, String pattern) {
        if (originalValue == null || returnLength == null || pattern == null) {
            return null;
        }

        if (returnLength < -1 || pattern.isEmpty()) {
            throw new IllegalArgumentException("returnLength cannot be 0 or pattern cannot be empty.");
        }

        if (originalValue.length() == returnLength) {
            return originalValue;
        } else if (originalValue.length() < returnLength) { // add padding to right
            return StringUtils.rightPad(originalValue, Math.toIntExact(returnLength), pattern);
        } else { // truncating string by str.substring
            // Java String can only hold a string with Integer.MAX_VALUE as longest length.
            return originalValue.substring(0, Math.toIntExact(returnLength));
        }
    }

    @UDF(funcName = "RPAD", parameterArray = { TypeName.BYTES, TypeName.INT64 }, returnType = TypeName.BYTES)
    public byte[] rpad(byte[] originalValue, Long returnLength) {
        return lpad(originalValue, returnLength, " ".getBytes(UTF_8));
    }

    @UDF(funcName = "RPAD", parameterArray = { TypeName.BYTES, TypeName.INT64,
            TypeName.BYTES }, returnType = TypeName.BYTES)
    public byte[] rpad(byte[] originalValue, Long returnLength, byte[] pattern) {
        if (originalValue == null || returnLength == null || pattern == null) {
            return null;
        }
        if (returnLength < -1 || pattern.length == 0) {
            throw new IllegalArgumentException("returnLength cannot be 0 or pattern cannot be empty.");
        }

        int returnLengthInt = Math.toIntExact(returnLength);

        if (originalValue.length == returnLengthInt) {
            return originalValue;
        } else if (originalValue.length < returnLengthInt) { // add padding to right
            byte[] ret = new byte[returnLengthInt];
            // step one: copy originalValue.
            System.arraycopy(originalValue, 0, ret, 0, originalValue.length);

            // step one: pad #(returnLengthInt - originalValue.length) bytes to right side.
            int paddingOff = originalValue.length;
            int paddingLeftBytes = returnLengthInt - originalValue.length;
            byteArrayPadding(ret, pattern, paddingOff, paddingLeftBytes);
            return ret;
        } else { // truncating string by str.substring
            // Java String can only hold a string with Integer.MAX_VALUE as longest length.
            byte[] ret = new byte[returnLengthInt];
            System.arraycopy(originalValue, 0, ret, 0, returnLengthInt);
            return ret;
        }
    }

    private void byteArrayPadding(byte[] dest, byte[] pattern, int paddingOff, int paddingLeftBytes) {
        while (paddingLeftBytes > 0) {
            if (paddingLeftBytes >= pattern.length) {
                // pad the whole pattern
                System.arraycopy(pattern, 0, dest, paddingOff, pattern.length);
                paddingLeftBytes -= pattern.length;
                paddingOff += pattern.length;
            } else {
                System.arraycopy(pattern, 0, dest, paddingOff, paddingLeftBytes);
                paddingLeftBytes = 0;
            }
        }
    }
}