com.teradata.tempto.internal.hadoop.hdfs.WebHDFSClient.java Source code

Java tutorial

Introduction

Here is the source code for com.teradata.tempto.internal.hadoop.hdfs.WebHDFSClient.java

Source

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.teradata.tempto.internal.hadoop.hdfs;

import com.google.common.net.HostAndPort;
import com.jayway.jsonpath.JsonPath;
import com.teradata.tempto.hadoop.hdfs.HdfsClient;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.http.HttpEntity;
import org.apache.http.HttpRequest;
import org.apache.http.HttpResponse;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpDelete;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPut;
import org.apache.http.client.methods.HttpUriRequest;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.entity.BufferedHttpEntity;
import org.apache.http.entity.ContentProducer;
import org.apache.http.entity.EntityTemplate;
import org.apache.http.entity.InputStreamEntity;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.DefaultHttpRequestRetryHandler;
import org.apache.http.impl.client.HttpClientBuilder;
import org.slf4j.Logger;

import javax.inject.Inject;
import javax.inject.Named;

import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Optional;

import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.net.HostAndPort.fromParts;
import static org.apache.commons.io.FileUtils.byteCountToDisplaySize;
import static org.apache.commons.io.IOUtils.copyLarge;
import static org.apache.http.HttpStatus.SC_CREATED;
import static org.apache.http.HttpStatus.SC_NOT_FOUND;
import static org.apache.http.HttpStatus.SC_OK;
import static org.apache.http.HttpStatus.SC_TEMPORARY_REDIRECT;
import static org.slf4j.LoggerFactory.getLogger;

/**
 * HDFS client based on WebHDFS REST API.
 */
public class WebHDFSClient implements HdfsClient {

    private static final Logger logger = getLogger(WebHDFSClient.class);

    private static final JsonPath GET_FILESTATUS_LENGTH_JSON_PATH = JsonPath.compile("$.FileStatus.length");
    private static final JsonPath GET_XATTR_JSON_PATH = JsonPath.compile("$.XAttrs");
    private static final JsonPath GET_XATTR_VALUE_JSON_PATH = JsonPath.compile("$.XAttrs.[0].value");

    private static final int NUMBER_OF_RETRIES = 3;

    private final HostAndPort nameNode;

    private final CloseableHttpClient httpClient;

    @Inject
    public WebHDFSClient(@Named("hdfs.webhdfs.host") String webHdfsNameNodeHost,
            @Named("hdfs.webhdfs.port") int webHdfsNameNodePort) {
        this.nameNode = fromParts(checkNotNull(webHdfsNameNodeHost), webHdfsNameNodePort);
        checkArgument(webHdfsNameNodePort > 0, "Invalid name node WebHDFS port number: %s", webHdfsNameNodePort);

        this.httpClient = HttpClientBuilder.create()
                .setRetryHandler(new DefaultHttpRequestRetryHandler(NUMBER_OF_RETRIES, true)).build();
    }

    @Override
    public void createDirectory(String path, String username) {
        // TODO: reconsider permission=777
        HttpPut mkdirRequest = new HttpPut(buildUri(path, username, "MKDIRS", Pair.of("permission", "777")));
        try (CloseableHttpResponse response = httpClient.execute(mkdirRequest)) {
            if (response.getStatusLine().getStatusCode() != SC_OK) {
                throw invalidStatusException("MKDIRS", path, username, mkdirRequest, response);
            }
            logger.debug("Created directory {} - username: {}", path, username);
        } catch (IOException e) {
            throw new RuntimeException("Could not create directory " + path + " in hdfs, user: " + username, e);
        }
    }

    @Override
    public void delete(String path, String username) {
        Pair[] params = { Pair.of("recursive", "true") };
        HttpDelete removeFileOrDirectoryRequest = new HttpDelete(buildUri(path, username, "DELETE", params));
        try (CloseableHttpResponse response = httpClient.execute(removeFileOrDirectoryRequest)) {
            if (response.getStatusLine().getStatusCode() != SC_OK) {
                throw invalidStatusException("DELETE", path, username, removeFileOrDirectoryRequest, response);
            }
            logger.debug("Removed file or directory {} - username: {}", path, username);
        } catch (IOException e) {
            throw new RuntimeException("Could not remove file or directory " + path + " in hdfs, user: " + username,
                    e);
        }
    }

    @Override
    public void saveFile(String path, String username, InputStream input) {
        try {
            saveFile(path, username, new BufferedHttpEntity(new InputStreamEntity(input)));
        } catch (IOException e) {
            throw new RuntimeException("Could not create buffered http entity", e);
        }
    }

    @Override
    public void saveFile(String path, String username, RepeatableContentProducer repeatableContentProducer) {
        saveFile(path, username, new EntityTemplate(toApacheContentProducer(repeatableContentProducer)));
    }

    private ContentProducer toApacheContentProducer(RepeatableContentProducer repeatableContentProducer) {
        return (OutputStream outputStream) -> {
            try (InputStream inputStream = repeatableContentProducer.getInputStream()) {
                copyLarge(inputStream, outputStream);
            }
        };
    }

    private void saveFile(String path, String username, HttpEntity entity) {
        Pair<String, String> params = Pair.of("overwrite", "true");
        String writeRedirectUri = executeAndGetRedirectUri(new HttpPut(buildUri(path, username, "CREATE", params)));
        HttpPut writeRequest = new HttpPut(writeRedirectUri);
        writeRequest.setEntity(entity);

        try (CloseableHttpResponse response = httpClient.execute(writeRequest)) {
            if (response.getStatusLine().getStatusCode() != SC_CREATED) {
                throw invalidStatusException("CREATE", path, username, writeRequest, response);
            }
            long length = waitForFileSavedAndReturnLength(path, username);
            logger.debug("Saved file {} - username: {}, size: {}", path, username, byteCountToDisplaySize(length));
        } catch (IOException e) {
            throw new RuntimeException("Could not save file " + path + " in hdfs, user: " + username, e);
        }
    }

    @Override
    public void loadFile(String path, String username, OutputStream outputStream) {
        HttpGet readRequest = new HttpGet(buildUri(path, username, "OPEN"));
        try (CloseableHttpResponse response = httpClient.execute(readRequest)) {
            if (response.getStatusLine().getStatusCode() != SC_OK) {
                throw invalidStatusException("OPEN", path, username, readRequest, response);
            }

            IOUtils.copy(response.getEntity().getContent(), outputStream);

            logger.debug("Loaded file {} - username: {}", path, username);
        } catch (IOException e) {
            throw new RuntimeException("Could not read file " + path + " in hdfs, user: " + username, e);
        }
    }

    @Override
    public long getLength(String path, String username) {
        HttpGet readRequest = new HttpGet(buildUri(path, username, "GETFILESTATUS"));
        try (CloseableHttpResponse response = httpClient.execute(readRequest)) {
            int statusCode = response.getStatusLine().getStatusCode();
            if (statusCode != SC_OK) {
                throw invalidStatusException("GETFILESTATUS", path, username, readRequest, response);
            }
            return JsonPath.parse(response.getEntity().getContent()).read(GET_FILESTATUS_LENGTH_JSON_PATH,
                    Long.class);
        } catch (IOException e) {
            throw new RuntimeException("Could not get file status: " + path + " , user: " + username, e);
        }
    }

    @Override
    public boolean exist(String path, String username) {
        HttpGet readRequest = new HttpGet(buildUri(path, username, "GETFILESTATUS"));
        try (CloseableHttpResponse response = httpClient.execute(readRequest)) {
            return response.getStatusLine().getStatusCode() == SC_OK;
        } catch (IOException e) {
            throw new RuntimeException("Could not get file status: " + path + " , user: " + username, e);
        }
    }

    @Override
    public void setXAttr(String path, String username, String key, String value) {
        Pair[] params = { Pair.of("xattr.name", key), Pair.of("xattr.value", value), Pair.of("flag", "CREATE") };
        HttpPut setXAttrRequest = new HttpPut(buildUri(path, username, "SETXATTR", params));
        try (CloseableHttpResponse response = httpClient.execute(setXAttrRequest)) {
            if (response.getStatusLine().getStatusCode() != SC_OK) {
                throw invalidStatusException("SETXATTR", path, username, setXAttrRequest, response);
            }
            logger.debug("Set xAttr {} = {} for {}, username: {}", key, value, path, username);
        } catch (IOException e) {
            throw new RuntimeException("Could not set xAttr for path: " + path + " in hdfs, user: " + username, e);
        }
    }

    @Override
    public void removeXAttr(String path, String username, String key) {
        Pair[] params = { Pair.of("xattr.name", key) };
        HttpPut setXAttrRequest = new HttpPut(buildUri(path, username, "REMOVEXATTR", params));
        try (CloseableHttpResponse response = httpClient.execute(setXAttrRequest)) {
            if (response.getStatusLine().getStatusCode() != SC_OK) {
                throw invalidStatusException("SETXATTR", path, username, setXAttrRequest, response);
            }
            logger.debug("Remove xAttr {} for {}, username: {}", key, path, username);
        } catch (IOException e) {
            throw new RuntimeException("Could not remove xAttr for path: " + path + " in hdfs, user: " + username,
                    e);
        }
    }

    @Override
    public Optional<String> getXAttr(String path, String username, String key) {
        Pair[] params = { Pair.of("xattr.name", key) };
        HttpGet setXAttrRequest = new HttpGet(buildUri(path, username, "GETXATTRS", params));
        try (CloseableHttpResponse response = httpClient.execute(setXAttrRequest)) {
            if (response.getStatusLine().getStatusCode() == SC_NOT_FOUND) {
                return Optional.empty();
            }
            if (response.getStatusLine().getStatusCode() != SC_OK) {
                throw invalidStatusException("GETXATTRS", path, username, setXAttrRequest, response);
            }

            String responseContent = IOUtils.toString(response.getEntity().getContent());
            if (GET_XATTR_JSON_PATH.read(responseContent) == null) {
                return Optional.empty();
            }

            String xArgValue = StringUtils.strip(GET_XATTR_VALUE_JSON_PATH.read(responseContent).toString(), "\"");
            return Optional.of(xArgValue);
        } catch (IOException e) {
            throw new RuntimeException("Could not get xAttr for path: " + path + " in hdfs, user: " + username, e);
        }
    }

    private String executeAndGetRedirectUri(HttpUriRequest request) {
        try (CloseableHttpResponse response = httpClient.execute(request)) {
            if (response.getStatusLine().getStatusCode() != SC_TEMPORARY_REDIRECT) {
                throw new RuntimeException("Expected redirect for request: " + request);
            }
            return response.getFirstHeader("Location").getValue();
        } catch (IOException e) {
            throw new RuntimeException("Could not execute request " + request, e);
        }
    }

    /**
     * There is some wired bug in WebHDFS, which happens for big files. Just after saving such file
     * it is not possible to immediately set xAttr. Calling GETFILESTATUS seems to introduce
     * some synchronization point, so it should be used just after saving file.
     */
    private long waitForFileSavedAndReturnLength(String path, String username) {
        return getLength(path, username);
    }

    private URI buildUri(String path, String username, String operation, Pair<String, String>... parameters) {
        try {
            if (!path.startsWith("/")) {
                path = "/" + path;
            }
            URIBuilder uriBuilder = new URIBuilder().setScheme("http").setHost(nameNode.getHostText())
                    .setPort(nameNode.getPort()).setPath("/webhdfs/v1" + checkNotNull(path))
                    .setParameter("op", checkNotNull(operation)).setParameter("user.name", checkNotNull(username));

            for (Pair<String, String> parameter : parameters) {
                uriBuilder.setParameter(parameter.getKey(), parameter.getValue());
            }

            return uriBuilder.build();
        } catch (URISyntaxException e) {
            throw new RuntimeException("Could not create save file URI" + ", nameNode: " + nameNode + ", path: "
                    + path + ", username: " + username);
        }
    }

    private RuntimeException invalidStatusException(String operation, String path, String username,
            HttpRequest request, HttpResponse response) throws IOException {
        return new RuntimeException("Operation " + operation + " on file " + path + " failed, user: " + username
                + ", status: " + response.getStatusLine().getStatusCode() + " "
                + response.getStatusLine().getReasonPhrase() + ", content: "
                + IOUtils.toString(response.getEntity().getContent()) + ", request: "
                + request.getRequestLine().getMethod() + " " + request.getRequestLine().getUri());
    }
}