com.wandisco.s3hdfs.rewrite.redirect.MultiPartFileRedirect.java Source code

Java tutorial

Introduction

Here is the source code for com.wandisco.s3hdfs.rewrite.redirect.MultiPartFileRedirect.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.wandisco.s3hdfs.rewrite.redirect;

import com.wandisco.s3hdfs.path.S3HdfsPath;
import com.wandisco.s3hdfs.rewrite.redirect.comparator.PartComparator;
import org.apache.commons.httpclient.Header;
import org.apache.commons.httpclient.URI;
import org.apache.commons.httpclient.methods.*;
import org.apache.hadoop.util.StringUtils;
import org.codehaus.jackson.JsonNode;
import org.codehaus.jackson.map.ObjectMapper;

import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.*;

import static com.wandisco.s3hdfs.conf.S3HdfsConstants.HTTP_METHOD.*;
import static com.wandisco.s3hdfs.conf.S3HdfsConstants.*;
import static com.wandisco.s3hdfs.rewrite.filter.S3HdfsFilter.ADD_WEBHDFS;

/**
 * This class is intended to be used by S3HdfsFilter
 * to deal with the issue of of dealing with multi-part initialization
 * and completion.
 */
public class MultiPartFileRedirect extends Redirect {

    public MultiPartFileRedirect(HttpServletRequest request, HttpServletResponse response, S3HdfsPath path) {
        super(request, response, path);
        LOG.debug("Created " + getClass().getSimpleName() + ".");
    }

    /**
     * Sends a PUT command to create the container directory inside of HDFS.
     * It uses the URL from the original request to do so.
     *
     * @throws IOException
     * @throws ServletException
     */
    public void sendInitiate() throws IOException, ServletException {
        PutMethod httpPut = (PutMethod) getHttpMethod(request.getScheme(), request.getServerName(),
                request.getServerPort(), "MKDIRS", path.getUserName(), path.getHdfsRootUploadPath(), PUT);

        //Make /root/user/bucket/object/version/upload directory
        httpClient.executeMethod(httpPut);
        httpPut.releaseConnection();
        assert httpPut.getStatusCode() == 200;

        response.setHeader("Set-Cookie", httpPut.getResponseHeader("Set-Cookie").getValue());

        // Make /root/user/bucket/object/version/.meta file
        // Set up HttpPut
        httpPut = (PutMethod) getHttpMethod(request.getScheme(), request.getServerName(), request.getServerPort(),
                "CREATE&overwrite=true", path.getUserName(), path.getFullHdfsMetaPath(), PUT);

        // Set custom metadata headers
        Enumeration headers = request.getHeaderNames();
        Properties metadata = new Properties();
        while (headers.hasMoreElements()) {
            String key = (String) headers.nextElement();
            if (key.startsWith("x-amz-meta-")) {
                metadata.setProperty(key, request.getHeader(key));
            }
        }

        // Include lastModified header
        SimpleDateFormat rc228 = new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss Z");
        String modTime = rc228.format(Calendar.getInstance().getTime());
        metadata.setProperty("Last-Modified", modTime);

        // Store metadata headers into serialized HashMap in HttpPut entity.
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        metadata.store(baos, null);

        httpPut.setRequestEntity(new ByteArrayRequestEntity(baos.toByteArray()));
        httpPut.setRequestHeader(S3_HEADER_NAME, S3_HEADER_VALUE);
        httpClient.executeMethod(httpPut);

        LOG.debug("1st response: " + httpPut.getStatusLine().toString());

        boolean containsRedirect = (httpPut.getResponseHeader("Location") != null);
        LOG.debug("Contains redirect? " + containsRedirect);

        if (!containsRedirect) {
            LOG.error("1st response did not contain redirect. " + "No metadata will be created.");
            return;
        }

        // Handle redirect header transition
        assert httpPut.getStatusCode() == 307;
        Header locationHeader = httpPut.getResponseHeader("Location");
        httpPut.setURI(new URI(locationHeader.getValue(), true));

        // Consume response and re-allocate connection for redirect
        httpPut.releaseConnection();
        httpClient.executeMethod(httpPut);

        LOG.debug("2nd response: " + httpPut.getStatusLine().toString());

        if (httpPut.getStatusCode() != 200) {
            LOG.debug("Response content: " + httpPut.getResponseBodyAsString());
        }

        // Consume 2nd response, assure it was a 200
        httpPut.releaseConnection();
        assert httpPut.getStatusCode() == 200;
    }

    public void sendComplete() throws IOException, ServletException {
        //STEP 1. Get listing of .part's.
        GetMethod httpGet = (GetMethod) getHttpMethod(request.getScheme(), request.getServerName(),
                request.getServerPort(), "LISTSTATUS", path.getUserName(),
                ADD_WEBHDFS(path.getHdfsRootUploadPath()), GET);
        httpClient.executeMethod(httpGet);

        // STEP 2. Parse sources from listing.
        String sourceStr = readInputStream(httpGet.getResponseBodyAsStream());

        List<String> sources = parseSources(path.getHdfsRootUploadPath(), sourceStr);
        httpGet.releaseConnection();
        assert httpGet.getStatusCode() == 200;

        //STEP 3. Perform concatenation of other .part's into 1.part.
        if (sources.size() > 1) {
            Collections.sort(sources, new PartComparator(path.getHdfsRootUploadPath()));
            if (!partsAreInOrder(sources)) {
                response.setStatus(400);
                return;
            }
            doIncrementalConcat(sources);
        }

        //STEP 3. Rename concat'd 1.part as .obj
        PutMethod httpPut = (PutMethod) getHttpMethod(request.getScheme(), request.getServerName(),
                request.getServerPort(), "RENAME&destination=" + path.getFullHdfsObjPath(), path.getUserName(),
                ADD_WEBHDFS(path.getHdfsRootUploadPath() + "1" + PART_FILE_NAME), PUT);
        httpClient.executeMethod(httpPut);
        httpPut.releaseConnection();
        assert httpPut.getStatusCode() == 200;

        //STEP 4. Delete upload directory
        DeleteMethod httpDelete = (DeleteMethod) getHttpMethod(request.getScheme(), request.getServerName(),
                request.getServerPort(), "DELETE", path.getUserName(), path.getHdfsRootUploadPath(), DELETE);
        httpClient.executeMethod(httpDelete);
        httpDelete.releaseConnection();
        assert httpDelete.getStatusCode() == 200;
    }

    private void doIncrementalConcat(List<String> sources) throws IOException {
        int sourcesToConcat = sources.size();
        int increments = sourcesToConcat / 500;

        int i = 0;
        do {
            int startIndex = (i * 500 == 0) ? 1 : i * 500; //1, 500, 1000, 1500...
            int endIndex = ((i + 1) * 500); //499, 999, 1499...
            if (endIndex >= sourcesToConcat)
                endIndex = sourcesToConcat;
            List<String> toConcat = sources.subList(startIndex, endIndex);
            System.out.println("CONCAT SRCS[" + i + "]: " + toConcat.toString());
            String conCatSrcs = StringUtils.join(",", toConcat);

            PostMethod httpPost = (PostMethod) getHttpMethod(request.getScheme(), request.getServerName(),
                    request.getServerPort(), "CONCAT&sources=" + conCatSrcs, path.getUserName(),
                    ADD_WEBHDFS(path.getHdfsRootUploadPath() + "1" + PART_FILE_NAME), POST);

            httpClient.executeMethod(httpPost);
            httpPost.releaseConnection();
            assert httpPost.getStatusCode() == 200;

            i++;
        } while (i <= increments);
    }

    private List<String> parseSources(String hdfsRootVersionPath, String sources) throws IOException {
        ObjectMapper mapper = new ObjectMapper();
        JsonNode jsonRoot = mapper.readTree(sources);
        JsonNode array = jsonRoot.get("FileStatuses").get("FileStatus");

        ArrayList<String> retVal = new ArrayList<String>();
        for (int i = 0; i < array.size(); i++) {
            String name;
            JsonNode element = array.get(i);
            name = element.get("pathSuffix").getTextValue();
            if (name.matches("[1-9]+[0-9]*" + PART_FILE_NAME)) {
                retVal.add(hdfsRootVersionPath + name);
            }
        }

        return retVal;
    }

    private boolean partsAreInOrder(List<String> sources) {
        String pathStr = path.getHdfsRootUploadPath();
        int index = 0;

        for (String source : sources) {
            int nextIndex = Integer.decode(source.replace(pathStr, "").replace(PART_FILE_NAME, ""));
            if (nextIndex == (index + 1)) {
                index = nextIndex;
            } else {
                return false;
            }
        }
        return true;
    }
}