Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package com.aliyun.odps.ship.download; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; import java.util.List; import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; import org.apache.commons.cli.ParseException; import org.apache.commons.io.FilenameUtils; import org.apache.commons.lang.StringUtils; import com.aliyun.odps.Odps; import com.aliyun.odps.OdpsException; import com.aliyun.odps.PartitionSpec; import com.aliyun.odps.ship.common.Constants; import com.aliyun.odps.ship.common.DshipContext; import com.aliyun.odps.ship.common.PartitionHelper; import com.aliyun.odps.ship.common.SessionStatus; import com.aliyun.odps.ship.common.Util; import com.aliyun.odps.ship.history.SessionHistory; import com.aliyun.odps.ship.history.SessionHistoryManager; import com.aliyun.odps.tunnel.TunnelException; import com.aliyun.openservices.odps.console.ExecutionContext; import com.aliyun.openservices.odps.console.ODPSConsole; import com.aliyun.openservices.odps.console.ODPSConsoleException; import com.aliyun.openservices.odps.console.utils.OdpsConnectionFactory; import com.google.common.io.Files; import jline.console.UserInterruptException; /** * Created by nizheming on 15/5/27. */ public class DshipDownload { private ArrayList<FileDownloader> workItems = new ArrayList<FileDownloader>(); private int threads; private String path; private long writtenBytes = 0L; private Long limit; private ExecutionContext context; private String projectName; private String tableName; private String partitonSpecLiteral; private String ext; private String filename; private String parentDir; private long totalLines; private long slices; SimpleDateFormat sim = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); public DshipDownload() { threads = Integer.parseInt(DshipContext.INSTANCE.get(Constants.THREADS)); if (DshipContext.INSTANCE.get(Constants.LIMIT) != null) { limit = Long.parseLong(DshipContext.INSTANCE.get(Constants.LIMIT)); } else { limit = null; } path = DshipContext.INSTANCE.get(Constants.RESUME_PATH); projectName = DshipContext.INSTANCE.get(Constants.TABLE_PROJECT); tableName = DshipContext.INSTANCE.get(Constants.TABLE); partitonSpecLiteral = DshipContext.INSTANCE.get(Constants.PARTITION_SPEC); ext = Files.getFileExtension(path); filename = Files.getNameWithoutExtension(path); parentDir = FilenameUtils.removeExtension(path) + File.separator; context = DshipContext.INSTANCE.getExecutionContext(); } public void download() throws IOException, ParseException, ODPSConsoleException, OdpsException { Odps odps = OdpsConnectionFactory.createOdps(context); if (projectName == null) { projectName = odps.getDefaultProject(); } PartitionHelper helper = new PartitionHelper(odps, projectName, tableName); if (!helper.isPartitioned()) { if (partitonSpecLiteral != null) { throw new OdpsException( Constants.ERROR_INDICATOR + "can not specify partition for an unpartitioned table"); } splitTableByThreads(null); } else { List<PartitionSpec> parSpecs = helper.inferPartitionSpecs(partitonSpecLiteral); if (parSpecs.size() == 0) { throw new OdpsException( Constants.ERROR_INDICATOR + "can not infer any partitions from: " + partitonSpecLiteral); } else if (parSpecs.size() == 1) { // 1 ?? PartitionSpec ps = parSpecs.get(0); splitTableByThreads(ps); } else { // 2 ?? slices = parSpecs.size(); long sliceId = 0; long start = 0; for (PartitionSpec ps : parSpecs) { if (limit != null && start == limit) { break; } TunnelDownloadSession tds = new TunnelDownloadSession(ps); SessionHistory sh = SessionHistoryManager.createSessionHistory(tds.getDownloadId()); String msg = sim.format(new Date()) + " - " + ps.toString() + "\tnew session: " + tds.getDownloadId() + "\ttotal lines: " + Util.toReadableNumber(tds.getTotalLines()); System.err.println(msg); sh.log(msg); long step = (limit == null) ? tds.getTotalLines() : Math.min(tds.getTotalLines(), limit - start); String sliceFileName = filename + PartitionHelper.buildSuffix(ps); if (StringUtils.isNotEmpty(ext)) { sliceFileName = sliceFileName + "." + ext; } path = parentDir + sliceFileName; FileDownloader sd = new FileDownloader(path, sliceId, 0L, step, tds, sh); workItems.add(sd); sliceId++; start += step; } totalLines = start; } } long startTime = System.currentTimeMillis(); for (final FileDownloader sd : workItems) { DshipContext.INSTANCE.put(Constants.STATUS, SessionStatus.running.toString()); sd.sh.saveContext(); } if (threads == 1) { System.err.printf("downloading %s records into %s\n", Util.toReadableNumber(totalLines), Util.pluralize("file", slices)); for (final FileDownloader sd : workItems) { sd.download(); writtenBytes += sd.getWrittenBytes(); } } else { System.err.printf("downloading %s records into %s using %s\n", Util.toReadableNumber(totalLines), Util.pluralize("file", slices), Util.pluralize("thread", threads)); multiThreadDownload(); } for (final FileDownloader sd : workItems) { DshipContext.INSTANCE.put(Constants.STATUS, SessionStatus.success.toString()); sd.sh.saveContext(); } long gap = (System.currentTimeMillis() - startTime) / 1000; if (gap > 0) { long avgSpeed = writtenBytes / gap; System.err.printf("total: %s, time: %s, average speed: %s/s\n", Util.toReadableBytes(writtenBytes), Util.toReadableSeconds(gap), Util.toReadableBytes(avgSpeed)); } System.err.println("download OK"); } private void splitTableByThreads(PartitionSpec ps) throws FileNotFoundException, ODPSConsoleException, IOException, TunnelException { TunnelDownloadSession tds = new TunnelDownloadSession(ps); SessionHistory sh = SessionHistoryManager.createSessionHistory(tds.getDownloadId()); String msg = sim.format(new Date()) + " - new session: " + tds.getDownloadId() + "\ttotal lines: " + Util.toReadableNumber(tds.getTotalLines()); System.err.println(msg); sh.log(msg); // ?? slices = threads; long start = 0; totalLines = (limit == null) ? tds.getTotalLines() : Math.min(limit, tds.getTotalLines()); long step = (totalLines + slices - 1) / slices; for (long i = 0; i < slices; i++) { long end = Math.min(start + step, totalLines); if (slices != 1) { //? String sliceFileName = filename + "_" + i; if (StringUtils.isNotEmpty(ext)) { sliceFileName = sliceFileName + "." + ext; } path = parentDir + sliceFileName; } FileDownloader sd = new FileDownloader(path, i, start, end, tds, sh); workItems.add(sd); start = end; } } private void multiThreadDownload() throws TunnelException { ArrayList<Callable<Long>> callList = new ArrayList<Callable<Long>>(); for (final FileDownloader downloader : workItems) { Callable<Long> call = new Callable<Long>() { @Override public Long call() throws Exception { downloader.download(); return downloader.getWrittenBytes(); } }; callList.add(call); } ExecutorService executors = Executors.newFixedThreadPool(threads); try { List<Future<Long>> futures = executors.invokeAll(callList); ArrayList<String> failedThread = new ArrayList<String>(); for (int i = 0; i < futures.size(); ++i) { try { writtenBytes += futures.get(i).get(); } catch (ExecutionException e) { e.printStackTrace(); failedThread.add(String.valueOf(i)); } } if (!failedThread.isEmpty()) { throw new TunnelException("Slice ID:" + StringUtils.join(failedThread, ",") + " Failed."); } } catch (InterruptedException e) { throw new UserInterruptException(e.getMessage()); } } }