com.google.cloud.dataflow.sdk.transforms.windowing.CalendarWindows.java Source code

Java tutorial

Introduction

Here is the source code for com.google.cloud.dataflow.sdk.transforms.windowing.CalendarWindows.java

Source

/*
 * Copyright (C) 2015 Google Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package com.google.cloud.dataflow.sdk.transforms.windowing;

import com.google.cloud.dataflow.sdk.coders.Coder;
import com.google.cloud.dataflow.sdk.transforms.display.DisplayData;

import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;
import org.joda.time.Days;
import org.joda.time.Instant;
import org.joda.time.Months;
import org.joda.time.Years;

/**
 * A collection of {@link WindowFn}s that windows values into calendar-based
 * windows such as spans of days, months, or years.
 *
 * <p>For example, to group data into quarters that change on the 15th, use
 * {@code CalendarWindows.months(3).withStartingMonth(2014, 1).beginningOnDay(15)}.
 */
public class CalendarWindows {

    private static final DateTime DEFAULT_START_DATE = new DateTime(0, DateTimeZone.UTC);

    /**
     * Returns a {@link WindowFn} that windows elements into periods measured by days.
     *
     * <p>For example, {@code CalendarWindows.days(1)} will window elements into
     * separate windows for each day.
     */
    public static DaysWindows days(int number) {
        return new DaysWindows(number, DEFAULT_START_DATE, DateTimeZone.UTC);
    }

    /**
     * Returns a {@link WindowFn} that windows elements into periods measured by weeks.
     *
     * <p>For example, {@code CalendarWindows.weeks(1, DateTimeConstants.TUESDAY)} will
     * window elements into week-long windows starting on Tuesdays.
     */
    public static DaysWindows weeks(int number, int startDayOfWeek) {
        return new DaysWindows(7 * number, DEFAULT_START_DATE.withDayOfWeek(startDayOfWeek), DateTimeZone.UTC);
    }

    /**
     * Returns a {@link WindowFn} that windows elements into periods measured by months.
     *
     * <p>For example,
     * {@code CalendarWindows.months(8).withStartingMonth(2014, 1).beginningOnDay(10)}
     * will window elements into 8 month windows where that start on the 10th day of month,
     * and the first window begins in January 2014.
     */
    public static MonthsWindows months(int number) {
        return new MonthsWindows(number, 1, DEFAULT_START_DATE, DateTimeZone.UTC);
    }

    /**
     * Returns a {@link WindowFn} that windows elements into periods measured by years.
     *
     * <p>For example,
     * {@code CalendarWindows.years(1).withTimeZone(DateTimeZone.forId("America/Los_Angeles"))}
     * will window elements into year-long windows that start at midnight on Jan 1, in the
     * America/Los_Angeles time zone.
     */
    public static YearsWindows years(int number) {
        return new YearsWindows(number, 1, 1, DEFAULT_START_DATE, DateTimeZone.UTC);
    }

    /**
     * A {@link WindowFn} that windows elements into periods measured by days.
     *
     * <p>By default, periods of multiple days are measured starting at the
     * epoch.  This can be overridden with {@link #withStartingDay}.
     *
     * <p>The time zone used to determine calendar boundaries is UTC, unless this
     * is overridden with the {@link #withTimeZone} method.
     */
    public static class DaysWindows extends PartitioningWindowFn<Object, IntervalWindow> {
        public DaysWindows withStartingDay(int year, int month, int day) {
            return new DaysWindows(number, new DateTime(year, month, day, 0, 0, timeZone), timeZone);
        }

        public DaysWindows withTimeZone(DateTimeZone timeZone) {
            return new DaysWindows(number, startDate.withZoneRetainFields(timeZone), timeZone);
        }

        ////////////////////////////////////////////////////////////////////////////

        private int number;
        private DateTime startDate;
        private DateTimeZone timeZone;

        private DaysWindows(int number, DateTime startDate, DateTimeZone timeZone) {
            this.number = number;
            this.startDate = startDate;
            this.timeZone = timeZone;
        }

        @Override
        public IntervalWindow assignWindow(Instant timestamp) {
            DateTime datetime = new DateTime(timestamp, timeZone);

            int dayOffset = Days.daysBetween(startDate, datetime).getDays() / number * number;

            DateTime begin = startDate.plusDays(dayOffset);
            DateTime end = begin.plusDays(number);

            return new IntervalWindow(begin.toInstant(), end.toInstant());
        }

        @Override
        public Coder<IntervalWindow> windowCoder() {
            return IntervalWindow.getCoder();
        }

        @Override
        public boolean isCompatible(WindowFn<?, ?> other) {
            if (!(other instanceof DaysWindows)) {
                return false;
            }
            DaysWindows that = (DaysWindows) other;
            return number == that.number && startDate == that.startDate && timeZone == that.timeZone;
        }

        @Override
        public void populateDisplayData(DisplayData.Builder builder) {
            super.populateDisplayData(builder);

            builder.add(DisplayData.item("numDays", number).withLabel("Windows Days"))
                    .addIfNotDefault(
                            DisplayData.item("startDate", new DateTime(startDate, timeZone).toInstant())
                                    .withLabel("Window Start Date"),
                            new DateTime(DEFAULT_START_DATE, DateTimeZone.UTC).toInstant());
        }

        public int getNumber() {
            return number;
        }

        public DateTime getStartDate() {
            return startDate;
        }

        public DateTimeZone getTimeZone() {
            return timeZone;
        }

    }

    /**
     * A {@link WindowFn} that windows elements into periods measured by months.
     *
     * <p>By default, periods of multiple months are measured starting at the
     * epoch.  This can be overridden with {@link #withStartingMonth}.
     *
     * <p>Months start on the first day of each calendar month, unless overridden by
     * {@link #beginningOnDay}.
     *
     * <p>The time zone used to determine calendar boundaries is UTC, unless this
     * is overridden with the {@link #withTimeZone} method.
     */
    public static class MonthsWindows extends PartitioningWindowFn<Object, IntervalWindow> {
        public MonthsWindows beginningOnDay(int dayOfMonth) {
            return new MonthsWindows(number, dayOfMonth, startDate, timeZone);
        }

        public MonthsWindows withStartingMonth(int year, int month) {
            return new MonthsWindows(number, dayOfMonth, new DateTime(year, month, 1, 0, 0, timeZone), timeZone);
        }

        public MonthsWindows withTimeZone(DateTimeZone timeZone) {
            return new MonthsWindows(number, dayOfMonth, startDate.withZoneRetainFields(timeZone), timeZone);
        }

        ////////////////////////////////////////////////////////////////////////////

        private int number;
        private int dayOfMonth;
        private DateTime startDate;
        private DateTimeZone timeZone;

        private MonthsWindows(int number, int dayOfMonth, DateTime startDate, DateTimeZone timeZone) {
            this.number = number;
            this.dayOfMonth = dayOfMonth;
            this.startDate = startDate;
            this.timeZone = timeZone;
        }

        @Override
        public IntervalWindow assignWindow(Instant timestamp) {
            DateTime datetime = new DateTime(timestamp, timeZone);

            int monthOffset = Months.monthsBetween(startDate.withDayOfMonth(dayOfMonth), datetime).getMonths()
                    / number * number;

            DateTime begin = startDate.withDayOfMonth(dayOfMonth).plusMonths(monthOffset);
            DateTime end = begin.plusMonths(number);

            return new IntervalWindow(begin.toInstant(), end.toInstant());
        }

        @Override
        public Coder<IntervalWindow> windowCoder() {
            return IntervalWindow.getCoder();
        }

        @Override
        public boolean isCompatible(WindowFn<?, ?> other) {
            if (!(other instanceof MonthsWindows)) {
                return false;
            }
            MonthsWindows that = (MonthsWindows) other;
            return number == that.number && dayOfMonth == that.dayOfMonth && startDate == that.startDate
                    && timeZone == that.timeZone;
        }

        @Override
        public void populateDisplayData(DisplayData.Builder builder) {
            super.populateDisplayData(builder);

            builder.add(DisplayData.item("numMonths", number).withLabel("Window Months"))
                    .addIfNotDefault(
                            DisplayData.item("startDate", new DateTime(startDate, timeZone).toInstant())
                                    .withLabel("Window Start Date"),
                            new DateTime(DEFAULT_START_DATE, DateTimeZone.UTC).toInstant());
        }

        public int getNumber() {
            return number;
        }

        public int getDayOfMonth() {
            return dayOfMonth;
        }

        public DateTime getStartDate() {
            return startDate;
        }

        public DateTimeZone getTimeZone() {
            return timeZone;
        }

    }

    /**
     * A {@link WindowFn} that windows elements into periods measured by years.
     *
     * <p>By default, periods of multiple years are measured starting at the
     * epoch.  This can be overridden with {@link #withStartingYear}.
     *
     * <p>Years start on the first day of each calendar year, unless overridden by
     * {@link #beginningOnDay}.
     *
     * <p>The time zone used to determine calendar boundaries is UTC, unless this
     * is overridden with the {@link #withTimeZone} method.
     */
    public static class YearsWindows extends PartitioningWindowFn<Object, IntervalWindow> {
        public YearsWindows beginningOnDay(int monthOfYear, int dayOfMonth) {
            return new YearsWindows(number, monthOfYear, dayOfMonth, startDate, timeZone);
        }

        public YearsWindows withStartingYear(int year) {
            return new YearsWindows(number, monthOfYear, dayOfMonth, new DateTime(year, 1, 1, 0, 0, timeZone),
                    timeZone);
        }

        public YearsWindows withTimeZone(DateTimeZone timeZone) {
            return new YearsWindows(number, monthOfYear, dayOfMonth, startDate.withZoneRetainFields(timeZone),
                    timeZone);
        }

        ////////////////////////////////////////////////////////////////////////////

        private int number;
        private int monthOfYear;
        private int dayOfMonth;
        private DateTime startDate;
        private DateTimeZone timeZone;

        private YearsWindows(int number, int monthOfYear, int dayOfMonth, DateTime startDate,
                DateTimeZone timeZone) {
            this.number = number;
            this.monthOfYear = monthOfYear;
            this.dayOfMonth = dayOfMonth;
            this.startDate = startDate;
            this.timeZone = timeZone;
        }

        @Override
        public IntervalWindow assignWindow(Instant timestamp) {
            DateTime datetime = new DateTime(timestamp, timeZone);

            DateTime offsetStart = startDate.withMonthOfYear(monthOfYear).withDayOfMonth(dayOfMonth);

            int yearOffset = Years.yearsBetween(offsetStart, datetime).getYears() / number * number;

            DateTime begin = offsetStart.plusYears(yearOffset);
            DateTime end = begin.plusYears(number);

            return new IntervalWindow(begin.toInstant(), end.toInstant());
        }

        @Override
        public Coder<IntervalWindow> windowCoder() {
            return IntervalWindow.getCoder();
        }

        @Override
        public boolean isCompatible(WindowFn<?, ?> other) {
            if (!(other instanceof YearsWindows)) {
                return false;
            }
            YearsWindows that = (YearsWindows) other;
            return number == that.number && monthOfYear == that.monthOfYear && dayOfMonth == that.dayOfMonth
                    && startDate == that.startDate && timeZone == that.timeZone;
        }

        @Override
        public void populateDisplayData(DisplayData.Builder builder) {
            super.populateDisplayData(builder);

            builder.add(DisplayData.item("numYears", number).withLabel("Window Years"))
                    .addIfNotDefault(
                            DisplayData.item("startDate", new DateTime(startDate, timeZone).toInstant())
                                    .withLabel("Window Start Date"),
                            new DateTime(DEFAULT_START_DATE, DateTimeZone.UTC).toInstant());
        }

        public DateTimeZone getTimeZone() {
            return timeZone;
        }

        public DateTime getStartDate() {
            return startDate;
        }

        public int getDayOfMonth() {
            return dayOfMonth;
        }

        public int getMonthOfYear() {
            return monthOfYear;
        }

        public int getNumber() {
            return number;
        }

    }
}