Java tutorial
/* * Copyright (C) 2015 Google Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package com.google.cloud.dataflow.sdk.transforms.windowing; import com.google.cloud.dataflow.sdk.coders.Coder; import com.google.cloud.dataflow.sdk.transforms.display.DisplayData; import org.joda.time.DateTime; import org.joda.time.DateTimeZone; import org.joda.time.Days; import org.joda.time.Instant; import org.joda.time.Months; import org.joda.time.Years; /** * A collection of {@link WindowFn}s that windows values into calendar-based * windows such as spans of days, months, or years. * * <p>For example, to group data into quarters that change on the 15th, use * {@code CalendarWindows.months(3).withStartingMonth(2014, 1).beginningOnDay(15)}. */ public class CalendarWindows { private static final DateTime DEFAULT_START_DATE = new DateTime(0, DateTimeZone.UTC); /** * Returns a {@link WindowFn} that windows elements into periods measured by days. * * <p>For example, {@code CalendarWindows.days(1)} will window elements into * separate windows for each day. */ public static DaysWindows days(int number) { return new DaysWindows(number, DEFAULT_START_DATE, DateTimeZone.UTC); } /** * Returns a {@link WindowFn} that windows elements into periods measured by weeks. * * <p>For example, {@code CalendarWindows.weeks(1, DateTimeConstants.TUESDAY)} will * window elements into week-long windows starting on Tuesdays. */ public static DaysWindows weeks(int number, int startDayOfWeek) { return new DaysWindows(7 * number, DEFAULT_START_DATE.withDayOfWeek(startDayOfWeek), DateTimeZone.UTC); } /** * Returns a {@link WindowFn} that windows elements into periods measured by months. * * <p>For example, * {@code CalendarWindows.months(8).withStartingMonth(2014, 1).beginningOnDay(10)} * will window elements into 8 month windows where that start on the 10th day of month, * and the first window begins in January 2014. */ public static MonthsWindows months(int number) { return new MonthsWindows(number, 1, DEFAULT_START_DATE, DateTimeZone.UTC); } /** * Returns a {@link WindowFn} that windows elements into periods measured by years. * * <p>For example, * {@code CalendarWindows.years(1).withTimeZone(DateTimeZone.forId("America/Los_Angeles"))} * will window elements into year-long windows that start at midnight on Jan 1, in the * America/Los_Angeles time zone. */ public static YearsWindows years(int number) { return new YearsWindows(number, 1, 1, DEFAULT_START_DATE, DateTimeZone.UTC); } /** * A {@link WindowFn} that windows elements into periods measured by days. * * <p>By default, periods of multiple days are measured starting at the * epoch. This can be overridden with {@link #withStartingDay}. * * <p>The time zone used to determine calendar boundaries is UTC, unless this * is overridden with the {@link #withTimeZone} method. */ public static class DaysWindows extends PartitioningWindowFn<Object, IntervalWindow> { public DaysWindows withStartingDay(int year, int month, int day) { return new DaysWindows(number, new DateTime(year, month, day, 0, 0, timeZone), timeZone); } public DaysWindows withTimeZone(DateTimeZone timeZone) { return new DaysWindows(number, startDate.withZoneRetainFields(timeZone), timeZone); } //////////////////////////////////////////////////////////////////////////// private int number; private DateTime startDate; private DateTimeZone timeZone; private DaysWindows(int number, DateTime startDate, DateTimeZone timeZone) { this.number = number; this.startDate = startDate; this.timeZone = timeZone; } @Override public IntervalWindow assignWindow(Instant timestamp) { DateTime datetime = new DateTime(timestamp, timeZone); int dayOffset = Days.daysBetween(startDate, datetime).getDays() / number * number; DateTime begin = startDate.plusDays(dayOffset); DateTime end = begin.plusDays(number); return new IntervalWindow(begin.toInstant(), end.toInstant()); } @Override public Coder<IntervalWindow> windowCoder() { return IntervalWindow.getCoder(); } @Override public boolean isCompatible(WindowFn<?, ?> other) { if (!(other instanceof DaysWindows)) { return false; } DaysWindows that = (DaysWindows) other; return number == that.number && startDate == that.startDate && timeZone == that.timeZone; } @Override public void populateDisplayData(DisplayData.Builder builder) { super.populateDisplayData(builder); builder.add(DisplayData.item("numDays", number).withLabel("Windows Days")) .addIfNotDefault( DisplayData.item("startDate", new DateTime(startDate, timeZone).toInstant()) .withLabel("Window Start Date"), new DateTime(DEFAULT_START_DATE, DateTimeZone.UTC).toInstant()); } public int getNumber() { return number; } public DateTime getStartDate() { return startDate; } public DateTimeZone getTimeZone() { return timeZone; } } /** * A {@link WindowFn} that windows elements into periods measured by months. * * <p>By default, periods of multiple months are measured starting at the * epoch. This can be overridden with {@link #withStartingMonth}. * * <p>Months start on the first day of each calendar month, unless overridden by * {@link #beginningOnDay}. * * <p>The time zone used to determine calendar boundaries is UTC, unless this * is overridden with the {@link #withTimeZone} method. */ public static class MonthsWindows extends PartitioningWindowFn<Object, IntervalWindow> { public MonthsWindows beginningOnDay(int dayOfMonth) { return new MonthsWindows(number, dayOfMonth, startDate, timeZone); } public MonthsWindows withStartingMonth(int year, int month) { return new MonthsWindows(number, dayOfMonth, new DateTime(year, month, 1, 0, 0, timeZone), timeZone); } public MonthsWindows withTimeZone(DateTimeZone timeZone) { return new MonthsWindows(number, dayOfMonth, startDate.withZoneRetainFields(timeZone), timeZone); } //////////////////////////////////////////////////////////////////////////// private int number; private int dayOfMonth; private DateTime startDate; private DateTimeZone timeZone; private MonthsWindows(int number, int dayOfMonth, DateTime startDate, DateTimeZone timeZone) { this.number = number; this.dayOfMonth = dayOfMonth; this.startDate = startDate; this.timeZone = timeZone; } @Override public IntervalWindow assignWindow(Instant timestamp) { DateTime datetime = new DateTime(timestamp, timeZone); int monthOffset = Months.monthsBetween(startDate.withDayOfMonth(dayOfMonth), datetime).getMonths() / number * number; DateTime begin = startDate.withDayOfMonth(dayOfMonth).plusMonths(monthOffset); DateTime end = begin.plusMonths(number); return new IntervalWindow(begin.toInstant(), end.toInstant()); } @Override public Coder<IntervalWindow> windowCoder() { return IntervalWindow.getCoder(); } @Override public boolean isCompatible(WindowFn<?, ?> other) { if (!(other instanceof MonthsWindows)) { return false; } MonthsWindows that = (MonthsWindows) other; return number == that.number && dayOfMonth == that.dayOfMonth && startDate == that.startDate && timeZone == that.timeZone; } @Override public void populateDisplayData(DisplayData.Builder builder) { super.populateDisplayData(builder); builder.add(DisplayData.item("numMonths", number).withLabel("Window Months")) .addIfNotDefault( DisplayData.item("startDate", new DateTime(startDate, timeZone).toInstant()) .withLabel("Window Start Date"), new DateTime(DEFAULT_START_DATE, DateTimeZone.UTC).toInstant()); } public int getNumber() { return number; } public int getDayOfMonth() { return dayOfMonth; } public DateTime getStartDate() { return startDate; } public DateTimeZone getTimeZone() { return timeZone; } } /** * A {@link WindowFn} that windows elements into periods measured by years. * * <p>By default, periods of multiple years are measured starting at the * epoch. This can be overridden with {@link #withStartingYear}. * * <p>Years start on the first day of each calendar year, unless overridden by * {@link #beginningOnDay}. * * <p>The time zone used to determine calendar boundaries is UTC, unless this * is overridden with the {@link #withTimeZone} method. */ public static class YearsWindows extends PartitioningWindowFn<Object, IntervalWindow> { public YearsWindows beginningOnDay(int monthOfYear, int dayOfMonth) { return new YearsWindows(number, monthOfYear, dayOfMonth, startDate, timeZone); } public YearsWindows withStartingYear(int year) { return new YearsWindows(number, monthOfYear, dayOfMonth, new DateTime(year, 1, 1, 0, 0, timeZone), timeZone); } public YearsWindows withTimeZone(DateTimeZone timeZone) { return new YearsWindows(number, monthOfYear, dayOfMonth, startDate.withZoneRetainFields(timeZone), timeZone); } //////////////////////////////////////////////////////////////////////////// private int number; private int monthOfYear; private int dayOfMonth; private DateTime startDate; private DateTimeZone timeZone; private YearsWindows(int number, int monthOfYear, int dayOfMonth, DateTime startDate, DateTimeZone timeZone) { this.number = number; this.monthOfYear = monthOfYear; this.dayOfMonth = dayOfMonth; this.startDate = startDate; this.timeZone = timeZone; } @Override public IntervalWindow assignWindow(Instant timestamp) { DateTime datetime = new DateTime(timestamp, timeZone); DateTime offsetStart = startDate.withMonthOfYear(monthOfYear).withDayOfMonth(dayOfMonth); int yearOffset = Years.yearsBetween(offsetStart, datetime).getYears() / number * number; DateTime begin = offsetStart.plusYears(yearOffset); DateTime end = begin.plusYears(number); return new IntervalWindow(begin.toInstant(), end.toInstant()); } @Override public Coder<IntervalWindow> windowCoder() { return IntervalWindow.getCoder(); } @Override public boolean isCompatible(WindowFn<?, ?> other) { if (!(other instanceof YearsWindows)) { return false; } YearsWindows that = (YearsWindows) other; return number == that.number && monthOfYear == that.monthOfYear && dayOfMonth == that.dayOfMonth && startDate == that.startDate && timeZone == that.timeZone; } @Override public void populateDisplayData(DisplayData.Builder builder) { super.populateDisplayData(builder); builder.add(DisplayData.item("numYears", number).withLabel("Window Years")) .addIfNotDefault( DisplayData.item("startDate", new DateTime(startDate, timeZone).toInstant()) .withLabel("Window Start Date"), new DateTime(DEFAULT_START_DATE, DateTimeZone.UTC).toInstant()); } public DateTimeZone getTimeZone() { return timeZone; } public DateTime getStartDate() { return startDate; } public int getDayOfMonth() { return dayOfMonth; } public int getMonthOfYear() { return monthOfYear; } public int getNumber() { return number; } } }