Java tutorial
/* * Copyright (c) 2011, Lawrence Livermore National Security, LLC. Produced at * the Lawrence Livermore National Laboratory. Written by Keith Stevens, * kstevens@cs.ucla.edu OCEC-10-073 All rights reserved. * * This file is part of the C-Cat package and is covered under the terms and * conditions therein. * * The C-Cat package is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as published * by the Free Software Foundation and distributed hereunder to you. * * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER * RIGHTS. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ /* * * Copyright 2008 The New York Times Company * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package gov.llnl.ontology.text.corpora; import gov.llnl.ontology.text.Document; import com.google.common.collect.Sets; import org.apache.commons.lang3.StringEscapeUtils; import java.io.File; import java.net.URL; import java.util.ArrayList; import java.util.Date; import java.util.List; import java.util.Set; /** * NYTimesLDCDocument <BR> * Created: Jun 17, 2008 <BR> * Author: Evan Sandhaus (sandhes@nytimes.com)<BR> * <P> * This class represents a New York Times Corpus Document. See field comments * for individual field description. * <P> * * @author Evan Sandhaus * */ public class NYTCorpusDocument implements Document { /** * The corpus name for any {@link Document} returned by this class. */ public static final String CORPUS_NAME = "NYT"; /** * This field specifies the location on nytimes.com of the article. When * present, this URL is preferred to the URL field on articles published on * or after April 02, 2006, as the linked page will have richer content. */ protected URL alternateURL; /** * This field is a summary of the article written by the New York Times * Indexing Service. */ protected String articleAbstract; /** * This field specifies the biography of the author of the article. * Generally, this field is specified for guest authors not for New York * Times reporters. When this field is specified for Times reporters, it is * usually used to provide the author's email address. */ protected String authorBiography; /** * The banner field is used to indicate if there has been additional * information appended to the articles since its publication. Examples of * banners include ('Correction Appended' and 'Editor's Note Appended'). */ protected String banner; /** * When present, the biographical category field generally indicates that a * document focuses on a particular individual. The value of the field * indicates the area or category in which this individual is best known. * This field is most often defined for Obituaries and Book Reviews. These * tags are hand-assigned by a team of library scientists working for the * New York Times Indexing service. * * <ol> * <li>Politics and Government (U.S.) <li>Books and Magazines <li>Royalty * </ol> */ protected List<String> biographicalCategories = new ArrayList<String>(); /** * The body field is the text content of the article. Please note that this * value includes the lead paragraph. */ protected String body; /** * This field specifies the byline of the article as it appeared in the * print edition of the New York Times. Please note that not every article * in this collection has a byline, as editorials and other types of * articles are generally unsigned. * <P> * Sample byline: * <ul> * <li>By James Reston * <li>By JAMES GLANZ; William J. Broad contributed reporting for this * article. * <li>By ADAM NAGOURNEY and JEFF ZELENY * </ul> */ protected String byline; /** * If the article is part of a regular column, this field specifies the name * of that column. * <p> * Sample Column Names: * <p> * <ol> * <li>World News Briefs * <li>WEDDINGS * <li>The Accessories Channel * </ol> * */ protected String columnName; /** * This field specifies the column in which the article starts in the print * paper. A typical printed page in the paper has six columns numbered from * right to left. As a consequence most, but not all, of the values for this * field fall in the range 1-6. */ protected Integer columnNumber; /** * This field specifies the date on which a correction was made to the * article. Generally, if the correction date is specified, the correction * text will also be specified (and vice versa). */ protected Date correctionDate; /** * For articles corrected following publication, this field specifies the * correction. Generally, if the correction text is specified, the * correction date will also be specified (and vice versa). */ protected String correctionText; /** * This field indicates the entity that produced the editorial content of * this document. For this collection, the credit will always be set to 'The * New York Times'. */ protected String credit; /** * The dateline field is the dateline of the article. Generally a dateline * is the name of the geographic location from which the article was filed * followed by a comma and the month and day of the filing. * <p> * Sample datelines: * <ul> * <li>WASHINGTON, April 30 * <li>RIYADH, Saudi Arabia, March 29 * <li>ONTARIO, N.Y., Jan. 26 * </ul> * Please note: * <ol> * <li>The dateline location is the location from which the article was * filed. Often times this location is related to the content of the * article, but this is not guaranteed. * <li>The date specified for the dateline is often but not always the day * previous to the publication date. * <li>The date is usually but not always specified. * </ol> */ protected String dateline; /** * This field specifies the day of week on which the article was published. * <ul> * <li>Monday <li>Tuesday <li>Wednesday <li>Thursday <li>Friday <li>Saturday * <li>Sunday * </ul> */ protected String dayOfWeek; /** * The descriptors field specifies a list of descriptive terms drawn from * a normalized controlled vocabulary corresponding to subjects mentioned in * the article. These tags are hand-assigned by a team of library scientists * working in the New York Times Indexing service. * <p> * Examples Include: * <ol> * <li>ECONOMIC CONDITIONS AND TRENDS * <li>AIRPLANES * <li>VIOLINS * </ol> */ protected List<String> descriptors = new ArrayList<String>(); /** * The */ protected String featurePage; /** * The general online descriptors field specifies a list of descriptors * that are at a higher level of generality than the other tags associated * with the article. These tags are algorithmically assigned and manually * verified by nytimes.com production staff. * <p> * Examples Include: * <ol> * <li>Surfing * <li>Venice Biennale * <li>Ranches * </ol> */ protected List<String> generalOnlineDescriptors = new ArrayList<String>(); /** * The GUID field specifies a an integer that is guaranteed to be unique for * every document in the corpus. */ protected int guid; /** * This field specifies the headline of the article as it appeared in the * print edition of the New York Times. */ protected String headline; /** * The kicker is an additional piece of information printed as an * accompaniment to a news headline. */ protected String kicker; /** * The lead Paragraph field is the lead paragraph of the article. * Generally this field is populated with the first two paragraphs from the * article. */ protected String leadParagraph; /** * The locations field specifies a list of geographic descriptors drawn * from a normalized controlled vocabulary that correspond to places * mentioned in the article. These tags are hand-assigned by a team of * library scientists working for the New York Times Indexing service. * <p> * Examples Include: * <ol> * <li>Wellsboro (Pa) * <li>Kansas City (Kan) * <li>Park Slope (NYC) * </ol> */ protected List<String> locations = new ArrayList<String>(); /** * The names field specifies a list of names mentioned in the article. * These tags are hand-assigned by a team of library scientists working for * the New York Times Indexing service. * <p> * Examples Include: * <ol> * <li>Azza Fahmy * <li>George C. Izenour * <li>Chris Schenkel * </ol> */ protected List<String> names = new ArrayList<String>(); /** * This field specifies the desk in the New York Times newsroom that * produced the article. The desk is related to, but is not the same as the * section in which the article appears. */ protected String newsDesk; /** * The Normalized Byline field is the byline normalized to the form (last * name, first name). */ protected String normalizedByline; /** * This field specifies a list of descriptors from a normalized controlled * vocabulary that correspond to topics mentioned in the article. These tags * are algorithmically assigned and manually verified by nytimes.com * production staff. * <p> * Examples Include: * <ol> * <li>Marriages * <li>Parks and Other Recreation Areas * <li>Cooking and Cookbooks * </ol> */ protected List<String> onlineDescriptors = new ArrayList<String>(); /** * This field specifies the headline displayed with the article on * nytimes.com. Often this differs from the headline used in print. */ protected String onlineHeadline; /** * This field specifies the lead paragraph as defined by the producers at * nytimes.com. */ protected String onlineLeadParagraph; /** * This field specifies a list of place names that correspond to geographic * locations mentioned in the article. These tags are algorithmically * assigned and manually verified by nytimes.com production staff. * <p> * Examples Include: * <ol> * <li>Hollywood * <li>Los Angeles * <li>Arcadia * </ol> */ protected List<String> onlineLocations = new ArrayList<String>(); /** * This field specifies a list of organizations that correspond to * organizations mentioned in the article. These tags are algorithmically * assigned and manually verified by nytimes.com production staff. * <p> * Examples Include: * <ol> * <li>Nintendo Company Limited * <li>Yeshiva University * <li>Rose Center * </ol> */ protected List<String> onlineOrganizations = new ArrayList<String>(); /** * This field specifies a list of people that correspond to individuals * mentioned in the article. These tags are algorithmically assigned and * manually verified by nytimes.com production staff. * <p> * Examples Include: * <ol> * <li>Lopez, Jennifer * <li>Joyce, James * <li>Robinson, Jackie * </ol> */ protected List<String> onlinePeople = new ArrayList<String>(); /** * This field specifies the section(s) on nytimes.com in which the article * is placed. If the article is placed in multiple sections, this field will * be specified as a ; delineated list. */ protected String onlineSection; /** * This field specifies a list of authored works mentioned in the article. * These tags are algorithmically assigned and manually verified by * nytimes.com production staff. * <p> * Examples Include: * <ol> * <li>Matchstick Men (Movie) * <li>Blades of Glory (Movie) * <li>Bridge & Tunnel (Play) * </ol> */ protected List<String> onlineTitles = new ArrayList<String>(); /** * This field specifies a list of organization names drawn from a normalized * controlled vocabulary that correspond to organizations mentioned in the * article. These tags are hand-assigned by a team of library scientists * working in the New York Times Indexing service. * <p> * Examples Include: * <ol> * <li>Circuit City Stores Inc * <li>Delaware County Community College (Pa) * <li>CONNECTICUT GRAND OPERA * </ol> */ protected List<String> organizations = new ArrayList<String>(); /** * This field specifies the page of the section in the paper in which the * article appears. This is not an absolute pagination. An article that * appears on page 3 in section A occurs in the physical paper before an * article that occurs on page 1 of section F. */ protected Integer page; /** * This field specifies a list of people from a normalized controlled * vocabulary that correspond to individuals mentioned in the article. These * tags are hand-assigned by a team of library scientists working in the New * York Times Indexing service. * <p> * Examples Include: * <ol> * <li>REAGAN, RONALD WILSON (PRES) * <li>BEGIN, MENACHEM (PRIME MIN) * <li>COLLINS, GLENN * </ol> */ protected List<String> people = new ArrayList<String>(); /** * This field specifies the date of the articles publication. */ protected Date publicationDate; /** * This field specifies the day of the month on which the article was * published, always in the range 1-31. */ protected Integer publicationDayOfMonth; /** * This field specifies the month on which the article was published in the * range 1-12 where 1 is January 2 is February etc. */ protected Integer publicationMonth; /** * This field specifies the year in which the article was published. This * value is in the range 1987-2007 for this collection. */ protected Integer publicationYear; /** * This field specifies the section of the paper in which the article * appears. This is not the name of the section, but rather a letter or * number that indicates the section. */ protected String section; /** * If the article is part of a regular series, this field specifies the name * of that column. */ protected String seriesName; /** * The slug is a short string that uniquely identifies an article from all * other articles published on the same day. Please note, however, that * different articles on different days may have the same slug. * <ul> * <li>30other <li>12reunion * </ul> */ protected String slug; /** The file from which this object was read. */ protected File sourceFile; /** * This field specifies a list of taxonomic classifiers that place this * article into a hierarchy of articles. The individual terms of each * taxonomic classifier are separated with the '/' character. These tags are * algorithmically assigned and manually verified by nytimes.com production * staff. These tags are algorithmically assigned and manually verified by * nytimes.com production staff. * <p> * Examples Include: * <ol> * <li>Top/Features/Travel/Guides/Destinations/North America/United * States/Arizona * <li>Top/News/U.S./Rockies * <li>Top/Opinion * </ol> */ protected List<String> taxonomicClassifiers = new ArrayList<String>(); /** * This field specifies a list of authored works that correspond to works * mentioned in the article. These tags are hand-assigned by a team of * library scientists working in the New York Times Indexing service. * <p> * Examples Include: * <ol> * <li>Greystoke: The Legend of Tarzan, Lord of the Apes (Movie) * <li>Law & Order (TV Program) * <li>BATTLEFIELD EARTH (BOOK) * </ol> */ protected List<String> titles = new ArrayList<String>(); /** * This field specifies a normalized list of terms describing the general * editorial category of the article. These tags are algorithmically * assigned and manually verified by nytimes.com production staff. * <p> * Examples Include: * <ol> * <li>REVIEW * <li>OBITUARY * <li>ANALYSIS * </ol> */ protected List<String> typesOfMaterial = new ArrayList<String>(); /** * This field specifies the location on nytimes.com of the article. The * Alternative Url field is preferred to this field on articles published * on or after April 02, 2006, as the linked page will have richer content. */ protected URL url; /** * This field specifies the number of words in the body of the article, * including the lead paragraph. */ protected Integer wordCount; /** * The original xml text for this {@link Document}. */ protected String originalText; /** * The corpus name for this {@link Document}, originall {@link * #CORPUS_NAME}. */ private String corpusName = CORPUS_NAME; /** * {@inheritDoc} */ public String sourceCorpus() { return corpusName; } /** * Sets the source corpus name. */ void setSourceCorpus(String corpusName) { this.corpusName = corpusName; } /** * {@inheritDoc} */ public String rawText() { return getBody(); } /** * {@inheritDoc} */ public String originalText() { return originalText; } /** * {@inheritDoc} */ public String key() { return (alternateURL != null) ? alternateURL.toString() : url.toString(); } /** * {@inheritDoc} */ public long id() { return guid; } /** * {@inheritDoc}. */ public String title() { return headline; } /** * {@inheritDoc} */ public Set<String> categories() { return Sets.newHashSet(taxonomicClassifiers.iterator()); } /** * Accessor for the alternateURL property. * * @return the alternateURL */ public URL getAlternateURL() { return alternateURL; } /** * Accessor for the articleAbstract property. * * @return the articleAbstract */ public String getArticleAbstract() { return articleAbstract; } /** * Accessor for the authorBiography property. * * @return the authorBiography */ public String getAuthorBiography() { return authorBiography; } /** * Accessor for the banner property. * * @return the banner */ public String getBanner() { return banner; } /** * Accessor for the biographicalCategories property. * * @return the biographicalCategories */ public List<String> getBiographicalCategories() { return biographicalCategories; } /** * Accessor for the body property. * * @return the body */ public String getBody() { return body; } /** * Accessor for the byline property. * * @return the byline */ public String getByline() { return byline; } /** * Accessor for the columnName property. * * @return the columnName */ public String getColumnName() { return columnName; } /** * Accessor for the columnNumber property. * * @return the columnNumber */ public Integer getColumnNumber() { return columnNumber; } /** * Accessor for the correctionDate property. * * @return the correctionDate */ public Date getCorrectionDate() { return correctionDate; } /** * Accessor for the correctionText property. * * @return the correctionText */ public String getCorrectionText() { return correctionText; } /** * Accessor for the credit property. * * @return the credit */ public String getCredit() { return credit; } /** * Accessor for the dateline property. * * @return the dateline */ public String getDateline() { return dateline; } /** * Accessor for the dayOfWeek property. * * @return the dayOfWeek */ public String getDayOfWeek() { return dayOfWeek; } /** * Accessor for the descriptors property. * * @return the descriptors */ public List<String> getDescriptors() { return descriptors; } /** * Accessor for the featurePage property. * * @return the featurePage */ public String getFeaturePage() { return featurePage; } /** * Accessor for the generalOnlineDescriptors property. * * @return the generalOnlineDescriptors */ public List<String> getGeneralOnlineDescriptors() { return generalOnlineDescriptors; } /** * Accessor for the guid property. * * @return the guid */ public int getGuid() { return guid; } /** * Accessor for the headline property. * * @return the headline */ public String getHeadline() { return headline; } /** * Accessor for the kicker property. * * @return the kicker */ public String getKicker() { return kicker; } /** * Accessor for the leadParagraph property. * * @return the leadParagraph */ public String getLeadParagraph() { return leadParagraph; } /** * Accessor for the locations property. * * @return the locations */ public List<String> getLocations() { return locations; } /** * Accessor for the names property. * * @return the names */ public List<String> getNames() { return names; } /** * Accessor for the newsDesk property. * * @return the newsDesk */ public String getNewsDesk() { return newsDesk; } /** * Accessor for the normalizedByline property. * * @return the normalizedByline */ public String getNormalizedByline() { return normalizedByline; } /** * Accessor for the onlineDescriptors property. * * @return the onlineDescriptors */ public List<String> getOnlineDescriptors() { return onlineDescriptors; } /** * Accessor for the onlineHeadline property. * * @return the onlineHeadline */ public String getOnlineHeadline() { return onlineHeadline; } /** * Accessor for the onlineLeadParagraph property. * * @return the onlineLeadParagraph */ public String getOnlineLeadParagraph() { return onlineLeadParagraph; } /** * Accessor for the onlineLocations property. * * @return the onlineLocations */ public List<String> getOnlineLocations() { return onlineLocations; } /** * Accessor for the onlineOrganizations property. * * @return the onlineOrganizations */ public List<String> getOnlineOrganizations() { return onlineOrganizations; } /** * Accessor for the onlinePeople property. * * @return the onlinePeople */ public List<String> getOnlinePeople() { return onlinePeople; } /** * Accessor for the onlineSection property. * * @return the onlineSection */ public String getOnlineSection() { return onlineSection; } /** * Accessor for the onlineTitles property. * * @return the onlineTitles */ public List<String> getOnlineTitles() { return onlineTitles; } /** * Accessor for the organizations property. * * @return the organizations */ public List<String> getOrganizations() { return organizations; } /** * Accessor for the page property. * * @return the page */ public Integer getPage() { return page; } /** * Accessor for the people property. * * @return the people */ public List<String> getPeople() { return people; } /** * Accessor for the publicationDate property. * * @return the publicationDate */ public Date getPublicationDate() { return publicationDate; } /** * Accessor for the publicationDayOfMonth property. * * @return the publicationDayOfMonth */ public Integer getPublicationDayOfMonth() { return publicationDayOfMonth; } /** * Accessor for the publicationMonth property. * * @return the publicationMonth */ public Integer getPublicationMonth() { return publicationMonth; } /** * Accessor for the publicationYear property. * * @return the publicationYear */ public Integer getPublicationYear() { return publicationYear; } /** * Accessor for the section property. * * @return the section */ public String getSection() { return section; } /** * Accessor for the seriesName property. * * @return the seriesName */ public String getSeriesName() { return seriesName; } /** * Accessor for the slug property. * * @return the slug */ public String getSlug() { return slug; } /** * Accessor for the sourceFile property. * * @return the sourceFile */ public File getSourceFile() { return sourceFile; } /** * Accessor for the taxonomicClassifiers property. * * @return the taxonomicClassifiers */ public List<String> getTaxonomicClassifiers() { return taxonomicClassifiers; } /** * Accessor for the titles property. * * @return the titles */ public List<String> getTitles() { return titles; } /** * Accessor for the typesOfMaterial property. * * @return the typesOfMaterial */ public List<String> getTypesOfMaterial() { return typesOfMaterial; } /** * Accessor for the url property. * * @return the url */ public URL getUrl() { return url; } /** * Accessor for the wordCount property. * * @return the wordCount */ public Integer getWordCount() { return wordCount; } /** * Left justify a string by forcing it to be the specified length. This is * done by concatonating space characters to the end of the string until the * string is of the specified length. If, however, the string is initially * longer than the specified length then the original string is returned. * * @param s * A string. * @param length * The target length for the string. * @return A left-justified string. */ private String ljust(String s, Integer length) { if (s.length() >= length) { return s; } length -= s.length(); StringBuffer sb = new StringBuffer(); for (Integer i = 0; i < length; i++) { sb.append(" "); } return s + sb.toString(); } /** * Setter for the alternateURL property. * * @param alternateURL * the alternativeURL to set */ public void setAlternateURL(URL alternateURL) { this.alternateURL = alternateURL; } /** * Setter for the articleAbstract property. * * @param articleAbstract * the articleAbstract to set */ public void setArticleAbstract(String articleAbstract) { this.articleAbstract = articleAbstract; } /** * Setter for the authorBiography property. * * @param authorBiography * the authorBiography to set */ public void setAuthorBiography(String authorBiography) { this.authorBiography = authorBiography; } /** * Setter for the banner property. * * @param banner * the banner to set */ public void setBanner(String banner) { this.banner = banner; } /** * Setter for the biographicalCategories property. * * @param biographicalCategories * the biographicalCategories to set */ public void setBiographicalCategories(List<String> biographicalCategories) { this.biographicalCategories = biographicalCategories; } /** * Setter for the original text. * * @param text The original text to set. */ public void setOriginalText(String text) { this.originalText = text; } /** * Setter for the body property. * * @param body * the body to set */ public void setBody(String body) { this.body = StringEscapeUtils.unescapeXml(body); } /** * Setter for the byline property. * * @param byline * the byline to set */ public void setByline(String byline) { this.byline = byline; } /** * Setter for the columnName property. * * @param columnName * the columnName to set */ public void setColumnName(String columnName) { this.columnName = columnName; } /** * Setter for the columnNumber property. * * @param columnNumber * the columnNumber to set */ public void setColumnNumber(Integer columnNumber) { this.columnNumber = columnNumber; } /** * Setter for the correctionDate property. * * @param correctionDate * the correctionDate to set */ public void setCorrectionDate(Date correctionDate) { this.correctionDate = correctionDate; } /** * Setter for the correctionText property. * * @param correctionText * the correctionText to set */ public void setCorrectionText(String correctionText) { this.correctionText = correctionText; } /** * Setter for the credit property. * * @param credit * the credit to set */ public void setCredit(String credit) { this.credit = credit; } /** * Setter for the dateline property. * * @param dateline * the dateline to set */ public void setDateline(String dateline) { this.dateline = dateline; } /** * Setter for the dayOfWeek property. * * @param dayOfWeek * the dayOfWeek to set */ public void setDayOfWeek(String dayOfWeek) { this.dayOfWeek = dayOfWeek; } /** * Setter for the descriptors property. * * @param descriptors * the descriptors to set */ public void setDescriptors(List<String> descriptors) { this.descriptors = descriptors; } /** * Setter for the featurePage property. * * @param featurePage * the featurePage to set */ public void setFeaturePage(String featurePage) { this.featurePage = featurePage; } /** * Setter for the generalOnlineDescriptors property. * * @param generalOnlineDescriptors * the generalOnlineDescriptors to set */ public void setGeneralOnlineDescriptors(List<String> generalOnlineDescriptors) { this.generalOnlineDescriptors = generalOnlineDescriptors; } /** * Setter for the guid property. * * @param guid * the guid to set */ public void setGuid(int guid) { this.guid = guid; } /** * Setter for the headline property. * * @param headline * the headline to set */ public void setHeadline(String headline) { this.headline = headline; } /** * Setter for the kicker property. * * @param kicker * the kicker to set */ public void setKicker(String kicker) { this.kicker = kicker; } /** * Setter for the leadParagraph property. * * @param leadParagraph * the leadParagraph to set */ public void setLeadParagraph(String leadParagraph) { this.leadParagraph = leadParagraph; } /** * Setter for the locations property. * * @param locations * the locations to set */ public void setLocations(List<String> locations) { this.locations = locations; } /** * Setter for the names property. * * @param names * the names to set */ public void setNames(List<String> names) { this.names = names; } /** * Setter for the newsDesk property. * * @param newsDesk * the newsDesk to set */ public void setNewsDesk(String newsDesk) { this.newsDesk = newsDesk; } /** * Setter for the normalizedByline property. * * @param normalizedByline * the normalizedByline to set */ public void setNormalizedByline(String normalizedByline) { this.normalizedByline = normalizedByline; } /** * Setter for the onlineDescriptors property. * * @param onlineDescriptors * the onlineDescriptors to set */ public void setOnlineDescriptors(List<String> onlineDescriptors) { this.onlineDescriptors = onlineDescriptors; } /** * Setter for the onlineHeadline property. * * @param onlineHeadline * the onlineHeadline to set */ public void setOnlineHeadline(String onlineHeadline) { this.onlineHeadline = onlineHeadline; } /** * Setter for the onlineLeadParagraph property. * * @param onlineLeadParagraph * the onlineLeadParagraph to set */ public void setOnlineLeadParagraph(String onlineLeadParagraph) { this.onlineLeadParagraph = onlineLeadParagraph; } /** * Setter for the onlineLocations property. * * @param onlineLocations * the onlineLocations to set */ public void setOnlineLocations(List<String> onlineLocations) { this.onlineLocations = onlineLocations; } /** * Setter for the onlineOrganizations property. * * @param onlineOrganizations * the onlineOrganizations to set */ public void setOnlineOrganizations(List<String> onlineOrganizations) { this.onlineOrganizations = onlineOrganizations; } /** * Setter for the onlinePeople property. * * @param onlinePeople * the onlinePeople to set */ public void setOnlinePeople(List<String> onlinePeople) { this.onlinePeople = onlinePeople; } /** * Setter for the onlineSection property. * * @param onlineSection * the onlineSection to set */ public void setOnlineSection(String onlineSection) { this.onlineSection = onlineSection; } /** * Setter for the onlineTitles property. * * @param onlineTitles * the onlineTitles to set */ public void setOnlineTitles(List<String> onlineTitles) { this.onlineTitles = onlineTitles; } /** * Setter for the organizations property. * * @param organizations * the organizations to set */ public void setOrganizations(List<String> organizations) { this.organizations = organizations; } /** * Setter for the page property. * * @param page * the page to set */ public void setPage(Integer page) { this.page = page; } /** * Setter for the people property. * * @param people * the people to set */ public void setPeople(List<String> people) { this.people = people; } /** * Setter for the publicationDate property. * * @param publicationDate * the publicationDate to set */ public void setPublicationDate(Date publicationDate) { this.publicationDate = publicationDate; } /** * Setter for the publicationDayOfMonth property. * * @param publicationDayOfMonth * the publicationDayOfMonth to set */ public void setPublicationDayOfMonth(Integer publicationDayOfMonth) { this.publicationDayOfMonth = publicationDayOfMonth; } /** * Setter for the publicationMonth property. * * @param publicationMonth * the publicationMonth to set */ public void setPublicationMonth(Integer publicationMonth) { this.publicationMonth = publicationMonth; } /** * Setter for the publicationYear property. * * @param publicationYear * the publicationYear to set */ public void setPublicationYear(Integer publicationYear) { this.publicationYear = publicationYear; } /** * Setter for the section property. * * @param section * the section to set */ public void setSection(String section) { this.section = section; } /** * Setter for the seriesName property. * * @param seriesName * the seriesName to set */ public void setSeriesName(String seriesName) { this.seriesName = seriesName; } /** * Setter for the slug property. * * @param slug * the slug to set */ public void setSlug(String slug) { this.slug = slug; } /** * Setter for the sourceFile property. * * @param sourceFile * the sourceFile to set */ public void setSourceFile(File sourceFile) { this.sourceFile = sourceFile; } /** * Setter for the taxonomicClassifiers property. * * @param taxonomicClassifiers * the taxonomicClassifiers to set */ public void setTaxonomicClassifiers(List<String> taxonomicClassifiers) { this.taxonomicClassifiers = taxonomicClassifiers; } /** * Setter for the titles property. * * @param titles * the titles to set */ public void setTitles(List<String> titles) { this.titles = titles; } /** * Setter for the typesOfMaterial property. * * @param typesOfMaterial * the typesOfMaterial to set */ public void setTypesOfMaterial(List<String> typesOfMaterial) { this.typesOfMaterial = typesOfMaterial; } /** * Setter for the url property. * * @param url * the url to set */ public void setUrl(URL url) { this.url = url; } /** * Setter for the wordCount property. * * @param wordCount * the wordCount to set */ public void setWordCount(Integer wordCount) { this.wordCount = wordCount; } /* * (non-Javadoc) * * @see java.lang.Object#toString() */ public String toString() { StringBuffer sb = new StringBuffer(); appendProperty(sb, "alternativeURL", alternateURL); appendProperty(sb, "articleAbstract", articleAbstract); appendProperty(sb, "authorBiography", authorBiography); appendProperty(sb, "banner", banner); appendProperty(sb, "biographicalCategories", biographicalCategories); appendProperty(sb, "body", body); appendProperty(sb, "byline", byline); appendProperty(sb, "columnName", columnName); appendProperty(sb, "columnNumber", columnNumber); appendProperty(sb, "correctionDate", correctionDate); appendProperty(sb, "correctionText", correctionText); appendProperty(sb, "credit", credit); appendProperty(sb, "dateline", dateline); appendProperty(sb, "dayOfWeek", dayOfWeek); appendProperty(sb, "descriptors", descriptors); appendProperty(sb, "featurePage", featurePage); appendProperty(sb, "generalOnlineDescriptors", generalOnlineDescriptors); appendProperty(sb, "guid", guid); appendProperty(sb, "headline", headline); appendProperty(sb, "kicker", kicker); appendProperty(sb, "leadParagraph", leadParagraph); appendProperty(sb, "locations", locations); appendProperty(sb, "names", names); appendProperty(sb, "newsDesk", newsDesk); appendProperty(sb, "normalizedByline", normalizedByline); appendProperty(sb, "onlineDescriptors", onlineDescriptors); appendProperty(sb, "onlineHeadline", onlineHeadline); appendProperty(sb, "onlineLeadParagraph", onlineLeadParagraph); appendProperty(sb, "onlineLocations", onlineLocations); appendProperty(sb, "onlineOrganizations", onlineOrganizations); appendProperty(sb, "onlinePeople", onlinePeople); appendProperty(sb, "onlineSection", onlineSection); appendProperty(sb, "onlineTitles", onlineTitles); appendProperty(sb, "organizations", organizations); appendProperty(sb, "page", page); appendProperty(sb, "people", people); appendProperty(sb, "publicationDate", publicationDate); appendProperty(sb, "publicationDayOfMonth", publicationDayOfMonth); appendProperty(sb, "publicationMonth", publicationMonth); appendProperty(sb, "publicationYear", publicationYear); appendProperty(sb, "section", section); appendProperty(sb, "seriesName", seriesName); appendProperty(sb, "slug", slug); appendProperty(sb, "sourceFile", sourceFile); appendProperty(sb, "taxonomicClassifiers", taxonomicClassifiers); appendProperty(sb, "titles", titles); appendProperty(sb, "typesOfMaterial", typesOfMaterial); appendProperty(sb, "url", url); appendProperty(sb, "wordCount", wordCount); return sb.toString(); } /** * Append a property to the specified string. * * @param sb * @param propertyName * @param propertyValue */ private void appendProperty(StringBuffer sb, String propertyName, Object propertyValue) { if (propertyValue != null) { propertyValue = propertyValue.toString().replaceAll("\\s+", " ").trim(); } sb.append(ljust(propertyName + ":", 45) + propertyValue + "\n"); } }