gov.wa.wsdot.cms.App.java Source code

Java tutorial

Introduction

Here is the source code for gov.wa.wsdot.cms.App.java

Source

/*
 * Copyright (c) 2016 Washington State Department of Transportation
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>
 *
 */

package gov.wa.wsdot.cms;

import gov.wa.wsdot.cms.shared.ChannelsAndPostingsBase;
import gov.wa.wsdot.cms.shared.ResourceItem;
import gov.wa.wsdot.cms.utils.Migration;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Map.Entry;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;

import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

import com.google.gson.Gson;
import com.mongodb.BasicDBObject;
import com.mongodb.DB;
import com.mongodb.DBCollection;
import com.mongodb.DBCursor;
import com.mongodb.DBObject;
import com.mongodb.MongoClient;

/**
 * Dump all CMS channels with parent references.
 */
public class App {
    static DocumentBuilderFactory factory;
    static DocumentBuilder builder;
    static HashMap<String, ResourceItem> nodeGuidHashMap = new HashMap<String, ResourceItem>();
    static String sitePath = "/sites/default/files/"; // Location of Drupal files.
    static String archiveFolder = ""; // Where the files are currently stored. Needs to be dynamic.
    static String outputFile = ""; // This should be dynamically generated based on the templates being parsed.
    static String locationUri = "public://import"; // Default. Location of files to be imported. Either local or on the test web server.
    static HashMap<String, ChannelsAndPostingsBase> postingsHashMap = new HashMap<String, ChannelsAndPostingsBase>();
    static HashMap<String, Integer> templatesCountMap = new HashMap<String, Integer>(); // Keep track of the number of each template.
    static HashMap<String, Integer> templatesExcludedCountMap = new HashMap<String, Integer>(); // Keep track of the number of each excluded template.
    static HashMap<String, Integer> templatesExpiredCountMap = new HashMap<String, Integer>(); // Keep track of the number of each excluded template.
    static HashMap<String, Integer> resourcesCountMap = new HashMap<String, Integer>(); // Keep track of the number of each resource.
    static HashMap<String, String> templatesMap = new HashMap<String, String>();
    static MongoClient mongo;
    static DB db;
    static DBCollection channelCollection;
    static DBCollection postingCollection;
    static DBCollection resourcesCollection;
    static Set<String> highways = new HashSet<String>();
    static HashMap<String, String> redirectsMap = new HashMap<String, String>(); // legacyURL -> redirectURL
    static boolean reportOnly;

    /**
     * The main method for the program
     * 
     * @param args  command-line arguments.
     * @throws ParserConfigurationException
     * @throws SAXException
     * @throws IOException
     */
    public static void main(String[] args) throws ParserConfigurationException, SAXException, IOException {
        int i = 0, j;
        String arg;
        char flag;
        boolean vflag = false;
        reportOnly = false;

        while (i < args.length && args[i].startsWith("-")) {
            arg = args[i++];

            // Use this for more detailed output.
            if (arg.equals("-verbose")) {
                System.out.println("Verbose mode on");
                vflag = true;
            }

            // Use this to check for arguments that require more arguments.
            else if (arg.equals("-uri")) {
                if (i < args.length) {
                    locationUri = args[i++];
                } else {
                    // e.g. http://test.wsdot.wa.gov/wsdot/migration
                    System.err.println("-uri requires a local or network location");
                }
                if (vflag)
                    System.out.println("Uri = " + locationUri);
            }

            // Use this to check for a series of flag arguments.
            else {
                for (j = 1; j < arg.length(); j++) {
                    flag = arg.charAt(j);
                    switch (flag) {
                    case 'r':
                        if (vflag)
                            System.out.println("Report only"); // Does not parse nodes or build content
                        reportOnly = true;
                        break;
                    default:
                        System.err.println("App: illegal option " + flag);
                        break;
                    }
                }
            }
        }

        buildTemplatesMap();

        // filename is the only required parameter.
        if (i == args.length) {
            System.err.println("Usage: App [-verbose] [-xnr] [-uri aUri] filename");
            System.exit(0);
        } else {
            archiveFolder = args[i];
            if (vflag)
                System.out.println("Archive folder: " + archiveFolder);
        }

        // Factory API which allows us to get a parser which produces DOM object
        // trees from XML documents.
        factory = DocumentBuilderFactory.newInstance();
        builder = factory.newDocumentBuilder();

        mongo = new MongoClient("localhost");
        db = mongo.getDB("public");
        channelCollection = db.getCollection("channels");
        postingCollection = db.getCollection("postings");
        resourcesCollection = db.getCollection("resources");

        // TODO automate this selection.
        Document document = builder.parse(new File(archiveFolder + File.separator + "Cha.xml"));
        document.getDocumentElement().normalize();

        BuildChannels(document); // CMS containers only import from the Cha.xml file
        BuildPostings(document); // CMS postings / pages from Cha.xml and Fol.xml files for content.

    }

    /**
     * Add all the templates to the data structure
     */
    private static void buildTemplatesMap() {
        /**
         * Public templates
         */
        templatesMap.put("{DFF8BE08-33C2-4C8B-912F-75DA9E798298}", "Default"); // Location="/Templates/Standard/"
        templatesMap.put("{461E556C-E713-48BB-8B56-B9C9C6B8529D}", "DefaultwNoLeft"); // Location="/Templates/Standard/"
        templatesMap.put("{78FF8841-6880-444C-9BFA-56732F591D17}", "NewsItem"); // Location="/Templates/News/"
        templatesMap.put("{00857788-CFB2-4AB6-A616-44650EAED222}", "ProjectSpecifics"); // Location="/Templates/Projects/"
        templatesMap.put("{CAC413A0-56C7-4EB8-83C9-D2C9E704800A}", "HomePage"); // Location="/Templates/Home/"
        templatesMap.put("{06550835-2580-403E-8D28-64A7866E5EDD}", "NewsRoom"); // Location="/Templates/News/"
        templatesMap.put("{2ADE8FE3-7F45-4CED-BE89-A5BD1DC4E0E7}", "NewsArchive"); // Location="/Templates/News/"
        templatesMap.put("{ED4E5A97-4608-4D7F-8ECA-CC5CE6C3B0D3}", "NewsHome"); // Location="/Templates/News/"
        templatesMap.put("{302FD56C-9930-4A45-969C-29901D086242}", "CompletedProject"); // Location="/Templates/Projects/"
        templatesMap.put("{5E703BE3-B25C-4841-95AE-C96B695BC513}", "ProjectFeedback"); // Location="/Templates/Projects/"
        templatesMap.put("{60C905CE-E8F3-46BE-A40E-C31CA064C999}", "oldcompleted"); // Location="/Templates/Projects/"
        templatesMap.put("{66EA195A-46C1-4072-86AE-0480A7693696}", "ProjectTemplate"); // Location="/Templates/Projects/"
        templatesMap.put("{882B84F7-EE64-478A-9A2F-2EAF6BF4AFAD}", "ProjectHome"); // Location="/Templates/Projects/"
        templatesMap.put("{AB5D347F-4C85-4648-8204-F37C465CC240}", "ProjectHomeFacetmap"); // Location="/Templates/Projects/"
        templatesMap.put("{B5034FF2-6088-4D13-B2D5-0D33FBED6A12}", "QuarterlyReport"); // Location="/Templates/Projects/" - 22
        templatesMap.put("{8FE13678-A62C-4B62-9E32-7E09625CE2D3}", "Monthly Reports Home"); // Location="/Templates/Public Transportation/"
        templatesMap.put("{D5A341D3-4CAC-40D9-BB3D-E581A0D9F93F}", "Monthly Report"); // Location="/Templates/Public Transportation/"
        templatesMap.put("{3850D8CD-9459-4BF9-BA82-42C46448BC51}", "TrafficContent"); // Location="/Templates/Traffic/"
        templatesMap.put("{3A0C5395-E681-48AF-AE0E-B926B531B1BA}", "TextOnly"); // Location="/Templates/Traffic/"
        templatesMap.put("{070C8446-4AC2-4084-A9A2-C3B42C686C3A}", "PropertyListFacetmap"); // Location="/Templates/RealEstate/"
        templatesMap.put("{67E38140-7C4A-4984-A6B1-ECC3C698CB92}", "ScraperTest"); // Location="/Templates/Administrative/"
        templatesMap.put("{688DA8B1-D561-4DE2-9E35-4268570EFBC8}", "CrawlPage"); // Location="/Templates/Administrative/"
        templatesMap.put("{CCCC8256-DA97-4659-A06D-9AD1BACBD6BC}", "PostingStatus"); // Location="/Templates/Administrative/"
        templatesMap.put("{F4279E09-2113-4DF5-B98F-D49B756CE7D9}", "PlaceholderResources"); // Location="/Templates/Administrative/"
        templatesMap.put("{3C41E93A-6917-49A4-AAE5-52EEB96D4453}", "Certificate of Status"); // Location="/Templates/Aviation/"
        templatesMap.put("{8D71BB62-9FA1-42B4-B255-3E6D9E577F97}", "HOV Dispute"); // Location="/Templates/Feedback/"
        templatesMap.put("{A79169D4-BA75-40AB-A315-0C4B464A65FE}", "HOV Report"); //Location="/Templates/Feedback/"
        templatesMap.put("{B6854085-DB6C-4950-A771-C343BF38A7EA}", "CVISN Form"); // Location="/Templates/Feedback/"
        templatesMap.put("{E572F52B-B545-45E9-9EC1-BA5A09DAA823}", "Feedback"); // Location="/Templates/Feedback/"
        templatesMap.put("{F381A6CC-D14A-491B-BE41-FCA474C65D74}", "Web Feedback"); // Location="/Templates/Feedback/"
        templatesMap.put("{FF56C752-58E5-4692-BD9A-3A0538C560AD}", "Graffiti Feedback"); // Location="/Templates/Feedback/"
        templatesMap.put("{1493A417-5C0C-457B-B5F1-5FC70EB05C13}", "Redirect"); // Location="/Templates/Standard/"
        templatesMap.put("{89BC0F62-6974-49ED-850C-BA6BB9839370}", "Training"); // Location="/Templates/Standard/"
        templatesMap.put("{A114DAFA-DFB0-48CC-B133-E8C29251E901}", "DefaultwUpdateBox"); // Location="/Templates/Standard/"
        templatesMap.put("{A3FD3A59-1EF6-49B8-8357-4728B9FFA36C}", "ResourceList"); // Location="/Templates/Standard/"
        templatesMap.put("{320598FA-2DBE-4DF8-A6C7-E0BDAD394A3C}", "Abstract"); // Location="/Templates/Publications/"
        templatesMap.put("{C7DD44E7-16E5-4EDD-A630-E5B4EC316A60}", "AbstractsList"); // Location="/Templates/Publications/"
        templatesMap.put("{46978165-408D-4F68-AEC2-1FB11FC79D59}", "Abstract"); // Location="/Templates/Research/"
        templatesMap.put("{74A5FFA4-F529-4A22-839F-494C22634805}", "AbstractsList"); // Location="/Templates/Research/"
        templatesMap.put("{2343FC1F-F44D-4EF0-AB19-88D2DF435D4D}", "FeaturedEmployeeListing"); // Location="/Templates/Employment/"
        templatesMap.put("{94967861-7B8F-4634-B72D-D6EDFA6D6905}", "HomePage"); // Location="/Templates/Employment/"
        templatesMap.put("{9F061DB7-C5C7-4615-B280-17520EAB5B5A}", "FeaturedEmployee"); // Location="/Templates/Employment/"
        templatesMap.put("{D9BC634C-2BF1-4831-9908-4BEACB2C3505}", "JobsListing"); // Location="/Templates/Employment/"
        templatesMap.put("{DB90941A-0FF2-40D7-8C99-F38F811E6B37}", "Redirect"); // Location="/Templates/Standard/Redirect/"
        templatesMap.put("{2EA65D29-2828-468C-AC7E-2DC7BDF077B3}", "CompletedProjectTemplate"); // Location="/Templates/Projects/oldcompleted/"
        templatesMap.put("{D8B8A0B1-F728-42BF-A796-78A8AD076816}", "DefaultTraining"); // Location="/Templates/Standard/Training/"         
    }

    /**
     * Build a list of all channels and their parents nodes
     * 
     * @param document
     */
    private static void BuildChannels(Document document) {
        Map<String, String> channelGuids = new HashMap<String, String>(); // GUID, Name
        List<ChannelsAndPostingsBase> parentReferences = new ArrayList<ChannelsAndPostingsBase>(); // Name, Parent

        NodeList channelNodeList = document.getElementsByTagName("Channel");
        int channelNodeListLength = channelNodeList.getLength();

        if (reportOnly) {
            System.out.println();
            System.out.println("=== REPORT ONLY ===");
        }

        System.out.println();
        System.out.println("Number of Channel nodes: " + channelNodeListLength);
        System.out.println();

        // Build a map of GUIDS and channel names.
        for (int i = 0; i < channelNodeListLength; i++) {
            Node channelNode = channelNodeList.item(i);
            Element element = (Element) channelNode;

            String guid = element.getAttribute("GUID");
            String name = element.getElementsByTagName("Version").item(0).getAttributes().getNamedItem("Name")
                    .getNodeValue();

            channelGuids.put(guid, name);
        }

        // Build a list of channel names and their parent nodes.
        for (int i = 0; i < channelNodeListLength; i++) {
            Node channelNode = channelNodeList.item(i);
            Element element = (Element) channelNode;

            String guid = element.getAttribute("GUID");

            int internalId = Integer.parseInt(element.getElementsByTagName("Version").item(0).getAttributes()
                    .getNamedItem("InternalID").getNodeValue());

            String name = element.getElementsByTagName("Version").item(0).getAttributes().getNamedItem("Name")
                    .getNodeValue();

            String parent = element.getElementsByTagName("Version").item(0).getAttributes().getNamedItem("Parent")
                    .getNodeValue();

            try {
                // Remove leading and trailing slashes.
                String location = element.getElementsByTagName("Version").item(0).getAttributes()
                        .getNamedItem("Location").getNodeValue()
                        .substring(1, element.getElementsByTagName("Version").item(0).getAttributes()
                                .getNamedItem("Location").getNodeValue().length() - 1);

                location = location.replaceAll("Channels/?", "");
                String[] locationArray = location.split("/");
                location = Arrays.toString(locationArray).replace("[", "").replace("]", "").replace(", ", ";");
                int level;

                if (location.isEmpty()) {
                    level = 0;
                    System.out.println(name); // Root channel so just print the channel name
                } else {
                    level = locationArray.length;
                    System.out.println(location + ";" + name);
                }

                parentReferences
                        .add(new ChannelsAndPostingsBase(internalId, guid, name, parent, level, location, 1));

            } catch (StringIndexOutOfBoundsException e) {
                // Root channel doesn't have leading and trailing slashes. Ignore it.
                continue;
            }
        }

        saveChannels(parentReferences, channelCollection);

        // Dump POJO out as a Json string.
        /*
        Gson gson = new Gson();
        String json = gson.toJson(parentReferences);
        System.out.println(json);
        */
    }

    /**
     * Build a list of postings, their parents nodes, content and associated resources
     * 
     * @param document
     * @throws ParserConfigurationException
     * @throws SAXException
     * @throws IOException
     */
    private static void BuildPostings(Document document)
            throws ParserConfigurationException, SAXException, IOException {

        Map<String, String> channelGuids = new HashMap<String, String>(); // GUID, Name
        List<String> pagesToExclude = new ArrayList<String>();
        List<String> templatesToExclude = new ArrayList<String>();
        HashMap<String, HashMap<String, String>> controlsHashMap = null;
        HashMap<String, ResourceItem> resourcesHashMap = null;
        HashMap<String, String> genericPropertiesHashMap = null;

        /**
         * Public templates
         */
        // Ignore postings based on these templates
        templatesToExclude.add("{CAC413A0-56C7-4EB8-83C9-D2C9E704800A}"); // Location="/Templates/Home/HomePage"
        templatesToExclude.add("{06550835-2580-403E-8D28-64A7866E5EDD}"); // Location="/Templates/News/NewsRoom"
        templatesToExclude.add("{2ADE8FE3-7F45-4CED-BE89-A5BD1DC4E0E7}"); // Location="/Templates/News/NewsArchive"
        templatesToExclude.add("{ED4E5A97-4608-4D7F-8ECA-CC5CE6C3B0D3}"); // Location="/Templates/News/NewsHome"
        templatesToExclude.add("{302FD56C-9930-4A45-969C-29901D086242}"); // Location="/Templates/Projects/CompletedProject"
        templatesToExclude.add("{5E703BE3-B25C-4841-95AE-C96B695BC513}"); // Location="/Templates/Projects/ProjectFeedback"
        templatesToExclude.add("{60C905CE-E8F3-46BE-A40E-C31CA064C999}"); // Location="/Templates/Projects/oldcompleted"
        templatesToExclude.add("{882B84F7-EE64-478A-9A2F-2EAF6BF4AFAD}"); // Location="/Templates/Projects/ProjectHome"
        templatesToExclude.add("{AB5D347F-4C85-4648-8204-F37C465CC240}"); // Location="/Templates/Projects/ProjectHomeFacetmap"
        templatesToExclude.add("{B5034FF2-6088-4D13-B2D5-0D33FBED6A12}"); // Location="/Templates/Projects/QuarterlyReport"
        templatesToExclude.add("{8FE13678-A62C-4B62-9E32-7E09625CE2D3}"); // Location="/Templates/Public Transportation/Monthly Reports Home"
        templatesToExclude.add("{D5A341D3-4CAC-40D9-BB3D-E581A0D9F93F}"); // Location="/Templates/Public Transportation/Monthly Report"
        templatesToExclude.add("{3850D8CD-9459-4BF9-BA82-42C46448BC51}"); // Location="/Templates/Traffic/TrafficContent"
        templatesToExclude.add("{3A0C5395-E681-48AF-AE0E-B926B531B1BA}"); // Location="/Templates/Traffic/TextOnly"
        templatesToExclude.add("{070C8446-4AC2-4084-A9A2-C3B42C686C3A}"); // Location="/Templates/RealEstate/PropertyListFacetmap"
        templatesToExclude.add("{67E38140-7C4A-4984-A6B1-ECC3C698CB92}"); // Location="/Templates/Administrative/ScraperTest"
        templatesToExclude.add("{688DA8B1-D561-4DE2-9E35-4268570EFBC8}"); // Location="/Templates/Administrative/CrawlPage"
        templatesToExclude.add("{CCCC8256-DA97-4659-A06D-9AD1BACBD6BC}"); // Location="/Templates/Administrative/PostingStatus"
        templatesToExclude.add("{F4279E09-2113-4DF5-B98F-D49B756CE7D9}"); // Location="/Templates/Administrative/PlaceholderResources"
        templatesToExclude.add("{3C41E93A-6917-49A4-AAE5-52EEB96D4453}"); // Location="/Templates/Aviation/Certificate of Status"
        templatesToExclude.add("{8D71BB62-9FA1-42B4-B255-3E6D9E577F97}"); // Location="/Templates/Feedback/HOV Dispute"
        templatesToExclude.add("{A79169D4-BA75-40AB-A315-0C4B464A65FE}"); // Location="/Templates/Feedback/HOV Report"
        templatesToExclude.add("{B6854085-DB6C-4950-A771-C343BF38A7EA}"); // Location="/Templates/Feedback/CVISN Form"
        templatesToExclude.add("{E572F52B-B545-45E9-9EC1-BA5A09DAA823}"); // Location="/Templates/Feedback/Feedback"
        templatesToExclude.add("{F381A6CC-D14A-491B-BE41-FCA474C65D74}"); // Location="/Templates/Feedback/Web Feedback"
        templatesToExclude.add("{FF56C752-58E5-4692-BD9A-3A0538C560AD}"); // Location="/Templates/Feedback/Graffiti Feedback"
        templatesToExclude.add("{89BC0F62-6974-49ED-850C-BA6BB9839370}"); // Location="/Templates/Standard/Training"
        templatesToExclude.add("{A114DAFA-DFB0-48CC-B133-E8C29251E901}"); // Location="/Templates/Standard/DefaultwUpdateBox"
        templatesToExclude.add("{A3FD3A59-1EF6-49B8-8357-4728B9FFA36C}"); // Location="/Templates/Standard/ResourceList"
        templatesToExclude.add("{320598FA-2DBE-4DF8-A6C7-E0BDAD394A3C}"); // Location="/Templates/Publications/Abstract"
        templatesToExclude.add("{C7DD44E7-16E5-4EDD-A630-E5B4EC316A60}"); // Location="/Templates/Publications/AbstractsList"
        templatesToExclude.add("{74A5FFA4-F529-4A22-839F-494C22634805}"); // Location="/Templates/Research/AbstractsList"
        templatesToExclude.add("{2343FC1F-F44D-4EF0-AB19-88D2DF435D4D}"); // Location="/Templates/Employment/FeaturedEmployeeListing"
        templatesToExclude.add("{94967861-7B8F-4634-B72D-D6EDFA6D6905}"); // Location="/Templates/Employment/HomePage"
        templatesToExclude.add("{9F061DB7-C5C7-4615-B280-17520EAB5B5A}"); // Location="/Templates/Employment/FeaturedEmployee"
        templatesToExclude.add("{D9BC634C-2BF1-4831-9908-4BEACB2C3505}"); // Location="/Templates/Employment/JobsListing"
        templatesToExclude.add("{2EA65D29-2828-468C-AC7E-2DC7BDF077B3}"); // Location="/Templates/Projects/oldcompleted/CompletedProjectTemplate"
        templatesToExclude.add("{D8B8A0B1-F728-42BF-A796-78A8AD076816}"); // Location="/Templates/Standard/Training/DefaultTraining"  

        // Build the channel guids from channel containers already in the database.
        BasicDBObject query = new BasicDBObject("isChannel", 1);
        DBCursor cursor = channelCollection.find(query);

        while (cursor.hasNext()) {
            DBObject dbObject = cursor.next();
            channelGuids.put(dbObject.get("guid").toString(), dbObject.get("name").toString());
        }

        NodeList postingNodeList = document.getElementsByTagName("Posting");
        int postingNodeListLength = postingNodeList.getLength();
        System.out.println("Number of Posting nodes: " + postingNodeListLength);

        // Build a list of posting names and their parent nodes.
        for (int i = 0; i < postingNodeListLength; i++) {
            Node channelNode = postingNodeList.item(i);
            Element element = (Element) channelNode;

            String guid = element.getAttribute("GUID");

            int internalId = Integer.parseInt(element.getElementsByTagName("Version").item(0).getAttributes()
                    .getNamedItem("InternalID").getNodeValue());

            String name = element.getElementsByTagName("Version").item(0).getAttributes().getNamedItem("Name")
                    .getNodeValue();

            String parent = element.getElementsByTagName("Version").item(0).getAttributes().getNamedItem("Parent")
                    .getNodeValue();

            String shortcut = element.getElementsByTagName("Version").item(0).getAttributes()
                    .getNamedItem("Shortcut").getNodeValue();

            String template = element.getElementsByTagName("Version").item(0).getAttributes()
                    .getNamedItem("TemplateGuid").getNodeValue();

            String expireDate = element.getElementsByTagName("Version").item(0).getAttributes()
                    .getNamedItem("Expiredate").getNodeValue();

            // Skip templates which are summary or rollup types and contain little or no content.
            // These will be generated separately in Drupal.
            if (templatesToExclude.contains(template)) {
                pagesToExclude.add(shortcut);

                if (templatesExcludedCountMap.containsKey(template)) {
                    templatesExcludedCountMap.put(template, templatesExcludedCountMap.get(template) + 1);
                } else {
                    templatesExcludedCountMap.put(template, 1);
                }

                continue;
            } else if (!expireDate.equalsIgnoreCase("401769")) { // Year 3000. Any other date, the page is expired.
                pagesToExclude.add(shortcut);

                if (templatesExpiredCountMap.containsKey(template)) {
                    templatesExpiredCountMap.put(template, templatesExpiredCountMap.get(template) + 1);
                } else {
                    templatesExpiredCountMap.put(template, 1);
                }

                continue;
            } else {
                if (templatesCountMap.containsKey(template)) {
                    templatesCountMap.put(template, templatesCountMap.get(template) + 1);
                } else {
                    templatesCountMap.put(template, 1);
                }
            }

            try {
                // Remove leading and trailing slashes.
                String location = element.getElementsByTagName("Version").item(0).getAttributes()
                        .getNamedItem("Location").getNodeValue()
                        .substring(1, element.getElementsByTagName("Version").item(0).getAttributes()
                                .getNamedItem("Location").getNodeValue().length() - 1);

                location = location.replaceAll("Channels/?", "");
                String[] locationArray = location.split("/");
                location = Arrays.toString(locationArray).replace("[", "").replace("]", "").replace(", ", ">>");
                int level;

                if (location.isEmpty()) {
                    level = 0;
                } else {
                    level = locationArray.length;
                }

                ChannelsAndPostingsBase posting = new ChannelsAndPostingsBase();
                posting.setUid(internalId);
                posting.setGuid(guid);
                posting.setName(name);
                posting.setParent(parent);
                posting.setLevel(level);
                posting.setLocation(location);
                posting.setIsChannel(0);
                posting.setTemplate(template);

                postingsHashMap.put(shortcut, posting);

            } catch (StringIndexOutOfBoundsException e) {
                // Root channel doesn't have leading and trailing slashes. Ignore it.
                continue;
            }

        }

        // Load and parse the Folders and Pages XML document which contains content for the pages.
        // TODO automate this selection.
        Document folDocument = builder.parse(new File(archiveFolder + File.separator + "Fol.xml"));
        folDocument.getDocumentElement().normalize();
        NodeList pageNodeList = folDocument.getElementsByTagName("Page");
        int pageNodeListLength = pageNodeList.getLength();
        System.out.println("Number of Page nodes: " + pageNodeListLength);
        System.out.println("Number of valid pages: " + (pageNodeListLength - pagesToExclude.size()));

        System.out.println();

        if (reportOnly) {
        } else {
            for (int j = 0; j < pageNodeListLength; j++) {
                Node pageNode = pageNodeList.item(j);
                Element pageNodeElement = (Element) pageNode;
                String pageGuid = pageNodeElement.getAttribute("GUID");

                if (pagesToExclude.contains(pageGuid))
                    continue;

                String pageName = pageNodeElement.getElementsByTagName("Version").item(0).getAttributes()
                        .getNamedItem("Name").getNodeValue();

                postingsHashMap.get(pageGuid).setPageName(pageName);
                String location = postingsHashMap.get(pageGuid).getLocation().replace(">>", "/") + "/";
                postingsHashMap.get(pageGuid).setLegacyURL(location + pageName + ".htm");

                String createdWhen = pageNodeElement.getElementsByTagName("Version").item(0).getAttributes()
                        .getNamedItem("CreatedWhen").getNodeValue();

                postingsHashMap.get(pageGuid)
                        .setCreatedWhen(Migration.convertDays(Double.parseDouble(createdWhen)));

                controlsHashMap = new HashMap<String, HashMap<String, String>>();
                resourcesHashMap = new HashMap<String, ResourceItem>();
                genericPropertiesHashMap = new HashMap<String, String>();

                traverseNodes(pageNodeList.item(j), pageGuid, controlsHashMap, resourcesHashMap,
                        genericPropertiesHashMap); // Crawl Page node.

                buildContent(pageGuid, controlsHashMap, resourcesHashMap, genericPropertiesHashMap);
            }
        }

        // Print out the template use count.
        System.out.println("Pages to process (Based on template)");
        System.out.println("------------------------------------");
        for (Map.Entry<String, Integer> entry : templatesCountMap.entrySet()) {
            System.out.println(templatesMap.get(entry.getKey()) + ": " + entry.getValue());
            if (templatesMap.get(entry.getKey()).equalsIgnoreCase("Redirect")) {
                for (Map.Entry<String, String> redirect : redirectsMap.entrySet()) {
                    System.out.println("  " + redirect.getKey() + " -> " + redirect.getValue());
                }
            }
        }

        System.out.println();

        System.out.println("Pages to ignore (Based on template)");
        System.out.println("-----------------------------------");
        for (Map.Entry<String, Integer> entry : templatesExcludedCountMap.entrySet()) {
            System.out.println(templatesMap.get(entry.getKey()) + ": " + entry.getValue());
        }

        System.out.println();

        System.out.println("Pages to ignore (Expired)");
        System.out.println("-------------------------");
        for (Map.Entry<String, Integer> entry : templatesExpiredCountMap.entrySet()) {
            System.out.println(templatesMap.get(entry.getKey()) + ": " + entry.getValue());
        }

        System.out.println();

        System.out.println("Page resources and counts");
        System.out.println("-------------------------");

        Map<String, Integer> sortedResources = sortByComparator(resourcesCountMap, false);

        for (Map.Entry<String, Integer> entry : sortedResources.entrySet()) {
            System.out.println(entry.getKey() + ": " + entry.getValue());
        }

        System.out.println();

        if (reportOnly) {
        } else {
            savePostings(postingsHashMap, channelCollection); // Should be changed to a taskCollection
            savePostingsAsJson(postingsHashMap);
        }
    }

    /**
     * Sort the resources associated with each page
     * 
     * @param unsortedResources
     * @param order
     * @return
     * @see <a href="http://stackoverflow.com/a/13913206">http://stackoverflow.com/a/13913206</a>
     */
    private static Map<String, Integer> sortByComparator(HashMap<String, Integer> unsortedResources,
            final boolean order) {

        List<Entry<String, Integer>> list = new LinkedList<Entry<String, Integer>>(unsortedResources.entrySet());

        Collections.sort(list, new Comparator<Entry<String, Integer>>() {

            @Override
            public int compare(Entry<String, Integer> o1, Entry<String, Integer> o2) {
                if (order) {
                    return o1.getValue().compareTo(o2.getValue());
                } else {
                    return o2.getValue().compareTo(o1.getValue());
                }
            }
        });

        // Convert sorted map back to a Map
        Map<String, Integer> sortedMap = new LinkedHashMap<String, Integer>();
        for (Entry<String, Integer> entry : list) {
            sortedMap.put(entry.getKey(), entry.getValue());
        }

        return sortedMap;
    }

    /**
     * Save the channel structure to the database
     * 
     * @param parentReferences
     * @param collection
     */
    private static void saveChannels(List<ChannelsAndPostingsBase> parentReferences, DBCollection collection) {

        // Store POJO to MongoDB collection. 
        for (ChannelsAndPostingsBase item : parentReferences) {
            BasicDBObject doc = new BasicDBObject();
            doc.put("uid", item.getUid());
            doc.put("guid", item.getGuid());
            doc.put("name", item.getName());
            doc.put("parent", item.getParent());
            doc.put("level", item.getLevel());
            doc.put("location", item.getLocation());
            doc.put("isChannel", item.getIsChannel());
            doc.put("created", new Date());

            BasicDBObject updateQuery = new BasicDBObject("uid", item.getUid());
            collection.update(updateQuery, doc, true, false);
        }

        System.out.println();
        System.out.println("All done saving channels.");
        System.out.println();
    }

    /**
     * Save the posting structure and content to the database
     * 
     * @param postingsHashMap
     * @param collection
     */
    private static void savePostings(HashMap<String, ChannelsAndPostingsBase> postingsHashMap,
            DBCollection collection) {

        for (Entry<String, ChannelsAndPostingsBase> entry : postingsHashMap.entrySet()) {
            BasicDBObject doc = new BasicDBObject();
            doc.put("uid", entry.getValue().getUid());
            doc.put("guid", entry.getValue().getGuid());
            doc.put("name", entry.getValue().getName());
            doc.put("parent", entry.getValue().getParent());
            doc.put("level", entry.getValue().getLevel());
            doc.put("location", entry.getValue().getLocation());
            doc.put("isChannel", entry.getValue().getIsChannel());
            doc.put("created", new Date());

            if (entry.getValue().getPageName() != null) {
                doc.put("pageName", entry.getValue().getPageName());
            }

            BasicDBObject updateQuery = new BasicDBObject("uid", entry.getValue().getUid());
            collection.update(updateQuery, doc, true, false);
        }

        System.out.println("All done saving postings.");
    }

    /**
     * Output the data as a JSON file based on the template the content relates to.
     * 
     * @param postingsHashMap
     */
    private static void savePostingsAsJson(HashMap<String, ChannelsAndPostingsBase> postingsHashMap) {

        /**
         * Step through postingsHashMap and build separate hashmaps based on the posting's template.
         * <p>
         * Save those individual json files with the template's name e.g. Default.json, ProjectSpecifics.json, etc.
         */
        for (Map.Entry<String, Integer> template : templatesCountMap.entrySet()) {
            String templateName = templatesMap.get(template.getKey());
            HashMap<String, ChannelsAndPostingsBase> postingsByTemplate = new HashMap<String, ChannelsAndPostingsBase>();

            for (Map.Entry<String, ChannelsAndPostingsBase> posting : postingsHashMap.entrySet()) {
                if (posting.getValue().getTemplate().equals(template.getKey())) {
                    postingsByTemplate.put(posting.getKey(), posting.getValue());
                }
            }

            Gson gson = new Gson();
            String json = gson.toJson(postingsByTemplate);

            try {
                File jsonFile = new File(archiveFolder + File.separator + templateName + ".json");

                if (!jsonFile.exists()) {
                    jsonFile.createNewFile();
                }

                FileWriter fw = new FileWriter(jsonFile.getAbsoluteFile());
                BufferedWriter bw = new BufferedWriter(fw);
                bw.write(json);
                bw.close();

                System.out.println("Wrote " + templateName + " postings data to JSON file.");
            } catch (IOException e) {
                e.printStackTrace();
            }

        }

    }

    /**
     * Recursive call to traverse Node datatypes.
     * 
     * @param node  node we are processing
     * @param pageGuid  unique id of the page 
     * @param resourcesHashMap 
     * @param controlsHashMap 
     * @param genericPropertiesHashMap 
     * @param postingCollection 
     * @param collection 
     */
    private static void traverseNodes(Node node, String pageGuid,
            HashMap<String, HashMap<String, String>> controlsHashMap,
            HashMap<String, ResourceItem> resourcesHashMap, HashMap<String, String> genericPropertiesHashMap) {

        NamedNodeMap attributes;
        NamedNodeMap blobAttributes;
        String placeholderDefinition;
        ResourceItem resourceItem;
        String nodeGuid;
        String resourceBlobId;

        // Now traverse the rest of the tree in depth-first order.
        if (node.hasChildNodes()) {
            // Get the children in a list.
            NodeList nl = node.getChildNodes();
            // How many nodes?
            int size = nl.getLength();

            for (int i = 0; i < size; i++) {
                if (nl.item(i).getNodeType() == Node.ELEMENT_NODE) {
                    /**
                     * Controls
                     */
                    if (nl.item(i).getNodeName().equalsIgnoreCase("Control")) {
                        placeholderDefinition = nl.item(i).getAttributes().getNamedItem("Name").getNodeValue();

                        Element propertyElement = (Element) nl.item(i);
                        int numProperties = propertyElement.getElementsByTagName("Property").getLength();

                        HashMap<String, String> property = new HashMap<String, String>();

                        for (int j = 0; j < numProperties; j++) {
                            attributes = propertyElement.getElementsByTagName("Property").item(j).getAttributes();

                            if (attributes.getNamedItem("Value") != null) {
                                property.put(attributes.getNamedItem("InternalIDResource").getNodeValue(),
                                        attributes.getNamedItem("Value").getNodeValue());

                            } else {
                                String nameAttribute = attributes.getNamedItem("Name").getNodeValue();
                                if (nameAttribute.matches("ControlProp(\\d+)")) {
                                    property.put(attributes.getNamedItem("InternalIDResource").getNodeValue(),
                                            attributes.getNamedItem("Name").getNodeValue());
                                }
                            }
                        }

                        if (!property.isEmpty()) {
                            controlsHashMap.put(placeholderDefinition, property);
                        }
                        /**
                         * Resources    
                         */
                    } else if (nl.item(i).getNodeName().equalsIgnoreCase("Resources")) {
                        Element propertyElement = (Element) nl.item(i);
                        int numResources = propertyElement.getElementsByTagName("Resource").getLength();

                        for (int j = 0; j < numResources; j++) {
                            resourceItem = new ResourceItem();
                            attributes = propertyElement.getElementsByTagName("Resource").item(j).getAttributes();

                            if (attributes.getNamedItem("IsLink").getNodeValue().equals("1")) {
                                if (attributes.getNamedItem("URL") != null) {
                                    //
                                    // <Resource URL="http://www..." ResourceBlobId="0" IsLink="1" Name="NewResource2" InternalID="2"/>
                                    //
                                    resourceItem.setIsLink(attributes.getNamedItem("IsLink").getNodeValue());
                                    resourceItem.setUrl(attributes.getNamedItem("URL").getNodeValue());
                                } else {
                                    //
                                    // <Resource ResourceBlobId="0" NodeGuid="{1027105F-...}" IsLink="1" Name="NewResource3" InternalID="3"/>
                                    //
                                    // NodeGuid references Posting GUID attribute in Cha.xml file which references the Page GUID in the Fol.xml file
                                    // via the Shortcut attribute. How do I resolve this with an SDO export? HashMap? MongoDB?
                                    //
                                    // NodeGUID references the CMS database Node table and NodeGUID column. In that table there
                                    // is a FollowGUID column which points to the Node row which has the Name of the page.
                                    // Build the channel guids from channel containers already in the database.
                                    String nodeGUID = attributes.getNamedItem("NodeGuid").getNodeValue();
                                    BasicDBObject query = new BasicDBObject("NodeGUID", nodeGUID);
                                    DBCursor postingCursor = postingCollection.find(query);

                                    String guid = "";
                                    String postingName = "default";
                                    String channelName = "";
                                    String location = "";
                                    String rootRelativeURL = "";

                                    while (postingCursor.hasNext()) {
                                        DBObject dbObject = postingCursor.next();
                                        guid = dbObject.get("ParentGUID").toString();
                                        postingName = dbObject.get("Name").toString();
                                    }

                                    if (postingCursor.size() == 0) {
                                        guid = nodeGUID;
                                    }

                                    query = new BasicDBObject("guid", guid);
                                    DBCursor channelCursor = channelCollection.find(query);

                                    while (channelCursor.hasNext()) {
                                        DBObject dbObject = channelCursor.next();
                                        channelName = dbObject.get("name").toString();
                                        location = dbObject.get("location").toString();
                                    }

                                    if (channelName.isEmpty() && location.isEmpty()) {
                                        rootRelativeURL = "";
                                    } else if (location.isEmpty()) {
                                        rootRelativeURL = "/" + channelName + "/" + postingName + ".htm";
                                    } else {
                                        rootRelativeURL = "/" + location.replace(";", "/") + "/" + channelName + "/"
                                                + postingName + ".htm";
                                    }

                                    resourceItem.setIsLink(attributes.getNamedItem("IsLink").getNodeValue());
                                    resourceItem.setUrl(rootRelativeURL);
                                }

                                resourcesHashMap.put(attributes.getNamedItem("InternalID").getNodeValue(),
                                        resourceItem);

                            } else {
                                Node blobInfo = propertyElement.getElementsByTagName("Resource").item(j);
                                Element blobInfoElement = (Element) blobInfo;

                                if (blobInfoElement.hasChildNodes()) {
                                    //
                                    // <Resource ResourceBlobId="24041" NodeGuid="{7CE36259-...}" IsLink="0" Name="btn_video_smaller1" InternalID="1146">
                                    //   <BlobInfo FileName="resF7F9C8ACD9... .jpg" Size="1578" Guid="{F7F9C8AC-...}" FileExtension="jpg" InternalID="24041"/>
                                    // </Resource>
                                    //
                                    blobAttributes = blobInfoElement.getElementsByTagName("BlobInfo").item(0)
                                            .getAttributes();
                                    resourceItem.setIsLink(attributes.getNamedItem("IsLink").getNodeValue());
                                    resourceItem.setName(attributes.getNamedItem("Name").getNodeValue());
                                    resourceItem
                                            .setFilename(blobAttributes.getNamedItem("FileName").getNodeValue());
                                    resourceItem.setFileExtension(
                                            blobAttributes.getNamedItem("FileExtension").getNodeValue());

                                    // Store unique reference to resource item keyed by NodeGuid.
                                    // If NodeGuid is not already in the HashMap then store a reference to it.
                                    if (attributes.getNamedItem("NodeGuid") != null) {
                                        nodeGuid = attributes.getNamedItem("NodeGuid").getNodeValue();
                                        if (nodeGuidHashMap.get(nodeGuid) == null) {
                                            nodeGuidHashMap.put(nodeGuid, resourceItem);
                                        }
                                        //
                                        // <Resource ResourceBlobId="104084" IsLink="0" Name="SR510toSR512WEB" InternalID="3">
                                        //   <BlobInfo FileName="resDD0CDE054B....pdf" Size="257303" Guid="{DD0CDE05-...}" FileExtension="pdf" InternalID="104084" />
                                        // </Resource>
                                        //
                                        // If the resource is stored locally in the page there is no NodeGuid to reference.
                                        // Use the ResourceBlobId as a fallback.
                                    } else {
                                        resourceBlobId = attributes.getNamedItem("ResourceBlobId").getNodeValue();
                                        if (nodeGuidHashMap.get(resourceBlobId) == null) {
                                            nodeGuidHashMap.put(resourceBlobId, resourceItem);
                                        }
                                    }
                                } else {
                                    //
                                    // <Resource ResourceBlobId="24041" NodeGuid="{7CE36259-...}" IsLink="0" Name="btn_video_smaller94" InternalID="1147"/>
                                    //
                                    // Duplicate ResourceBlobId referencing existing link to resource on the page.
                                    // Store unique reference to resource item keyed on NodeGuid.
                                    if (attributes.getNamedItem("NodeGuid") != null) {
                                        nodeGuid = attributes.getNamedItem("NodeGuid").getNodeValue();
                                        resourceItem.setIsLink(nodeGuidHashMap.get(nodeGuid).getIsLink());
                                        resourceItem.setName(nodeGuidHashMap.get(nodeGuid).getName());
                                        resourceItem.setFilename(nodeGuidHashMap.get(nodeGuid).getFilename());
                                        resourceItem
                                                .setFileExtension(nodeGuidHashMap.get(nodeGuid).getFileExtension());
                                        //
                                        // <Resource ResourceBlobId="104084" IsLink="0" Name="SR510toSR512WEB1" InternalID="6" />
                                        //
                                        // No NodeGuid. Use ResourceBlobId.
                                    } else {
                                        resourceBlobId = attributes.getNamedItem("ResourceBlobId").getNodeValue();
                                        resourceItem.setIsLink(nodeGuidHashMap.get(resourceBlobId).getIsLink());
                                        resourceItem.setName(nodeGuidHashMap.get(resourceBlobId).getName());
                                        resourceItem.setFilename(nodeGuidHashMap.get(resourceBlobId).getFilename());
                                        resourceItem.setFileExtension(
                                                nodeGuidHashMap.get(resourceBlobId).getFileExtension());
                                    }
                                }

                                resourcesHashMap.put(attributes.getNamedItem("InternalID").getNodeValue(),
                                        resourceItem);
                            }
                        }
                        /**
                         * GenericProperties
                         */
                    } else if (nl.item(i).getNodeName().equalsIgnoreCase("GenericProperties")) {
                        Element propertyElement = (Element) nl.item(i);
                        int numProperties = propertyElement.getElementsByTagName("Property").getLength();

                        for (int j = 0; j < numProperties; j++) {
                            attributes = propertyElement.getElementsByTagName("Property").item(j).getAttributes();

                            // Posting summary. Used in News Items and varioius RSS feeds for mobile apps and GovDelivery content.
                            if (attributes.getNamedItem("Name").getNodeValue().equalsIgnoreCase("_Description")) {
                                if (attributes.getNamedItem("ValueLong") != null) {
                                    postingsHashMap.get(pageGuid)
                                            .setDescription(attributes.getNamedItem("ValueLong").getNodeValue());
                                } else if (attributes.getNamedItem("Value") != null) {
                                    postingsHashMap.get(pageGuid)
                                            .setDescription(attributes.getNamedItem("Value").getNodeValue());
                                    ;
                                } else {
                                    postingsHashMap.get(pageGuid).setDescription("");
                                }
                                // Otherwise, store everything else for processing in the individual template model.
                            } else {
                                if (attributes.getNamedItem("Value") != null) {
                                    genericPropertiesHashMap.put(attributes.getNamedItem("Name").getNodeValue(),
                                            attributes.getNamedItem("Value").getNodeValue());
                                } else {
                                    genericPropertiesHashMap.put(attributes.getNamedItem("Name").getNodeValue(),
                                            "");

                                }
                            }
                        }
                    }

                    // Recursive call to traverse nodes.
                    traverseNodes(nl.item(i), pageGuid, controlsHashMap, resourcesHashMap,
                            genericPropertiesHashMap);
                }
            }
        }
    }

    /**
     * Method to reassemble content from data structures.
     * 
     * @param pageGuid
     * @param pageControls
     * @param pageResources 
     * @param genericPropertiesHashMap 
     */
    private static void buildContent(String pageGuid, HashMap<String, HashMap<String, String>> pageControls,
            HashMap<String, ResourceItem> pageResources, HashMap<String, String> genericPropertiesMap) {

        TemplateFactory templateFactory = new TemplateFactory();
        String placeholderDefinition;
        String propertyValue;
        String updatedPropertyValue = null;
        String regexPattern;
        String oldFilename;
        String newFilename = null;
        String newFilenamePath = null;
        String fileExtension = null;
        String imageExtensions[] = { "bmp", "eps", "gif", "jpeg", "jpg", "png", "tif" };
        String documentExtensions[] = { "doc", "docm", "docx", "fp5", "fp7", "pdf", "pps", "ppsx", "ppt", "pptx",
                "psd", "txt", "xls", "xlsx" };

        String projectPhaseImages[] = { "Status_Planning.gif", "Status_PlanningDesign.gif", "Status_Design.gif",
                "Status_DesignConstruct.gif", "Status_Construction.gif", "Status_Complete.gif" };

        // Use the posting createdWhen date to build the file directory path of the images and documents.
        String createdWhen[] = postingsHashMap.get(pageGuid).getCreatedWhen().split("\\s+"); // 2011-03-24 17:00:50
        String date[] = createdWhen[0].split("-"); // 2011-03-24
        String year = date[0]; // 2011
        String month = date[1]; // 03
        String day = date[2]; // 24

        String location = null;
        String siteStructure = null;
        List<String> files = new ArrayList<String>();
        List<String> images = new ArrayList<String>();

        String templateName = templatesMap.get(postingsHashMap.get(pageGuid).getTemplate());

        location = postingsHashMap.get(pageGuid).getLocation().toLowerCase();
        //siteStructure = location.replace(";", "/");

        // Hold a reference of the Control Name and its content
        HashMap<String, String> controlsMap = new HashMap<String, String>();

        for (Map.Entry<String, HashMap<String, String>> control : pageControls.entrySet()) {
            placeholderDefinition = control.getKey();
            propertyValue = pageControls.get(placeholderDefinition).get("0");
            propertyValue = Migration.sanitizeContent(propertyValue);

            for (Map.Entry<String, String> property : control.getValue().entrySet()) {
                if (!property.getKey().equals("0")) {
                    regexPattern = "<!--\\* Resource = \"" + property.getValue() + "\" -->";

                    if (pageResources.get(property.getKey()).getIsLink().equals("1")) {
                        updatedPropertyValue = propertyValue.replaceAll(regexPattern,
                                pageResources.get(property.getKey()).getUrl());

                        /**
                         * <a href="<!--* Resource = "ControlProp0" -->">http://wwwi.wsdot.wa.gov/IT/Help/Lync2013.htm </a>
                         * 
                         * If Resource is an internal link then I could match the link text and use that for the href value.
                         * What if the link text is not a URL but text? Buzzard.
                         * 
                         * Currently the href will be replaced with the internal GUID e.g. "{84C6C09A-5072-441E-AE40-100A64F9A1B7}"
                         */

                    } else {
                        oldFilename = pageResources.get(property.getKey()).getFilename();
                        fileExtension = pageResources.get(property.getKey()).getFileExtension().toLowerCase();
                        newFilename = pageResources.get(property.getKey()).getName() + "." + fileExtension;

                        BasicDBObject query = new BasicDBObject("filename", newFilename);

                        // Count individual references to resources. We don't want duplicates.
                        if (resourcesCountMap.containsKey(newFilename)) {
                            resourcesCountMap.put(newFilename, resourcesCountMap.get(newFilename) + 1);

                            // Use existing resources in MongoDB if they are there.
                            DBCursor filenameCursor = resourcesCollection.find(query);
                            while (filenameCursor.hasNext()) {
                                DBObject dbObject = filenameCursor.next();
                                newFilenamePath = dbObject.get("path").toString();
                            }

                        } else {
                            resourcesCountMap.put(newFilename, 1);

                            if (resourcesCollection.find(query).count() > 0) {
                                // Use existing resources in MongoDB if they are there.
                                DBCursor filenameCursor = resourcesCollection.find(query);
                                while (filenameCursor.hasNext()) {
                                    DBObject dbObject = filenameCursor.next();
                                    newFilenamePath = dbObject.get("path").toString();
                                }
                            } else {
                                newFilenamePath = sitePath + year + "/" + month + "/" + day + "/" + newFilename;
                                // New resource so store reference to it in MongoDB
                                BasicDBObject doc = new BasicDBObject();
                                doc.put("filename", newFilename);
                                doc.put("path", newFilenamePath);
                                doc.put("created", new Date());
                                BasicDBObject updateQuery = new BasicDBObject("filename", newFilename);
                                resourcesCollection.update(updateQuery, doc, true, false);

                                // Since it's a new resource let's be sure to import it in Drupal
                                if (Arrays.asList(imageExtensions).contains(fileExtension)) {
                                    // If the image is a project phase, skip it; these are handled by a taxonomy
                                    if (!Arrays.asList(projectPhaseImages).contains(newFilename)) {
                                        images.add(locationUri + File.separator + archiveFolder + File.separator
                                                + newFilename);
                                    }
                                } else if (Arrays.asList(documentExtensions).contains(fileExtension)) {
                                    files.add(locationUri + File.separator + archiveFolder + File.separator
                                            + newFilename);
                                }
                            }
                        }

                        updatedPropertyValue = propertyValue.replaceAll(regexPattern, newFilenamePath);
                        Migration.resourceCopy(archiveFolder, oldFilename, newFilename);
                    }
                    propertyValue = updatedPropertyValue;
                }
            }

            controlsMap.put(placeholderDefinition, propertyValue);

        }

        if (files.size() > 0) {
            postingsHashMap.get(pageGuid)
                    .setDocuments(files.toString().replace("[", "").replace("]", "").replace(", ", ";"));
        }

        if (images.size() > 0) {
            postingsHashMap.get(pageGuid)
                    .setImages(images.toString().replace("[", "").replace("]", "").replace(", ", ";"));
        }

        /** 
         * PageContent needs to be assembled differently depending on the template the content is coming from.
         * Using a Factory pattern to create the object so we don't expose the creation logic. Creating the
         * objects will be fussy because each template can be slightly different including how the common
         * placeholders are named.
         * 
         * In some cases we will just be copying from the MainContentPlaceHolderDefinition and in others we will
         * need to concatenate multiple placeholders into one or into different ones depending on the Drupal content
         * type.
         */
        Template template = templateFactory.getTemplate(templateName);
        try {
            postingsHashMap.put(pageGuid,
                    template.build(postingsHashMap.get(pageGuid), controlsMap, genericPropertiesMap));

            if (templateName.equalsIgnoreCase("Redirect")) {
                redirectsMap.put(postingsHashMap.get(pageGuid).getLegacyURL(),
                        postingsHashMap.get(pageGuid).getRedirectURL());
            }
        } catch (NullPointerException e) {
            System.out.println("Don't have a template class for: " + templateName);
            System.out.println("Or there was a problem with this page: " + pageGuid);
            System.out.println();
            e.printStackTrace();
            System.exit(0);
        }

    }

}