org.onexus.ui.workspace.internal.wizards.data.CreateCollectionWizard.java Source code

Java tutorial

Introduction

Here is the source code for org.onexus.ui.workspace.internal.wizards.data.CreateCollectionWizard.java

Source

/**
 *  Copyright 2012 Universitat Pompeu Fabra.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 *
 *
 */
package org.onexus.ui.workspace.internal.wizards.data;

import org.apache.commons.lang3.StringUtils;
import org.apache.wicket.extensions.wizard.WizardModel;
import org.apache.wicket.extensions.wizard.WizardStep;
import org.apache.wicket.markup.html.form.ListMultipleChoice;
import org.apache.wicket.markup.html.form.RadioChoice;
import org.apache.wicket.model.IModel;
import org.apache.wicket.model.PropertyModel;
import org.apache.wicket.request.mapper.parameter.PageParameters;
import org.onexus.collection.api.Collection;
import org.onexus.collection.api.Field;
import org.onexus.collection.api.Link;
import org.onexus.collection.api.types.Text;
import org.onexus.collection.api.utils.LinkUtils;
import org.onexus.data.api.IDataManager;
import org.onexus.resource.api.Folder;
import org.onexus.resource.api.IResourceManager;
import org.onexus.resource.api.Loader;
import org.onexus.resource.api.ORI;
import org.onexus.resource.api.Parameter;
import org.onexus.resource.api.Property;
import org.onexus.resource.api.Resource;
import org.onexus.ui.api.pages.resource.ResourcesPage;
import org.onexus.ui.api.wizards.AbstractWizard;

import javax.inject.Inject;
import javax.inject.Named;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;

public class CreateCollectionWizard extends AbstractWizard {

    @Inject
    private IDataManager dataManager;

    @Inject
    private IResourceManager resourceManager;

    // Formats
    private static final String CSV = "Comma separated values";
    private static final String TSV = "Tab separated values";
    private static final List<String> FORMATS = Arrays.asList(new String[] { TSV, CSV });

    // Maximum lines to load to deduce the datatype
    private static final int MAXIMUM_LINES = 10000;
    private static final int MAXIMUM_UNIQUE_VALUES = 1500;

    private String selected = TSV;
    private List<String> primaryKeys = new ArrayList<String>();
    private ORI sourceURI;

    // Data information
    private String headers[];
    private Map<String, Set<String>> sampleData;
    private int nullEmpty = 0;
    private int nullDash = 0;
    private int nullString = 0;
    private int nullNA = 0;

    public CreateCollectionWizard(String id, IModel<? extends Resource> model) {
        super(id);

        sourceURI = model.getObject().getORI();

        WizardModel wizardModel = new WizardModel();
        wizardModel.add(new ChooseFormat());
        wizardModel.add(new PrimaryKeys());
        init(wizardModel);
    }

    private void readData() throws IOException {

        String separator = " ";
        if (selected.equals(CSV)) {
            separator = ",";
        }

        if (selected.equals(TSV)) {
            separator = "\t";
        }

        Iterator<InputStream> streams = dataManager.load(sourceURI).iterator();

        if (!streams.hasNext()) {
            return;
        }

        BufferedReader fr = new BufferedReader(new InputStreamReader(streams.next()));

        // Get headers
        headers = fr.readLine().split(separator);

        // Build values map
        sampleData = new HashMap<String, Set<String>>();
        for (String header : headers) {
            sampleData.put(header, new HashSet<String>());
        }
        String line = fr.readLine();

        for (int i = 0; i < MAXIMUM_LINES && line != null; i++) {
            String values[] = line.split(separator);
            for (int h = 0; h < headers.length && h < values.length; h++) {

                if (values[h] == null || values[h].isEmpty()) {
                    nullEmpty++;
                } else if (values[h].equalsIgnoreCase("-")) {
                    nullDash++;
                } else if (values[h].equalsIgnoreCase("NULL")) {
                    nullString++;
                } else if (values[h].equalsIgnoreCase("NA")) {
                    nullNA++;
                }

                Set<String> columnValues = sampleData.get(headers[h]);
                if (columnValues.size() < MAXIMUM_UNIQUE_VALUES) {
                    sampleData.get(headers[h]).add(values[h]);
                }
            }
            line = fr.readLine();
        }
        fr.close();

    }

    @Override
    public void onFinish() {
        super.onFinish();

        // Create collection
        Collection collection = newCollection();

        // Collect fields from other collections in the same folder
        Map<String, Field> otherFields = collectFields();

        List<Field> fields = new ArrayList<Field>();
        for (String header : headers) {
            String shortName, title;
            if (otherFields.containsKey(header)) {
                Field field = otherFields.get(header);
                shortName = field.getLabel();
                title = field.getTitle();
            } else {
                String lower = StringUtils.lowerCase(header);
                shortName = StringUtils.abbreviate(lower, 20);
                title = StringUtils.capitalize(lower);
            }

            Field field = new Field(header, shortName, title, deduceClass(sampleData.get(header)));

            if (header.toLowerCase().contains("pvalue") || header.toLowerCase().contains("qvalue")) {
                field.setProperties(Arrays.asList(new Property[] { new Property("BROWSER_DECORATOR", "PVALUE2") }));
            }

            if (primaryKeys.contains(header)) {
                field.setPrimaryKey(Boolean.TRUE);
            }

            fields.add(field);

        }
        collection.setFields(fields);

        // Deduce links from other collections in the same folder
        Map<String, Link> otherLinks = collectLinks();
        List<Link> links = new ArrayList<Link>();

        List<Collection> allProjectCollections = new ArrayList<Collection>();
        addAllCollections(allProjectCollections, resourceManager.getProject(sourceURI.getProjectUrl()).getORI());

        for (String header : headers) {
            if (otherLinks.containsKey(header)) {
                Link otherLink = otherLinks.get(header);
                Link link = new Link();
                link.setCollection(otherLink.getCollection());
                link.getFields().add(otherLink.getFields().get(0));
                links.add(link);
            } else {

                for (Collection col : allProjectCollections) {
                    Field field = col.getField(header);

                    if (field != null
                            && (header.toLowerCase().endsWith("id") || header.toLowerCase().endsWith("key"))) {

                        // Only link to collections without any link
                        if (col.getLinks() == null || col.getLinks().isEmpty()) {
                            Link link = new Link();
                            link.setCollection(new ORI((String) null, col.getORI().getPath()));
                            link.getFields().add(header);
                            links.add(link);
                        }
                    }
                }

            }
        }
        collection.setLinks(links);

        Loader loader = new Loader();
        loader.setPlugin("tsv-loader");
        List<Parameter> parameters = new ArrayList<Parameter>();
        parameters.add(new Parameter("data", sourceURI.getPath()));

        if (nullEmpty > nullDash && nullEmpty > nullString && nullEmpty > nullNA) {
            parameters.add(new Parameter("NULL_VALUE", ""));
        }

        if (nullString > nullDash && nullString > nullEmpty && nullString > nullNA) {
            parameters.add(new Parameter("NULL_VALUE", "NULL"));
        }

        if (nullNA > nullDash && nullNA > nullString && nullNA > nullEmpty) {
            parameters.add(new Parameter("NULL_VALUE", "NA"));
        }

        loader.setParameters(parameters);
        collection.setLoader(loader);

        resourceManager.save(collection);

        PageParameters params = new PageParameters().add(ResourcesPage.PARAMETER_RESOURCE, collection.getORI());
        setResponsePage(ResourcesPage.class, params);

    }

    private Map<String, Link> collectLinks() {
        Map<String, Link> links = new HashMap<String, Link>();

        List<Collection> collections = new ArrayList<Collection>();
        addAllCollections(collections, sourceURI.getParent());

        for (Collection collection : collections) {
            if (collection.getLinks() != null) {
                for (Link link : collection.getLinks()) {

                    // Only simple links (not composed)
                    if (link.getFields().size() == 1) {
                        String field = LinkUtils.getFromFieldName(link.getFields().get(0));
                        links.put(field, link);
                    }
                }
            }
        }

        return links;
    }

    private Map<String, Field> collectFields() {
        Map<String, Field> fields = new HashMap<String, Field>();

        List<Collection> collections = new ArrayList<Collection>();
        addAllCollections(collections, sourceURI.getParent());

        for (Collection collection : collections) {
            for (Field field : collection.getFields()) {
                fields.put(field.getId(), field);
            }
        }

        return fields;
    }

    private Collection newCollection() {

        String sourceName = sourceURI.getPath();
        String collectionName;

        int punt = sourceName.lastIndexOf('.');
        if (punt != -1) {
            collectionName = sourceName.substring(0, punt);
        } else {
            collectionName = sourceName + ".col";
        }

        ORI collectionURI = new ORI(sourceURI.getProjectUrl(), collectionName);

        Collection collection = new Collection();
        collection.setORI(collectionURI);
        collection.setTitle(collectionName);

        return collection;
    }

    private void addAllCollections(List<Collection> collections, ORI parentUri) {
        collections.addAll(resourceManager.loadChildren(Collection.class, parentUri));
        List<Folder> folders = resourceManager.loadChildren(Folder.class, parentUri);
        for (Folder folder : folders) {
            addAllCollections(collections, folder.getORI());
        }
    }

    public String getSelected() {
        return selected;
    }

    public void setSelected(String selected) {
        this.selected = selected;
    }

    private static Class<?> deduceClass(Set<String> values) {

        boolean longString = false;
        boolean integerType = true;
        boolean doubleType = false;

        for (String value : values) {

            // Skip null and empty values
            if (value == null || value.isEmpty() || value.equals("-")) {
                continue;
            }

            if (integerType) {
                try {
                    Integer.valueOf(value);
                } catch (NumberFormatException e) {
                    integerType = false;
                    doubleType = true;
                }
            }

            if (doubleType) {
                try {
                    Double.valueOf(value);
                } catch (NumberFormatException e) {
                    doubleType = false;
                }
            }

            if (value.length() > 127) {
                longString = true;
            }
        }

        if (integerType) {
            return Integer.class;
        }

        if (doubleType) {
            return Double.class;
        }

        if (longString) {
            return Text.class;
        } else {
            return String.class;
        }
    }

    public List<String> getPrimaryKeys() {
        return primaryKeys;
    }

    public void setPrimaryKeys(List<String> primaryKeys) {
        this.primaryKeys = primaryKeys;
    }

    private final class ChooseFormat extends WizardStep {

        public ChooseFormat() {
            super("Create collection", "Choose one file format");

            RadioChoice<String> commandOptions = new RadioChoice<String>("formats",
                    new PropertyModel<String>(CreateCollectionWizard.this, "selected"), FORMATS);
            add(commandOptions);

        }

        @Override
        public void applyState() {
            try {
                readData();
            } catch (IOException e) {
                error(e.getMessage());
            }
        }
    }

    private final class PrimaryKeys extends WizardStep {

        public PrimaryKeys() {
            super("Create collection", "Select primary keys");
        }

        @Override
        protected void onBeforeRender() {
            addOrReplace(new ListMultipleChoice<String>("primaryKeys", Arrays.asList(headers)));
            super.onBeforeRender();
        }
    }

}