org.lenskit.data.store.PackedEntityCollectionBuilder.java Source code

Java tutorial

Introduction

Here is the source code for org.lenskit.data.store.PackedEntityCollectionBuilder.java

Source

/*
 * LensKit, an open-source toolkit for recommender systems.
 * Copyright 2014-2017 LensKit contributors (see CONTRIBUTORS.md)
 * Copyright 2010-2014 Regents of the University of Minnesota
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */
package org.lenskit.data.store;

import com.google.common.base.Preconditions;
import com.google.common.primitives.Longs;
import com.google.common.reflect.TypeToken;
import it.unimi.dsi.fastutil.Arrays;
import it.unimi.dsi.fastutil.Swapper;
import it.unimi.dsi.fastutil.longs.LongOpenHashSet;
import it.unimi.dsi.fastutil.longs.LongSet;
import org.lenskit.data.entities.*;
import org.lenskit.util.BinarySearch;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Entity collection builder packing data into shards.
 */
class PackedEntityCollectionBuilder extends EntityCollectionBuilder {
    private static final Logger logger = LoggerFactory.getLogger(PackedEntityCollectionBuilder.class);
    private final EntityType entityType;
    private final AttributeSet attributes;
    private final LongAttrStoreBuilder idStore;
    private final AttrStoreBuilder[] storeBuilders;
    private final Class<? extends EntityBuilder> entityBuilderClass;
    private boolean needIndex[];
    private LongSet ids = null;
    private boolean isSorted = true;
    private int size = 0;
    private long lastEntityId = Long.MIN_VALUE;

    PackedEntityCollectionBuilder(EntityType et, AttributeSet attrs, Class<? extends EntityBuilder> ebc) {
        Preconditions.checkArgument(attrs.size() > 0, "attribute set is emtpy");
        Preconditions.checkArgument(attrs.size() < 32, "cannot have more than 31 attributes");
        Preconditions.checkArgument(attrs.getAttribute(0) == CommonAttributes.ENTITY_ID,
                "attribute set does not contain entity ID attribute");
        entityType = et;
        attributes = attrs;
        int n = attrs.size();
        storeBuilders = new AttrStoreBuilder[n];
        needIndex = new boolean[n];
        idStore = new LongAttrStoreBuilder();
        storeBuilders[0] = idStore;
        for (int i = 1; i < n; i++) {
            TypedName<?> attr = attrs.getAttribute(i);
            AttrStoreBuilder asb;
            if (attr.getType().equals(TypeToken.of(Long.class))) {
                logger.debug("{}: storing  long column {}", et, attr.getName());
                asb = new LongAttrStoreBuilder();
            } else if (attr.getType().equals(TypeToken.of(Integer.class))) {
                logger.debug("{}: storing int column {}", et, attr.getName());
                asb = new AttrStoreBuilder(IntShard::create);
            } else if (attr.getType().equals(TypeToken.of(Double.class))) {
                logger.debug("{}: storing double column {}", et, attr.getName());
                asb = new DoubleAttrStoreBuilder();
            } else {
                logger.debug("{}: storing object column {}", et, attr);
                asb = new AttrStoreBuilder(ObjectShard::new);
            }
            storeBuilders[i] = asb;
        }

        entityBuilderClass = ebc;

    }

    @Override
    public <T> EntityCollectionBuilder addIndex(TypedName<T> attribute) {
        int pos = attributes.lookup(attribute);
        if (pos >= 0) {
            needIndex[pos] = true;
        }
        return this;
    }

    @Override
    public EntityCollectionBuilder addIndex(String attrName) {
        int pos = attributes.lookup(attrName);
        if (pos >= 0) {
            needIndex[pos] = true;
        }
        return this;
    }

    private PackIndex buildIndex(int aidx) {
        TypedName<?> tn = attributes.getAttribute(aidx);
        logger.debug("indexing column {} of {}", tn, entityType);
        PackIndex.Builder builder;
        if (tn.getRawType().equals(Long.class)) {
            builder = new PackIndex.LongBuilder();
        } else {
            builder = new PackIndex.GenericBuilder();
        }
        for (int i = 0; i < size; i++) {
            builder.add(storeBuilders[aidx].get(i), i);
        }
        return builder.build();
    }

    @Override
    public EntityCollectionBuilder add(Entity e, boolean replace) {
        long id = e.getId();
        isSorted &= id > lastEntityId;

        if (!isSorted) {
            if (ids == null) {
                ids = new LongOpenHashSet();
                for (int i = 0; i < size; i++) {
                    ids.add((long) storeBuilders[0].get(i));
                }
            }

            if (ids.contains(id)) {
                if (replace) {
                    throw new UnsupportedOperationException("packed builder cannot replace entities");
                } else {
                    return this; // don't replace existing id
                }
            }
        } else if (!replace) {
            BinarySearch search = new IdSearch(id);
            int res = search.search(0, size);
            if (res <= 0) {
                return this;
            }
        }

        for (Attribute<?> a : e.getAttributes()) {
            int ap = attributes.lookup(a.getTypedName());
            if (ap >= 0) {
                storeBuilders[ap].add(a.getValue());
            }
        }
        size += 1;
        lastEntityId = id;
        if (ids != null) {
            ids.add(id);
        }

        for (AttrStoreBuilder storeBuilder : storeBuilders) {
            if (storeBuilder.size() < size) {
                assert storeBuilder.size() == size - 1;
                storeBuilder.skip();
            }
        }

        return this;
    }

    @Override
    public Iterable<Entity> entities() {
        AttrStore[] stores = new AttrStore[storeBuilders.length];
        for (int i = 0; i < stores.length; i++) {
            stores[i] = storeBuilders[i].tempBuild();
        }
        // the packed collection is not fully functional! But it will be iterable.
        return new PackedEntityCollection(entityType, attributes, stores, new PackIndex[attributes.size()],
                entityBuilderClass);
    }

    @Override
    public EntityCollection build() {
        if (!isSorted) {
            Arrays.quickSort(0, size, this::compareIds, new SortSwap());
        }
        AttrStore[] stores = new AttrStore[storeBuilders.length];
        PackIndex[] indexes = new PackIndex[needIndex.length];
        for (int i = 0; i < stores.length; i++) {
            stores[i] = storeBuilders[i].build();
            if (needIndex[i]) {
                indexes[i] = buildIndex(i);
            }
        }
        return new PackedEntityCollection(entityType, attributes, stores, indexes, entityBuilderClass);
    }

    private class IdSearch extends BinarySearch {
        private final long targetId;

        IdSearch(long id) {
            targetId = id;
        }

        @Override
        protected int test(int pos) {
            return Longs.compare(targetId, idStore.getLong(pos));
        }
    }

    private int compareIds(int k1, int k2) {
        return Longs.compare(idStore.getLong(k1), idStore.getLong(k2));
    }

    private class SortSwap implements Swapper {
        @Override
        public void swap(int a, int b) {
            for (AttrStoreBuilder asb : storeBuilders) {
                asb.swap(a, b);
            }
        }
    }
}