org.apache.nutch.crawl.MapWritable.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.nutch.crawl.MapWritable.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.nutch.crawl;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Map;
import java.util.Set;

// Commons Logging imports
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.DataInputBuffer;
import org.apache.hadoop.io.DataOutputBuffer;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.MD5Hash;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.ObjectWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.util.StringUtils;
import org.apache.nutch.protocol.ProtocolStatus;

/**
 * A writable map, with a similar behavior as <code>java.util.HashMap</code>.
 * In addition to the size of key and value writable tuple two additional bytes
 * are stored to identify the Writable classes. This means that a maximum of
 * 255 different class types can be used for key and value objects.
 * A binary-id to class mapping is defined in a static block of this class.
 * However it is possible to use custom implementations of Writable.
 * For these custom Writables we write the byte id - utf class name tuple
 * into the header of each MapWritable that uses these types.
 *
 * @author Stefan Groschupf
 * @deprecated Use org.apache.hadoop.io.MapWritable instead.
 */
public class MapWritable implements Writable {

    public static final Log LOG = LogFactory.getLog(MapWritable.class);

    private KeyValueEntry fFirst;

    private KeyValueEntry fLast;

    private KeyValueEntry fOld;

    private int fSize = 0;

    private int fIdCount = 0;

    private ClassIdEntry fIdLast;

    private ClassIdEntry fIdFirst;

    private static Map<Class, Byte> CLASS_ID_MAP = new HashMap<Class, Byte>();

    private static Map<Byte, Class> ID_CLASS_MAP = new HashMap<Byte, Class>();

    static {

        addToMap(NullWritable.class, new Byte((byte) -127));
        addToMap(LongWritable.class, new Byte((byte) -126));
        addToMap(Text.class, new Byte((byte) -125));
        addToMap(MD5Hash.class, new Byte((byte) -124));
        addToMap(org.apache.nutch.fetcher.FetcherOutput.class, new Byte((byte) -123));
        addToMap(org.apache.nutch.protocol.Content.class, new Byte((byte) -122));
        addToMap(org.apache.nutch.parse.ParseText.class, new Byte((byte) -121));
        addToMap(org.apache.nutch.parse.ParseData.class, new Byte((byte) -120));
        addToMap(MapWritable.class, new Byte((byte) -119));
        addToMap(BytesWritable.class, new Byte((byte) -118));
        addToMap(FloatWritable.class, new Byte((byte) -117));
        addToMap(IntWritable.class, new Byte((byte) -116));
        addToMap(ObjectWritable.class, new Byte((byte) -115));
        addToMap(ProtocolStatus.class, new Byte((byte) -114));

    }

    private static void addToMap(Class clazz, Byte byteId) {
        CLASS_ID_MAP.put(clazz, byteId);
        ID_CLASS_MAP.put(byteId, clazz);
    }

    public MapWritable() {
    }

    /**
     * Copy constructor. This constructor makes a deep copy, using serialization /
     * deserialization to break any possible references to contained objects.
     * 
     * @param map map to copy from
     */
    public MapWritable(MapWritable map) {
        if (map != null) {
            try {
                DataOutputBuffer dob = new DataOutputBuffer();
                map.write(dob);
                DataInputBuffer dib = new DataInputBuffer();
                dib.reset(dob.getData(), dob.getLength());
                readFields(dib);
            } catch (IOException e) {
                throw new IllegalArgumentException(
                        "this map cannot be copied: " + StringUtils.stringifyException(e));
            }
        }
    }

    public void clear() {
        fOld = fFirst;
        fFirst = fLast = null;
        fSize = 0;
    }

    public boolean containsKey(Writable key) {
        return findEntryByKey(key) != null;
    }

    public boolean containsValue(Writable value) {
        KeyValueEntry entry = fFirst;
        while (entry != null) {
            if (entry.fValue.equals(value)) {
                return true;
            }
            entry = entry.fNextEntry;
        }
        return false;
    }

    public Writable get(Writable key) {
        KeyValueEntry entry = findEntryByKey(key);
        if (entry != null) {
            return entry.fValue;
        }
        return null;
    }

    public int hashCode() {
        final int seed = 23;
        int hash = 0;
        KeyValueEntry entry = fFirst;
        while (entry != null) {
            hash += entry.fKey.hashCode() * seed;
            hash += entry.fValue.hashCode() * seed;
            entry = entry.fNextEntry;
        }
        return hash;

    }

    public boolean isEmpty() {
        return fFirst == null;
    }

    public Set<Writable> keySet() {
        HashSet<Writable> set = new HashSet<Writable>();
        if (isEmpty())
            return set;
        set.add(fFirst.fKey);
        KeyValueEntry entry = fFirst;
        while ((entry = entry.fNextEntry) != null) {
            set.add(entry.fKey);
        }
        return set;
    }

    public Writable put(Writable key, Writable value) {
        KeyValueEntry entry = findEntryByKey(key);
        if (entry != null) {
            Writable oldValue = entry.fValue;
            entry.fValue = value;
            return oldValue;
        }
        KeyValueEntry newEntry = new KeyValueEntry(key, value);
        fSize++;
        if (fLast != null) {
            fLast = fLast.fNextEntry = newEntry;
            return null;
        }
        fLast = fFirst = newEntry;
        return null;

    }

    public void putAll(MapWritable map) {
        if (map == null || map.size() == 0) {
            return;
        }
        Iterator<Writable> iterator = map.keySet().iterator();
        while (iterator.hasNext()) {
            Writable key = iterator.next();
            Writable value = map.get(key);
            put(key, value);
        }
    }

    public Writable remove(Writable key) {
        Writable oldValue = null;
        KeyValueEntry entry = fFirst;
        KeyValueEntry predecessor = null;
        while (entry != null) {
            if (entry.fKey.equals(key)) {
                oldValue = entry.fValue;
                if (predecessor == null) {
                    fFirst = fFirst.fNextEntry;
                } else {
                    predecessor.fNextEntry = entry.fNextEntry;
                }
                if (fLast.equals(entry)) {
                    fLast = predecessor;
                }
                fSize--;
                return oldValue;
            }
            predecessor = entry;
            entry = entry.fNextEntry;
        }
        return oldValue;
    }

    public int size() {
        return fSize;
    }

    public Collection<Writable> values() {
        LinkedList<Writable> list = new LinkedList<Writable>();
        KeyValueEntry entry = fFirst;
        while (entry != null) {
            list.add(entry.fValue);
            entry = entry.fNextEntry;
        }
        return list;
    }

    public boolean equals(Object obj) {
        if (obj instanceof MapWritable) {
            MapWritable map = (MapWritable) obj;
            if (fSize != map.fSize)
                return false;
            HashSet<KeyValueEntry> set1 = new HashSet<KeyValueEntry>();
            KeyValueEntry e1 = fFirst;
            while (e1 != null) {
                set1.add(e1);
                e1 = e1.fNextEntry;
            }
            HashSet<KeyValueEntry> set2 = new HashSet<KeyValueEntry>();
            KeyValueEntry e2 = map.fFirst;
            while (e2 != null) {
                set2.add(e2);
                e2 = e2.fNextEntry;
            }
            return set1.equals(set2);
        }
        return false;
    }

    public String toString() {
        if (fFirst != null) {
            StringBuffer buffer = new StringBuffer();
            KeyValueEntry entry = fFirst;
            while (entry != null) {
                buffer.append(entry.toString());
                buffer.append(" ");
                entry = entry.fNextEntry;
            }
            return buffer.toString();
        }
        return null;
    }

    private KeyValueEntry findEntryByKey(final Writable key) {
        KeyValueEntry entry = fFirst;
        while (entry != null && !entry.fKey.equals(key)) {
            entry = entry.fNextEntry;
        }
        return entry;
    }

    // serialization methods

    public void write(DataOutput out) throws IOException {
        out.writeInt(size());

        if (size() > 0) {
            // scan for unknown classes;
            createInternalIdClassEntries();
            // write internal map
            out.writeByte(fIdCount);
            if (fIdCount > 0) {
                ClassIdEntry entry = fIdFirst;
                while (entry != null) {
                    out.writeByte(entry.fId);
                    Text.writeString(out, entry.fclazz.getName());
                    entry = entry.fNextIdEntry;
                }
            }
            // write meta data
            KeyValueEntry entry = fFirst;
            while (entry != null) {
                out.writeByte(entry.fKeyClassId);
                out.writeByte(entry.fValueClassId);

                entry.fKey.write(out);
                entry.fValue.write(out);

                entry = entry.fNextEntry;
            }

        }

    }

    public void readFields(DataInput in) throws IOException {
        clear();
        fSize = in.readInt();
        if (fSize > 0) {
            // read class-id map
            fIdCount = in.readByte();
            byte id;
            Class clazz;
            for (int i = 0; i < fIdCount; i++) {
                try {
                    id = in.readByte();
                    clazz = Class.forName(Text.readString(in));
                    addIdEntry(id, clazz);
                } catch (Exception e) {
                    if (LOG.isWarnEnabled()) {
                        LOG.warn("Unable to load internal map entry" + e.toString());
                    }
                    fIdCount--;
                }
            }
            KeyValueEntry entry;
            for (int i = 0; i < fSize; i++) {
                try {
                    entry = getKeyValueEntry(in.readByte(), in.readByte());
                    entry.fKey.readFields(in);
                    entry.fValue.readFields(in);
                    if (fFirst == null) {
                        fFirst = fLast = entry;
                    } else {
                        fLast = fLast.fNextEntry = entry;
                    }
                } catch (IOException e) {
                    if (LOG.isWarnEnabled()) {
                        LOG.warn("Unable to load meta data entry, ignoring.. : " + e.toString());
                    }
                    fSize--;
                }
            }
        }
    }

    private void createInternalIdClassEntries() {
        KeyValueEntry entry = fFirst;
        byte id;
        while (entry != null) {
            id = getClassId(entry.fKey.getClass());
            if (id == -128) {
                id = addIdEntry((byte) (-128 + CLASS_ID_MAP.size() + ++fIdCount), entry.fKey.getClass());
            }
            entry.fKeyClassId = id;
            id = getClassId(entry.fValue.getClass());
            if (id == -128) {
                id = addIdEntry((byte) (-128 + CLASS_ID_MAP.size() + ++fIdCount), entry.fValue.getClass());
            }
            entry.fValueClassId = id;
            entry = entry.fNextEntry;
        }
    }

    private byte addIdEntry(byte id, Class clazz) {
        if (fIdFirst == null) {
            fIdFirst = fIdLast = new ClassIdEntry(id, clazz);
        } else {
            fIdLast.fNextIdEntry = fIdLast = new ClassIdEntry(id, clazz);
        }
        return id;
    }

    private byte getClassId(Class clazz) {
        Byte classId = CLASS_ID_MAP.get(clazz);
        if (classId != null) {
            return classId.byteValue();
        }
        ClassIdEntry entry = fIdFirst;
        while (entry != null) {
            if (entry.fclazz.equals(clazz)) {
                return entry.fId;
            }
            entry = entry.fNextIdEntry;
        }
        return -128;
    }

    private KeyValueEntry getKeyValueEntry(final byte keyId, final byte valueId) throws IOException {
        KeyValueEntry entry = fOld;
        KeyValueEntry last = null;
        byte entryKeyId;
        byte entryValueId;
        while (entry != null) {
            entryKeyId = getClassId(entry.fKey.getClass());
            entryValueId = getClassId(entry.fValue.getClass());
            if (entryKeyId == keyId && entryValueId == valueId) {
                if (last != null) {
                    last.fNextEntry = entry.fNextEntry;
                } else {
                    fOld = entry.fNextEntry;
                }
                entry.fNextEntry = null; // reset next entry
                return entry;
            }
            last = entry;
            entry = entry.fNextEntry;
        }
        Class keyClass = getClass(keyId);
        Class valueClass = getClass(valueId);
        try {
            return new KeyValueEntry((Writable) keyClass.newInstance(), (Writable) valueClass.newInstance());
        } catch (Exception e) {
            throw new IOException("unable to instantiate class: " + e.toString());
        }

    }

    private Class getClass(final byte id) throws IOException {
        Class clazz = ID_CLASS_MAP.get(new Byte(id));
        if (clazz == null) {
            ClassIdEntry entry = fIdFirst;
            while (entry != null) {
                if (entry.fId == id) {
                    return entry.fclazz;
                }

                entry = entry.fNextIdEntry;
            }
        } else {
            return clazz;
        }
        throw new IOException("unable to load class for id: " + id);
    }

    /** an entry holds writable key and value */
    private class KeyValueEntry {
        private byte fKeyClassId;

        private byte fValueClassId;

        private Writable fKey;

        private Writable fValue;

        private KeyValueEntry fNextEntry;

        public KeyValueEntry(Writable key, Writable value) {
            this.fKey = key;
            this.fValue = value;
        }

        public String toString() {
            return fKey.toString() + ":" + fValue.toString();
        }

        public boolean equals(Object obj) {
            if (obj instanceof KeyValueEntry) {
                KeyValueEntry entry = (KeyValueEntry) obj;
                return entry.fKey.equals(fKey) && entry.fValue.equals(fValue);
            }
            return false;
        }

        public int hashCode() {
            return toString().hashCode();
        }
    }

    /** container for Id class tuples */
    private class ClassIdEntry {
        public ClassIdEntry(byte id, Class clazz) {
            fId = id;
            fclazz = clazz;
        }

        private byte fId;

        private Class fclazz;

        private ClassIdEntry fNextIdEntry;
    }

}