TObjectHash.java

//CHECKSTYLE:OFF
///////////////////////////////////////////////////////////////////////////////
// Copyright (c) 2001, Eric D. Friedman All Rights Reserved.
// Copyright (c) 2009, Rob Eden All Rights Reserved.
// Copyright (c) 2009, Jeff Randall All Rights Reserved.
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
///////////////////////////////////////////////////////////////////////////////

package org.spf4j.stackmonitor;

import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
import gnu.trove.procedure.TObjectProcedure;

import java.io.IOException;
import java.io.ObjectInput;
import java.io.ObjectOutput;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;


/**
 * An open addressed hashing implementation for Object types.
 * <p/>
 * Created: Sun Nov  4 08:56:06 2001
 *
 * @author Eric D. Friedman
 * @author Rob Eden
 * @author Jeff Randall
 * @version $Id: TObjectHash.java,v 1.1.2.6 2009/11/07 03:36:44 robeden Exp $
 */
@SuppressFBWarnings
abstract public class TObjectHash<T> extends THash {

    @SuppressWarnings({"UnusedDeclaration"})
    public static final long serialVersionUID = 1L;


    /**
     * the set of Objects
     */
    protected transient Object[] _set;

    public static final Object REMOVED = new Object(), FREE = new Object();


    /**
     * Creates a new <code>TObjectHash</code> instance with the
     * default capacity and load factor.
     */
    public TObjectHash() {
        super();
    }


    /**
     * Creates a new <code>TObjectHash</code> instance whose capacity
     * is the next highest prime above <tt>initialCapacity + 1</tt>
     * unless that value is already prime.
     *
     * @param initialCapacity an <code>int</code> value
     */
    public TObjectHash(int initialCapacity) {
        super(initialCapacity);
    }


    public int capacity() {
        return _set.length;
    }


    protected void removeAt(int index) {
        _set[index] = REMOVED;
        super.removeAt(index);
    }


    /**
     * initializes the Object set of this hash table.
     *
     * @param initialCapacity an <code>int</code> value
     * @return an <code>int</code> value
     */
    public int setUp(int initialCapacity) {
        int capacity;
        capacity = super.setUp(initialCapacity);
        if (capacity == 0) {
          _set = org.spf4j.base.Arrays.EMPTY_OBJ_ARRAY;
          return 0;
        } else {
          _set = new Object[capacity];
        }
        Arrays.fill(_set, FREE);
        return capacity;
    }


    /**
     * Executes <tt>procedure</tt> for each element in the set.
     *
     * @param procedure a <code>TObjectProcedure</code> value
     * @return false if the loop over the set terminated because
     *         the procedure returned false for some value.
     */
    @SuppressWarnings({"unchecked"})
    public boolean forEach(TObjectProcedure<? super T> procedure) {
        Object[] set = _set;
        for (int i = set.length; i-- > 0;) {
            if (set[i] != FREE
                    && set[i] != REMOVED
                    && !procedure.execute((T) set[i])) {
                return false;
            }
        }
        return true;
    }


    /**
     * Searches the set for <tt>obj</tt>
     *
     * @param obj an <code>Object</code> value
     * @return a <code>boolean</code> value
     */
    @SuppressWarnings({"unchecked"})
    public boolean contains(Object obj) {
        return index(obj) >= 0;
    }


    /**
     * Locates the index of <tt>obj</tt>.
     *
     * @param obj an <code>Object</code> value
     * @return the index of <tt>obj</tt> or -1 if it isn't in the set.
     */
    protected int index(Object obj) {
        if (obj == null)
            return indexForNull();

        if (_set.length == 0) {
          return -1;
        }

        // From here on we know obj to be non-null
        final int hash = hash(obj) & 0x7fffffff;
        int index = hash % _set.length;
        Object cur = _set[index];


        if (cur == FREE) {
            return -1;
        }

        if (cur == obj || equals(obj, cur)) {
            return index;
        }

        return indexRehashed(obj, index, hash, cur);
    }

    /**
     * Locates the index of non-null <tt>obj</tt>.
     *
     * @param obj   target key, know to be non-null
     * @param index we start from
     * @param hash
     * @param cur
     * @return
     */
    private int indexRehashed(Object obj, int index, int hash, Object cur) {
        final Object[] set = _set;
        final int length = set.length;

        // NOTE: here it has to be REMOVED or FULL (some user-given value)
        // see Knuth, p. 529
        int probe = 1 + (hash % (length - 2));

        final int loopIndex = index;

        do {
            index -= probe;
            if (index < 0) {
                index += length;
            }
            cur = set[index];
            //
            if (cur == FREE)
                return -1;

            //
            if ((cur == obj || equals(obj, cur)))
                return index;
        } while (index != loopIndex);

        return -1;
    }

    /**
     * Locates the index <tt>null</tt>.
     * <p/>
     * null specific loop exploiting several properties to simplify the iteration logic
     * - the null value hashes to 0 we so we can iterate from the beginning.
     * - the probe value is 1 for this case
     * - object identity can be used to match this case
     * <p/>
     * --> this result a simpler loop
     *
     * @return
     */
    private int indexForNull() {
        int index = 0;
        for (Object o : _set) {
            if (o == null)
                return index;

            if (o == FREE)
                return -1;

            index++;
        }

        return -1;
    }

    /**
     * Alias introduced to avoid breaking the API. The new method name insertKey() reflects the
     * changes made to the logic.
     *
     * @param obj
     * @return
     * @deprecated use {@link #insertKey} instead
     */
    @Deprecated
    protected int insertionIndex(T obj) {
        return insertKey(obj);
    }

    /**
     * Locates the index at which <tt>key</tt> can be inserted.  if
     * there is already a value equal()ing <tt>key</tt> in the set,
     * returns that value's index as <tt>-index - 1</tt>.
     * <p/>
     * If a slot is found the value is inserted. When a FREE slot is used the consumeFreeSlot field is
     * set to true. This field should be used in the method invoking insertKey() to pass to postInsertHook()
     *
     * @param key an <code>Object</code> value
     * @return the index of a FREE slot at which key can be inserted
     *         or, if key is already stored in the hash, the negative value of
     *         that index, minus 1: -index -1.
     */
    protected int insertKey(T key) {
        if (key == null)
            return insertKeyForNull();

        final int hash = hash(key) & 0x7fffffff;
        int index = hash % _set.length;
        Object cur = _set[index];

        if (cur == FREE) {
            _set[index] = key;  // insert value
            return index;       // empty, all done
        }

        if (cur == key || equals(key, cur)) {
            return -index - 1;   // already stored
        }

        return insertKeyRehash(key, index, hash, cur);
    }

    /**
     * Looks for a slot using double hashing for a non-null key values and inserts the value
     * in the slot
     *
     * @param key   non-null key value
     * @param index natural index
     * @param hash
     * @param cur   value of first matched slot
     * @return
     */
    private int insertKeyRehash(T key, int index, int hash, Object cur) {
        final Object[] set = _set;
        final int length = set.length;
        // already FULL or REMOVED, must probe
        // compute the double hash
        final int probe = 1 + (hash % (length - 2));

        final int loopIndex = index;
        int firstRemoved = -1;

        /**
         * Look until FREE slot or we start to loop
         */
        do {
            // Identify first removed slot
            if (cur == REMOVED && firstRemoved == -1)
                firstRemoved = index;

            index -= probe;
            if (index < 0) {
                index += length;
            }
            cur = set[index];

            // A FREE slot stops the search
            if (cur == FREE) {
                if (firstRemoved != -1) {
                    _set[firstRemoved] = key;
                    return firstRemoved;
                } else {
                    _set[index] = key;  // insert value
                    return index;
                }
            }

            if (cur == key || equals(key, cur)) {
                return -index - 1;
            }

            // Detect loop
        } while (index != loopIndex);

        // We inspected all reachable slots and did not find a FREE one
        // If we found a REMOVED slot we return the first one found
        if (firstRemoved != -1) {
            _set[firstRemoved] = key;
            return firstRemoved;
        }

        // Can a resizing strategy be found that resizes the set?
        throw new IllegalStateException("No free or removed slots available. Key set full?!!");
    }

    /**
     * Looks for a slot using double hashing for a null key value and inserts the value.
     * <p/>
     * null specific loop exploiting several properties to simplify the iteration logic
     * - the null value hashes to 0 we so we can iterate from the beginning.
     * - the probe value is 1 for this case
     * - object identity can be used to match this case
     *
     * @return
     */
    private int insertKeyForNull() {
        int index = 0;
        int firstRemoved = -1;

        // Look for a slot containing the 'null' value as key
        for (Object o : _set) {
            // Locate first removed
            if (o == REMOVED && firstRemoved == -1)
                firstRemoved = index;

            if (o == FREE) {
                if (firstRemoved != -1) {
                    _set[firstRemoved] = null;
                    return firstRemoved;
                } else {
                    _set[index] = null;  // insert value
                    return index;
                }
            }

            if (o == null) {
                return -index - 1;
            }

            index++;
        }

        // We inspected all reachable slots and did not find a FREE one
        // If we found a REMOVED slot we return the first one found
        if (firstRemoved != -1) {
            _set[firstRemoved] = null;
            return firstRemoved;
        }

        // We scanned the entire key set and found nothing, is set full?
        // Can a resizing strategy be found that resizes the set?
        throw new IllegalStateException("Could not find insertion index for null key. Key set full!?!!");
    }


    /**
     * Convenience methods for subclasses to use in throwing exceptions about
     * badly behaved user objects employed as keys.  We have to throw an
     * IllegalArgumentException with a rather verbose message telling the
     * user that they need to fix their object implementation to conform
     * to the general contract for java.lang.Object.
     *
     *
     * @param o1 the first of the equal elements with unequal hash codes.
     * @param o2 the second of the equal elements with unequal hash codes.
     * @throws IllegalArgumentException the whole point of this method.
     */
    protected final void throwObjectContractViolation(Object o1, Object o2)
            throws IllegalArgumentException {
        throw buildObjectContractViolation(o1, o2, "");
    }

    /**
     * Convenience methods for subclasses to use in throwing exceptions about
     * badly behaved user objects employed as keys.  We have to throw an
     * IllegalArgumentException with a rather verbose message telling the
     * user that they need to fix their object implementation to conform
     * to the general contract for java.lang.Object.
     *
     *
     * @param o1 the first of the equal elements with unequal hash codes.
     * @param o2 the second of the equal elements with unequal hash codes.
     * @param size
     *@param oldSize
     * @param oldKeys @throws IllegalArgumentException the whole point of this method.
     */
    protected final void throwObjectContractViolation(Object o1, Object o2, int size, int oldSize, Object[] oldKeys)
            throws IllegalArgumentException {
        String extra = dumpExtraInfo(o1, o2, size(), oldSize, oldKeys);


        throw buildObjectContractViolation(o1, o2, extra);
    }

    /**
     * Convenience methods for subclasses to use in throwing exceptions about
     * badly behaved user objects employed as keys.  We have to throw an
     * IllegalArgumentException with a rather verbose message telling the
     * user that they need to fix their object implementation to conform
     * to the general contract for java.lang.Object.
     *
     *
     * @param o1 the first of the equal elements with unequal hash codes.
     * @param o2 the second of the equal elements with unequal hash codes.
     * @throws IllegalArgumentException the whole point of this method.
     */
    protected final IllegalArgumentException buildObjectContractViolation(Object o1, Object o2, String extra ) {
        return new IllegalArgumentException("Equal objects must have equal hashcodes. " +
                "During rehashing, Trove discovered that the following two objects claim " +
                "to be equal (as in java.lang.Object.equals()) but their hashCodes (or " +
                "those calculated by your TObjectHashingStrategy) are not equal." +
                "This violates the general contract of java.lang.Object.hashCode().  See " +
                "bullet point two in that method's documentation. object #1 =" + objectInfo(o1) +
                "; object #2 =" + objectInfo(o2) + "\n" + extra);
    }


    protected boolean equals(Object notnull, Object two) {
        if (two == null || two == REMOVED)
            return false;

        return notnull.equals(two);
    }

    protected int hash(Object notnull) {
        return notnull.hashCode();
    }

    protected static String reportPotentialConcurrentMod(int newSize, int oldSize) {
        // Note that we would not be able to detect concurrent paired of put()-remove()
        // operations with this simple check
        if (newSize != oldSize)
            return "[Warning] apparent concurrent modification of the key set. " +
                    "Size before and after rehash() do not match " + oldSize + " vs " + newSize;

        return "";
    }

    /**
     *
     * @param newVal the key being inserted
     * @param oldVal the key already stored at that position
     * @param currentSize size of the key set during rehashing
     * @param oldSize size of the key set before rehashing
     * @param oldKeys the old key set
     */
    protected String dumpExtraInfo(Object newVal, Object oldVal, int currentSize, int oldSize, Object[] oldKeys) {
        StringBuilder b = new StringBuilder();
        //
        b.append(dumpKeyTypes(newVal, oldVal));

        b.append(reportPotentialConcurrentMod(currentSize, oldSize));
        b.append(detectKeyLoss(oldKeys, oldSize));

        // Is de same object already present? Double insert?
        if (newVal == oldVal) {
            b.append("Inserting same object twice, rehashing bug. Object= ").append(oldVal);
        }

        return b.toString();
    }

    /**
     * Detect inconsistent hashCode() and/or equals() methods
     *
     * @param keys
     * @param oldSize
     * @return
     */
    private static String detectKeyLoss(Object[] keys, int oldSize) {
        StringBuilder buf = new StringBuilder();
        Set<Object> k = makeKeySet(keys);
        if (k.size() != oldSize) {
            buf.append("\nhashCode() and/or equals() have inconsistent implementation");
            buf.append("\nKey set lost entries, now got ").append(k.size()).append(" instead of ").append(oldSize);
            buf.append(". This can manifest itself as an apparent duplicate key.");
        }

        return buf.toString();
    }

    private static Set<Object> makeKeySet(Object[] keys) {
        Set<Object> types = new HashSet<Object>();
        for (Object o : keys) {
            if (o != FREE && o != REMOVED) {
                    types.add(o);
            }
        }

        return types;
    }

    private static String equalsSymmetryInfo(Object a, Object b) {
        StringBuilder buf = new StringBuilder();
        if (a == b) {
            return  "a == b";
        }

        if (a.getClass() != b.getClass()) {
            buf.append("Class of objects differ a=").append(a.getClass()).append(" vs b=").append(b.getClass());

            boolean aEb = a.equals(b);
            boolean bEa = b.equals(a);
            if (aEb != bEa) {
                buf.append("\nequals() of a or b object are asymmetric");
                buf.append("\na.equals(b) =").append(aEb);
                buf.append("\nb.equals(a) =").append(bEa);
            }
        }

        return buf.toString();
    }

    protected static String objectInfo(Object o) {
        return (o == null ? "class null" : o.getClass()) + " id= " + System.identityHashCode(o)
                + " hashCode= " + (o == null ? 0 : o.hashCode()) + " toString= " + String.valueOf(o);
    }

    private String dumpKeyTypes(Object newVal, Object oldVal) {
        StringBuilder buf = new StringBuilder();
        Set<Class<?>> types = new HashSet<Class<?>>();
        for (Object o : _set) {
            if (o != FREE && o != REMOVED) {
                if (o != null)
                    types.add(o.getClass());
                else
                    types.add(null);
            }
        }

        if (types.size() > 1) {
            buf.append("\nMore than one type used for keys. Watch out for asymmetric equals(). " +
                    "Read about the 'Liskov substitution principle' and the implications for equals() in java.");

            buf.append("\nKey types: ").append(types);
            buf.append(equalsSymmetryInfo(newVal, oldVal));
        }

        return buf.toString();
    }


    @Override
    public void writeExternal(ObjectOutput out) throws IOException {
        // VERSION
        out.writeByte(0);

        // SUPER
        super.writeExternal(out);
    }


    @Override
    public void readExternal(ObjectInput in)
            throws IOException, ClassNotFoundException {

        // VERSION
        in.readByte();

        // SUPER
        super.readExternal(in);
    }
} // TObjectHash