view libphobos/libdruntime/rt/aaA.d @ 158:494b0b89df80 default tip

...
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Mon, 25 May 2020 18:13:55 +0900
parents 1830386684a0
children
line wrap: on
line source

/**
 * Implementation of associative arrays.
 *
 * Copyright: Copyright Digital Mars 2000 - 2015.
 * License:   $(WEB www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
 * Authors:   Martin Nowak
 */
module rt.aaA;

/// AA version for debuggers, bump whenever changing the layout
extern (C) immutable int _aaVersion = 1;

import core.memory : GC;

// grow threshold
private enum GROW_NUM = 4;
private enum GROW_DEN = 5;
// shrink threshold
private enum SHRINK_NUM = 1;
private enum SHRINK_DEN = 8;
// grow factor
private enum GROW_FAC = 4;
// growing the AA doubles it's size, so the shrink threshold must be
// smaller than half the grow threshold to have a hysteresis
static assert(GROW_FAC * SHRINK_NUM * GROW_DEN < GROW_NUM * SHRINK_DEN);
// initial load factor (for literals), mean of both thresholds
private enum INIT_NUM = (GROW_DEN * SHRINK_NUM + GROW_NUM * SHRINK_DEN) / 2;
private enum INIT_DEN = SHRINK_DEN * GROW_DEN;

private enum INIT_NUM_BUCKETS = 8;
// magic hash constants to distinguish empty, deleted, and filled buckets
private enum HASH_EMPTY = 0;
private enum HASH_DELETED = 0x1;
private enum HASH_FILLED_MARK = size_t(1) << 8 * size_t.sizeof - 1;

/// Opaque AA wrapper
struct AA
{
    Impl* impl;
    alias impl this;

    private @property bool empty() const pure nothrow @nogc
    {
        return impl is null || !impl.length;
    }
}

private struct Impl
{
private:
    this(in TypeInfo_AssociativeArray ti, size_t sz = INIT_NUM_BUCKETS)
    {
        keysz = cast(uint) ti.key.tsize;
        valsz = cast(uint) ti.value.tsize;
        buckets = allocBuckets(sz);
        firstUsed = cast(uint) buckets.length;
        entryTI = fakeEntryTI(ti.key, ti.value);
        valoff = cast(uint) talign(keysz, ti.value.talign);

        import rt.lifetime : hasPostblit, unqualify;

        if (hasPostblit(unqualify(ti.key)))
            flags |= Flags.keyHasPostblit;
        if ((ti.key.flags | ti.value.flags) & 1)
            flags |= Flags.hasPointers;
    }

    Bucket[] buckets;
    uint used;
    uint deleted;
    TypeInfo_Struct entryTI;
    uint firstUsed;
    immutable uint keysz;
    immutable uint valsz;
    immutable uint valoff;
    Flags flags;

    enum Flags : ubyte
    {
        none = 0x0,
        keyHasPostblit = 0x1,
        hasPointers = 0x2,
    }

    @property size_t length() const pure nothrow @nogc
    {
        assert(used >= deleted);
        return used - deleted;
    }

    @property size_t dim() const pure nothrow @nogc @safe
    {
        return buckets.length;
    }

    @property size_t mask() const pure nothrow @nogc
    {
        return dim - 1;
    }

    // find the first slot to insert a value with hash
    inout(Bucket)* findSlotInsert(size_t hash) inout pure nothrow @nogc
    {
        for (size_t i = hash & mask, j = 1;; ++j)
        {
            if (!buckets[i].filled)
                return &buckets[i];
            i = (i + j) & mask;
        }
    }

    // lookup a key
    inout(Bucket)* findSlotLookup(size_t hash, in void* pkey, in TypeInfo keyti) inout
    {
        for (size_t i = hash & mask, j = 1;; ++j)
        {
            if (buckets[i].hash == hash && keyti.equals(pkey, buckets[i].entry))
                return &buckets[i];
            else if (buckets[i].empty)
                return null;
            i = (i + j) & mask;
        }
    }

    void grow(in TypeInfo keyti)
    {
        // If there are so many deleted entries, that growing would push us
        // below the shrink threshold, we just purge deleted entries instead.
        if (length * SHRINK_DEN < GROW_FAC * dim * SHRINK_NUM)
            resize(dim);
        else
            resize(GROW_FAC * dim);
    }

    void shrink(in TypeInfo keyti)
    {
        if (dim > INIT_NUM_BUCKETS)
            resize(dim / GROW_FAC);
    }

    void resize(size_t ndim) pure nothrow
    {
        auto obuckets = buckets;
        buckets = allocBuckets(ndim);

        foreach (ref b; obuckets[firstUsed .. $])
            if (b.filled)
                *findSlotInsert(b.hash) = b;

        firstUsed = 0;
        used -= deleted;
        deleted = 0;
        GC.free(obuckets.ptr); // safe to free b/c impossible to reference
    }

    void clear() pure nothrow
    {
        import core.stdc.string : memset;
        // clear all data, but don't change bucket array length
        memset(&buckets[firstUsed], 0, (buckets.length - firstUsed) * Bucket.sizeof);
        deleted = used = 0;
        firstUsed = cast(uint) dim;
    }
}

//==============================================================================
// Bucket
//------------------------------------------------------------------------------

private struct Bucket
{
private pure nothrow @nogc:
    size_t hash;
    void* entry;

    @property bool empty() const
    {
        return hash == HASH_EMPTY;
    }

    @property bool deleted() const
    {
        return hash == HASH_DELETED;
    }

    @property bool filled() const @safe
    {
        return cast(ptrdiff_t) hash < 0;
    }
}

Bucket[] allocBuckets(size_t dim) @trusted pure nothrow
{
    enum attr = GC.BlkAttr.NO_INTERIOR;
    immutable sz = dim * Bucket.sizeof;
    return (cast(Bucket*) GC.calloc(sz, attr))[0 .. dim];
}

//==============================================================================
// Entry
//------------------------------------------------------------------------------

private void* allocEntry(in Impl* aa, in void* pkey)
{
    import rt.lifetime : _d_newitemU;
    import core.stdc.string : memcpy, memset;

    immutable akeysz = aa.valoff;
    void* res = void;
    if (aa.entryTI)
        res = _d_newitemU(aa.entryTI);
    else
    {
        auto flags = (aa.flags & Impl.Flags.hasPointers) ? 0 : GC.BlkAttr.NO_SCAN;
        res = GC.malloc(akeysz + aa.valsz, flags);
    }

    memcpy(res, pkey, aa.keysz); // copy key
    memset(res + akeysz, 0, aa.valsz); // zero value

    return res;
}

package void entryDtor(void* p, const TypeInfo_Struct sti)
{
    // key and value type info stored after the TypeInfo_Struct by tiEntry()
    auto sizeti = __traits(classInstanceSize, TypeInfo_Struct);
    auto extra = cast(const(TypeInfo)*)(cast(void*) sti + sizeti);
    extra[0].destroy(p);
    extra[1].destroy(p + talign(extra[0].tsize, extra[1].talign));
}

private bool hasDtor(const TypeInfo ti)
{
    import rt.lifetime : unqualify;

    if (typeid(ti) is typeid(TypeInfo_Struct))
        if ((cast(TypeInfo_Struct) cast(void*) ti).xdtor)
            return true;
    if (typeid(ti) is typeid(TypeInfo_StaticArray))
        return hasDtor(unqualify(ti.next));

    return false;
}

// build type info for Entry with additional key and value fields
TypeInfo_Struct fakeEntryTI(const TypeInfo keyti, const TypeInfo valti)
{
    import rt.lifetime : unqualify;

    auto kti = unqualify(keyti);
    auto vti = unqualify(valti);
    if (!hasDtor(kti) && !hasDtor(vti))
        return null;

    // save kti and vti after type info for struct
    enum sizeti = __traits(classInstanceSize, TypeInfo_Struct);
    void* p = GC.malloc(sizeti + 2 * (void*).sizeof);
    import core.stdc.string : memcpy;

    memcpy(p, typeid(TypeInfo_Struct).initializer().ptr, sizeti);

    auto ti = cast(TypeInfo_Struct) p;
    auto extra = cast(TypeInfo*)(p + sizeti);
    extra[0] = cast() kti;
    extra[1] = cast() vti;

    static immutable tiName = __MODULE__ ~ ".Entry!(...)";
    ti.name = tiName;

    // we don't expect the Entry objects to be used outside of this module, so we have control
    // over the non-usage of the callback methods and other entries and can keep these null
    // xtoHash, xopEquals, xopCmp, xtoString and xpostblit
    ti.m_RTInfo = null;
    immutable entrySize = talign(kti.tsize, vti.talign) + vti.tsize;
    ti.m_init = (cast(ubyte*) null)[0 .. entrySize]; // init length, but not ptr

    // xdtor needs to be built from the dtors of key and value for the GC
    ti.xdtorti = &entryDtor;

    ti.m_flags = TypeInfo_Struct.StructFlags.isDynamicType;
    ti.m_flags |= (keyti.flags | valti.flags) & TypeInfo_Struct.StructFlags.hasPointers;
    ti.m_align = cast(uint) max(kti.talign, vti.talign);

    return ti;
}

//==============================================================================
// Helper functions
//------------------------------------------------------------------------------

private size_t talign(size_t tsize, size_t algn) @safe pure nothrow @nogc
{
    immutable mask = algn - 1;
    assert(!(mask & algn));
    return (tsize + mask) & ~mask;
}

// mix hash to "fix" bad hash functions
private size_t mix(size_t h) @safe pure nothrow @nogc
{
    // final mix function of MurmurHash2
    enum m = 0x5bd1e995;
    h ^= h >> 13;
    h *= m;
    h ^= h >> 15;
    return h;
}

private size_t calcHash(in void* pkey, in TypeInfo keyti)
{
    immutable hash = keyti.getHash(pkey);
    // highest bit is set to distinguish empty/deleted from filled buckets
    return mix(hash) | HASH_FILLED_MARK;
}

private size_t nextpow2(in size_t n) pure nothrow @nogc
{
    import core.bitop : bsr;

    if (!n)
        return 1;

    const isPowerOf2 = !((n - 1) & n);
    return 1 << (bsr(n) + !isPowerOf2);
}

pure nothrow @nogc unittest
{
    //                            0, 1, 2, 3, 4, 5, 6, 7, 8,  9
    foreach (const n, const pow2; [1, 1, 2, 4, 4, 8, 8, 8, 8, 16])
        assert(nextpow2(n) == pow2);
}

private T min(T)(T a, T b) pure nothrow @nogc
{
    return a < b ? a : b;
}

private T max(T)(T a, T b) pure nothrow @nogc
{
    return b < a ? a : b;
}

//==============================================================================
// API Implementation
//------------------------------------------------------------------------------

/// Determine number of entries in associative array.
extern (C) size_t _aaLen(in AA aa) pure nothrow @nogc
{
    return aa ? aa.length : 0;
}

/******************************
 * Lookup *pkey in aa.
 * Called only from implementation of (aa[key]) expressions when value is mutable.
 * Params:
 *      aa = associative array opaque pointer
 *      ti = TypeInfo for the associative array
 *      valsz = ignored
 *      pkey = pointer to the key value
 * Returns:
 *      if key was in the aa, a mutable pointer to the existing value.
 *      If key was not in the aa, a mutable pointer to newly inserted value which
 *      is set to all zeros
 */
extern (C) void* _aaGetY(AA* aa, const TypeInfo_AssociativeArray ti,
    in size_t valsz, in void* pkey)
{
    bool found;
    return _aaGetX(aa, ti, valsz, pkey, found);
}

/******************************
 * Lookup *pkey in aa.
 * Called only from implementation of require
 * Params:
 *      aa = associative array opaque pointer
 *      ti = TypeInfo for the associative array
 *      valsz = ignored
 *      pkey = pointer to the key value
 *      found = true if the value was found
 * Returns:
 *      if key was in the aa, a mutable pointer to the existing value.
 *      If key was not in the aa, a mutable pointer to newly inserted value which
 *      is set to all zeros
 */
extern (C) void* _aaGetX(AA* aa, const TypeInfo_AssociativeArray ti,
    in size_t valsz, in void* pkey, out bool found)
{
    // lazily alloc implementation
    if (aa.impl is null)
        aa.impl = new Impl(ti);

    // get hash and bucket for key
    immutable hash = calcHash(pkey, ti.key);

    // found a value => return it
    if (auto p = aa.findSlotLookup(hash, pkey, ti.key))
    {
        found = true;
        return p.entry + aa.valoff;
    }

    auto p = aa.findSlotInsert(hash);
    if (p.deleted)
        --aa.deleted;
    // check load factor and possibly grow
    else if (++aa.used * GROW_DEN > aa.dim * GROW_NUM)
    {
        aa.grow(ti.key);
        p = aa.findSlotInsert(hash);
        assert(p.empty);
    }

    // update search cache and allocate entry
    aa.firstUsed = min(aa.firstUsed, cast(uint)(p - aa.buckets.ptr));
    p.hash = hash;
    p.entry = allocEntry(aa.impl, pkey);
    // postblit for key
    if (aa.flags & Impl.Flags.keyHasPostblit)
    {
        import rt.lifetime : __doPostblit, unqualify;

        __doPostblit(p.entry, aa.keysz, unqualify(ti.key));
    }
    // return pointer to value
    return p.entry + aa.valoff;
}

/******************************
 * Lookup *pkey in aa.
 * Called only from implementation of (aa[key]) expressions when value is not mutable.
 * Params:
 *      aa = associative array opaque pointer
 *      keyti = TypeInfo for the key
 *      valsz = ignored
 *      pkey = pointer to the key value
 * Returns:
 *      pointer to value if present, null otherwise
 */
extern (C) inout(void)* _aaGetRvalueX(inout AA aa, in TypeInfo keyti, in size_t valsz,
    in void* pkey)
{
    return _aaInX(aa, keyti, pkey);
}

/******************************
 * Lookup *pkey in aa.
 * Called only from implementation of (key in aa) expressions.
 * Params:
 *      aa = associative array opaque pointer
 *      keyti = TypeInfo for the key
 *      pkey = pointer to the key value
 * Returns:
 *      pointer to value if present, null otherwise
 */
extern (C) inout(void)* _aaInX(inout AA aa, in TypeInfo keyti, in void* pkey)
{
    if (aa.empty)
        return null;

    immutable hash = calcHash(pkey, keyti);
    if (auto p = aa.findSlotLookup(hash, pkey, keyti))
        return p.entry + aa.valoff;
    return null;
}

/// Delete entry in AA, return true if it was present
extern (C) bool _aaDelX(AA aa, in TypeInfo keyti, in void* pkey)
{
    if (aa.empty)
        return false;

    immutable hash = calcHash(pkey, keyti);
    if (auto p = aa.findSlotLookup(hash, pkey, keyti))
    {
        // clear entry
        p.hash = HASH_DELETED;
        p.entry = null;

        ++aa.deleted;
        if (aa.length * SHRINK_DEN < aa.dim * SHRINK_NUM)
            aa.shrink(keyti);

        return true;
    }
    return false;
}

/// Remove all elements from AA.
extern (C) void _aaClear(AA aa) pure nothrow
{
    if (!aa.empty)
    {
        aa.impl.clear();
    }
}

/// Rehash AA
extern (C) void* _aaRehash(AA* paa, in TypeInfo keyti) pure nothrow
{
    if (!paa.empty)
        paa.resize(nextpow2(INIT_DEN * paa.length / INIT_NUM));
    return *paa;
}

/// Return a GC allocated array of all values
extern (C) inout(void[]) _aaValues(inout AA aa, in size_t keysz, in size_t valsz,
    const TypeInfo tiValueArray) pure nothrow
{
    if (aa.empty)
        return null;

    import rt.lifetime : _d_newarrayU;

    auto res = _d_newarrayU(tiValueArray, aa.length).ptr;
    auto pval = res;

    immutable off = aa.valoff;
    foreach (b; aa.buckets[aa.firstUsed .. $])
    {
        if (!b.filled)
            continue;
        pval[0 .. valsz] = b.entry[off .. valsz + off];
        pval += valsz;
    }
    // postblit is done in object.values
    return (cast(inout(void)*) res)[0 .. aa.length]; // fake length, return number of elements
}

/// Return a GC allocated array of all keys
extern (C) inout(void[]) _aaKeys(inout AA aa, in size_t keysz, const TypeInfo tiKeyArray) pure nothrow
{
    if (aa.empty)
        return null;

    import rt.lifetime : _d_newarrayU;

    auto res = _d_newarrayU(tiKeyArray, aa.length).ptr;
    auto pkey = res;

    foreach (b; aa.buckets[aa.firstUsed .. $])
    {
        if (!b.filled)
            continue;
        pkey[0 .. keysz] = b.entry[0 .. keysz];
        pkey += keysz;
    }
    // postblit is done in object.keys
    return (cast(inout(void)*) res)[0 .. aa.length]; // fake length, return number of elements
}

// opApply callbacks are extern(D)
extern (D) alias dg_t = int delegate(void*);
extern (D) alias dg2_t = int delegate(void*, void*);

/// foreach opApply over all values
extern (C) int _aaApply(AA aa, in size_t keysz, dg_t dg)
{
    if (aa.empty)
        return 0;

    immutable off = aa.valoff;
    foreach (b; aa.buckets)
    {
        if (!b.filled)
            continue;
        if (auto res = dg(b.entry + off))
            return res;
    }
    return 0;
}

/// foreach opApply over all key/value pairs
extern (C) int _aaApply2(AA aa, in size_t keysz, dg2_t dg)
{
    if (aa.empty)
        return 0;

    immutable off = aa.valoff;
    foreach (b; aa.buckets)
    {
        if (!b.filled)
            continue;
        if (auto res = dg(b.entry, b.entry + off))
            return res;
    }
    return 0;
}

/// Construct an associative array of type ti from keys and value
extern (C) Impl* _d_assocarrayliteralTX(const TypeInfo_AssociativeArray ti, void[] keys,
    void[] vals)
{
    assert(keys.length == vals.length);

    immutable keysz = ti.key.tsize;
    immutable valsz = ti.value.tsize;
    immutable length = keys.length;

    if (!length)
        return null;

    auto aa = new Impl(ti, nextpow2(INIT_DEN * length / INIT_NUM));

    void* pkey = keys.ptr;
    void* pval = vals.ptr;
    immutable off = aa.valoff;
    uint actualLength = 0;
    foreach (_; 0 .. length)
    {
        immutable hash = calcHash(pkey, ti.key);

        auto p = aa.findSlotLookup(hash, pkey, ti.key);
        if (p is null)
        {
            p = aa.findSlotInsert(hash);
            p.hash = hash;
            p.entry = allocEntry(aa, pkey); // move key, no postblit
            aa.firstUsed = min(aa.firstUsed, cast(uint)(p - aa.buckets.ptr));
            actualLength++;
        }
        else if (aa.entryTI && hasDtor(ti.value))
        {
            // destroy existing value before overwriting it
            ti.value.destroy(p.entry + off);
        }
        // set hash and blit value
        auto pdst = p.entry + off;
        pdst[0 .. valsz] = pval[0 .. valsz]; // move value, no postblit

        pkey += keysz;
        pval += valsz;
    }
    aa.used = actualLength;
    return aa;
}

/// compares 2 AAs for equality
extern (C) int _aaEqual(in TypeInfo tiRaw, in AA aa1, in AA aa2)
{
    if (aa1.impl is aa2.impl)
        return true;

    immutable len = _aaLen(aa1);
    if (len != _aaLen(aa2))
        return false;

    if (!len) // both empty
        return true;

    import rt.lifetime : unqualify;

    auto uti = unqualify(tiRaw);
    auto ti = *cast(TypeInfo_AssociativeArray*)&uti;
    // compare the entries
    immutable off = aa1.valoff;
    foreach (b1; aa1.buckets)
    {
        if (!b1.filled)
            continue;
        auto pb2 = aa2.findSlotLookup(b1.hash, b1.entry, ti.key);
        if (pb2 is null || !ti.value.equals(b1.entry + off, pb2.entry + off))
            return false;
    }
    return true;
}

/// compute a hash
extern (C) hash_t _aaGetHash(in AA* aa, in TypeInfo tiRaw) nothrow
{
    if (aa.empty)
        return 0;

    import rt.lifetime : unqualify;

    auto uti = unqualify(tiRaw);
    auto ti = *cast(TypeInfo_AssociativeArray*)&uti;
    immutable off = aa.valoff;
    auto keyHash = &ti.key.getHash;
    auto valHash = &ti.value.getHash;

    size_t h;
    foreach (b; aa.buckets)
    {
        if (!b.filled)
            continue;
        size_t[2] h2 = [keyHash(b.entry), valHash(b.entry + off)];
        // use addition here, so that hash is independent of element order
        h += hashOf(h2);
    }

    return h;
}

/**
 * _aaRange implements a ForwardRange
 */
struct Range
{
    Impl* impl;
    size_t idx;
    alias impl this;
}

extern (C) pure nothrow @nogc @safe
{
    Range _aaRange(AA aa)
    {
        if (!aa)
            return Range();

        foreach (i; aa.firstUsed .. aa.dim)
        {
            if (aa.buckets[i].filled)
                return Range(aa.impl, i);
        }
        return Range(aa, aa.dim);
    }

    bool _aaRangeEmpty(Range r)
    {
        return r.impl is null || r.idx >= r.dim;
    }

    void* _aaRangeFrontKey(Range r)
    {
        assert(!_aaRangeEmpty(r));
        if (r.idx >= r.dim)
            return null;
        return r.buckets[r.idx].entry;
    }

    void* _aaRangeFrontValue(Range r)
    {
        assert(!_aaRangeEmpty(r));
        if (r.idx >= r.dim)
            return null;

        auto entry = r.buckets[r.idx].entry;
        return entry is null ?
            null :
            (() @trusted { return entry + r.valoff; } ());
    }

    void _aaRangePopFront(ref Range r)
    {
        if (r.idx >= r.dim) return;
        for (++r.idx; r.idx < r.dim; ++r.idx)
        {
            if (r.buckets[r.idx].filled)
                break;
        }
    }
}

// Most tests are now in in test_aa.d

// test postblit for AA literals
unittest
{
    static struct T
    {
        ubyte field;
        static size_t postblit, dtor;
        this(this)
        {
            ++postblit;
        }

        ~this()
        {
            ++dtor;
        }
    }

    T t;
    auto aa1 = [0 : t, 1 : t];
    assert(T.dtor == 0 && T.postblit == 2);
    aa1[0] = t;
    assert(T.dtor == 1 && T.postblit == 3);

    T.dtor = 0;
    T.postblit = 0;

    auto aa2 = [0 : t, 1 : t, 0 : t]; // literal with duplicate key => value overwritten
    assert(T.dtor == 1 && T.postblit == 3);

    T.dtor = 0;
    T.postblit = 0;

    auto aa3 = [t : 0];
    assert(T.dtor == 0 && T.postblit == 1);
    aa3[t] = 1;
    assert(T.dtor == 0 && T.postblit == 1);
    aa3.remove(t);
    assert(T.dtor == 0 && T.postblit == 1);
    aa3[t] = 2;
    assert(T.dtor == 0 && T.postblit == 2);

    // dtor will be called by GC finalizers
    aa1 = null;
    aa2 = null;
    aa3 = null;
    GC.runFinalizers((cast(char*)(&entryDtor))[0 .. 1]);
    assert(T.dtor == 6 && T.postblit == 2);
}