diff --git a/src/cuckoocache.h b/src/cuckoocache.h
index 0fe1326af..ab2f70faf 100644
--- a/src/cuckoocache.h
+++ b/src/cuckoocache.h
@@ -1,521 +1,534 @@
 // Copyright (c) 2016 Jeremy Rubin
 // Distributed under the MIT software license, see the accompanying
 // file COPYING or http://www.opensource.org/licenses/mit-license.php.
 
 #ifndef BITCOIN_CUCKOOCACHE_H
 #define BITCOIN_CUCKOOCACHE_H
 
 #include <algorithm>
 #include <array>
 #include <atomic>
 #include <cmath>
 #include <cstring>
 #include <memory>
 #include <vector>
 
 /**
  * High-performance cache primitives.
  *
  * Summary:
  *
  * 1. @ref bit_packed_atomic_flags is bit-packed atomic flags for garbage
  * collection
  *
  * 2. @ref cache is a cache which is performant in memory usage and lookup
  * speed. It is lockfree for erase operations. Elements are lazily erased on the
  * next insert.
  */
 namespace CuckooCache {
 /**
  * @ref bit_packed_atomic_flags implements a container for garbage collection
  * flags that is only thread unsafe on calls to setup. This class bit-packs
  * collection flags for memory efficiency.
  *
  * All operations are `std::memory_order_relaxed` so external mechanisms must
  * ensure that writes and reads are properly synchronized.
  *
  * On setup(n), all bits up to `n` are marked as collected.
  *
  * Under the hood, because it is an 8-bit type, it makes sense to use a multiple
  * of 8 for setup, but it will be safe if that is not the case as well.
  */
 class bit_packed_atomic_flags {
     std::unique_ptr<std::atomic<uint8_t>[]> mem;
 
 public:
     /** No default constructor, as there must be some size. */
     bit_packed_atomic_flags() = delete;
 
     /**
      * bit_packed_atomic_flags constructor creates memory to sufficiently
      * keep track of garbage collection information for `size` entries.
      *
      * @param size the number of elements to allocate space for
      *
      * @post bit_set, bit_unset, and bit_is_set function properly forall x. x <
      * size
      * @post All calls to bit_is_set (without subsequent bit_unset) will return
      * true.
      */
     explicit bit_packed_atomic_flags(uint32_t size) {
         // pad out the size if needed
         size = (size + 7) / 8;
         mem.reset(new std::atomic<uint8_t>[size]);
         for (uint32_t i = 0; i < size; ++i) {
             mem[i].store(0xFF);
         }
     };
 
     /**
      * setup marks all entries and ensures that bit_packed_atomic_flags can
      * store at least `b` entries.
      *
      * @param b the number of elements to allocate space for
      * @post bit_set, bit_unset, and bit_is_set function properly forall x. x <
      * b
      * @post All calls to bit_is_set (without subsequent bit_unset) will return
      * true.
      */
     inline void setup(uint32_t b) {
         bit_packed_atomic_flags d(b);
         std::swap(mem, d.mem);
     }
 
     /**
      * bit_set sets an entry as discardable.
      *
      * @param s the index of the entry to bit_set
      * @post immediately subsequent call (assuming proper external memory
      * ordering) to bit_is_set(s) == true.
      */
     inline void bit_set(uint32_t s) {
         mem[s >> 3].fetch_or(1 << (s & 7), std::memory_order_relaxed);
     }
 
     /**
      * bit_unset marks an entry as something that should not be overwritten.
      *
      * @param s the index of the entry to bit_unset
      * @post immediately subsequent call (assuming proper external memory
      * ordering) to bit_is_set(s) == false.
      */
     inline void bit_unset(uint32_t s) {
         mem[s >> 3].fetch_and(~(1 << (s & 7)), std::memory_order_relaxed);
     }
 
     /**
      * bit_is_set queries the table for discardability at `s`.
      *
      * @param s the index of the entry to read
      * @returns true if the bit at index `s` was set, false otherwise
      * */
     inline bool bit_is_set(uint32_t s) const {
         return (1 << (s & 7)) & mem[s >> 3].load(std::memory_order_relaxed);
     }
 };
 
 /**
  * @ref cache implements a cache with properties similar to a cuckoo-set.
  *
  *  The cache is able to hold up to `(~(uint32_t)0) - 1` elements.
  *
  *  Read Operations:
  *      - contains() for `erase=false`
  *
  *  Read+Erase Operations:
  *      - contains() for `erase=true`
  *
  *  Erase Operations:
  *      - allow_erase()
  *
  *  Write Operations:
  *      - setup()
  *      - setup_bytes()
  *      - insert()
  *      - please_keep()
  *
  *  Synchronization Free Operations:
  *      - invalid()
  *      - compute_hashes()
  *
  * User Must Guarantee:
  *
  * 1. Write requires synchronized access (e.g. a lock)
  * 2. Read requires no concurrent Write, synchronized with last insert.
  * 3. Erase requires no concurrent Write, synchronized with last insert.
  * 4. An Erase caller must release all memory before allowing a new Writer.
  *
  *
  * Note on function names:
  *   - The name "allow_erase" is used because the real discard happens later.
  *   - The name "please_keep" is used because elements may be erased anyways on
  * insert.
  *
  * @tparam Element should be a movable and copyable type
  * @tparam Hash should be a function/callable which takes a template parameter
  * hash_select and an Element and extracts a hash from it. Should return
  * high-entropy uint32_t hashes for `Hash h; h<0>(e) ... h<7>(e)`.
  */
 template <typename Element, typename Hash> class cache {
 private:
     /** table stores all the elements */
     std::vector<Element> table;
 
     /** size stores the total available slots in the hash table */
     uint32_t size;
 
     /**
      * The bit_packed_atomic_flags array is marked mutable because we want
      * garbage collection to be allowed to occur from const methods.
      */
     mutable bit_packed_atomic_flags collection_flags;
 
     /**
      * epoch_flags tracks how recently an element was inserted into the cache.
      * true denotes recent, false denotes not-recent. See insert() method for
      * full semantics.
      */
     mutable std::vector<bool> epoch_flags;
 
     /**
      * epoch_heuristic_counter is used to determine when an epoch might be aged
      * & an expensive scan should be done. epoch_heuristic_counter is
      * decremented on insert and reset to the new number of inserts which would
      * cause the epoch to reach epoch_size when it reaches zero.
      */
     uint32_t epoch_heuristic_counter;
 
     /**
      * epoch_size is set to be the number of elements supposed to be in a epoch.
      * When the number of non-erased elements in an epoch exceeds epoch_size, a
      * new epoch should be started and all current entries demoted. epoch_size
      * is set to be 45% of size because we want to keep load around 90%, and we
      * support 3 epochs at once -- one "dead" which has been erased, one "dying"
      * which has been marked to be erased next, and one "living" which new
      * inserts add to.
      */
     uint32_t epoch_size;
 
     /**
      * depth_limit determines how many elements insert should try to replace.
      * Should be set to log2(n).
      */
     uint8_t depth_limit;
 
     /**
      * hash_function is a const instance of the hash function. It cannot be
      * static or initialized at call time as it may have internal state (such as
      * a nonce).
      */
     const Hash hash_function;
 
     /**
      * compute_hashes is convenience for not having to write out this expression
      * everywhere we use the hash values of an Element.
      *
      * We need to map the 32-bit input hash onto a hash bucket in a range [0,
      * size) in a manner which preserves as much of the hash's uniformity as
      * possible. Ideally this would be done by bitmasking but the size is
      * usually not a power of two.
      *
      * The naive approach would be to use a mod -- which isn't perfectly uniform
      * but so long as the hash is much larger than size it is not that bad.
      * Unfortunately, mod/division is fairly slow on ordinary microprocessors
      * (e.g. 90-ish cycles on haswell, ARM doesn't even have an instruction for
      * it.); when the divisor is a constant the compiler will do clever tricks
      * to turn it into a multiply+add+shift, but size is a run-time value so the
      * compiler can't do that here.
      *
      * One option would be to implement the same trick the compiler uses and
      * compute the constants for exact division based on the size, as described
      * in "{N}-bit Unsigned Division via {N}-bit Multiply-Add" by Arch D.
      * Robison in 2005. But that code is somewhat complicated and the result is
      * still slower than other options:
      *
      * Instead we treat the 32-bit random number as a Q32 fixed-point number in
      * the range [0, 1) and simply multiply it by the size. Then we just shift
      * the result down by 32-bits to get our bucket number. The result has
      * non-uniformity the same as a mod, but it is much faster to compute. More
      * about this technique can be found at
      *  http://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/
      * .
      *
      * The resulting non-uniformity is also more equally distributed which would
      * be advantageous for something like linear probing, though it shouldn't
      * matter one way or the other for a cuckoo table.
      *
      * The primary disadvantage of this approach is increased intermediate
      * precision is required but for a 32-bit random number we only need the
      * high 32 bits of a 32*32->64 multiply, which means the operation is
      * reasonably fast even on a typical 32-bit processor.
      *
      * @param e The element whose hashes will be returned
      * @returns Deterministic hashes derived from `e` uniformly mapped onto the
      * range [0, size)
      */
     inline std::array<uint32_t, 8> compute_hashes(const Element &e) const {
         return {{uint32_t(uint64_t(hash_function.template operator()<0>(e)) *
                               uint64_t(size) >>
                           32),
                  uint32_t(uint64_t(hash_function.template operator()<1>(e)) *
                               uint64_t(size) >>
                           32),
                  uint32_t(uint64_t(hash_function.template operator()<2>(e)) *
                               uint64_t(size) >>
                           32),
                  uint32_t(uint64_t(hash_function.template operator()<3>(e)) *
                               uint64_t(size) >>
                           32),
                  uint32_t(uint64_t(hash_function.template operator()<4>(e)) *
                               uint64_t(size) >>
                           32),
                  uint32_t(uint64_t(hash_function.template operator()<5>(e)) *
                               uint64_t(size) >>
                           32),
                  uint32_t(uint64_t(hash_function.template operator()<6>(e)) *
                               uint64_t(size) >>
                           32),
                  uint32_t(uint64_t(hash_function.template operator()<7>(e)) *
                               uint64_t(size) >>
                           32)}};
     }
 
     /**
      * invalid returns a special index that can never be inserted to
      * @returns the special constexpr index that can never be inserted to
      */
     constexpr uint32_t invalid() const { return ~uint32_t(0); }
 
     /**
      * allow_erase marks the element at index `n` as discardable. Threadsafe
      * without any concurrent insert.
      * @param n the index to allow erasure of
      */
     inline void allow_erase(uint32_t n) const { collection_flags.bit_set(n); }
 
     /**
      * please_keep marks the element at index `n` as an entry that should be
      * kept. Threadsafe without any concurrent insert.
      * @param n the index to prioritize keeping
      */
     inline void please_keep(uint32_t n) const { collection_flags.bit_unset(n); }
 
     /**
      * epoch_check handles the changing of epochs for elements stored in the
      * cache. epoch_check should be run before every insert.
      *
      * First, epoch_check decrements and checks the cheap heuristic, and then
      * does a more expensive scan if the cheap heuristic runs out. If the
      * expensive scan succeeds, the epochs are aged and old elements are
      * allow_erased. The cheap heuristic is reset to retrigger after the worst
      * case growth of the current epoch's elements would exceed the epoch_size.
      */
     void epoch_check() {
         if (epoch_heuristic_counter != 0) {
             --epoch_heuristic_counter;
             return;
         }
         // count the number of elements from the latest epoch which have not
         // been erased.
         uint32_t epoch_unused_count = 0;
         for (uint32_t i = 0; i < size; ++i) {
             epoch_unused_count +=
                 epoch_flags[i] && !collection_flags.bit_is_set(i);
         }
         // If there are more non-deleted entries in the current epoch than the
         // epoch size, then allow_erase on all elements in the old epoch (marked
         // false) and move all elements in the current epoch to the old epoch
         // but do not call allow_erase on their indices.
         if (epoch_unused_count >= epoch_size) {
             for (uint32_t i = 0; i < size; ++i) {
                 if (epoch_flags[i]) {
                     epoch_flags[i] = false;
                 } else {
                     allow_erase(i);
                 }
             }
             epoch_heuristic_counter = epoch_size;
         } else {
             // reset the epoch_heuristic_counter to next do a scan when worst
             // case behavior (no intermittent erases) would exceed epoch size,
             // with a reasonable minimum scan size. Ordinarily, we would have to
             // sanity check std::min(epoch_size, epoch_unused_count), but we
             // already know that `epoch_unused_count < epoch_size` in this
             // branch
             epoch_heuristic_counter = std::max(
                 1u, std::max(epoch_size / 16, epoch_size - epoch_unused_count));
         }
     }
 
 public:
     /**
      * You must always construct a cache with some elements via a subsequent
      * call to setup or setup_bytes, otherwise operations may segfault.
      */
     cache()
         : table(), size(), collection_flags(0), epoch_flags(),
           epoch_heuristic_counter(), epoch_size(), depth_limit(0),
           hash_function() {}
 
     /**
      * setup initializes the container to store no more than new_size
      * elements.
      *
      * setup should only be called once.
      *
      * @param new_size the desired number of elements to store
      * @returns the maximum number of elements storable
      */
     uint32_t setup(uint32_t new_size) {
         // depth_limit must be at least one otherwise errors can occur.
         depth_limit = static_cast<uint8_t>(
             std::log2(static_cast<float>(std::max((uint32_t)2, new_size))));
         size = std::max<uint32_t>(2, new_size);
         table.resize(size);
         collection_flags.setup(size);
         epoch_flags.resize(size);
         // Set to 45% as described above
         epoch_size = std::max((uint32_t)1, (45 * size) / 100);
         // Initially set to wait for a whole epoch
         epoch_heuristic_counter = epoch_size;
         return size;
     }
 
     /**
      * setup_bytes is a convenience function which accounts for internal memory
      * usage when deciding how many elements to store. It isn't perfect because
      * it doesn't account for any overhead (struct size, MallocUsage, collection
      * and epoch flags). This was done to simplify selecting a power of two
      * size. In the expected use case, an extra two bits per entry should be
      * negligible compared to the size of the elements.
      *
      * @param bytes the approximate number of bytes to use for this data
      * structure
      * @returns the maximum number of elements storable (see setup()
      * documentation for more detail)
      */
     uint32_t setup_bytes(size_t bytes) {
         return setup(bytes / sizeof(Element));
     }
 
     /**
      * insert loops at most depth_limit times trying to insert a hash at various
      * locations in the table via a variant of the Cuckoo Algorithm with eight
      * hash locations.
      *
      * It drops the last tried element if it runs out of depth before
      * encountering an open slot.
      *
      * Thus:
      *
      * ```
      * insert(x);
      * return contains(x, false);
      * ```
      *
      * is not guaranteed to return true.
      *
      * @param e the element to insert
      * @post one of the following: All previously inserted elements and e are
      * now in the table, one previously inserted element is evicted from the
      * table, the entry attempted to be inserted is evicted.
      */
     inline void insert(Element e) {
         epoch_check();
         uint32_t last_loc = invalid();
         bool last_epoch = true;
         std::array<uint32_t, 8> locs = compute_hashes(e);
         // Make sure we have not already inserted this element.
         // If we have, make sure that it does not get deleted.
         for (const uint32_t loc : locs) {
-            if (table[loc] == e) {
+            if (table[loc].matchKey(e)) {
                 please_keep(loc);
                 epoch_flags[loc] = last_epoch;
                 return;
             }
         }
         for (uint8_t depth = 0; depth < depth_limit; ++depth) {
             // First try to insert to an empty slot, if one exists
             for (const uint32_t loc : locs) {
                 if (!collection_flags.bit_is_set(loc)) {
                     continue;
                 }
                 table[loc] = std::move(e);
                 please_keep(loc);
                 epoch_flags[loc] = last_epoch;
                 return;
             }
             /**
              * Swap with the element at the location that was not the last one
              * looked at. Example:
              *
              * 1. On first iteration, last_loc == invalid(), find returns last,
              * so last_loc defaults to locs[0].
              * 2. On further iterations, where last_loc == locs[k], last_loc
              * will go to locs[k+1 % 8], i.e., next of the 8 indices wrapping
              * around to 0 if needed.
              *
              * This prevents moving the element we just put in.
              *
              * The swap is not a move -- we must switch onto the evicted element
              * for the next iteration.
              */
             last_loc =
                 locs[(1 + (std::find(locs.begin(), locs.end(), last_loc) -
                            locs.begin())) &
                      7];
             std::swap(table[last_loc], e);
             // Can't std::swap a std::vector<bool>::reference and a bool&.
             bool epoch = last_epoch;
             last_epoch = epoch_flags[last_loc];
             epoch_flags[last_loc] = epoch;
 
             // Recompute the locs -- unfortunately happens one too many times!
             locs = compute_hashes(e);
         }
     }
 
     /**
      * contains iterates through the hash locations for a given element and
      * checks to see if it is present.
      *
      * contains does not check garbage collected state (in other words, garbage
      * is only collected when the space is needed), so:
      *
      * ```
      * insert(x);
      * if (contains(x, true))
      *     return contains(x, false);
      * else
      *     return true;
      * ```
      *
      * executed on a single thread will always return true!
      *
      * This is a great property for re-org performance for example.
      *
      * contains returns a bool set true if the element was found.
      *
      * @param e the element to check
      * @param erase whether to attempt setting the garbage collect flag
      *
      * @post if erase is true and the element is found, then the garbage collect
      * flag is set
      * @returns true if the element is found, false otherwise
      */
     inline bool contains(const Element &e, const bool erase) const {
         std::array<uint32_t, 8> locs = compute_hashes(e);
         for (const uint32_t loc : locs) {
-            if (table[loc] == e) {
+            if (table[loc].matchKey(e)) {
                 if (erase) {
                     allow_erase(loc);
                 }
                 return true;
             }
         }
         return false;
     }
 };
+
+/**
+ * Helper class used when we only want the cache to be a set rather than a map.
+ */
+template <typename T> struct KeyOnly : public T {
+    // Ensure implicit conversion from T.
+    KeyOnly() = default;
+    KeyOnly(const T &x) : T(x) {}
+
+    // Implement required features.
+    bool matchKey(const T &key) const { return *this == key; }
+};
+
 } // namespace CuckooCache
 
 #endif // BITCOIN_CUCKOOCACHE_H
diff --git a/src/script/scriptcache.cpp b/src/script/scriptcache.cpp
index b1f450634..1795a5f09 100644
--- a/src/script/scriptcache.cpp
+++ b/src/script/scriptcache.cpp
@@ -1,61 +1,62 @@
 // Copyright (c) 2017 The Bitcoin developers
 // Distributed under the MIT software license, see the accompanying
 // file COPYING or http://www.opensource.org/licenses/mit-license.php.
 
 #include <script/scriptcache.h>
 
 #include <crypto/sha256.h>
 #include <cuckoocache.h>
 #include <primitives/transaction.h>
 #include <random.h>
 #include <script/sigcache.h>
 #include <sync.h>
 #include <util/system.h>
 #include <validation.h>
 
-static CuckooCache::cache<uint256, SignatureCacheHasher> scriptExecutionCache;
+static CuckooCache::cache<CuckooCache::KeyOnly<uint256>, SignatureCacheHasher>
+    scriptExecutionCache;
 static uint256 scriptExecutionCacheNonce(GetRandHash());
 
 void InitScriptExecutionCache() {
     // nMaxCacheSize is unsigned. If -maxscriptcachesize is set to zero,
     // setup_bytes creates the minimum possible cache (2 elements).
     size_t nMaxCacheSize =
         std::min(
             std::max(int64_t(0), gArgs.GetArg("-maxscriptcachesize",
                                               DEFAULT_MAX_SCRIPT_CACHE_SIZE)),
             MAX_MAX_SCRIPT_CACHE_SIZE) *
         (size_t(1) << 20);
     size_t nElems = scriptExecutionCache.setup_bytes(nMaxCacheSize);
     LogPrintf("Using %zu MiB out of %zu requested for script execution cache, "
               "able to store %zu elements\n",
               (nElems * sizeof(uint256)) >> 20, nMaxCacheSize >> 20, nElems);
 }
 
 uint256 GetScriptCacheKey(const CTransaction &tx, uint32_t flags) {
     uint256 key;
     // We only use the first 19 bytes of nonce to avoid a second SHA round -
     // giving us 19 + 32 + 4 = 55 bytes (+ 8 + 1 = 64)
     static_assert(55 - sizeof(flags) - 32 >= 128 / 8,
                   "Want at least 128 bits of nonce for script execution cache");
     CSHA256()
         .Write(scriptExecutionCacheNonce.begin(), 55 - sizeof(flags) - 32)
         .Write(tx.GetHash().begin(), 32)
         .Write((uint8_t *)&flags, sizeof(flags))
         .Finalize(key.begin());
 
     return key;
 }
 
 bool IsKeyInScriptCache(uint256 key, bool erase) {
     // TODO: Remove this requirement by making CuckooCache not require external
     // locks
     AssertLockHeld(cs_main);
     return scriptExecutionCache.contains(key, erase);
 }
 
 void AddKeyInScriptCache(uint256 key) {
     // TODO: Remove this requirement by making CuckooCache not require external
     // locks
     AssertLockHeld(cs_main);
     scriptExecutionCache.insert(key);
 }
diff --git a/src/script/sigcache.cpp b/src/script/sigcache.cpp
index dca305bc3..f7754f1b1 100644
--- a/src/script/sigcache.cpp
+++ b/src/script/sigcache.cpp
@@ -1,115 +1,117 @@
 // Copyright (c) 2009-2010 Satoshi Nakamoto
 // Copyright (c) 2009-2016 The Bitcoin Core developers
 // Distributed under the MIT software license, see the accompanying
 // file COPYING or http://www.opensource.org/licenses/mit-license.php.
 
 #include <script/sigcache.h>
 
 #include <cuckoocache.h>
 #include <memusage.h>
 #include <pubkey.h>
 #include <random.h>
 #include <uint256.h>
 #include <util/system.h>
 
 #include <boost/thread/shared_mutex.hpp>
 
 namespace {
 
 /**
  * Valid signature cache, to avoid doing expensive ECDSA signature checking
  * twice for every transaction (once when accepted into memory pool, and
  * again when accepted into the block chain)
  */
 class CSignatureCache {
 private:
     //! Entries are SHA256(nonce || signature hash || public key || signature):
     uint256 nonce;
-    typedef CuckooCache::cache<uint256, SignatureCacheHasher> map_type;
+    typedef CuckooCache::cache<CuckooCache::KeyOnly<uint256>,
+                               SignatureCacheHasher>
+        map_type;
     map_type setValid;
     boost::shared_mutex cs_sigcache;
 
 public:
     CSignatureCache() { GetRandBytes(nonce.begin(), 32); }
 
     void ComputeEntry(uint256 &entry, const uint256 &hash,
                       const std::vector<uint8_t> &vchSig,
                       const CPubKey &pubkey) {
         CSHA256()
             .Write(nonce.begin(), 32)
             .Write(hash.begin(), 32)
             .Write(&pubkey[0], pubkey.size())
             .Write(&vchSig[0], vchSig.size())
             .Finalize(entry.begin());
     }
 
     bool Get(const uint256 &entry, const bool erase) {
         boost::shared_lock<boost::shared_mutex> lock(cs_sigcache);
         return setValid.contains(entry, erase);
     }
 
     void Set(uint256 &entry) {
         boost::unique_lock<boost::shared_mutex> lock(cs_sigcache);
         setValid.insert(entry);
     }
     uint32_t setup_bytes(size_t n) { return setValid.setup_bytes(n); }
 };
 
 /**
  * In previous versions of this code, signatureCache was a local static variable
  * in CachingTransactionSignatureChecker::VerifySignature. We initialize
  * signatureCache outside of VerifySignature to avoid the atomic operation per
  * call overhead associated with local static variables even though
  * signatureCache could be made local to VerifySignature.
  */
 static CSignatureCache signatureCache;
 } // namespace
 
 // To be called once in AppInitMain/BasicTestingSetup to initialize the
 // signatureCache.
 void InitSignatureCache() {
     // nMaxCacheSize is unsigned. If -maxsigcachesize is set to zero,
     // setup_bytes creates the minimum possible cache (2 elements).
     size_t nMaxCacheSize =
         std::min(std::max(int64_t(0), gArgs.GetArg("-maxsigcachesize",
                                                    DEFAULT_MAX_SIG_CACHE_SIZE)),
                  MAX_MAX_SIG_CACHE_SIZE) *
         (size_t(1) << 20);
     size_t nElems = signatureCache.setup_bytes(nMaxCacheSize);
     LogPrintf("Using %zu MiB out of %zu requested for signature cache, able to "
               "store %zu elements\n",
               (nElems * sizeof(uint256)) >> 20, nMaxCacheSize >> 20, nElems);
 }
 
 template <typename F>
 bool RunMemoizedCheck(const std::vector<uint8_t> &vchSig, const CPubKey &pubkey,
                       const uint256 &sighash, bool storeOrErase, const F &fun) {
     uint256 entry;
     signatureCache.ComputeEntry(entry, sighash, vchSig, pubkey);
     if (signatureCache.Get(entry, !storeOrErase)) {
         return true;
     }
     if (!fun()) {
         return false;
     }
     if (storeOrErase) {
         signatureCache.Set(entry);
     }
     return true;
 }
 
 bool CachingTransactionSignatureChecker::IsCached(
     const std::vector<uint8_t> &vchSig, const CPubKey &pubkey,
     const uint256 &sighash) const {
     return RunMemoizedCheck(vchSig, pubkey, sighash, true,
                             [] { return false; });
 }
 
 bool CachingTransactionSignatureChecker::VerifySignature(
     const std::vector<uint8_t> &vchSig, const CPubKey &pubkey,
     const uint256 &sighash) const {
     return RunMemoizedCheck(vchSig, pubkey, sighash, store, [&] {
         return TransactionSignatureChecker::VerifySignature(vchSig, pubkey,
                                                             sighash);
     });
 }
diff --git a/src/test/cuckoocache_tests.cpp b/src/test/cuckoocache_tests.cpp
index 90e7b126b..47176e9df 100644
--- a/src/test/cuckoocache_tests.cpp
+++ b/src/test/cuckoocache_tests.cpp
@@ -1,395 +1,399 @@
 // Copyright (c) 2012-2016 The Bitcoin Core developers
 // Distributed under the MIT software license, see the accompanying
 // file COPYING or http://www.opensource.org/licenses/mit-license.php.
 
 #include <cuckoocache.h>
 
 #include <random.h>
 #include <script/sigcache.h>
 
 #include <test/test_bitcoin.h>
 
 #include <boost/test/unit_test.hpp>
 #include <boost/thread/shared_mutex.hpp>
 
 /**
  * Test Suite for CuckooCache
  *
  * 1. All tests should have a deterministic result (using insecure rand
  * with deterministic seeds)
  * 2. Some test methods are templated to allow for easier testing
  * against new versions / comparing
  * 3. Results should be treated as a regression test, i.e., did the behavior
  * change significantly from what was expected. This can be OK, depending on
  * the nature of the change, but requires updating the tests to reflect the new
  * expected behavior. For example improving the hit rate may cause some tests
  * using BOOST_CHECK_CLOSE to fail.
  */
 BOOST_AUTO_TEST_SUITE(cuckoocache_tests);
 
 /**
  * Test that no values not inserted into the cache are read out of it.
  *
  * There are no repeats in the first 200000 insecure_GetRandHash calls
  */
 BOOST_AUTO_TEST_CASE(test_cuckoocache_no_fakes) {
     SeedInsecureRand(true);
-    CuckooCache::cache<uint256, SignatureCacheHasher> cc{};
+    CuckooCache::cache<CuckooCache::KeyOnly<uint256>, SignatureCacheHasher>
+        cc{};
     size_t megabytes = 4;
     cc.setup_bytes(megabytes << 20);
     for (int x = 0; x < 100000; ++x) {
         cc.insert(InsecureRand256());
     }
     for (int x = 0; x < 100000; ++x) {
         BOOST_CHECK(!cc.contains(InsecureRand256(), false));
     }
 };
 
 /**
  * This helper returns the hit rate when megabytes*load worth of entries are
  * inserted into a megabytes sized cache
  */
 template <typename Cache>
 static double test_cache(size_t megabytes, double load) {
     SeedInsecureRand(true);
     std::vector<uint256> hashes;
     Cache set{};
     size_t bytes = megabytes * (1 << 20);
     set.setup_bytes(bytes);
     uint32_t n_insert = static_cast<uint32_t>(load * (bytes / sizeof(uint256)));
     hashes.resize(n_insert);
     for (uint32_t i = 0; i < n_insert; ++i) {
         uint32_t *ptr = (uint32_t *)hashes[i].begin();
         for (uint8_t j = 0; j < 8; ++j) {
             *(ptr++) = InsecureRand32();
         }
     }
     /**
      * We make a copy of the hashes because future optimizations of the
      * cuckoocache may overwrite the inserted element, so the test is "future
      * proofed".
      */
     std::vector<uint256> hashes_insert_copy = hashes;
     /** Do the insert */
     for (const uint256 &h : hashes_insert_copy) {
         set.insert(h);
     }
     /** Count the hits */
     uint32_t count = 0;
     for (const uint256 &h : hashes) {
         count += set.contains(h, false);
     }
     double hit_rate = double(count) / double(n_insert);
     return hit_rate;
 }
 
 /**
  * The normalized hit rate for a given load.
  *
  * The semantics are a little confusing, so please see the below
  * explanation.
  *
  * Examples:
  *
  * 1. at load 0.5, we expect a perfect hit rate, so we multiply by
  * 1.0
  * 2. at load 2.0, we expect to see half the entries, so a perfect hit rate
  * would be 0.5. Therefore, if we see a hit rate of 0.4, 0.4*2.0 = 0.8 is the
  * normalized hit rate.
  *
  * This is basically the right semantics, but has a bit of a glitch depending on
  * how you measure around load 1.0 as after load 1.0 your normalized hit rate
  * becomes effectively perfect, ignoring freshness.
  */
 static double normalize_hit_rate(double hits, double load) {
     return hits * std::max(load, 1.0);
 }
 
 /** Check the hit rate on loads ranging from 0.1 to 2.0 */
 BOOST_AUTO_TEST_CASE(cuckoocache_hit_rate_ok) {
     /**
      * Arbitrarily selected Hit Rate threshold that happens to work for this
      * test as a lower bound on performance.
      */
     double HitRateThresh = 0.98;
     size_t megabytes = 4;
     for (double load = 0.1; load < 2; load *= 2) {
         double hits =
-            test_cache<CuckooCache::cache<uint256, SignatureCacheHasher>>(
-                megabytes, load);
+            test_cache<CuckooCache::cache<CuckooCache::KeyOnly<uint256>,
+                                          SignatureCacheHasher>>(megabytes,
+                                                                 load);
         BOOST_CHECK(normalize_hit_rate(hits, load) > HitRateThresh);
     }
 }
 
 /**
  * This helper checks that erased elements are preferentially inserted onto and
  * that the hit rate of "fresher" keys is reasonable.
  */
 template <typename Cache> static void test_cache_erase(size_t megabytes) {
     double load = 1;
     SeedInsecureRand(true);
     std::vector<uint256> hashes;
     Cache set{};
     size_t bytes = megabytes * (1 << 20);
     set.setup_bytes(bytes);
     uint32_t n_insert = static_cast<uint32_t>(load * (bytes / sizeof(uint256)));
     hashes.resize(n_insert);
     for (uint32_t i = 0; i < n_insert; ++i) {
         uint32_t *ptr = (uint32_t *)hashes[i].begin();
         for (uint8_t j = 0; j < 8; ++j) {
             *(ptr++) = InsecureRand32();
         }
     }
     /**
      * We make a copy of the hashes because future optimizations of the
      * cuckoocache may overwrite the inserted element, so the test is
      * "future proofed".
      */
     std::vector<uint256> hashes_insert_copy = hashes;
 
     /** Insert the first half */
     for (uint32_t i = 0; i < (n_insert / 2); ++i) {
         set.insert(hashes_insert_copy[i]);
     }
     /** Erase the first quarter */
     for (uint32_t i = 0; i < (n_insert / 4); ++i) {
         set.contains(hashes[i], true);
     }
     /** Insert the second half */
     for (uint32_t i = (n_insert / 2); i < n_insert; ++i) {
         set.insert(hashes_insert_copy[i]);
     }
 
     /** elements that we marked as erased but are still there */
     size_t count_erased_but_contained = 0;
     /** elements that we did not erase but are older */
     size_t count_stale = 0;
     /** elements that were most recently inserted */
     size_t count_fresh = 0;
 
     for (uint32_t i = 0; i < (n_insert / 4); ++i) {
         count_erased_but_contained += set.contains(hashes[i], false);
     }
     for (uint32_t i = (n_insert / 4); i < (n_insert / 2); ++i) {
         count_stale += set.contains(hashes[i], false);
     }
     for (uint32_t i = (n_insert / 2); i < n_insert; ++i) {
         count_fresh += set.contains(hashes[i], false);
     }
 
     double hit_rate_erased_but_contained =
         double(count_erased_but_contained) / (double(n_insert) / 4.0);
     double hit_rate_stale = double(count_stale) / (double(n_insert) / 4.0);
     double hit_rate_fresh = double(count_fresh) / (double(n_insert) / 2.0);
 
     // Check that our hit_rate_fresh is perfect
     BOOST_CHECK_EQUAL(hit_rate_fresh, 1.0);
     // Check that we have a more than 2x better hit rate on stale elements than
     // erased elements.
     BOOST_CHECK(hit_rate_stale > 2 * hit_rate_erased_but_contained);
 }
 
 BOOST_AUTO_TEST_CASE(cuckoocache_erase_ok) {
     size_t megabytes = 4;
-    test_cache_erase<CuckooCache::cache<uint256, SignatureCacheHasher>>(
-        megabytes);
+    test_cache_erase<CuckooCache::cache<CuckooCache::KeyOnly<uint256>,
+                                        SignatureCacheHasher>>(megabytes);
 }
 
 template <typename Cache>
 static void test_cache_erase_parallel(size_t megabytes) {
     double load = 1;
     SeedInsecureRand(true);
     std::vector<uint256> hashes;
     Cache set{};
     size_t bytes = megabytes * (1 << 20);
     set.setup_bytes(bytes);
     uint32_t n_insert = static_cast<uint32_t>(load * (bytes / sizeof(uint256)));
     hashes.resize(n_insert);
     for (uint32_t i = 0; i < n_insert; ++i) {
         uint32_t *ptr = (uint32_t *)hashes[i].begin();
         for (uint8_t j = 0; j < 8; ++j) {
             *(ptr++) = InsecureRand32();
         }
     }
     /**
      * We make a copy of the hashes because future optimizations of the
      * cuckoocache may overwrite the inserted element, so the test is
      * "future proofed".
      */
     std::vector<uint256> hashes_insert_copy = hashes;
     boost::shared_mutex mtx;
 
     {
         /** Grab lock to make sure we release inserts */
         boost::unique_lock<boost::shared_mutex> l(mtx);
         /** Insert the first half */
         for (uint32_t i = 0; i < (n_insert / 2); ++i) {
             set.insert(hashes_insert_copy[i]);
         }
     }
 
     /**
      * Spin up 3 threads to run contains with erase.
      */
     std::vector<std::thread> threads;
     /** Erase the first quarter */
     for (uint32_t x = 0; x < 3; ++x)
         /** Each thread is emplaced with x copy-by-value */
         threads.emplace_back([&, x] {
             boost::shared_lock<boost::shared_mutex> l(mtx);
             size_t ntodo = (n_insert / 4) / 3;
             size_t start = ntodo * x;
             size_t end = ntodo * (x + 1);
             for (uint32_t i = start; i < end; ++i) {
                 set.contains(hashes[i], true);
             }
         });
 
     /** Wait for all threads to finish */
     for (std::thread &t : threads) {
         t.join();
     }
     /** Grab lock to make sure we observe erases */
     boost::unique_lock<boost::shared_mutex> l(mtx);
     /** Insert the second half */
     for (uint32_t i = (n_insert / 2); i < n_insert; ++i) {
         set.insert(hashes_insert_copy[i]);
     }
 
     /** elements that we marked erased but that are still there */
     size_t count_erased_but_contained = 0;
     /** elements that we did not erase but are older */
     size_t count_stale = 0;
     /** elements that were most recently inserted */
     size_t count_fresh = 0;
 
     for (uint32_t i = 0; i < (n_insert / 4); ++i) {
         count_erased_but_contained += set.contains(hashes[i], false);
     }
     for (uint32_t i = (n_insert / 4); i < (n_insert / 2); ++i) {
         count_stale += set.contains(hashes[i], false);
     }
     for (uint32_t i = (n_insert / 2); i < n_insert; ++i) {
         count_fresh += set.contains(hashes[i], false);
     }
 
     double hit_rate_erased_but_contained =
         double(count_erased_but_contained) / (double(n_insert) / 4.0);
     double hit_rate_stale = double(count_stale) / (double(n_insert) / 4.0);
     double hit_rate_fresh = double(count_fresh) / (double(n_insert) / 2.0);
 
     // Check that our hit_rate_fresh is perfect
     BOOST_CHECK_EQUAL(hit_rate_fresh, 1.0);
     // Check that we have a more than 2x better hit rate on stale elements than
     // erased elements.
     BOOST_CHECK(hit_rate_stale > 2 * hit_rate_erased_but_contained);
 }
 
 BOOST_AUTO_TEST_CASE(cuckoocache_erase_parallel_ok) {
     size_t megabytes = 4;
-    test_cache_erase_parallel<
-        CuckooCache::cache<uint256, SignatureCacheHasher>>(megabytes);
+    test_cache_erase_parallel<CuckooCache::cache<CuckooCache::KeyOnly<uint256>,
+                                                 SignatureCacheHasher>>(
+        megabytes);
 }
 
 template <typename Cache> static void test_cache_generations() {
     // This test checks that for a simulation of network activity, the fresh hit
     // rate is never below 99%, and the number of times that it is worse than
     // 99.9% are less than 1% of the time.
     double min_hit_rate = 0.99;
     double tight_hit_rate = 0.999;
     double max_rate_less_than_tight_hit_rate = 0.01;
     // A cache that meets this specification is therefore shown to have a hit
     // rate of at least tight_hit_rate * (1 - max_rate_less_than_tight_hit_rate)
     // +
     // min_hit_rate*max_rate_less_than_tight_hit_rate = 0.999*99%+0.99*1% ==
     // 99.89%
     // hit rate with low variance.
 
     // We use deterministic values, but this test has also passed on many
     // iterations with non-deterministic values, so it isn't "overfit" to the
     // specific entropy in FastRandomContext(true) and implementation of the
     // cache.
     SeedInsecureRand(true);
 
     // block_activity models a chunk of network activity. n_insert elements are
     // added to the cache. The first and last n/4 are stored for removal later
     // and the middle n/2 are not stored. This models a network which uses half
     // the signatures of recently (since the last block) added transactions
     // immediately and never uses the other half.
     struct block_activity {
         std::vector<uint256> reads;
         block_activity(uint32_t n_insert, Cache &c) : reads() {
             std::vector<uint256> inserts;
             inserts.resize(n_insert);
             reads.reserve(n_insert / 2);
             for (uint32_t i = 0; i < n_insert; ++i) {
                 uint32_t *ptr = (uint32_t *)inserts[i].begin();
                 for (uint8_t j = 0; j < 8; ++j) {
                     *(ptr++) = InsecureRand32();
                 }
             }
             for (uint32_t i = 0; i < n_insert / 4; ++i) {
                 reads.push_back(inserts[i]);
             }
             for (uint32_t i = n_insert - (n_insert / 4); i < n_insert; ++i) {
                 reads.push_back(inserts[i]);
             }
             for (const auto &h : inserts) {
                 c.insert(h);
             }
         }
     };
 
     const uint32_t BLOCK_SIZE = 1000;
     // We expect window size 60 to perform reasonably given that each epoch
     // stores 45% of the cache size (~472k).
     const uint32_t WINDOW_SIZE = 60;
     const uint32_t POP_AMOUNT = (BLOCK_SIZE / WINDOW_SIZE) / 2;
     const double load = 10;
     const size_t megabytes = 4;
     const size_t bytes = megabytes * (1 << 20);
     const uint32_t n_insert =
         static_cast<uint32_t>(load * (bytes / sizeof(uint256)));
 
     std::vector<block_activity> hashes;
     Cache set{};
     set.setup_bytes(bytes);
     hashes.reserve(n_insert / BLOCK_SIZE);
     std::deque<block_activity> last_few;
     uint32_t out_of_tight_tolerance = 0;
     uint32_t total = n_insert / BLOCK_SIZE;
     // we use the deque last_few to model a sliding window of blocks. at each
     // step, each of the last WINDOW_SIZE block_activities checks the cache for
     // POP_AMOUNT of the hashes that they inserted, and marks these erased.
     for (uint32_t i = 0; i < total; ++i) {
         if (last_few.size() == WINDOW_SIZE) {
             last_few.pop_front();
         }
         last_few.emplace_back(BLOCK_SIZE, set);
         uint32_t count = 0;
         for (auto &act : last_few) {
             for (uint32_t k = 0; k < POP_AMOUNT; ++k) {
                 count += set.contains(act.reads.back(), true);
                 act.reads.pop_back();
             }
         }
         // We use last_few.size() rather than WINDOW_SIZE for the correct
         // behavior on the first WINDOW_SIZE iterations where the deque is not
         // full yet.
         double hit = double(count) / (last_few.size() * POP_AMOUNT);
         // Loose Check that hit rate is above min_hit_rate
         BOOST_CHECK(hit > min_hit_rate);
         // Tighter check, count number of times we are less than tight_hit_rate
         // (and implicitly, greater than min_hit_rate)
         out_of_tight_tolerance += hit < tight_hit_rate;
     }
     // Check that being out of tolerance happens less than
     // max_rate_less_than_tight_hit_rate of the time
     BOOST_CHECK(double(out_of_tight_tolerance) / double(total) <
                 max_rate_less_than_tight_hit_rate);
 }
 BOOST_AUTO_TEST_CASE(cuckoocache_generations) {
-    test_cache_generations<CuckooCache::cache<uint256, SignatureCacheHasher>>();
+    test_cache_generations<CuckooCache::cache<CuckooCache::KeyOnly<uint256>,
+                                              SignatureCacheHasher>>();
 }
 
 BOOST_AUTO_TEST_SUITE_END();