Skip to content

Commit 107eb11

Browse files
committed
tier admission
1 parent acdfa0b commit 107eb11

22 files changed

+368
-49
lines changed

cachelib/allocator/Cache.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,12 @@ class CacheBase {
9393
//
9494
// @param poolId The pool id to query
9595
virtual const MemoryPool& getPool(PoolId poolId) const = 0;
96+
97+
// Get the reference to a memory pool using a tier id, for stats purposes
98+
//
99+
// @param poolId The pool id to query
100+
// @param tierId The tier of the pool id
101+
virtual const MemoryPool& getPoolByTid(PoolId poolId, TierId tid) const = 0;
96102

97103
// Get Pool specific stats (regular pools). This includes stats from the
98104
// Memory Pool and also the cache.

cachelib/allocator/CacheAllocator-inl.h

Lines changed: 107 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -362,7 +362,7 @@ CacheAllocator<CacheTrait>::allocate(PoolId poolId,
362362
creationTime = util::getCurrentTimeSec();
363363
}
364364
return allocateInternal(poolId, key, size, creationTime,
365-
ttlSecs == 0 ? 0 : creationTime + ttlSecs);
365+
ttlSecs == 0 ? 0 : creationTime + ttlSecs, false);
366366
}
367367

368368
template <typename CacheTrait>
@@ -372,7 +372,8 @@ CacheAllocator<CacheTrait>::allocateInternalTier(TierId tid,
372372
typename Item::Key key,
373373
uint32_t size,
374374
uint32_t creationTime,
375-
uint32_t expiryTime) {
375+
uint32_t expiryTime,
376+
bool fromEvictorThread) {
376377
util::LatencyTracker tracker{stats().allocateLatency_};
377378

378379
SCOPE_FAIL { stats_.invalidAllocs.inc(); };
@@ -387,14 +388,27 @@ CacheAllocator<CacheTrait>::allocateInternalTier(TierId tid,
387388
// TODO: per-tier
388389
(*stats_.allocAttempts)[pid][cid].inc();
389390

390-
void* memory = allocator_[tid]->allocate(pid, requiredSize);
391+
void *memory = nullptr;
392+
393+
if (tid == 0 && config_.acTopTierEvictionWatermark > 0.0
394+
&& getAllocationClassStats(tid, pid, cid)
395+
.approxFreePercent < config_.acTopTierEvictionWatermark) {
396+
memory = findEviction(tid, pid, cid);
397+
}
398+
399+
if (memory == nullptr) {
400+
// TODO: should we try allocate item even if this will result in violating
401+
// acTopTierEvictionWatermark?
402+
memory = allocator_[tid]->allocate(pid, requiredSize);
403+
}
404+
391405
// TODO: Today disableEviction means do not evict from memory (DRAM).
392406
// Should we support eviction between memory tiers (e.g. from DRAM to PMEM)?
393407
if (memory == nullptr && !config_.disableEviction) {
394408
memory = findEviction(tid, pid, cid);
395409
}
396410

397-
ItemHandle handle;
411+
WriteHandle handle;
398412
if (memory != nullptr) {
399413
// At this point, we have a valid memory allocation that is ready for use.
400414
// Ensure that when we abort from here under any circumstances, we free up
@@ -431,18 +445,71 @@ CacheAllocator<CacheTrait>::allocateInternalTier(TierId tid,
431445
}
432446

433447
template <typename CacheTrait>
434-
typename CacheAllocator<CacheTrait>::WriteHandle
435-
CacheAllocator<CacheTrait>::allocateInternal(PoolId pid,
448+
TierId
449+
CacheAllocator<CacheTrait>::getTargetTierForItem(PoolId pid,
436450
typename Item::Key key,
437451
uint32_t size,
438452
uint32_t creationTime,
439453
uint32_t expiryTime) {
440-
auto tid = 0; /* TODO: consult admission policy */
441-
for(TierId tid = 0; tid < numTiers_; ++tid) {
442-
auto handle = allocateInternalTier(tid, pid, key, size, creationTime, expiryTime);
443-
if (handle) return handle;
454+
if (numTiers_ == 1)
455+
return 0;
456+
457+
if (config_.forceAllocationTier != UINT64_MAX) {
458+
return config_.forceAllocationTier;
444459
}
445-
return {};
460+
461+
const TierId defaultTargetTier = 0;
462+
463+
const auto requiredSize = Item::getRequiredSize(key, size);
464+
const auto cid = allocator_[defaultTargetTier]->getAllocationClassId(pid, requiredSize);
465+
466+
auto freePercentage = getAllocationClassStats(defaultTargetTier, pid, cid).approxFreePercent;
467+
468+
// TODO: COULD we implement BG worker which would move slabs around
469+
// so that there is similar amount of free space in each pool/ac.
470+
// Should this be responsibility of BG evictor?
471+
472+
if (freePercentage >= config_.maxAcAllocationWatermark)
473+
return defaultTargetTier;
474+
475+
if (freePercentage <= config_.minAcAllocationWatermark)
476+
return defaultTargetTier + 1;
477+
478+
// TODO: we can even think about creating different allocation classes for PMEM
479+
// and we could look at possible fragmentation when deciding where to put the item
480+
if (config_.sizeThresholdPolicy)
481+
return requiredSize < config_.sizeThresholdPolicy ? defaultTargetTier : defaultTargetTier + 1;
482+
483+
// TODO: (e.g. always put chained items to PMEM)
484+
// if (chainedItemsPolicy)
485+
// return item.isChainedItem() ? defaultTargetTier + 1 : defaultTargetTier;
486+
487+
// TODO:
488+
// if (expiryTimePolicy)
489+
// return (expiryTime - creationTime) < expiryTimePolicy ? defaultTargetTier : defaultTargetTier + 1;
490+
491+
// TODO:
492+
// if (keyPolicy) // this can be based on key length or some other properties
493+
// return getTargetTierForKey(key);
494+
495+
// TODO:
496+
// if (compressabilityPolicy) // if compresses well store in PMEM? latency will be higher anyway
497+
// return TODO;
498+
499+
// TODO: only works for 2 tiers
500+
return (folly::Random::rand32() % 100) < config_.defaultTierChancePercentage ? defaultTargetTier : defaultTargetTier + 1;
501+
}
502+
503+
template <typename CacheTrait>
504+
typename CacheAllocator<CacheTrait>::WriteHandle
505+
CacheAllocator<CacheTrait>::allocateInternal(PoolId pid,
506+
typename Item::Key key,
507+
uint32_t size,
508+
uint32_t creationTime,
509+
uint32_t expiryTime,
510+
bool fromEvictorThread) {
511+
auto tid = getTargetTierForItem(pid, key, size, creationTime, expiryTime);
512+
return allocateInternalTier(tid, pid, key, size, creationTime, expiryTime, fromEvictorThread);
446513
}
447514

448515
template <typename CacheTrait>
@@ -1608,25 +1675,38 @@ bool CacheAllocator<CacheTrait>::shouldWriteToNvmCacheExclusive(
16081675
return true;
16091676
}
16101677

1678+
template <typename CacheTrait>
1679+
bool CacheAllocator<CacheTrait>::shouldEvictToNextMemoryTier(
1680+
TierId sourceTierId, TierId targetTierId, PoolId pid, Item& item)
1681+
{
1682+
if (config_.disableEvictionToMemory)
1683+
return false;
1684+
1685+
// TODO: implement more advanced admission policies for memory tiers
1686+
return true;
1687+
}
1688+
16111689
template <typename CacheTrait>
16121690
typename CacheAllocator<CacheTrait>::WriteHandle
16131691
CacheAllocator<CacheTrait>::tryEvictToNextMemoryTier(
1614-
TierId tid, PoolId pid, Item& item) {
1615-
if(item.isChainedItem()) return {}; // TODO: We do not support ChainedItem yet
1692+
TierId tid, PoolId pid, Item& item, bool fromEvictorThread) {
16161693
if(item.isExpired()) return acquire(&item);
16171694

1618-
TierId nextTier = tid; // TODO - calculate this based on some admission policy
1695+
TierId nextTier = tid;
16191696
while (++nextTier < numTiers_) { // try to evict down to the next memory tiers
1697+
if (!shouldEvictToNextMemoryTier(tid, nextTier, pid, item))
1698+
continue;
1699+
16201700
// allocateInternal might trigger another eviction
16211701
auto newItemHdl = allocateInternalTier(nextTier, pid,
16221702
item.getKey(),
16231703
item.getSize(),
16241704
item.getCreationTime(),
1625-
item.getExpiryTime());
1705+
item.getExpiryTime(),
1706+
fromEvictorThread);
16261707

16271708
if (newItemHdl) {
16281709
XDCHECK_EQ(newItemHdl->getSize(), item.getSize());
1629-
16301710
return moveRegularItemOnEviction(item, newItemHdl);
16311711
}
16321712
}
@@ -1636,10 +1716,10 @@ CacheAllocator<CacheTrait>::tryEvictToNextMemoryTier(
16361716

16371717
template <typename CacheTrait>
16381718
typename CacheAllocator<CacheTrait>::WriteHandle
1639-
CacheAllocator<CacheTrait>::tryEvictToNextMemoryTier(Item& item) {
1719+
CacheAllocator<CacheTrait>::tryEvictToNextMemoryTier(Item& item, bool fromEvictorThread) {
16401720
auto tid = getTierId(item);
16411721
auto pid = allocator_[tid]->getAllocInfo(item.getMemory()).poolId;
1642-
return tryEvictToNextMemoryTier(tid, pid, item);
1722+
return tryEvictToNextMemoryTier(tid, pid, item, fromEvictorThread);
16431723
}
16441724

16451725
template <typename CacheTrait>
@@ -2361,6 +2441,10 @@ void CacheAllocator<CacheTrait>::createMMContainers(const PoolId pid,
23612441
.getAllocsPerSlab()
23622442
: 0);
23632443
for (TierId tid = 0; tid < numTiers_; tid++) {
2444+
if constexpr (std::is_same_v<MMConfig, MMLru::Config> || std::is_same_v<MMConfig, MM2Q::Config>) {
2445+
config.lruInsertionPointSpec = config_.memoryTierConfigs[tid].lruInsertionPointSpec ;
2446+
config.markUsefulChance = config_.memoryTierConfigs[tid].markUsefulChance;
2447+
}
23642448
mmContainers_[tid][pid][cid].reset(new MMContainer(config, compressor_));
23652449
}
23662450
}
@@ -2415,7 +2499,7 @@ std::set<PoolId> CacheAllocator<CacheTrait>::getRegularPoolIds() const {
24152499
folly::SharedMutex::ReadHolder r(poolsResizeAndRebalanceLock_);
24162500
// TODO - get rid of the duplication - right now, each tier
24172501
// holds pool objects with mostly the same info
2418-
return filterCompactCachePools(allocator_[0]->getPoolIds());
2502+
return filterCompactCachePools(allocator_[currentTier()]->getPoolIds());
24192503
}
24202504

24212505
template <typename CacheTrait>
@@ -2828,7 +2912,8 @@ CacheAllocator<CacheTrait>::allocateNewItemForOldItem(const Item& oldItem) {
28282912
oldItem.getKey(),
28292913
oldItem.getSize(),
28302914
oldItem.getCreationTime(),
2831-
oldItem.getExpiryTime());
2915+
oldItem.getExpiryTime(),
2916+
false);
28322917
if (!newItemHdl) {
28332918
return {};
28342919
}
@@ -2961,14 +3046,14 @@ void CacheAllocator<CacheTrait>::evictForSlabRelease(
29613046
template <typename CacheTrait>
29623047
typename CacheAllocator<CacheTrait>::ItemHandle
29633048
CacheAllocator<CacheTrait>::evictNormalItem(Item& item,
2964-
bool skipIfTokenInvalid) {
3049+
bool skipIfTokenInvalid, bool fromEvictorThread) {
29653050
XDCHECK(item.isMoving());
29663051

29673052
if (item.isOnlyMoving()) {
29683053
return ItemHandle{};
29693054
}
29703055

2971-
auto evictHandle = tryEvictToNextMemoryTier(item);
3056+
auto evictHandle = tryEvictToNextMemoryTier(item, fromEvictorThread);
29723057
if(evictHandle) return evictHandle;
29733058

29743059
auto predicate = [](const Item& it) { return it.getRefCount() == 0; };

cachelib/allocator/CacheAllocator.h

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@
3636
#include <folly/Format.h>
3737
#include <folly/Range.h>
3838
#pragma GCC diagnostic pop
39-
4039
#include "cachelib/allocator/CCacheManager.h"
4140
#include "cachelib/allocator/Cache.h"
4241
#include "cachelib/allocator/CacheAllocatorConfig.h"
@@ -988,6 +987,10 @@ class CacheAllocator : public CacheBase {
988987
const MemoryPool& getPool(PoolId pid) const override final {
989988
return allocator_[currentTier()]->getPool(pid);
990989
}
990+
991+
const MemoryPool& getPoolByTid(PoolId pid, TierId tid) const override final {
992+
return allocator_[tid]->getPool(pid);
993+
}
991994

992995
// calculate the number of slabs to be advised/reclaimed in each pool
993996
PoolAdviseReclaimData calcNumSlabsToAdviseReclaim() override final {
@@ -1222,6 +1225,11 @@ class CacheAllocator : public CacheBase {
12221225
// allocator and executes the necessary callbacks. no-op if it is nullptr.
12231226
FOLLY_ALWAYS_INLINE void release(Item* it, bool isNascent);
12241227

1228+
TierId getTargetTierForItem(PoolId pid, typename Item::Key key,
1229+
uint32_t size,
1230+
uint32_t creationTime,
1231+
uint32_t expiryTime);
1232+
12251233
// This is the last step in item release. We also use this for the eviction
12261234
// scenario where we have to do everything, but not release the allocation
12271235
// to the allocator and instead recycle it for another new allocation. If
@@ -1326,7 +1334,8 @@ class CacheAllocator : public CacheBase {
13261334
Key key,
13271335
uint32_t size,
13281336
uint32_t creationTime,
1329-
uint32_t expiryTime);
1337+
uint32_t expiryTime,
1338+
bool fromEvictorThread);
13301339

13311340
// create a new cache allocation on specific memory tier.
13321341
// For description see allocateInternal.
@@ -1337,7 +1346,8 @@ class CacheAllocator : public CacheBase {
13371346
Key key,
13381347
uint32_t size,
13391348
uint32_t creationTime,
1340-
uint32_t expiryTime);
1349+
uint32_t expiryTime,
1350+
bool fromEvictorThread);
13411351

13421352
// Allocate a chained item
13431353
//
@@ -1577,15 +1587,18 @@ class CacheAllocator : public CacheBase {
15771587
//
15781588
// @return valid handle to the item. This will be the last
15791589
// handle to the item. On failure an empty handle.
1580-
WriteHandle tryEvictToNextMemoryTier(TierId tid, PoolId pid, Item& item);
1590+
WriteHandle tryEvictToNextMemoryTier(TierId tid, PoolId pid, Item& item, bool fromEvictorThread);
15811591

15821592
// Try to move the item down to the next memory tier
15831593
//
15841594
// @param item the item to evict
15851595
//
15861596
// @return valid handle to the item. This will be the last
15871597
// handle to the item. On failure an empty handle.
1588-
WriteHandle tryEvictToNextMemoryTier(Item& item);
1598+
WriteHandle tryEvictToNextMemoryTier(Item& item, bool fromEvictorThread);
1599+
1600+
bool shouldEvictToNextMemoryTier(TierId sourceTierId,
1601+
TierId targetTierId, PoolId pid, Item& item);
15891602

15901603
size_t memoryTierSize(TierId tid) const;
15911604

@@ -1714,7 +1727,7 @@ class CacheAllocator : public CacheBase {
17141727
//
17151728
// @return last handle for corresponding to item on success. empty handle on
17161729
// failure. caller can retry if needed.
1717-
ItemHandle evictNormalItem(Item& item, bool skipIfTokenInvalid = false);
1730+
ItemHandle evictNormalItem(Item& item, bool skipIfTokenInvalid = false, bool fromEvictorThread = false);
17181731

17191732
// Helper function to evict a child item for slab release
17201733
// As a side effect, the parent item is also evicted

cachelib/allocator/CacheAllocatorConfig.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -585,6 +585,15 @@ class CacheAllocatorConfig {
585585
// skip promote children items in chained when parent fail to promote
586586
bool skipPromoteChildrenWhenParentFailed{false};
587587

588+
bool disableEvictionToMemory{false};
589+
590+
double minAcAllocationWatermark{0.0};
591+
double maxAcAllocationWatermark{0.0};
592+
double acTopTierEvictionWatermark{0.0}; // TODO: make it per TIER?
593+
uint64_t sizeThresholdPolicy{0};
594+
double defaultTierChancePercentage{50.0};
595+
uint64_t forceAllocationTier{UINT64_MAX};
596+
588597
friend CacheT;
589598

590599
private:

cachelib/allocator/MM2Q-inl.h

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
* limitations under the License.
1515
*/
1616

17+
#include <folly/Random.h>
18+
1719
namespace facebook {
1820
namespace cachelib {
1921

@@ -104,6 +106,10 @@ bool MM2Q::Container<T, HookPtr>::recordAccess(T& node,
104106
return false;
105107
}
106108

109+
// TODO: % 100 is not very accurate
110+
if (config_.markUsefulChance < 100.0 && folly::Random::rand32() % 100 >= config_.markUsefulChance)
111+
return false;
112+
107113
return lruMutex_->lock_combine(func);
108114
}
109115
return false;
@@ -211,15 +217,32 @@ void MM2Q::Container<T, HookPtr>::rebalance() noexcept {
211217
template <typename T, MM2Q::Hook<T> T::*HookPtr>
212218
bool MM2Q::Container<T, HookPtr>::add(T& node) noexcept {
213219
const auto currTime = static_cast<Time>(util::getCurrentTimeSec());
214-
return lruMutex_->lock_combine([this, &node, currTime]() {
220+
221+
auto insertToList = [this, &node] {
222+
if (config_.lruInsertionPointSpec == 0) {
223+
markHot(node);
224+
unmarkCold(node);
225+
unmarkTail(node);
226+
lru_.getList(LruType::Hot).linkAtHead(node);
227+
} else if (config_.lruInsertionPointSpec == 1) {
228+
unmarkHot(node);
229+
unmarkCold(node);
230+
unmarkTail(node);
231+
lru_.getList(LruType::Warm).linkAtHead(node);
232+
} else {
233+
unmarkHot(node);
234+
markCold(node);
235+
unmarkTail(node);
236+
lru_.getList(LruType::Cold).linkAtHead(node);
237+
}
238+
};
239+
240+
return lruMutex_->lock_combine([this, &node, currTime, &insertToList]() {
215241
if (node.isInMMContainer()) {
216242
return false;
217243
}
218244

219-
markHot(node);
220-
unmarkCold(node);
221-
unmarkTail(node);
222-
lru_.getList(LruType::Hot).linkAtHead(node);
245+
insertToList();
223246
rebalance();
224247

225248
node.markInMMContainer();

0 commit comments

Comments
 (0)