Skip to content

Commit 5bc9f79

Browse files
vinser52byrnedj
authored andcommitted
Implemented async Item movement between tiers (with corrected
behaivour of tryEvictToNextMemoryTier). Fix ReaperSkippingSlabTraversalWhileSlabReleasing test The issue was caused by incorrect behaviour of the CacheAllocator<CacheTrait>::tryEvictToNextMemoryTier method in case the evicted item is expired. We cannot simply return a handle to it, but we need to remove it from the access container and MM container. The next commits will fix small issues (they will be combined with this one) Fix issue with "Destorying an unresolved handle" The issue happened when ReadHandleImpl ctor needs to destroy waitContext_ because addWaitContextForMovingItem() returns false. So before destroying waitContext_ we are calling discard method to notify ~ItemWaitContext() that Item is ready. Fix slab release code Get tier id of item before calling any function on allocator (which needs the tierID). fix async fix for transparent sync
1 parent b456aba commit 5bc9f79

File tree

8 files changed

+424
-18
lines changed

8 files changed

+424
-18
lines changed

cachelib/allocator/CacheAllocator-inl.h

Lines changed: 231 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,8 @@ CacheAllocator<CacheTrait>::CacheAllocator(
8585
config.chainedItemAccessConfig)),
8686
chainedItemLocks_(config_.chainedItemsLockPower,
8787
std::make_shared<MurmurHash2>()),
88+
movesMap_(kShards),
89+
moveLock_(kShards),
8890
cacheCreationTime_{
8991
type != InitMemType::kMemAttach
9092
? util::getCurrentTimeSec()
@@ -1026,6 +1028,25 @@ bool CacheAllocator<CacheTrait>::replaceInMMContainer(Item& oldItem,
10261028
}
10271029
}
10281030

1031+
template <typename CacheTrait>
1032+
bool CacheAllocator<CacheTrait>::replaceInMMContainer(Item* oldItem,
1033+
Item& newItem) {
1034+
return replaceInMMContainer(*oldItem, newItem);
1035+
}
1036+
1037+
template <typename CacheTrait>
1038+
bool CacheAllocator<CacheTrait>::replaceInMMContainer(EvictionIterator& oldItemIt,
1039+
Item& newItem) {
1040+
auto& oldContainer = getMMContainer(*oldItemIt);
1041+
auto& newContainer = getMMContainer(newItem);
1042+
1043+
// This function is used for eviction across tiers
1044+
XDCHECK(&oldContainer != &newContainer);
1045+
oldContainer.remove(oldItemIt);
1046+
1047+
return newContainer.add(newItem);
1048+
}
1049+
10291050
template <typename CacheTrait>
10301051
bool CacheAllocator<CacheTrait>::replaceChainedItemInMMContainer(
10311052
Item& oldItem, Item& newItem) {
@@ -1171,6 +1192,157 @@ CacheAllocator<CacheTrait>::insertOrReplace(const WriteHandle& handle) {
11711192
return replaced;
11721193
}
11731194

1195+
/* Next two methods are used to asynchronously move Item between memory tiers.
1196+
*
1197+
* The thread, which moves Item, allocates new Item in the tier we are moving to
1198+
* and calls moveRegularItemOnEviction() method. This method does the following:
1199+
* 1. Create MoveCtx and put it to the movesMap.
1200+
* 2. Update the access container with the new item from the tier we are
1201+
* moving to. This Item has kIncomplete flag set.
1202+
* 3. Copy data from the old Item to the new one.
1203+
* 4. Unset the kIncomplete flag and Notify MoveCtx
1204+
*
1205+
* Concurrent threads which are getting handle to the same key:
1206+
* 1. When a handle is created it checks if the kIncomplete flag is set
1207+
* 2. If so, Handle implementation creates waitContext and adds it to the
1208+
* MoveCtx by calling addWaitContextForMovingItem() method.
1209+
* 3. Wait until the moving thread will complete its job.
1210+
*/
1211+
template <typename CacheTrait>
1212+
bool CacheAllocator<CacheTrait>::addWaitContextForMovingItem(
1213+
folly::StringPiece key, std::shared_ptr<WaitContext<ReadHandle>> waiter) {
1214+
auto shard = getShardForKey(key);
1215+
auto& movesMap = getMoveMapForShard(shard);
1216+
auto lock = getMoveLockForShard(shard);
1217+
auto it = movesMap.find(key);
1218+
if (it == movesMap.end()) {
1219+
return false;
1220+
}
1221+
auto ctx = it->second.get();
1222+
ctx->addWaiter(std::move(waiter));
1223+
return true;
1224+
}
1225+
1226+
template <typename CacheTrait>
1227+
template <typename ItemPtr>
1228+
typename CacheAllocator<CacheTrait>::WriteHandle
1229+
CacheAllocator<CacheTrait>::moveRegularItemOnEviction(
1230+
ItemPtr& oldItemPtr, WriteHandle& newItemHdl) {
1231+
// TODO: should we introduce new latency tracker. E.g. evictRegularLatency_
1232+
// ??? util::LatencyTracker tracker{stats_.evictRegularLatency_};
1233+
1234+
Item& oldItem = *oldItemPtr;
1235+
if (!oldItem.isAccessible() || oldItem.isExpired()) {
1236+
return {};
1237+
}
1238+
1239+
XDCHECK_EQ(newItemHdl->getSize(), oldItem.getSize());
1240+
XDCHECK_NE(getTierId(oldItem), getTierId(*newItemHdl));
1241+
1242+
// take care of the flags before we expose the item to be accessed. this
1243+
// will ensure that when another thread removes the item from RAM, we issue
1244+
// a delete accordingly. See D7859775 for an example
1245+
if (oldItem.isNvmClean()) {
1246+
newItemHdl->markNvmClean();
1247+
}
1248+
1249+
folly::StringPiece key(oldItem.getKey());
1250+
auto shard = getShardForKey(key);
1251+
auto& movesMap = getMoveMapForShard(shard);
1252+
MoveCtx* ctx(nullptr);
1253+
{
1254+
auto lock = getMoveLockForShard(shard);
1255+
auto res = movesMap.try_emplace(key, std::make_unique<MoveCtx>());
1256+
if (!res.second) {
1257+
return {};
1258+
}
1259+
ctx = res.first->second.get();
1260+
}
1261+
1262+
auto resHdl = WriteHandle{};
1263+
auto guard = folly::makeGuard([key, this, ctx, shard, &resHdl]() {
1264+
auto& movesMap = getMoveMapForShard(shard);
1265+
if (resHdl)
1266+
resHdl->unmarkIncomplete();
1267+
auto lock = getMoveLockForShard(shard);
1268+
ctx->setItemHandle(std::move(resHdl));
1269+
movesMap.erase(key);
1270+
});
1271+
1272+
// TODO: Possibly we can use markMoving() instead. But today
1273+
// moveOnSlabRelease logic assume that we mark as moving old Item
1274+
// and than do copy and replace old Item with the new one in access
1275+
// container. Furthermore, Item can be marked as Moving only
1276+
// if it is linked to MM container. In our case we mark the new Item
1277+
// and update access container before the new Item is ready (content is
1278+
// copied).
1279+
newItemHdl->markIncomplete();
1280+
1281+
// Inside the access container's lock, this checks if the old item is
1282+
// accessible and its refcount is zero. If the item is not accessible,
1283+
// there is no point to replace it since it had already been removed
1284+
// or in the process of being removed. If the item is in cache but the
1285+
// refcount is non-zero, it means user could be attempting to remove
1286+
// this item through an API such as remove(ItemHandle). In this case,
1287+
// it is unsafe to replace the old item with a new one, so we should
1288+
// also abort.
1289+
if (!accessContainer_->replaceIf(oldItem, *newItemHdl,
1290+
itemExclusivePredicate)) {
1291+
return {};
1292+
}
1293+
1294+
if (config_.moveCb) {
1295+
// Execute the move callback. We cannot make any guarantees about the
1296+
// consistency of the old item beyond this point, because the callback can
1297+
// do more than a simple memcpy() e.g. update external references. If there
1298+
// are any remaining handles to the old item, it is the caller's
1299+
// responsibility to invalidate them. The move can only fail after this
1300+
// statement if the old item has been removed or replaced, in which case it
1301+
// should be fine for it to be left in an inconsistent state.
1302+
config_.moveCb(oldItem, *newItemHdl, nullptr);
1303+
} else {
1304+
std::memcpy(newItemHdl->getMemory(), oldItem.getMemory(),
1305+
oldItem.getSize());
1306+
}
1307+
1308+
// Inside the MM container's lock, this checks if the old item exists to
1309+
// make sure that no other thread removed it, and only then replaces it.
1310+
if (!replaceInMMContainer(oldItemPtr, *newItemHdl)) {
1311+
accessContainer_->remove(*newItemHdl);
1312+
return {};
1313+
}
1314+
1315+
// Replacing into the MM container was successful, but someone could have
1316+
// called insertOrReplace() or remove() before or after the
1317+
// replaceInMMContainer() operation, which would invalidate newItemHdl.
1318+
if (!newItemHdl->isAccessible()) {
1319+
removeFromMMContainer(*newItemHdl);
1320+
return {};
1321+
}
1322+
1323+
// no one can add or remove chained items at this point
1324+
if (oldItem.hasChainedItem()) {
1325+
// safe to acquire handle for a moving Item
1326+
auto oldHandle = acquire(&oldItem);
1327+
XDCHECK_EQ(1u, oldHandle->getRefCount()) << oldHandle->toString();
1328+
XDCHECK(!newItemHdl->hasChainedItem()) << newItemHdl->toString();
1329+
try {
1330+
auto l = chainedItemLocks_.lockExclusive(oldItem.getKey());
1331+
transferChainLocked(oldHandle, newItemHdl);
1332+
} catch (const std::exception& e) {
1333+
// this should never happen because we drained all the handles.
1334+
XLOGF(DFATAL, "{}", e.what());
1335+
throw;
1336+
}
1337+
1338+
XDCHECK(!oldItem.hasChainedItem());
1339+
XDCHECK(newItemHdl->hasChainedItem());
1340+
}
1341+
newItemHdl.unmarkNascent();
1342+
resHdl = std::move(newItemHdl); // guard will assign it to ctx under lock
1343+
return acquire(&oldItem);
1344+
}
1345+
11741346
template <typename CacheTrait>
11751347
bool CacheAllocator<CacheTrait>::moveRegularItem(Item& oldItem,
11761348
WriteHandle& newItemHdl) {
@@ -1491,6 +1663,47 @@ bool CacheAllocator<CacheTrait>::shouldWriteToNvmCacheExclusive(
14911663
return true;
14921664
}
14931665

1666+
template <typename CacheTrait>
1667+
template <typename ItemPtr>
1668+
typename CacheAllocator<CacheTrait>::WriteHandle
1669+
CacheAllocator<CacheTrait>::tryEvictToNextMemoryTier(
1670+
TierId tid, PoolId pid, ItemPtr& item) {
1671+
if(item->isChainedItem()) return {}; // TODO: We do not support ChainedItem yet
1672+
if(item->isExpired()) {
1673+
auto handle = removeIf(*item, [](const Item& it) {
1674+
return it.getRefCount() == 0;
1675+
});
1676+
1677+
if (handle) { return handle; }
1678+
}
1679+
1680+
TierId nextTier = tid; // TODO - calculate this based on some admission policy
1681+
while (++nextTier < getNumTiers()) { // try to evict down to the next memory tiers
1682+
// allocateInternal might trigger another eviction
1683+
auto newItemHdl = allocateInternalTier(nextTier, pid,
1684+
item->getKey(),
1685+
item->getSize(),
1686+
item->getCreationTime(),
1687+
item->getExpiryTime());
1688+
1689+
if (newItemHdl) {
1690+
XDCHECK_EQ(newItemHdl->getSize(), item->getSize());
1691+
1692+
return moveRegularItemOnEviction(item, newItemHdl);
1693+
}
1694+
}
1695+
1696+
return {};
1697+
}
1698+
1699+
template <typename CacheTrait>
1700+
typename CacheAllocator<CacheTrait>::WriteHandle
1701+
CacheAllocator<CacheTrait>::tryEvictToNextMemoryTier(Item* item) {
1702+
auto tid = getTierId(*item);
1703+
auto pid = allocator_[tid]->getAllocInfo(item->getMemory()).poolId;
1704+
return tryEvictToNextMemoryTier(tid, pid, item);
1705+
}
1706+
14941707
template <typename CacheTrait>
14951708
typename CacheAllocator<CacheTrait>::RemoveRes
14961709
CacheAllocator<CacheTrait>::remove(typename Item::Key key) {
@@ -2923,6 +3136,21 @@ void CacheAllocator<CacheTrait>::evictForSlabRelease(
29233136
}
29243137
}
29253138

3139+
template <typename CacheTrait>
3140+
template <typename Fn>
3141+
typename CacheAllocator<CacheTrait>::WriteHandle
3142+
CacheAllocator<CacheTrait>::removeIf(Item& item, Fn&& predicate) {
3143+
auto handle = accessContainer_->removeIf(item, std::forward<Fn>(predicate));
3144+
3145+
if (handle) {
3146+
XDCHECK_EQ(reinterpret_cast<uintptr_t>(handle.get()),
3147+
reinterpret_cast<uintptr_t>(&item));
3148+
removeFromMMContainer(item);
3149+
}
3150+
3151+
return handle;
3152+
}
3153+
29263154
template <typename CacheTrait>
29273155
bool CacheAllocator<CacheTrait>::removeIfExpired(const ReadHandle& handle) {
29283156
if (!handle) {
@@ -2931,14 +3159,7 @@ bool CacheAllocator<CacheTrait>::removeIfExpired(const ReadHandle& handle) {
29313159

29323160
// We remove the item from both access and mm containers.
29333161
// We want to make sure the caller is the only one holding the handle.
2934-
auto removedHandle =
2935-
accessContainer_->removeIf(*(handle.getInternal()), itemExpiryPredicate);
2936-
if (removedHandle) {
2937-
removeFromMMContainer(*(handle.getInternal()));
2938-
return true;
2939-
}
2940-
2941-
return false;
3162+
return (bool)removeIf(*(handle.getInternal()), itemExpiryPredicate);
29423163
}
29433164

29443165
template <typename CacheTrait>
@@ -2957,15 +3178,14 @@ bool CacheAllocator<CacheTrait>::markMovingForSlabRelease(
29573178
// At first, we assume this item was already freed
29583179
bool itemFreed = true;
29593180
bool markedMoving = false;
2960-
TierId tid = 0;
2961-
const auto fn = [&markedMoving, &itemFreed, &tid, this /* TODO - necessary for getTierId */](void* memory) {
3181+
TierId tid = getTierId(alloc);
3182+
const auto fn = [&markedMoving, &itemFreed](void* memory) {
29623183
// Since this callback is executed, the item is not yet freed
29633184
itemFreed = false;
29643185
Item* item = static_cast<Item*>(memory);
29653186
if (item->markMoving()) {
29663187
markedMoving = true;
29673188
}
2968-
tid = getTierId(*item);
29693189
};
29703190

29713191
auto startTime = util::getCurrentTimeSec();

0 commit comments

Comments
 (0)