Skip to content

Commit 5c13fd1

Browse files
committed
SERVER-33973 Force cleanup of possibly remaining partial data after failed collection/database drop
1 parent bcde4c4 commit 5c13fd1

File tree

5 files changed

+287
-310
lines changed

5 files changed

+287
-310
lines changed

src/mongo/db/s/config/configsvr_drop_collection_command.cpp

Lines changed: 4 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -53,33 +53,6 @@ namespace {
5353

5454
MONGO_FAIL_POINT_DEFINE(setDropCollDistLockWait);
5555

56-
template <typename F>
57-
auto staleExceptionRetry(OperationContext* opCtx, StringData opStr, F&& f) {
58-
uassert(ErrorCodes::IllegalOperation,
59-
str::stream() << "Command " << opStr << " does not support database versioning",
60-
!OperationShardingState::get(opCtx).hasDbVersion());
61-
uassert(ErrorCodes::IllegalOperation,
62-
str::stream() << "Command " << opStr << " does not support collection versioning",
63-
!OperationShardingState::get(opCtx).hasShardVersion());
64-
65-
for (int tries = 0;; ++tries) {
66-
// Try kMaxStaleExceptionTries times and on the last try, rethrow the exception
67-
const bool canRetry = tries < kMaxNumStaleVersionRetries - 1;
68-
try {
69-
return f();
70-
} catch (const ExceptionForCat<ErrorCategory::StaleShardVersionError>& ex) {
71-
log() << "Attempt " << tries << " of " << opStr << " received StaleShardVersion error"
72-
<< causedBy(ex);
73-
74-
if (canRetry) {
75-
continue;
76-
}
77-
throw;
78-
}
79-
MONGO_UNREACHABLE;
80-
}
81-
}
82-
8356
/**
8457
* Internal sharding command run on config servers to drop a collection from a database.
8558
*/
@@ -158,8 +131,7 @@ class ConfigSvrDropCollectionCommand : public BasicCommand {
158131
ON_BLOCK_EXIT(
159132
[opCtx, nss] { Grid::get(opCtx)->catalogCache()->invalidateShardedCollection(nss); });
160133

161-
staleExceptionRetry(
162-
opCtx, "_configsvrDropCollection", [&] { _dropCollection(opCtx, nss); });
134+
_dropCollection(opCtx, nss);
163135

164136
return true;
165137
}
@@ -172,72 +144,14 @@ class ConfigSvrDropCollectionCommand : public BasicCommand {
172144
catalogClient->getCollection(opCtx, nss, repl::ReadConcernArgs::get(opCtx).getLevel());
173145
if (collStatus == ErrorCodes::NamespaceNotFound) {
174146
// We checked the sharding catalog and found that this collection doesn't exist. This
175-
// may be because it never existed, or because a drop command was sent previously. This
176-
// data might not be majority committed though, so we will set the client's last optime
177-
// to the system's last optime to ensure the client waits for the writeConcern to be
178-
// satisfied.
179-
repl::ReplClientInfo::forClient(opCtx->getClient()).setLastOpToSystemLastOpTime(opCtx);
180-
181-
// If the DB isn't in the sharding catalog either, consider the drop a success.
182-
auto dbStatus = catalogClient->getDatabase(
183-
opCtx, nss.db().toString(), repl::ReadConcernArgs::get(opCtx).getLevel());
184-
if (dbStatus == ErrorCodes::NamespaceNotFound) {
185-
return;
186-
}
187-
uassertStatusOK(dbStatus);
188-
189-
// If we found the DB but not the collection, the collection might exist and not be
190-
// sharded, so send the command to the primary shard.
191-
_dropUnshardedCollectionFromShard(opCtx, dbStatus.getValue().value.getPrimary(), nss);
147+
// may be because it never existed, or because a drop command was sent previously.
148+
ShardingCatalogManager::get(opCtx)->ensureDropCollectionCompleted(opCtx, nss);
192149
} else {
193150
uassertStatusOK(collStatus);
194-
uassertStatusOK(ShardingCatalogManager::get(opCtx)->dropCollection(opCtx, nss));
151+
ShardingCatalogManager::get(opCtx)->dropCollection(opCtx, nss);
195152
}
196153
}
197154

198-
static void _dropUnshardedCollectionFromShard(OperationContext* opCtx,
199-
const ShardId& shardId,
200-
const NamespaceString& nss) {
201-
202-
const auto shardRegistry = Grid::get(opCtx)->shardRegistry();
203-
204-
const auto dropCommandBSON = [shardRegistry, opCtx, &shardId, &nss] {
205-
BSONObjBuilder builder;
206-
builder.append("drop", nss.coll());
207-
208-
// Append the chunk version for the specified namespace indicating that we believe it is
209-
// not sharded. Collections residing on the config server are never sharded so do not
210-
// send the shard version.
211-
if (shardId != shardRegistry->getConfigShard()->getId()) {
212-
ChunkVersion::UNSHARDED().appendToCommand(&builder);
213-
}
214-
215-
if (!opCtx->getWriteConcern().usedDefault) {
216-
builder.append(WriteConcernOptions::kWriteConcernField,
217-
opCtx->getWriteConcern().toBSON());
218-
}
219-
220-
return builder.obj();
221-
}();
222-
223-
const auto shard = uassertStatusOK(shardRegistry->getShard(opCtx, shardId));
224-
225-
auto cmdDropResult = uassertStatusOK(shard->runCommandWithFixedRetryAttempts(
226-
opCtx,
227-
ReadPreferenceSetting{ReadPreference::PrimaryOnly},
228-
nss.db().toString(),
229-
dropCommandBSON,
230-
Shard::RetryPolicy::kIdempotent));
231-
232-
// If the collection doesn't exist, consider the drop a success.
233-
if (cmdDropResult.commandStatus == ErrorCodes::NamespaceNotFound) {
234-
return;
235-
}
236-
237-
uassertStatusOK(cmdDropResult.commandStatus);
238-
uassertStatusOK(cmdDropResult.writeConcernStatus);
239-
};
240-
241155
} configsvrDropCollectionCmd;
242156

243157
} // namespace

src/mongo/db/s/config/configsvr_drop_database_command.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ class ConfigSvrDropDatabaseCommand : public BasicCommand {
150150
opCtx, dbname, repl::ReadConcernArgs::get(opCtx).getLevel())) {
151151
auto collDistLock = uassertStatusOK(catalogClient->getDistLockManager()->lock(
152152
opCtx, nss.ns(), "dropCollection", DistLockManager::kDefaultLockTimeout));
153-
uassertStatusOK(catalogManager->dropCollection(opCtx, nss));
153+
catalogManager->dropCollection(opCtx, nss);
154154
}
155155

156156
// Drop the database from the primary shard first.

src/mongo/db/s/config/sharding_catalog_manager.h

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -254,12 +254,24 @@ class ShardingCatalogManager {
254254
/**
255255
* Drops the specified collection from the collection metadata store.
256256
*
257-
* Returns Status::OK if successful or any error code indicating the failure. These are
258-
* some of the known failures:
259-
* - NamespaceNotFound - collection does not exist
257+
* Throws a DBException for any failures. These are some of the known failures:
258+
* - NamespaceNotFound - Collection does not exist
260259
*/
261-
Status dropCollection(OperationContext* opCtx, const NamespaceString& nss);
260+
void dropCollection(OperationContext* opCtx, const NamespaceString& nss);
262261

262+
/**
263+
* Ensures that a namespace that has received a dropCollection, but no longer has an entry in
264+
* config.collections, has cleared all relevant metadata entries for the corresponding
265+
* collection. As part of this, sends dropCollection, setShardVersion, and unsetSharding to all
266+
* shards -- in case shards didn't receive these commands as part of the original
267+
* dropCollection.
268+
*
269+
* This function does not guarantee that all shards will eventually receive setShardVersion,
270+
* unless the client infinitely retries until hearing back success. This function does, however,
271+
* increase the likelihood of shards having received setShardVersion.
272+
*/
273+
274+
void ensureDropCollectionCompleted(OperationContext* opCtx, const NamespaceString& nss);
263275

264276
/**
265277
* Shards collection with namespace 'nss' and implicitly assumes that the database is enabled

0 commit comments

Comments
 (0)