@@ -220,12 +220,13 @@ class CompletionHandler : public folly::EventHandler {
220220// Per-thread context for AsyncIO like libaio or io_uring
221221class AsyncIoContext : public IoContext {
222222 public:
223- AsyncIoContext (std::unique_ptr<folly::AsyncBase>&& asyncBase,
224- size_t id,
225- folly::EventBase* evb,
226- size_t capacity,
227- bool useIoUring,
228- std::vector<std::shared_ptr<FdpNvme>> fdpNvmeVec);
223+ AsyncIoContext (
224+ std::unique_ptr<folly::AsyncBase>&& asyncBase,
225+ size_t id,
226+ folly::EventBase* evb,
227+ size_t capacity,
228+ bool useIoUring,
229+ const std::unordered_map<int , std::shared_ptr<FdpNvme>>& fdpNvmeDevs);
229230
230231 ~AsyncIoContext () override = default ;
231232
@@ -279,18 +280,16 @@ class AsyncIoContext : public IoContext {
279280 size_t numSubmitted_ = 0 ;
280281 size_t numCompleted_ = 0 ;
281282
282- // Device info vector for FDP support
283- const std::vector<std::shared_ptr<FdpNvme>> fdpNvmeVec_{};
284- // As of now, only one FDP enabled Device is supported
285- static constexpr uint16_t kDefaultFdpIdx = 0u ;
283+ // Map of file descriptors to FdpNvme device objects
284+ const std::unordered_map<int , std::shared_ptr<FdpNvme>>& fdpNvmeDevs_;
286285};
287286
288287// An FileDevice manages direct I/O to either a single or multiple (RAID0)
289288// block device(s) or regular file(s).
290289class FileDevice : public Device {
291290 public:
292291 FileDevice (std::vector<folly::File>&& fvec,
293- std::vector< std::shared_ptr<FdpNvme>>&& fdpNvmeVec ,
292+ std::unordered_map< int , std::shared_ptr<FdpNvme>>&& fdpNvmeDevs ,
294293 uint64_t size,
295294 uint32_t blockSize,
296295 uint32_t stripeSize,
@@ -317,8 +316,8 @@ class FileDevice : public Device {
317316 // File vector for devices or regular files
318317 const std::vector<folly::File> fvec_{};
319318
320- // Device info vector for FDP support
321- const std::vector< std::shared_ptr<FdpNvme>> fdpNvmeVec_{} ;
319+ // Map of file descriptors to FdpNvme device objects
320+ const std::unordered_map< int , std::shared_ptr<FdpNvme>> fdpNvmeDevs_ ;
322321
323322 // RAID stripe size when multiple devices are used
324323 const uint32_t stripeSize_;
@@ -750,20 +749,21 @@ bool SyncIoContext::submitIo(IOOp& op) {
750749/*
751750 * AsyncIoContext
752751 */
753- AsyncIoContext::AsyncIoContext (std::unique_ptr<folly::AsyncBase>&& asyncBase,
754- size_t id,
755- folly::EventBase* evb,
756- size_t capacity,
757- bool useIoUring,
758- std::vector<std::shared_ptr<FdpNvme>> fdpNvmeVec)
752+ AsyncIoContext::AsyncIoContext (
753+ std::unique_ptr<folly::AsyncBase>&& asyncBase,
754+ size_t id,
755+ folly::EventBase* evb,
756+ size_t capacity,
757+ bool useIoUring,
758+ const std::unordered_map<int , std::shared_ptr<FdpNvme>>& fdpNvmeDevs)
759759 : asyncBase_(std::move(asyncBase)),
760760 id_(id),
761761 qDepth_(capacity),
762762 useIoUring_(useIoUring),
763- fdpNvmeVec_(fdpNvmeVec ) {
763+ fdpNvmeDevs_(fdpNvmeDevs ) {
764764#ifdef CACHELIB_IOURING_DISABLE
765765 // io_uring is not available on the system
766- XDCHECK (!useIoUring_ && !(fdpNvmeVec_ .size () > 0 ));
766+ XDCHECK (!useIoUring_ && !(fdpNvmeDevs_ .size () > 0 ));
767767 useIoUring_ = false ;
768768#endif
769769 if (evb) {
@@ -781,7 +781,7 @@ AsyncIoContext::AsyncIoContext(std::unique_ptr<folly::AsyncBase>&& asyncBase,
781781 " [{}] Created new async io context with qdepth {}{} io_engine {} {}" ,
782782 getName (), qDepth_, qDepth_ == 1 ? " (sync wait)" : " " ,
783783 useIoUring_ ? " io_uring" : " libaio" ,
784- (fdpNvmeVec_ .size () > 0 ) ? " FDP enabled" : " " );
784+ (fdpNvmeDevs_ .size () > 0 ) ? " FDP enabled" : " " );
785785}
786786
787787void AsyncIoContext::pollCompletion () {
@@ -820,7 +820,7 @@ void AsyncIoContext::handleCompletion(
820820 }
821821
822822 auto len = aop->result ();
823- if (fdpNvmeVec_ .size () > 0 ) {
823+ if (fdpNvmeDevs_ .size () > 0 ) {
824824 // 0 means success here, so get the completed size from iop
825825 len = !len ? iop->size_ : 0 ;
826826 }
@@ -869,7 +869,7 @@ bool AsyncIoContext::submitIo(IOOp& op) {
869869}
870870
871871std::unique_ptr<folly::AsyncBaseOp> AsyncIoContext::prepAsyncIo (IOOp& op) {
872- if (fdpNvmeVec_ .size () > 0 ) {
872+ if (fdpNvmeDevs_ .size () > 0 ) {
873873 return prepNvmeIo (op);
874874 }
875875
@@ -905,10 +905,10 @@ std::unique_ptr<folly::AsyncBaseOp> AsyncIoContext::prepNvmeIo(IOOp& op) {
905905 iouringCmdOp->initBase ();
906906 struct io_uring_sqe & sqe = iouringCmdOp->getSqe ();
907907 if (req.opType_ == OpType::READ) {
908- fdpNvmeVec_[ kDefaultFdpIdx ] ->prepReadUringCmdSqe (sqe, op.data_ , op.size_ ,
909- op.offset_ );
908+ fdpNvmeDevs_. at (op. fd_ ) ->prepReadUringCmdSqe (sqe, op.data_ , op.size_ ,
909+ op.offset_ );
910910 } else {
911- fdpNvmeVec_[ kDefaultFdpIdx ] ->prepWriteUringCmdSqe (
911+ fdpNvmeDevs_. at (op. fd_ ) ->prepWriteUringCmdSqe (
912912 sqe, op.data_ , op.size_ , op.offset_ , op.placeHandle_ .value_or (-1 ));
913913 }
914914 io_uring_sqe_set_data (&sqe, iouringCmdOp.get ());
@@ -921,23 +921,24 @@ std::unique_ptr<folly::AsyncBaseOp> AsyncIoContext::prepNvmeIo(IOOp& op) {
921921/*
922922 * FileDevice
923923 */
924- FileDevice::FileDevice (std::vector<folly::File>&& fvec,
925- std::vector<std::shared_ptr<FdpNvme>>&& fdpNvmeVec,
926- uint64_t fileSize,
927- uint32_t blockSize,
928- uint32_t stripeSize,
929- uint32_t maxIOSize,
930- uint32_t maxDeviceWriteSize,
931- IoEngine ioEngine,
932- uint32_t qDepthPerContext,
933- std::shared_ptr<DeviceEncryptor> encryptor)
924+ FileDevice::FileDevice (
925+ std::vector<folly::File>&& fvec,
926+ std::unordered_map<int , std::shared_ptr<FdpNvme>>&& fdpNvmeDevs,
927+ uint64_t fileSize,
928+ uint32_t blockSize,
929+ uint32_t stripeSize,
930+ uint32_t maxIOSize,
931+ uint32_t maxDeviceWriteSize,
932+ IoEngine ioEngine,
933+ uint32_t qDepthPerContext,
934+ std::shared_ptr<DeviceEncryptor> encryptor)
934935 : Device(fileSize * fvec.size(),
935936 std::move(encryptor),
936937 blockSize,
937938 maxIOSize,
938939 maxDeviceWriteSize),
939940 fvec_(std::move(fvec)),
940- fdpNvmeVec_ (std::move(fdpNvmeVec )),
941+ fdpNvmeDevs_ (std::move(fdpNvmeDevs )),
941942 stripeSize_(stripeSize),
942943 ioEngine_(ioEngine),
943944 qDepthPerContext_(qDepthPerContext) {
@@ -974,7 +975,7 @@ FileDevice::FileDevice(std::vector<folly::File>&& fvec,
974975 " num_fdp_devices {}" ,
975976 fvec_.size (), getSize (), blockSize, stripeSize, maxDeviceWriteSize,
976977 maxIOSize, getIoEngineName (ioEngine_), qDepthPerContext_,
977- fdpNvmeVec_ .size ());
978+ fdpNvmeDevs_ .size ());
978979}
979980
980981bool FileDevice::readImpl (uint64_t offset, uint32_t size, void * value) {
@@ -1030,7 +1031,7 @@ IoContext* FileDevice::getIoContext() {
10301031 std::unique_ptr<folly::AsyncBase> asyncBase;
10311032 if (useIoUring) {
10321033#ifndef CACHELIB_IOURING_DISABLE
1033- if (fdpNvmeVec_ .size () > 0 ) {
1034+ if (fdpNvmeDevs_ .size () > 0 ) {
10341035 // Big sqe/cqe is mandatory for NVMe passthrough
10351036 // https://elixir.bootlin.com/linux/v6.7/source/drivers/nvme/host/ioctl.c#L742
10361037 folly::IoUringOp::Options options;
@@ -1051,7 +1052,7 @@ IoContext* FileDevice::getIoContext() {
10511052 auto idx = incrementalIdx_++;
10521053 tlContext_.reset (new AsyncIoContext (std::move (asyncBase), idx, evb,
10531054 qDepthPerContext_, useIoUring,
1054- fdpNvmeVec_ ));
1055+ fdpNvmeDevs_ ));
10551056
10561057 {
10571058 // Keep pointers in a vector to ease the gdb debugging
@@ -1067,10 +1068,20 @@ IoContext* FileDevice::getIoContext() {
10671068}
10681069
10691070int FileDevice::allocatePlacementHandle () {
1070- static constexpr uint16_t kDefaultFdpIdx = 0u ;
10711071#ifndef CACHELIB_IOURING_DISABLE
1072- if (fdpNvmeVec_.size () > 0 ) {
1073- return fdpNvmeVec_[kDefaultFdpIdx ]->allocateFdpHandle ();
1072+ if (fdpNvmeDevs_.size () > 0 ) {
1073+ auto fdpHandle = -1 ;
1074+ // Ensuring that same FDP placement handle is allocated for all FdpNvme
1075+ // devices for RAID, and returns the allocated handle if successful,
1076+ // or -1 if there is a conflict
1077+ for (auto & nvmeFdp : fdpNvmeDevs_) {
1078+ auto tempHandle = nvmeFdp.second ->allocateFdpHandle ();
1079+ if (fdpHandle != -1 && (tempHandle != fdpHandle)) {
1080+ return -1 ;
1081+ }
1082+ fdpHandle = tempHandle;
1083+ }
1084+ return fdpHandle;
10741085 }
10751086#endif
10761087 return -1 ;
@@ -1186,31 +1197,25 @@ std::unique_ptr<Device> createDirectIoFileDevice(
11861197 XDCHECK (folly::isPowTwo (blockSize));
11871198
11881199 uint32_t maxIOSize = maxDeviceWriteSize;
1189- std::vector< std::shared_ptr<FdpNvme>> fdpNvmeVec{} ;
1200+ std::unordered_map< int , std::shared_ptr<FdpNvme>> fdpNvmeDevs ;
11901201#ifndef CACHELIB_IOURING_DISABLE
11911202 if (isFDPEnabled) {
11921203 try {
1193- if (filePaths.size () > 1 ) {
1194- throw std::invalid_argument (folly::sformat (
1195- " {} input files; but FDP mode does not support RAID files yet" ,
1196- filePaths.size ()));
1197- }
1198-
1199- for (const auto & path : filePaths) {
1200- auto fdpNvme = std::make_shared<FdpNvme>(path);
1204+ for (size_t i = 0 ; i < filePaths.size (); i++) {
1205+ auto fdpNvme = std::make_shared<FdpNvme>(filePaths[i]);
12011206
12021207 auto maxDevIOSize = fdpNvme->getMaxIOSize ();
12031208 if (maxDevIOSize != 0u &&
12041209 (maxIOSize == 0u || maxDevIOSize < maxIOSize)) {
12051210 maxIOSize = maxDevIOSize;
12061211 }
12071212
1208- fdpNvmeVec. push_back ( std::move (fdpNvme));
1213+ fdpNvmeDevs. insert ({ fVec [i]. fd (), std::move (fdpNvme)} );
12091214 }
12101215 } catch (const std::exception& e) {
12111216 XLOGF (ERR, " NVMe FDP mode could not be enabled {}, Errno: {}" , e.what (),
12121217 errno);
1213- fdpNvmeVec .clear ();
1218+ fdpNvmeDevs .clear ();
12141219 maxIOSize = 0u ;
12151220 }
12161221 }
@@ -1221,7 +1226,7 @@ std::unique_ptr<Device> createDirectIoFileDevice(
12211226 }
12221227
12231228 return std::make_unique<FileDevice>(std::move (fVec ),
1224- std::move (fdpNvmeVec ),
1229+ std::move (fdpNvmeDevs ),
12251230 fileSize,
12261231 blockSize,
12271232 stripeSize,
0 commit comments