Skip to content

Commit cb681d5

Browse files
author
FengHao
committed
GDS-fixed
1 parent 2e0834b commit cb681d5

File tree

12 files changed

+33
-27
lines changed

12 files changed

+33
-27
lines changed

ucm/store/device/cuda/cuda_device.cu

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
#include <cstring>
3333
#include <unordered_map>
3434
#include <cstdlib>
35-
#include "infra/template/sharded_handle_recorder.h"
35+
#include "infra/template/handle_recorder.h"
3636

3737
#define CUDA_TRANS_UNIT_SIZE (sizeof(uint64_t) * 2)
3838
#define CUDA_TRANS_BLOCK_NUMBER (32)
@@ -311,13 +311,17 @@ private:
311311
cudaStream_t stream_;
312312
};
313313

314+
void DeviceFactory::Setup(bool useDirect)
315+
{
316+
if (useDirect) {
317+
CudaDevice::InitGdsOnce();
318+
}
319+
}
320+
314321
std::unique_ptr<IDevice> DeviceFactory::Make(const int32_t deviceId, const size_t bufferSize,
315-
const size_t bufferNumber, bool transferUseDirect)
322+
const size_t bufferNumber)
316323
{
317324
try {
318-
if (transferUseDirect) {
319-
CudaDevice::InitGdsOnce();
320-
}
321325
return std::make_unique<CudaDevice>(deviceId, bufferSize, bufferNumber);
322326
} catch (const std::exception& e) {
323327
UC_ERROR("Failed({}) to make cuda device({},{},{}).", e.what(), deviceId, bufferSize,

ucm/store/device/idevice.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,9 @@ class IDevice {
6161

6262
class DeviceFactory {
6363
public:
64+
static void Setup(bool useDirect = false);
6465
static std::unique_ptr<IDevice> Make(const int32_t deviceId, const size_t bufferSize,
65-
const size_t bufferNumber, bool transferUseDirect = false);
66+
const size_t bufferNumber);
6667
};
6768

6869
} // namespace UC

ucm/store/nfsstore/cc/api/nfsstore.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ class NFSStoreImpl : public NFSStore {
4444
status =
4545
this->transMgr_.Setup(config.transferDeviceId, config.transferStreamNumber,
4646
config.transferIoSize, config.transferBufferNumber,
47-
this->spaceMgr_.GetSpaceLayout(), config.transferTimeoutMs, config.transferUseDirect);
47+
this->spaceMgr_.GetSpaceLayout(), config.transferTimeoutMs, config.useDirect);
4848
if (status.Failure()) {
4949
UC_ERROR("Failed({}) to setup TsfTaskManager.", status);
5050
return status.Underlying();

ucm/store/nfsstore/cc/api/nfsstore.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,14 +43,14 @@ class NFSStore : public CCStore {
4343
bool tempDumpDirEnable;
4444
bool hotnessEnable;
4545
size_t hotnessInterval;
46-
bool transferUseDirect;
46+
bool useDirect;
4747

4848
Config(const std::vector<std::string>& storageBackends, const size_t kvcacheBlockSize,
4949
const bool transferEnable)
5050
: storageBackends{storageBackends}, kvcacheBlockSize{kvcacheBlockSize},
5151
transferEnable{transferEnable}, transferDeviceId{-1}, transferStreamNumber{32},
5252
transferIoSize{262144}, transferBufferNumber{512}, transferTimeoutMs{30000},
53-
tempDumpDirEnable{false}, hotnessEnable{true}, hotnessInterval{60}, transferUseDirect{false}
53+
tempDumpDirEnable{false}, hotnessEnable{true}, hotnessInterval{60}, useDirect{false}
5454
{
5555
}
5656
};

ucm/store/nfsstore/cc/domain/trans/directstorage_queue.cc

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,14 +31,14 @@
3131
namespace UC {
3232

3333
Status DirectStorageQueue::Setup(const int32_t deviceId, const size_t bufferSize, const size_t bufferNumber,
34-
TaskSet* failureSet, const SpaceLayout* layout, const size_t timeoutMs, bool transferUseDirect)
34+
TaskSet* failureSet, const SpaceLayout* layout, const size_t timeoutMs, bool useDirect)
3535
{
3636
this->deviceId_ = deviceId;
3737
this->bufferSize_ = bufferSize;
3838
this->bufferNumber_ = bufferNumber;
3939
this->failureSet_ = failureSet;
4040
this->layout_ = layout;
41-
this->transferUseDirect_ = transferUseDirect;
41+
this->useDirect = useDirect;
4242
auto success =
4343
this->backend_.SetWorkerInitFn([this](auto& device) { return this->Init(device); })
4444
.SetWorkerFn([this](auto& shard, const auto& device) { this->Work(shard, device); })
@@ -52,7 +52,8 @@ void DirectStorageQueue::Push(std::list<Task::Shard>& shards) noexcept { this->b
5252
bool DirectStorageQueue::Init(Device& device)
5353
{
5454
if (this->deviceId_ < 0) { return true; }
55-
device = DeviceFactory::Make(this->deviceId_, this->bufferSize_, this->bufferNumber_, this->transferUseDirect_);
55+
DeviceFactory::Setup(useDirect);
56+
device = DeviceFactory::Make(this->deviceId_, this->bufferSize_, this->bufferNumber_);
5657
if (!device) { return false; }
5758
return device->Setup().Success();
5859
}

ucm/store/nfsstore/cc/domain/trans/directstorage_queue.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
#include <unordered_map>
3535
#include <mutex>
3636
#include <string>
37-
#include "infra/template/sharded_handle_recorder.h"
37+
#include "infra/template/handle_recorder.h"
3838

3939
namespace UC {
4040

@@ -46,11 +46,11 @@ class DirectStorageQueue : public TaskQueue {
4646
TaskSet* failureSet_{nullptr};
4747
const SpaceLayout* layout_{nullptr};
4848
ThreadPool<Task::Shard, Device> backend_{};
49-
bool transferUseDirect_{false};
49+
bool useDirect{false};
5050

5151
public:
5252
Status Setup(const int32_t deviceId, const size_t bufferSize, const size_t bufferNumber,
53-
TaskSet* failureSet, const SpaceLayout* layout, const size_t timeoutMs, bool transferUseDirect);
53+
TaskSet* failureSet, const SpaceLayout* layout, const size_t timeoutMs, bool useDirect);
5454
void Push(std::list<Task::Shard>& shards) noexcept override;
5555

5656
private:

ucm/store/nfsstore/cc/domain/trans/posix_queue.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,14 +35,14 @@ bool IsAligned(const T value)
3535
}
3636

3737
Status PosixQueue::Setup(const int32_t deviceId, const size_t bufferSize, const size_t bufferNumber,
38-
TaskSet* failureSet, const SpaceLayout* layout, const size_t timeoutMs, bool transferUseDirect)
38+
TaskSet* failureSet, const SpaceLayout* layout, const size_t timeoutMs, bool useDirect)
3939
{
4040
this->deviceId_ = deviceId;
4141
this->bufferSize_ = bufferSize;
4242
this->bufferNumber_ = bufferNumber;
4343
this->failureSet_ = failureSet;
4444
this->layout_ = layout;
45-
this->transferUseDirect_ = transferUseDirect;
45+
this->useDirect = useDirect;
4646
auto success =
4747
this->backend_.SetWorkerInitFn([this](auto& device) { return this->Init(device); })
4848
.SetWorkerFn([this](auto& shard, const auto& device) { this->Work(shard, device); })
@@ -56,7 +56,7 @@ void PosixQueue::Push(std::list<Task::Shard>& shards) noexcept { this->backend_.
5656
bool PosixQueue::Init(Device& device)
5757
{
5858
if (this->deviceId_ < 0) { return true; }
59-
device = DeviceFactory::Make(this->deviceId_, this->bufferSize_, this->bufferNumber_, this->transferUseDirect_);
59+
device = DeviceFactory::Make(this->deviceId_, this->bufferSize_, this->bufferNumber_);
6060
if (!device) { return false; }
6161
return device->Setup().Success();
6262
}

ucm/store/nfsstore/cc/domain/trans/posix_queue.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,11 +41,11 @@ class PosixQueue : public TaskQueue {
4141
TaskSet* failureSet_{nullptr};
4242
const SpaceLayout* layout_{nullptr};
4343
ThreadPool<Task::Shard, Device> backend_{};
44-
bool transferUseDirect_{false};
44+
bool useDirect{false};
4545

4646
public:
4747
Status Setup(const int32_t deviceId, const size_t bufferSize, const size_t bufferNumber,
48-
TaskSet* failureSet, const SpaceLayout* layout, const size_t timeoutMs, bool transferUseDirect);
48+
TaskSet* failureSet, const SpaceLayout* layout, const size_t timeoutMs, bool useDirect);
4949
void Push(std::list<Task::Shard>& shards) noexcept override;
5050

5151
private:

ucm/store/nfsstore/cc/domain/trans/trans_manager.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,16 +33,16 @@ namespace UC {
3333
class TransManager : public TaskManager {
3434
public:
3535
Status Setup(const int32_t deviceId, const size_t streamNumber, const size_t ioSize,
36-
const size_t bufferNumber, const SpaceLayout* layout, const size_t timeoutMs, bool transferUseDirect)
36+
const size_t bufferNumber, const SpaceLayout* layout, const size_t timeoutMs, bool useDirect)
3737
{
3838
this->timeoutMs_ = timeoutMs;
3939
auto status = Status::OK();
40-
if(transferUseDirect)
40+
if(useDirect)
4141
{
4242
for (size_t i = 0; i < streamNumber; i++) {
4343
auto q = std::make_shared<DirectStorageQueue>();
4444
status =
45-
q->Setup(deviceId, ioSize, bufferNumber, &this->failureSet_, layout, timeoutMs, transferUseDirect);
45+
q->Setup(deviceId, ioSize, bufferNumber, &this->failureSet_, layout, timeoutMs, useDirect);
4646
if (status.Failure()) { break; }
4747
this->queues_.emplace_back(std::move(q));
4848
}
@@ -52,7 +52,7 @@ class TransManager : public TaskManager {
5252
for (size_t i = 0; i < streamNumber; i++) {
5353
auto q = std::make_shared<PosixQueue>();
5454
status =
55-
q->Setup(deviceId, ioSize, bufferNumber, &this->failureSet_, layout, timeoutMs, transferUseDirect);
55+
q->Setup(deviceId, ioSize, bufferNumber, &this->failureSet_, layout, timeoutMs, useDirect);
5656
if (status.Failure()) { break; }
5757
this->queues_.emplace_back(std::move(q));
5858
}

0 commit comments

Comments
 (0)