Skip to content

Commit 92b44e1

Browse files
lyeeeeeliyi.neek
andauthored
decrease max bytes read from hdfs (#1633)
Co-authored-by: liyi.neek <liyi.neek@bytedance.com>
1 parent b9c150b commit 92b44e1

File tree

1 file changed

+8
-5
lines changed

1 file changed

+8
-5
lines changed

tensorflow_io/core/filesystems/hdfs/hadoop_filesystem.cc

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -313,10 +313,11 @@ int64_t Read(const TF_RandomAccessFile* file, uint64_t offset, size_t n,
313313
// concurrent readers.
314314
absl::MutexLock l(&hdfs_file->mu);
315315
auto handle = hdfs_file->handle;
316-
// Max read length is INT_MAX-2, for hdfsPread function take a parameter
317-
// of int32. -2 offset can avoid JVM OutOfMemoryError.
316+
// Max read length is INT_MAX-2.
317+
// Actual max array size in java depends on JVM's implentation
318+
// So we choose INT_MAX-8, which is the maximum "safe" number.
318319
size_t read_n =
319-
(std::min)(n, static_cast<size_t>(std::numeric_limits<int>::max() - 2));
320+
(std::min)(n, static_cast<size_t>(std::numeric_limits<int>::max() - 8));
320321
int64_t r = libhdfs->hdfsPread(fs, handle, static_cast<tOffset>(offset),
321322
dst, static_cast<tSize>(read_n));
322323
if (r > 0) {
@@ -389,9 +390,11 @@ void Append(const TF_WritableFile* file, const char* buffer, size_t n,
389390

390391
size_t cur_pos = 0, write_len = 0;
391392
bool retry = false;
392-
// max() - 2 can avoid OutOfMemoryError in JVM .
393+
394+
// Actual max array size in java depends on JVM's implentation
395+
// So we choose INT_MAX-8, which is the maximum "safe" number.
393396
static const size_t max_len_once =
394-
static_cast<size_t>(std::numeric_limits<tSize>::max() - 2);
397+
static_cast<size_t>(std::numeric_limits<tSize>::max() - 8);
395398
while (cur_pos < n) {
396399
write_len = (std::min)(n - cur_pos, max_len_once);
397400
tSize w = libhdfs->hdfsWrite(fs, handle, buffer + cur_pos,

0 commit comments

Comments
 (0)