Skip to content

Commit 75d0a44

Browse files
committed
support newline and backslash character in Glacier logs
Signed-off-by: Utkarsh Srivastava <srivastavautkarsh8097@gmail.com> add support for legacy logs as well Signed-off-by: Utkarsh Srivastava <srivastavautkarsh8097@gmail.com> (cherry picked from commit a80d1bb)
1 parent 0cce309 commit 75d0a44

File tree

4 files changed

+217
-15
lines changed

4 files changed

+217
-15
lines changed

src/sdk/glacier.js

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -300,6 +300,32 @@ class Glacier {
300300
return restore_status.state === Glacier.RESTORE_STATUS_CAN_RESTORE;
301301
}
302302

303+
/**
304+
* encode_log takes in data intended for the backend and encodes
305+
* it.
306+
*
307+
* This method must be overwritten for all the backends if they need
308+
* different encodings for their logs.
309+
* @param {string} data
310+
* @returns {string}
311+
*/
312+
encode_log(data) {
313+
return data;
314+
}
315+
316+
/**
317+
* decode_log takes in data intended for the backend and decodes
318+
* it.
319+
*
320+
* This method must be overwritten for all the backends if they need
321+
* different encodings for their logs.
322+
* @param {string} data
323+
* @returns {string}
324+
*/
325+
decode_log(data) {
326+
return data;
327+
}
328+
303329
/**
304330
* get_restore_status returns status of the object at the given
305331
* file_path

src/sdk/glacier_tapecloud.js

Lines changed: 65 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,8 @@ class TapeCloudUtils {
191191
}
192192

193193
class TapeCloudGlacier extends Glacier {
194+
static LOG_DELIM = ' -- ';
195+
194196
/**
195197
* @param {nb.NativeFSContext} fs_context
196198
* @param {LogFile} log_file
@@ -200,8 +202,14 @@ class TapeCloudGlacier extends Glacier {
200202
async stage_migrate(fs_context, log_file, failure_recorder) {
201203
dbg.log2('TapeCloudGlacier.stage_migrate starting for', log_file.log_path);
202204

205+
// Wrap failure recorder to make sure we correctly encode the entries
206+
// before appending them to the failure log
207+
const encoded_failure_recorder = async failure => failure_recorder(this.encode_log(failure));
208+
203209
try {
204210
await log_file.collect(Glacier.MIGRATE_STAGE_WAL_NAME, async (entry, batch_recorder) => {
211+
entry = this.decode_log(entry);
212+
205213
let entry_fh;
206214
let should_migrate = true;
207215
try {
@@ -230,7 +238,7 @@ class TapeCloudGlacier extends Glacier {
230238
// Can't really do anything if this fails - provider
231239
// needs to make sure that appropriate error handling
232240
// is being done there
233-
await failure_recorder(entry);
241+
await encoded_failure_recorder(entry);
234242
return;
235243
}
236244

@@ -240,14 +248,14 @@ class TapeCloudGlacier extends Glacier {
240248
// Mark the file staged
241249
try {
242250
await entry_fh.replacexattr(fs_context, { [Glacier.XATTR_STAGE_MIGRATE]: Date.now().toString() });
243-
await batch_recorder(entry);
251+
await batch_recorder(this.encode_log(entry));
244252
} catch (error) {
245253
dbg.error('failed to mark the entry migrate staged', error);
246254

247255
// Can't really do anything if this fails - provider
248256
// needs to make sure that appropriate error handling
249257
// is being done there
250-
await failure_recorder(entry);
258+
await encoded_failure_recorder(entry);
251259
} finally {
252260
entry_fh?.close(fs_context);
253261
}
@@ -268,16 +276,23 @@ class TapeCloudGlacier extends Glacier {
268276
*/
269277
async migrate(fs_context, log_file, failure_recorder) {
270278
dbg.log2('TapeCloudGlacier.migrate starting for', log_file.log_path);
279+
280+
// Wrap failure recorder to make sure we correctly encode the entries
281+
// before appending them to the failure log
282+
const encoded_failure_recorder = async failure => failure_recorder(this.encode_log(failure));
283+
271284
try {
272285
// This will throw error only if our eeadm error handler
273286
// panics as well and at that point it's okay to
274287
// not handle the error and rather keep the log file around
275-
await this._migrate(log_file.log_path, failure_recorder);
288+
await this._migrate(log_file.log_path, encoded_failure_recorder);
276289

277290
// Un-stage all the files - We don't need to deal with the cases
278291
// where some files have migrated and some have not as that is
279292
// not important for staging/un-staging.
280293
await log_file.collect_and_process(async entry => {
294+
entry = this.decode_log(entry);
295+
281296
let fh;
282297
try {
283298
fh = await nb_native().fs.open(fs_context, entry);
@@ -293,7 +308,7 @@ class TapeCloudGlacier extends Glacier {
293308
// Add the enty to the failure log - This could be wasteful as it might
294309
// add entries which have already been migrated but this is a better
295310
// retry.
296-
await failure_recorder(entry);
311+
await encoded_failure_recorder(entry);
297312
} finally {
298313
await fh?.close(fs_context);
299314
}
@@ -315,8 +330,14 @@ class TapeCloudGlacier extends Glacier {
315330
async stage_restore(fs_context, log_file, failure_recorder) {
316331
dbg.log2('TapeCloudGlacier.stage_restore starting for', log_file.log_path);
317332

333+
// Wrap failure recorder to make sure we correctly encode the entries
334+
// before appending them to the failure log
335+
const encoded_failure_recorder = async failure => failure_recorder(this.encode_log(failure));
336+
318337
try {
319338
await log_file.collect(Glacier.RESTORE_STAGE_WAL_NAME, async (entry, batch_recorder) => {
339+
entry = this.decode_log(entry);
340+
320341
let fh;
321342
try {
322343
fh = await nb_native().fs.open(fs_context, entry);
@@ -343,9 +364,9 @@ class TapeCloudGlacier extends Glacier {
343364
// 3. If we read corrupt value then either the file is getting staged or is
344365
// getting un-staged - In either case we must requeue.
345366
if (stat.xattr[Glacier.XATTR_STAGE_MIGRATE]) {
346-
await failure_recorder(entry);
367+
await encoded_failure_recorder(entry);
347368
} else {
348-
await batch_recorder(entry);
369+
await batch_recorder(this.encode_log(entry));
349370
}
350371
} catch (error) {
351372
if (error.code === 'ENOENT') {
@@ -357,7 +378,7 @@ class TapeCloudGlacier extends Glacier {
357378
'adding log entry', entry,
358379
'to failure recorder due to error', error,
359380
);
360-
await failure_recorder(entry);
381+
await encoded_failure_recorder(entry);
361382
} finally {
362383
await fh?.close(fs_context);
363384
}
@@ -379,25 +400,32 @@ class TapeCloudGlacier extends Glacier {
379400
async restore(fs_context, log_file, failure_recorder) {
380401
dbg.log2('TapeCloudGlacier.restore starting for', log_file.log_path);
381402

403+
// Wrap failure recorder to make sure we correctly encode the entries
404+
// before appending them to the failure log
405+
const encoded_failure_recorder = async failure => failure_recorder(this.encode_log(failure));
406+
382407
try {
383408
const success = await this._recall(
384409
log_file.log_path,
385410
async entry_path => {
411+
entry_path = this.decode_log(entry_path);
386412
dbg.log2('TapeCloudGlacier.restore.partial_failure - entry:', entry_path);
387-
await failure_recorder(entry_path);
413+
await encoded_failure_recorder(entry_path);
388414
},
389415
async entry_path => {
416+
entry_path = this.decode_log(entry_path);
390417
dbg.log2('TapeCloudGlacier.restore.partial_success - entry:', entry_path);
391-
await this._finalize_restore(fs_context, entry_path, failure_recorder);
418+
await this._finalize_restore(fs_context, entry_path, encoded_failure_recorder);
392419
}
393420
);
394421

395422
// We will iterate through the entire log file iff and we get a success message from
396423
// the recall call.
397424
if (success) {
398425
await log_file.collect_and_process(async (entry_path, batch_recorder) => {
426+
entry_path = this.decode_log(entry_path);
399427
dbg.log2('TapeCloudGlacier.restore.batch - entry:', entry_path);
400-
await this._finalize_restore(fs_context, entry_path, failure_recorder);
428+
await this._finalize_restore(fs_context, entry_path, encoded_failure_recorder);
401429
});
402430
}
403431

@@ -421,6 +449,32 @@ class TapeCloudGlacier extends Glacier {
421449
return result.toLowerCase().trim() === 'true';
422450
}
423451

452+
/**
453+
* encode_log takes string of data and escapes all the backslash and newline
454+
* characters
455+
* @example
456+
* // /Users/noobaa/data/buc/obj\nfile => /Users/noobaa/data/buc/obj\\nfile
457+
* // /Users/noobaa/data/buc/obj\file => /Users/noobaa/data/buc/obj\\file
458+
* @param {string} data
459+
* @returns {string}
460+
*/
461+
encode_log(data) {
462+
const encoded = data.replace(/\\/g, '\\\\').replace(/\n/g, '\\n');
463+
return `${TapeCloudGlacier.LOG_DELIM}${encoded}`;
464+
}
465+
466+
/**
467+
*
468+
* @param {string} data
469+
* @returns {string}
470+
*/
471+
decode_log(data) {
472+
if (!data.startsWith(TapeCloudGlacier.LOG_DELIM)) return data;
473+
return data.substring(TapeCloudGlacier.LOG_DELIM.length)
474+
.replace(/\\n/g, '\n')
475+
.replace(/\\\\/g, '\\');
476+
}
477+
424478
// ============= PRIVATE FUNCTIONS =============
425479

426480
/**

src/sdk/namespace_fs.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3702,13 +3702,13 @@ class NamespaceFS {
37023702
async append_to_migrate_wal(entry) {
37033703
if (!config.NSFS_GLACIER_LOGS_ENABLED) return;
37043704

3705-
await NamespaceFS.migrate_wal.append(entry);
3705+
await NamespaceFS.migrate_wal.append(Glacier.getBackend().encode_log(entry));
37063706
}
37073707

37083708
async append_to_restore_wal(entry) {
37093709
if (!config.NSFS_GLACIER_LOGS_ENABLED) return;
37103710

3711-
await NamespaceFS.restore_wal.append(entry);
3711+
await NamespaceFS.restore_wal.append(Glacier.getBackend().encode_log(entry));
37123712
}
37133713

37143714
static get migrate_wal() {

src/test/unit_tests/nsfs/test_nsfs_glacier_backend.js

Lines changed: 124 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,8 @@ mocha.describe('nsfs_glacier', function() {
166166
});
167167

168168
mocha.describe('nsfs_glacier_tapecloud', async function() {
169+
const restore_key_spcl_char_1 = 'restore_key_2_\n';
170+
const restore_key_spcl_char_2 = 'restore_key_2_\n_2';
169171
const upload_key = 'upload_key_1';
170172
const restore_key = 'restore_key_1';
171173
const xattr = { key: 'value', key2: 'value2' };
@@ -280,10 +282,10 @@ mocha.describe('nsfs_glacier', function() {
280282
failure_backend._process_expired = async () => { /**noop*/ };
281283
failure_backend._recall = async (_file, failure_recorder, success_recorder) => {
282284
// This unintentionally also replicates duplicate entries in WAL
283-
await failure_recorder(failed_file_path);
285+
await failure_recorder(failure_backend.encode_log(failed_file_path));
284286

285287
// This unintentionally also replicates duplicate entries in WAL
286-
await success_recorder(success_file_path);
288+
await success_recorder(failure_backend.encode_log(success_file_path));
287289

288290
return false;
289291
};
@@ -324,6 +326,126 @@ mocha.describe('nsfs_glacier', function() {
324326
assert(failure_stats.xattr[Glacier.XATTR_RESTORE_REQUEST]);
325327
});
326328

329+
mocha.it('restore-object should successfully restore objects with special characters', async function() {
330+
const now = Date.now();
331+
const data = crypto.randomBytes(100);
332+
const all_params = [
333+
{
334+
bucket: upload_bkt,
335+
key: restore_key_spcl_char_1,
336+
storage_class: s3_utils.STORAGE_CLASS_GLACIER,
337+
xattr,
338+
days: 1,
339+
source_stream: buffer_utils.buffer_to_read_stream(data)
340+
},
341+
{
342+
bucket: upload_bkt,
343+
key: restore_key_spcl_char_2,
344+
storage_class: s3_utils.STORAGE_CLASS_GLACIER,
345+
xattr,
346+
days: 1,
347+
source_stream: buffer_utils.buffer_to_read_stream(data)
348+
}
349+
];
350+
351+
for (const params of all_params) {
352+
const upload_res = await glacier_ns.upload_object(params, dummy_object_sdk);
353+
console.log('upload_object response', inspect(upload_res));
354+
355+
const restore_res = await glacier_ns.restore_object(params, dummy_object_sdk);
356+
assert(restore_res);
357+
358+
// Issue restore
359+
await backend.perform(glacier_ns.prepare_fs_context(dummy_object_sdk), "RESTORE");
360+
361+
362+
// Ensure object is restored
363+
const md = await glacier_ns.read_object_md(params, dummy_object_sdk);
364+
365+
assert(!md.restore_status.ongoing);
366+
367+
const expected_expiry = Glacier.generate_expiry(new Date(), params.days, '', config.NSFS_GLACIER_EXPIRY_TZ);
368+
assert(expected_expiry.getTime() >= md.restore_status.expiry_time.getTime());
369+
assert(now <= md.restore_status.expiry_time.getTime());
370+
}
371+
});
372+
373+
mocha.it('restore-object should not restore failed item with special characters', async function() {
374+
const now = Date.now();
375+
const data = crypto.randomBytes(100);
376+
const failed_restore_key = `${restore_key_spcl_char_1}_failured`;
377+
const success_restore_key = `${restore_key_spcl_char_1}_success`;
378+
379+
const failed_params = {
380+
bucket: upload_bkt,
381+
key: failed_restore_key,
382+
storage_class: s3_utils.STORAGE_CLASS_GLACIER,
383+
xattr,
384+
days: 1,
385+
source_stream: buffer_utils.buffer_to_read_stream(data)
386+
};
387+
388+
const success_params = {
389+
bucket: upload_bkt,
390+
key: success_restore_key,
391+
storage_class: s3_utils.STORAGE_CLASS_GLACIER,
392+
xattr,
393+
days: 1,
394+
source_stream: buffer_utils.buffer_to_read_stream(data)
395+
};
396+
397+
const failed_file_path = glacier_ns._get_file_path(failed_params);
398+
const success_file_path = glacier_ns._get_file_path(success_params);
399+
400+
const failure_backend = new TapeCloudGlacier();
401+
failure_backend._migrate = async () => true;
402+
failure_backend._process_expired = async () => { /**noop*/ };
403+
failure_backend._recall = async (_file, failure_recorder, success_recorder) => {
404+
// This unintentionally also replicates duplicate entries in WAL
405+
await failure_recorder(failure_backend.encode_log(failed_file_path));
406+
407+
// This unintentionally also replicates duplicate entries in WAL
408+
await success_recorder(failure_backend.encode_log(success_file_path));
409+
410+
return false;
411+
};
412+
413+
const upload_res_1 = await glacier_ns.upload_object(failed_params, dummy_object_sdk);
414+
console.log('upload_object response', inspect(upload_res_1));
415+
416+
const upload_res_2 = await glacier_ns.upload_object(success_params, dummy_object_sdk);
417+
console.log('upload_object response', inspect(upload_res_2));
418+
419+
const restore_res_1 = await glacier_ns.restore_object(failed_params, dummy_object_sdk);
420+
assert(restore_res_1);
421+
422+
const restore_res_2 = await glacier_ns.restore_object(success_params, dummy_object_sdk);
423+
assert(restore_res_2);
424+
425+
const fs_context = glacier_ns.prepare_fs_context(dummy_object_sdk);
426+
427+
// Issue restore
428+
await failure_backend.perform(glacier_ns.prepare_fs_context(dummy_object_sdk), "RESTORE");
429+
430+
// Ensure success object is restored
431+
const success_md = await glacier_ns.read_object_md(success_params, dummy_object_sdk);
432+
433+
assert(!success_md.restore_status.ongoing);
434+
435+
const expected_expiry = Glacier.generate_expiry(new Date(), success_params.days, '', config.NSFS_GLACIER_EXPIRY_TZ);
436+
assert(expected_expiry.getTime() >= success_md.restore_status.expiry_time.getTime());
437+
assert(now <= success_md.restore_status.expiry_time.getTime());
438+
439+
// Ensure failed object is NOT restored
440+
const failure_stats = await nb_native().fs.stat(
441+
fs_context,
442+
failed_file_path,
443+
);
444+
445+
assert(!failure_stats.xattr[Glacier.XATTR_RESTORE_EXPIRY] || failure_stats.xattr[Glacier.XATTR_RESTORE_EXPIRY] === '');
446+
assert(failure_stats.xattr[Glacier.XATTR_RESTORE_REQUEST]);
447+
});
448+
327449
mocha.it('_finalize_restore should tolerate deleted objects', async function() {
328450
// should not throw error if the path does not exist
329451
await backend._finalize_restore(glacier_ns.prepare_fs_context(dummy_object_sdk), '/path/does/not/exist');

0 commit comments

Comments
 (0)