Skip to content

Commit 2960f2e

Browse files
tr: add full support for ranges
tr currently only handles ranges if they are the only construct in the string containing them (see `contains_single_range`). Add proper handling of ranges: multiple in one string, ranges from one character type to another (e.g. tr -d '\060-9 A-Z'). Fix handling of octal sequences above \377. Add validation to ensure equiv (in "[=equiv=]" constructs) is only one character. Add validation to ensure ranges are not "backwards" (the ending character be must greater than or equal to the starting character). Parse and validate string1 and string2 before reading from stdin.
1 parent 0a07286 commit 2960f2e

File tree

2 files changed

+1184
-509
lines changed

2 files changed

+1184
-509
lines changed

text/tests/tr/mod.rs

Lines changed: 181 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,18 +11,37 @@
1111
use plib::{run_test, TestPlan};
1212

1313
fn tr_test(args: &[&str], test_data: &str, expected_output: &str) {
14-
let str_args: Vec<String> = args.iter().map(|s| String::from(*s)).collect();
14+
let str_args = args
15+
.iter()
16+
.map(|st| st.to_owned().to_owned())
17+
.collect::<Vec<String>>();
1518

1619
run_test(TestPlan {
17-
cmd: String::from("tr"),
20+
cmd: "tr".to_owned(),
1821
args: str_args,
19-
stdin_data: String::from(test_data),
20-
expected_out: String::from(expected_output),
21-
expected_err: String::from(""),
22+
stdin_data: test_data.to_owned(),
23+
expected_out: expected_output.to_owned(),
24+
expected_err: String::new(),
2225
expected_exit_code: 0,
2326
});
2427
}
2528

29+
fn tr_bad_arguments_failure_test(args: &[&str], expected_stderr: &str) {
30+
let str_args = args
31+
.iter()
32+
.map(|st| st.to_owned().to_owned())
33+
.collect::<Vec<_>>();
34+
35+
run_test(TestPlan {
36+
cmd: "tr".to_owned(),
37+
args: str_args,
38+
stdin_data: String::new(),
39+
expected_out: String::new(),
40+
expected_err: expected_stderr.to_owned(),
41+
expected_exit_code: 1,
42+
});
43+
}
44+
2645
#[test]
2746
fn test_tr_1() {
2847
tr_test(&["abcd", "[]*]"], "abcd", "]]]]");
@@ -387,3 +406,160 @@ fn tr_left_square_bracket_literal() {
387406
fn tr_multiple_transformations() {
388407
tr_test(&["3[:lower:]", "![:upper:]"], "abc123", "ABC12!");
389408
}
409+
410+
#[test]
411+
fn tr_equiv_not_one_char() {
412+
tr_bad_arguments_failure_test(
413+
&["-d", "[=aa=]"],
414+
"tr: aa: equivalence class operand must be a single character\n",
415+
);
416+
}
417+
418+
#[test]
419+
fn tr_backwards_range_normal() {
420+
tr_bad_arguments_failure_test(
421+
&["-d", "b-a"],
422+
"tr: range-endpoints of 'b-a' are in reverse collating sequence order\n",
423+
);
424+
}
425+
426+
#[test]
427+
fn tr_backwards_range_backslash() {
428+
tr_bad_arguments_failure_test(
429+
&["-d", r"\t-\b"],
430+
r"tr: range-endpoints of '\t-\u{8}' are in reverse collating sequence order
431+
",
432+
);
433+
}
434+
435+
#[test]
436+
fn tr_backwards_range_octal() {
437+
tr_bad_arguments_failure_test(
438+
&["-d", r"\045-\044"],
439+
"tr: range-endpoints of '%-$' are in reverse collating sequence order\n",
440+
);
441+
}
442+
443+
#[test]
444+
fn tr_backwards_range_mixed() {
445+
tr_bad_arguments_failure_test(
446+
&["-d", r"A-\t"],
447+
r"tr: range-endpoints of 'A-\t' are in reverse collating sequence order
448+
",
449+
);
450+
}
451+
452+
#[test]
453+
fn tr_mixed_range() {
454+
tr_test(
455+
&["-d", r"\044-Z"],
456+
"$123456789ABCDEFGHIabcdefghi",
457+
"abcdefghi",
458+
);
459+
}
460+
461+
#[test]
462+
fn tr_two_ranges() {
463+
tr_test(&["ab12", r"\044-\045Y-Z"], "21ba", "ZY%$");
464+
}
465+
466+
#[test]
467+
fn tr_bad_octal_range() {
468+
tr_bad_arguments_failure_test(
469+
&["-d", r"\046-\048"],
470+
r"tr: range-endpoints of '&-\u{4}' are in reverse collating sequence order
471+
",
472+
);
473+
}
474+
475+
#[test]
476+
fn tr_bad_x_n_construct_decimal() {
477+
tr_bad_arguments_failure_test(
478+
&["-d", "[a*100000000000000000000]"],
479+
"tr: invalid repeat count ‘100000000000000000000’ in [c*n] construct\n",
480+
);
481+
}
482+
483+
#[test]
484+
fn tr_bad_x_n_construct_octal() {
485+
tr_bad_arguments_failure_test(
486+
&["-d", "[a*010000000000000000000000]"],
487+
"tr: invalid repeat count ‘010000000000000000000000’ in [c*n] construct\n",
488+
);
489+
}
490+
491+
#[test]
492+
fn tr_bad_x_n_construct_non_decimal_non_octal() {
493+
tr_bad_arguments_failure_test(
494+
&["-d", "[a*a]"],
495+
"tr: invalid repeat count ‘a’ in [c*n] construct\n",
496+
);
497+
}
498+
499+
#[test]
500+
fn tr_trailing_hyphen() {
501+
tr_test(&["ab", "c-"], "abc123", "c-c123");
502+
}
503+
504+
#[test]
505+
fn tr_backslash_range() {
506+
tr_test(
507+
&["1-9", r"\b-\r"],
508+
r"\ 987654321 -",
509+
"\\ \x0D\x0D\x0D\x0D\x0C\x0B\x0A\x09\x08 -",
510+
);
511+
}
512+
513+
#[test]
514+
fn tr_fill_with_last_char() {
515+
tr_test(&["1-34-8", "A-C!"], "987654321", "9!!!!!CBA");
516+
}
517+
518+
#[test]
519+
fn tr_octal_above_one_byte_value() {
520+
let args = &["-d", r"\501"];
521+
522+
let str_args = args
523+
.iter()
524+
.map(|st| st.to_owned().to_owned())
525+
.collect::<Vec<String>>();
526+
527+
run_test(TestPlan {
528+
cmd: "tr".to_owned(),
529+
args: str_args,
530+
stdin_data: "(1Ł)".to_owned(),
531+
expected_out: "Ł)".to_owned(),
532+
expected_err: r"tr: warning: the ambiguous octal escape \501 is being interpreted as the 2-byte sequence \050, 1
533+
".to_owned(),
534+
expected_exit_code: 0,
535+
});
536+
}
537+
538+
#[test]
539+
fn tr_short_octal_with_non_octal_digits_after() {
540+
// Interpret as \004, '8', and the range from '1' through '3'
541+
tr_test(&["-d", r"\0481-3"], "A 123 \x04 456 789 Z", "A 456 79 Z");
542+
}
543+
544+
#[test]
545+
fn tr_octal_parsing_ambiguous() {
546+
// "If an ordinary digit (representing itself) is to follow an octal sequence, the octal sequence must use the full three digits to avoid ambiguity."
547+
// https://pubs.opengroup.org/onlinepubs/9799919799/utilities/tr.html
548+
// Interpret as \123, not \012 and '3'
549+
tr_test(
550+
&["-d", r"\123"],
551+
"321 \\ \x0A \x53 \x50 \x02 \x01 \\ CBA",
552+
"321 \\ \x0A \x50 \x02 \x01 \\ CBA",
553+
);
554+
}
555+
556+
#[test]
557+
fn tr_octal_parsing_non_ambiguous() {
558+
// See above
559+
// Interpret as \012 and 'A'
560+
tr_test(
561+
&["-d", r"\12A"],
562+
"321 \\ \x0A \x53 \x50 \x02 \x01 \\ CBA",
563+
"321 \\ \x53 \x50 \x02 \x01 \\ CB",
564+
);
565+
}

0 commit comments

Comments
 (0)