Skip to content

Commit f1aab0f

Browse files
grep: add -o option
1 parent 6c791e7 commit f1aab0f

File tree

2 files changed

+208
-49
lines changed

2 files changed

+208
-49
lines changed

text/grep.rs

Lines changed: 170 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,13 @@
99

1010
use clap::Parser;
1111
use gettextrs::{bind_textdomain_codeset, textdomain};
12-
use libc::{regcomp, regex_t, regexec, regfree, REG_EXTENDED, REG_ICASE, REG_NOMATCH};
12+
use libc::{regcomp, regex_t, regexec, regfree, regmatch_t, REG_EXTENDED, REG_ICASE, REG_NOMATCH};
1313
use plib::PROJECT_NAME;
1414
use std::{
1515
ffi::CString,
1616
fs::File,
17-
io::{self, BufRead, BufReader},
17+
io::{self, BufRead, BufReader, StdoutLock, Write},
1818
path::{Path, PathBuf},
19-
ptr,
2019
};
2120

2221
/// grep - search a file for a pattern.
@@ -47,15 +46,19 @@ struct Args {
4746
#[arg(short, long)]
4847
ignore_case: bool,
4948

50-
/// Write only the names of input_files containing selected lines to standard output.
49+
/// Write only the names of input files containing selected lines to standard output.
5150
#[arg(short = 'l', long)]
5251
files_with_matches: bool,
5352

5453
/// Precede each output line by its relative line number in the file, each file starting at line 1.
5554
#[arg(short = 'n', long)]
5655
line_number: bool,
5756

58-
/// Write only the names of input_files containing selected lines to standard output.
57+
/// Only print the matching characters in each line.
58+
#[arg(short = 'o', long = "only-matching")]
59+
only_matching: bool,
60+
61+
/// Do not print to standard output. The presence or absence of a match is communicated with the exit status.
5962
#[arg(short, long)]
6063
quiet: bool,
6164

@@ -106,7 +109,7 @@ impl Args {
106109
return Err("Options '-l' and '-q' cannot be used together".to_string());
107110
}
108111
if self.regexp.is_empty() && self.file.is_empty() && self.single_pattern.is_none() {
109-
return Err("Required at least one pattern list or file".to_string());
112+
return Err("A pattern list or at least one file is required".to_string());
110113
}
111114
Ok(())
112115
}
@@ -198,6 +201,8 @@ impl Args {
198201
output_mode,
199202
patterns,
200203
input_files: self.input_files,
204+
stdout_lock: io::stdout().lock(),
205+
only_matching: self.only_matching,
201206
})
202207
}
203208
}
@@ -298,28 +303,118 @@ impl Patterns {
298303
/// # Returns
299304
///
300305
/// Returns [bool](bool) - `true` if input matches present patterns, else `false`.
301-
fn matches(&self, input: impl AsRef<str>) -> bool {
306+
fn matches(
307+
&self,
308+
input: impl AsRef<str>,
309+
collect_matching_substrings: bool,
310+
) -> (bool, Vec<Vec<u8>>) {
302311
let input = input.as_ref();
312+
303313
match self {
304314
Patterns::Fixed(patterns, ignore_case, line_regexp) => {
305315
let input = if *ignore_case {
306316
input.to_lowercase()
307317
} else {
308318
input.to_string()
309319
};
310-
patterns.iter().any(|p| {
320+
321+
let mut matching_substrings = Vec::<Vec<u8>>::new();
322+
323+
let mut any_pattern_matched = false;
324+
325+
for pattern in patterns {
311326
if *line_regexp {
312-
input == *p
327+
if input != *pattern {
328+
continue;
329+
}
330+
331+
if !collect_matching_substrings {
332+
return (true, Vec::<Vec<u8>>::new());
333+
}
334+
335+
any_pattern_matched = true;
336+
337+
matching_substrings.push(pattern.as_bytes().to_vec());
313338
} else {
314-
input.contains(p)
339+
for st in input.matches(pattern) {
340+
if !collect_matching_substrings {
341+
return (true, Vec::<Vec<u8>>::new());
342+
}
343+
344+
any_pattern_matched = true;
345+
346+
matching_substrings.push(st.as_bytes().to_vec());
347+
}
315348
}
316-
})
349+
}
350+
351+
(any_pattern_matched, matching_substrings)
317352
}
318353
Patterns::Regex(patterns) => {
319-
let c_input = CString::new(input).unwrap();
320-
patterns.iter().any(|p| unsafe {
321-
regexec(p, c_input.as_ptr(), 0, ptr::null_mut(), 0) != REG_NOMATCH
322-
})
354+
let nmatch_to_use = if collect_matching_substrings { 1 } else { 0 };
355+
356+
let input_slice = input.as_bytes();
357+
358+
let mut matching_substrings = Vec::<Vec<u8>>::new();
359+
360+
let mut any_pattern_matched = false;
361+
362+
'next_pattern: for p in patterns {
363+
let mut current_string_index = 0_usize;
364+
365+
loop {
366+
let current_string_slice = &input_slice[current_string_index..];
367+
368+
let current_string_c_string = CString::new(current_string_slice).unwrap();
369+
370+
let mut regmatch_t_vec = vec![
371+
regmatch_t {
372+
rm_so: -1,
373+
rm_eo: -1,
374+
};
375+
nmatch_to_use
376+
];
377+
378+
let regmatch_vec_pointer = regmatch_t_vec.as_mut_ptr();
379+
380+
let regexec_return_value = unsafe {
381+
regexec(
382+
p,
383+
current_string_c_string.as_ptr(),
384+
nmatch_to_use,
385+
regmatch_vec_pointer,
386+
0,
387+
)
388+
};
389+
390+
if regexec_return_value != 0 {
391+
debug_assert!(regexec_return_value == REG_NOMATCH);
392+
393+
continue 'next_pattern;
394+
}
395+
396+
if !collect_matching_substrings {
397+
return (true, Vec::<Vec<u8>>::new());
398+
}
399+
400+
any_pattern_matched = true;
401+
402+
let regmatch_t = regmatch_t_vec.first().unwrap();
403+
404+
let start = usize::try_from(regmatch_t.rm_so).unwrap();
405+
let end = usize::try_from(regmatch_t.rm_eo).unwrap();
406+
407+
debug_assert!(end > 0_usize);
408+
409+
matching_substrings.push(current_string_slice[start..end].to_vec());
410+
411+
debug_assert!(end > current_string_index);
412+
413+
current_string_index += end;
414+
}
415+
}
416+
417+
(any_pattern_matched, matching_substrings)
323418
}
324419
}
325420
}
@@ -358,6 +453,8 @@ struct GrepModel {
358453
output_mode: OutputMode,
359454
patterns: Patterns,
360455
input_files: Vec<String>,
456+
stdout_lock: StdoutLock<'static>,
457+
only_matching: bool,
361458
}
362459

363460
impl GrepModel {
@@ -399,85 +496,110 @@ impl GrepModel {
399496
}
400497
}
401498

499+
fn print_line_prefix(&mut self, input_name: &str, line_number: u64) {
500+
if self.multiple_inputs {
501+
write!(self.stdout_lock, "{input_name}:").unwrap();
502+
}
503+
504+
if self.line_number {
505+
write!(self.stdout_lock, "{line_number}:").unwrap();
506+
}
507+
}
508+
402509
/// Reads lines from buffer and processes them.
403510
///
404511
/// # Arguments
405512
///
406513
/// * `input_name` - [str](str) that represents content source name.
407514
/// * `reader` - [Box](Box) that contains object that implements [BufRead] and reads lines.
408515
fn process_input(&mut self, input_name: &str, mut reader: Box<dyn BufRead>) {
409-
let mut line_number: u64 = 0;
516+
let mut line_number = 0_u64;
517+
518+
let mut line = String::new();
519+
410520
loop {
411-
let mut line = String::new();
412521
line_number += 1;
522+
523+
line.clear();
524+
525+
// TODO
526+
// Probably should work on non-UTF-8 input
413527
match reader.read_line(&mut line) {
414528
Ok(n_read) => {
415529
if n_read == 0 {
416530
break;
417531
}
418-
let trimmed = if line.ends_with('\n') {
419-
&line[..line.len() - 1]
420-
} else {
421-
&line
532+
533+
let mut chars = line.chars();
534+
535+
let line_without_newline = match chars.next_back() {
536+
Some('\n') => chars.as_str(),
537+
_ => line.as_str(),
422538
};
423539

424-
let init_matches = self.patterns.matches(trimmed);
540+
let (line_matches_any_pattern, matching_substrings) = self.patterns.matches(
541+
line_without_newline,
542+
self.only_matching && matches!(self.output_mode, OutputMode::Default),
543+
);
544+
425545
let matches = if self.invert_match {
426-
!init_matches
546+
!line_matches_any_pattern
427547
} else {
428-
init_matches
548+
line_matches_any_pattern
429549
};
550+
430551
if matches {
431552
self.any_matches = true;
553+
432554
match &mut self.output_mode {
433555
OutputMode::Count(count) => {
434556
*count += 1;
435557
}
436558
OutputMode::FilesWithMatches => {
437-
println!("{input_name}");
559+
writeln!(&mut self.stdout_lock, "{input_name}").unwrap();
560+
438561
break;
439562
}
440563
OutputMode::Quiet => {
441564
return;
442565
}
443566
OutputMode::Default => {
444-
let result = format!(
445-
"{}{}{}",
446-
if self.multiple_inputs {
447-
format!("{input_name}:")
448-
} else {
449-
String::new()
450-
},
451-
if self.line_number {
452-
format!("{line_number}:")
453-
} else {
454-
String::new()
455-
},
456-
trimmed
457-
);
458-
println!("{result}");
567+
if self.only_matching {
568+
for matching_substring in matching_substrings {
569+
self.print_line_prefix(input_name, line_number);
570+
571+
self.stdout_lock
572+
.write_all(matching_substring.as_slice())
573+
.unwrap();
574+
575+
self.stdout_lock.write_all(b"\n").unwrap();
576+
}
577+
} else {
578+
self.print_line_prefix(input_name, line_number);
579+
580+
writeln!(self.stdout_lock, "{line_without_newline}").unwrap();
581+
}
459582
}
460583
}
461584
}
462-
line.clear();
463585
}
464586
Err(err) => {
465587
self.any_errors = true;
588+
466589
if !self.no_messages {
467-
eprintln!(
468-
"{}: Error reading line {} ({})",
469-
input_name, line_number, err
470-
);
590+
eprintln!("{input_name}: Error reading line {line_number} ({err})",);
471591
}
472592
}
473593
}
474594
}
595+
475596
if let OutputMode::Count(count) = &mut self.output_mode {
476597
if self.multiple_inputs {
477-
println!("{input_name}:{count}");
598+
writeln!(&mut self.stdout_lock, "{input_name}:{count}").unwrap();
478599
} else {
479-
println!("{count}");
600+
writeln!(&mut self.stdout_lock, "{count}").unwrap();
480601
}
602+
481603
*count = 0;
482604
}
483605
}
@@ -501,7 +623,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
501623
})
502624
.map(|mut grep_model| grep_model.grep())
503625
.unwrap_or_else(|err| {
504-
eprintln!("{}", err);
626+
eprintln!("{err}");
505627
2
506628
});
507629

text/tests/grep/mod.rs

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ fn test_absent_pattern() {
8181
&[],
8282
"",
8383
"",
84-
"Required at least one pattern list or file\n",
84+
"A pattern list or at least one file is required\n",
8585
2,
8686
);
8787
}
@@ -1389,3 +1389,40 @@ fn test_long_names_files() {
13891389
0,
13901390
);
13911391
}
1392+
1393+
#[test]
1394+
fn test_dash_o() {
1395+
const INPUT: &str = "\
1396+
Contains KEYWORD here and also here: KEYWORD
1397+
Output is not organized: one KEYWORD per output line
1398+
Regardless of which input line it was found in
1399+
";
1400+
1401+
grep_test(
1402+
&["-o", "KEYWORD"],
1403+
INPUT,
1404+
"\
1405+
KEYWORD
1406+
KEYWORD
1407+
KEYWORD
1408+
",
1409+
"",
1410+
0_i32,
1411+
);
1412+
1413+
grep_test(&["-o", "NOT PRESENT"], INPUT, "", "", 1_i32);
1414+
1415+
grep_test(&["-o", "-v", "NOT PRESENT"], INPUT, "", "", 0_i32);
1416+
1417+
grep_test(
1418+
&["-o", "KEYWORD", "--", "-", "-"],
1419+
INPUT,
1420+
"\
1421+
(standard input):KEYWORD
1422+
(standard input):KEYWORD
1423+
(standard input):KEYWORD
1424+
",
1425+
"",
1426+
0_i32,
1427+
);
1428+
}

0 commit comments

Comments
 (0)