Skip to content

Commit aefacfc

Browse files
grep: add -o option
1 parent 87b53d4 commit aefacfc

File tree

2 files changed

+213
-54
lines changed

2 files changed

+213
-54
lines changed

text/grep.rs

Lines changed: 175 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,14 @@
88
//
99

1010
use clap::Parser;
11-
use gettextrs::{bind_textdomain_codeset, setlocale, textdomain, LocaleCategory};
12-
use libc::{regcomp, regex_t, regexec, regfree, REG_EXTENDED, REG_ICASE, REG_NOMATCH};
11+
use gettextrs::{bind_textdomain_codeset, textdomain};
12+
use libc::{regcomp, regex_t, regexec, regfree, regmatch_t, REG_EXTENDED, REG_ICASE, REG_NOMATCH};
13+
use plib::PROJECT_NAME;
1314
use std::{
1415
ffi::CString,
1516
fs::File,
16-
io::{self, BufRead, BufReader},
17+
io::{self, BufRead, BufReader, StdoutLock, Write},
1718
path::{Path, PathBuf},
18-
ptr,
1919
};
2020

2121
/// grep - search a file for a pattern.
@@ -46,15 +46,19 @@ struct Args {
4646
#[arg(short, long)]
4747
ignore_case: bool,
4848

49-
/// Write only the names of input_files containing selected lines to standard output.
49+
/// Write only the names of input files containing selected lines to standard output.
5050
#[arg(short = 'l', long)]
5151
files_with_matches: bool,
5252

5353
/// Precede each output line by its relative line number in the file, each file starting at line 1.
5454
#[arg(short = 'n', long)]
5555
line_number: bool,
5656

57-
/// Write only the names of input_files containing selected lines to standard output.
57+
/// Only print the matching characters in each line.
58+
#[arg(short = 'o', long = "only-matching")]
59+
only_matching: bool,
60+
61+
/// Do not print to standard output. The presence or absence of a match is communicated with the exit status.
5862
#[arg(short, long)]
5963
quiet: bool,
6064

@@ -105,7 +109,7 @@ impl Args {
105109
return Err("Options '-l' and '-q' cannot be used together".to_string());
106110
}
107111
if self.regexp.is_empty() && self.file.is_empty() && self.single_pattern.is_none() {
108-
return Err("Required at least one pattern list or file".to_string());
112+
return Err("A pattern list or at least one file is required".to_string());
109113
}
110114
Ok(())
111115
}
@@ -197,6 +201,8 @@ impl Args {
197201
output_mode,
198202
patterns,
199203
input_files: self.input_files,
204+
stdout_lock: io::stdout().lock(),
205+
only_matching: self.only_matching,
200206
})
201207
}
202208
}
@@ -297,28 +303,118 @@ impl Patterns {
297303
/// # Returns
298304
///
299305
/// Returns [bool](bool) - `true` if input matches present patterns, else `false`.
300-
fn matches(&self, input: impl AsRef<str>) -> bool {
306+
fn matches(
307+
&self,
308+
input: impl AsRef<str>,
309+
collect_matching_substrings: bool,
310+
) -> (bool, Vec<Vec<u8>>) {
301311
let input = input.as_ref();
312+
302313
match self {
303314
Patterns::Fixed(patterns, ignore_case, line_regexp) => {
304315
let input = if *ignore_case {
305316
input.to_lowercase()
306317
} else {
307318
input.to_string()
308319
};
309-
patterns.iter().any(|p| {
320+
321+
let mut matching_substrings = Vec::<Vec<u8>>::new();
322+
323+
let mut any_pattern_matched = false;
324+
325+
for pattern in patterns {
310326
if *line_regexp {
311-
input == *p
327+
if input != *pattern {
328+
continue;
329+
}
330+
331+
if !collect_matching_substrings {
332+
return (true, Vec::<Vec<u8>>::new());
333+
}
334+
335+
any_pattern_matched = true;
336+
337+
matching_substrings.push(pattern.as_bytes().to_vec());
312338
} else {
313-
input.contains(p)
339+
for st in input.matches(pattern) {
340+
if !collect_matching_substrings {
341+
return (true, Vec::<Vec<u8>>::new());
342+
}
343+
344+
any_pattern_matched = true;
345+
346+
matching_substrings.push(st.as_bytes().to_vec());
347+
}
314348
}
315-
})
349+
}
350+
351+
(any_pattern_matched, matching_substrings)
316352
}
317353
Patterns::Regex(patterns) => {
318-
let c_input = CString::new(input).unwrap();
319-
patterns.iter().any(|p| unsafe {
320-
regexec(p, c_input.as_ptr(), 0, ptr::null_mut(), 0) != REG_NOMATCH
321-
})
354+
let nmatch_to_use = if collect_matching_substrings { 1 } else { 0 };
355+
356+
let input_slice = input.as_bytes();
357+
358+
let mut matching_substrings = Vec::<Vec<u8>>::new();
359+
360+
let mut any_pattern_matched = false;
361+
362+
'next_pattern: for p in patterns {
363+
let mut current_string_index = 0_usize;
364+
365+
loop {
366+
let current_string_slice = &input_slice[current_string_index..];
367+
368+
let current_string_c_string = CString::new(current_string_slice).unwrap();
369+
370+
let mut regmatch_t_vec = vec![
371+
regmatch_t {
372+
rm_so: -1,
373+
rm_eo: -1,
374+
};
375+
nmatch_to_use
376+
];
377+
378+
let regmatch_vec_pointer = regmatch_t_vec.as_mut_ptr();
379+
380+
let regexec_return_value = unsafe {
381+
regexec(
382+
p,
383+
current_string_c_string.as_ptr(),
384+
nmatch_to_use,
385+
regmatch_vec_pointer,
386+
0,
387+
)
388+
};
389+
390+
if regexec_return_value != 0 {
391+
debug_assert!(regexec_return_value == REG_NOMATCH);
392+
393+
continue 'next_pattern;
394+
}
395+
396+
if !collect_matching_substrings {
397+
return (true, Vec::<Vec<u8>>::new());
398+
}
399+
400+
any_pattern_matched = true;
401+
402+
let regmatch_t = regmatch_t_vec.first().unwrap();
403+
404+
let start = usize::try_from(regmatch_t.rm_so).unwrap();
405+
let end = usize::try_from(regmatch_t.rm_eo).unwrap();
406+
407+
debug_assert!(end > 0_usize);
408+
409+
matching_substrings.push(current_string_slice[start..end].to_vec());
410+
411+
debug_assert!(end > current_string_index);
412+
413+
current_string_index += end;
414+
}
415+
}
416+
417+
(any_pattern_matched, matching_substrings)
322418
}
323419
}
324420
}
@@ -357,6 +453,8 @@ struct GrepModel {
357453
output_mode: OutputMode,
358454
patterns: Patterns,
359455
input_files: Vec<String>,
456+
stdout_lock: StdoutLock<'static>,
457+
only_matching: bool,
360458
}
361459

362460
impl GrepModel {
@@ -398,85 +496,110 @@ impl GrepModel {
398496
}
399497
}
400498

499+
fn print_line_prefix(&mut self, input_name: &str, line_number: u64) {
500+
if self.multiple_inputs {
501+
write!(self.stdout_lock, "{input_name}:").unwrap();
502+
}
503+
504+
if self.line_number {
505+
write!(self.stdout_lock, "{line_number}:").unwrap();
506+
}
507+
}
508+
401509
/// Reads lines from buffer and processes them.
402510
///
403511
/// # Arguments
404512
///
405513
/// * `input_name` - [str](str) that represents content source name.
406514
/// * `reader` - [Box](Box) that contains object that implements [BufRead] and reads lines.
407515
fn process_input(&mut self, input_name: &str, mut reader: Box<dyn BufRead>) {
408-
let mut line_number: u64 = 0;
516+
let mut line_number = 0_u64;
517+
518+
let mut line = String::new();
519+
409520
loop {
410-
let mut line = String::new();
411521
line_number += 1;
522+
523+
line.clear();
524+
525+
// TODO
526+
// Probably should work on non-UTF-8 input
412527
match reader.read_line(&mut line) {
413528
Ok(n_read) => {
414529
if n_read == 0 {
415530
break;
416531
}
417-
let trimmed = if line.ends_with('\n') {
418-
&line[..line.len() - 1]
419-
} else {
420-
&line
532+
533+
let mut chars = line.chars();
534+
535+
let line_without_newline = match chars.next_back() {
536+
Some('\n') => chars.as_str(),
537+
_ => line.as_str(),
421538
};
422539

423-
let init_matches = self.patterns.matches(trimmed);
540+
let (line_matches_any_pattern, matching_substrings) = self.patterns.matches(
541+
line_without_newline,
542+
self.only_matching && matches!(self.output_mode, OutputMode::Default),
543+
);
544+
424545
let matches = if self.invert_match {
425-
!init_matches
546+
!line_matches_any_pattern
426547
} else {
427-
init_matches
548+
line_matches_any_pattern
428549
};
550+
429551
if matches {
430552
self.any_matches = true;
553+
431554
match &mut self.output_mode {
432555
OutputMode::Count(count) => {
433556
*count += 1;
434557
}
435558
OutputMode::FilesWithMatches => {
436-
println!("{input_name}");
559+
writeln!(&mut self.stdout_lock, "{input_name}").unwrap();
560+
437561
break;
438562
}
439563
OutputMode::Quiet => {
440564
return;
441565
}
442566
OutputMode::Default => {
443-
let result = format!(
444-
"{}{}{}",
445-
if self.multiple_inputs {
446-
format!("{input_name}:")
447-
} else {
448-
String::new()
449-
},
450-
if self.line_number {
451-
format!("{line_number}:")
452-
} else {
453-
String::new()
454-
},
455-
trimmed
456-
);
457-
println!("{result}");
567+
if self.only_matching {
568+
for matching_substring in matching_substrings {
569+
self.print_line_prefix(input_name, line_number);
570+
571+
self.stdout_lock
572+
.write_all(matching_substring.as_slice())
573+
.unwrap();
574+
575+
self.stdout_lock.write_all(b"\n").unwrap();
576+
}
577+
} else {
578+
self.print_line_prefix(input_name, line_number);
579+
580+
writeln!(self.stdout_lock, "{line_without_newline}").unwrap();
581+
}
458582
}
459583
}
460584
}
461-
line.clear();
462585
}
463586
Err(err) => {
464587
self.any_errors = true;
588+
465589
if !self.no_messages {
466-
eprintln!(
467-
"{}: Error reading line {} ({})",
468-
input_name, line_number, err
469-
);
590+
eprintln!("{input_name}: Error reading line {line_number} ({err})",);
470591
}
471592
}
472593
}
473594
}
595+
474596
if let OutputMode::Count(count) = &mut self.output_mode {
475597
if self.multiple_inputs {
476-
println!("{input_name}:{count}");
598+
writeln!(&mut self.stdout_lock, "{input_name}:{count}").unwrap();
477599
} else {
478-
println!("{count}");
600+
writeln!(&mut self.stdout_lock, "{count}").unwrap();
479601
}
602+
480603
*count = 0;
481604
}
482605
}
@@ -487,10 +610,9 @@ impl GrepModel {
487610
// 1 - No lines were selected.
488611
// >1 - An error occurred.
489612
fn main() -> Result<(), Box<dyn std::error::Error>> {
490-
setlocale(LocaleCategory::LcAll, "");
491-
textdomain(env!("PROJECT_NAME"))?;
492-
bind_textdomain_codeset(env!("PROJECT_NAME"), "UTF-8")?;
493-
613+
textdomain(PROJECT_NAME)?;
614+
bind_textdomain_codeset(PROJECT_NAME, "UTF-8")?;
615+
// Parse command line arguments
494616
let mut args = Args::parse();
495617

496618
let exit_code = args
@@ -501,7 +623,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
501623
})
502624
.map(|mut grep_model| grep_model.grep())
503625
.unwrap_or_else(|err| {
504-
eprintln!("{}", err);
626+
eprintln!("{err}");
505627
2
506628
});
507629

0 commit comments

Comments
 (0)