99
1010use clap:: Parser ;
1111use gettextrs:: { bind_textdomain_codeset, textdomain} ;
12- use libc:: { regcomp, regex_t, regexec, regfree, REG_EXTENDED , REG_ICASE , REG_NOMATCH } ;
12+ use libc:: { regcomp, regex_t, regexec, regfree, regmatch_t , REG_EXTENDED , REG_ICASE , REG_NOMATCH } ;
1313use plib:: PROJECT_NAME ;
1414use std:: {
1515 ffi:: CString ,
1616 fs:: File ,
17- io:: { self , BufRead , BufReader } ,
17+ io:: { self , BufRead , BufReader , StdoutLock , Write } ,
1818 path:: { Path , PathBuf } ,
19- ptr,
2019} ;
2120
2221/// grep - search a file for a pattern.
@@ -47,15 +46,19 @@ struct Args {
4746 #[ arg( short, long) ]
4847 ignore_case : bool ,
4948
50- /// Write only the names of input_files containing selected lines to standard output.
49+ /// Write only the names of input files containing selected lines to standard output.
5150 #[ arg( short = 'l' , long) ]
5251 files_with_matches : bool ,
5352
5453 /// Precede each output line by its relative line number in the file, each file starting at line 1.
5554 #[ arg( short = 'n' , long) ]
5655 line_number : bool ,
5756
58- /// Write only the names of input_files containing selected lines to standard output.
57+ /// Only print the matching characters in each line.
58+ #[ arg( short = 'o' , long = "only-matching" ) ]
59+ only_matching : bool ,
60+
61+ /// Do not print to standard output. The presence or absence of a match is communicated with the exit status.
5962 #[ arg( short, long) ]
6063 quiet : bool ,
6164
@@ -106,7 +109,7 @@ impl Args {
106109 return Err ( "Options '-l' and '-q' cannot be used together" . to_string ( ) ) ;
107110 }
108111 if self . regexp . is_empty ( ) && self . file . is_empty ( ) && self . single_pattern . is_none ( ) {
109- return Err ( "Required at least one pattern list or file " . to_string ( ) ) ;
112+ return Err ( "A pattern list or at least one file is required " . to_string ( ) ) ;
110113 }
111114 Ok ( ( ) )
112115 }
@@ -198,6 +201,8 @@ impl Args {
198201 output_mode,
199202 patterns,
200203 input_files : self . input_files ,
204+ stdout_lock : io:: stdout ( ) . lock ( ) ,
205+ only_matching : self . only_matching ,
201206 } )
202207 }
203208}
@@ -298,28 +303,118 @@ impl Patterns {
298303 /// # Returns
299304 ///
300305 /// Returns [bool](bool) - `true` if input matches present patterns, else `false`.
301- fn matches ( & self , input : impl AsRef < str > ) -> bool {
306+ fn matches (
307+ & self ,
308+ input : impl AsRef < str > ,
309+ collect_matching_substrings : bool ,
310+ ) -> ( bool , Vec < Vec < u8 > > ) {
302311 let input = input. as_ref ( ) ;
312+
303313 match self {
304314 Patterns :: Fixed ( patterns, ignore_case, line_regexp) => {
305315 let input = if * ignore_case {
306316 input. to_lowercase ( )
307317 } else {
308318 input. to_string ( )
309319 } ;
310- patterns. iter ( ) . any ( |p| {
320+
321+ let mut matching_substrings = Vec :: < Vec < u8 > > :: new ( ) ;
322+
323+ let mut any_pattern_matched = false ;
324+
325+ for pattern in patterns {
311326 if * line_regexp {
312- input == * p
327+ if input != * pattern {
328+ continue ;
329+ }
330+
331+ if !collect_matching_substrings {
332+ return ( true , Vec :: < Vec < u8 > > :: new ( ) ) ;
333+ }
334+
335+ any_pattern_matched = true ;
336+
337+ matching_substrings. push ( pattern. as_bytes ( ) . to_vec ( ) ) ;
313338 } else {
314- input. contains ( p)
339+ for st in input. matches ( pattern) {
340+ if !collect_matching_substrings {
341+ return ( true , Vec :: < Vec < u8 > > :: new ( ) ) ;
342+ }
343+
344+ any_pattern_matched = true ;
345+
346+ matching_substrings. push ( st. as_bytes ( ) . to_vec ( ) ) ;
347+ }
315348 }
316- } )
349+ }
350+
351+ ( any_pattern_matched, matching_substrings)
317352 }
318353 Patterns :: Regex ( patterns) => {
319- let c_input = CString :: new ( input) . unwrap ( ) ;
320- patterns. iter ( ) . any ( |p| unsafe {
321- regexec ( p, c_input. as_ptr ( ) , 0 , ptr:: null_mut ( ) , 0 ) != REG_NOMATCH
322- } )
354+ let nmatch_to_use = if collect_matching_substrings { 1 } else { 0 } ;
355+
356+ let input_slice = input. as_bytes ( ) ;
357+
358+ let mut matching_substrings = Vec :: < Vec < u8 > > :: new ( ) ;
359+
360+ let mut any_pattern_matched = false ;
361+
362+ ' next_pattern: for p in patterns {
363+ let mut current_string_index = 0_usize ;
364+
365+ loop {
366+ let current_string_slice = & input_slice[ current_string_index..] ;
367+
368+ let current_string_c_string = CString :: new ( current_string_slice) . unwrap ( ) ;
369+
370+ let mut regmatch_t_vec = vec ! [
371+ regmatch_t {
372+ rm_so: -1 ,
373+ rm_eo: -1 ,
374+ } ;
375+ nmatch_to_use
376+ ] ;
377+
378+ let regmatch_vec_pointer = regmatch_t_vec. as_mut_ptr ( ) ;
379+
380+ let regexec_return_value = unsafe {
381+ regexec (
382+ p,
383+ current_string_c_string. as_ptr ( ) ,
384+ nmatch_to_use,
385+ regmatch_vec_pointer,
386+ 0 ,
387+ )
388+ } ;
389+
390+ if regexec_return_value != 0 {
391+ debug_assert ! ( regexec_return_value == REG_NOMATCH ) ;
392+
393+ continue ' next_pattern;
394+ }
395+
396+ if !collect_matching_substrings {
397+ return ( true , Vec :: < Vec < u8 > > :: new ( ) ) ;
398+ }
399+
400+ any_pattern_matched = true ;
401+
402+ let regmatch_t = regmatch_t_vec. first ( ) . unwrap ( ) ;
403+
404+ let start = usize:: try_from ( regmatch_t. rm_so ) . unwrap ( ) ;
405+ let end = usize:: try_from ( regmatch_t. rm_eo ) . unwrap ( ) ;
406+
407+ debug_assert ! ( end > 0_usize ) ;
408+
409+ matching_substrings. push ( current_string_slice[ start..end] . to_vec ( ) ) ;
410+
411+ debug_assert ! ( end > current_string_index) ;
412+
413+ current_string_index += end;
414+ }
415+ }
416+
417+ ( any_pattern_matched, matching_substrings)
323418 }
324419 }
325420 }
@@ -358,6 +453,8 @@ struct GrepModel {
358453 output_mode : OutputMode ,
359454 patterns : Patterns ,
360455 input_files : Vec < String > ,
456+ stdout_lock : StdoutLock < ' static > ,
457+ only_matching : bool ,
361458}
362459
363460impl GrepModel {
@@ -399,85 +496,110 @@ impl GrepModel {
399496 }
400497 }
401498
499+ fn print_line_prefix ( & mut self , input_name : & str , line_number : u64 ) {
500+ if self . multiple_inputs {
501+ write ! ( self . stdout_lock, "{input_name}:" ) . unwrap ( ) ;
502+ }
503+
504+ if self . line_number {
505+ write ! ( self . stdout_lock, "{line_number}:" ) . unwrap ( ) ;
506+ }
507+ }
508+
402509 /// Reads lines from buffer and processes them.
403510 ///
404511 /// # Arguments
405512 ///
406513 /// * `input_name` - [str](str) that represents content source name.
407514 /// * `reader` - [Box](Box) that contains object that implements [BufRead] and reads lines.
408515 fn process_input ( & mut self , input_name : & str , mut reader : Box < dyn BufRead > ) {
409- let mut line_number: u64 = 0 ;
516+ let mut line_number = 0_u64 ;
517+
518+ let mut line = String :: new ( ) ;
519+
410520 loop {
411- let mut line = String :: new ( ) ;
412521 line_number += 1 ;
522+
523+ line. clear ( ) ;
524+
525+ // TODO
526+ // Probably should work on non-UTF-8 input
413527 match reader. read_line ( & mut line) {
414528 Ok ( n_read) => {
415529 if n_read == 0 {
416530 break ;
417531 }
418- let trimmed = if line. ends_with ( '\n' ) {
419- & line[ ..line. len ( ) - 1 ]
420- } else {
421- & line
532+
533+ let mut chars = line. chars ( ) ;
534+
535+ let line_without_newline = match chars. next_back ( ) {
536+ Some ( '\n' ) => chars. as_str ( ) ,
537+ _ => line. as_str ( ) ,
422538 } ;
423539
424- let init_matches = self . patterns . matches ( trimmed) ;
540+ let ( line_matches_any_pattern, matching_substrings) = self . patterns . matches (
541+ line_without_newline,
542+ self . only_matching && matches ! ( self . output_mode, OutputMode :: Default ) ,
543+ ) ;
544+
425545 let matches = if self . invert_match {
426- !init_matches
546+ !line_matches_any_pattern
427547 } else {
428- init_matches
548+ line_matches_any_pattern
429549 } ;
550+
430551 if matches {
431552 self . any_matches = true ;
553+
432554 match & mut self . output_mode {
433555 OutputMode :: Count ( count) => {
434556 * count += 1 ;
435557 }
436558 OutputMode :: FilesWithMatches => {
437- println ! ( "{input_name}" ) ;
559+ writeln ! ( & mut self . stdout_lock, "{input_name}" ) . unwrap ( ) ;
560+
438561 break ;
439562 }
440563 OutputMode :: Quiet => {
441564 return ;
442565 }
443566 OutputMode :: Default => {
444- let result = format ! (
445- "{}{}{}" ,
446- if self . multiple_inputs {
447- format! ( "{input_name}:" )
448- } else {
449- String :: new ( )
450- } ,
451- if self . line_number {
452- format! ( "{line_number}:" )
453- } else {
454- String :: new ( )
455- } ,
456- trimmed
457- ) ;
458- println ! ( "{result}" ) ;
567+ if self . only_matching {
568+ for matching_substring in matching_substrings {
569+ self . print_line_prefix ( input_name , line_number ) ;
570+
571+ self . stdout_lock
572+ . write_all ( matching_substring . as_slice ( ) )
573+ . unwrap ( ) ;
574+
575+ self . stdout_lock . write_all ( b" \n " ) . unwrap ( ) ;
576+ }
577+ } else {
578+ self . print_line_prefix ( input_name , line_number ) ;
579+
580+ writeln ! ( self . stdout_lock , "{line_without_newline}" ) . unwrap ( ) ;
581+ }
459582 }
460583 }
461584 }
462- line. clear ( ) ;
463585 }
464586 Err ( err) => {
465587 self . any_errors = true ;
588+
466589 if !self . no_messages {
467- eprintln ! (
468- "{}: Error reading line {} ({})" ,
469- input_name, line_number, err
470- ) ;
590+ eprintln ! ( "{input_name}: Error reading line {line_number} ({err})" , ) ;
471591 }
472592 }
473593 }
474594 }
595+
475596 if let OutputMode :: Count ( count) = & mut self . output_mode {
476597 if self . multiple_inputs {
477- println ! ( "{input_name}:{count}" ) ;
598+ writeln ! ( & mut self . stdout_lock , "{input_name}:{count}" ) . unwrap ( ) ;
478599 } else {
479- println ! ( "{count}" ) ;
600+ writeln ! ( & mut self . stdout_lock , "{count}" ) . unwrap ( ) ;
480601 }
602+
481603 * count = 0 ;
482604 }
483605 }
@@ -501,7 +623,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
501623 } )
502624 . map ( |mut grep_model| grep_model. grep ( ) )
503625 . unwrap_or_else ( |err| {
504- eprintln ! ( "{}" , err ) ;
626+ eprintln ! ( "{err}" ) ;
505627 2
506628 } ) ;
507629
0 commit comments