88//
99
1010use clap:: Parser ;
11- use gettextrs:: { bind_textdomain_codeset, setlocale, textdomain, LocaleCategory } ;
12- use libc:: { regcomp, regex_t, regexec, regfree, REG_EXTENDED , REG_ICASE , REG_NOMATCH } ;
11+ use gettextrs:: { bind_textdomain_codeset, textdomain} ;
12+ use libc:: { regcomp, regex_t, regexec, regfree, regmatch_t, REG_EXTENDED , REG_ICASE , REG_NOMATCH } ;
13+ use plib:: PROJECT_NAME ;
1314use std:: {
1415 ffi:: CString ,
1516 fs:: File ,
16- io:: { self , BufRead , BufReader } ,
17+ io:: { self , BufRead , BufReader , StdoutLock , Write } ,
1718 path:: { Path , PathBuf } ,
18- ptr,
1919} ;
2020
2121/// grep - search a file for a pattern.
@@ -46,15 +46,19 @@ struct Args {
4646 #[ arg( short, long) ]
4747 ignore_case : bool ,
4848
49- /// Write only the names of input_files containing selected lines to standard output.
49+ /// Write only the names of input files containing selected lines to standard output.
5050 #[ arg( short = 'l' , long) ]
5151 files_with_matches : bool ,
5252
5353 /// Precede each output line by its relative line number in the file, each file starting at line 1.
5454 #[ arg( short = 'n' , long) ]
5555 line_number : bool ,
5656
57- /// Write only the names of input_files containing selected lines to standard output.
57+ /// Only print the matching characters in each line.
58+ #[ arg( short = 'o' , long = "only-matching" ) ]
59+ only_matching : bool ,
60+
61+ /// Do not print to standard output. The presence or absence of a match is communicated with the exit status.
5862 #[ arg( short, long) ]
5963 quiet : bool ,
6064
@@ -105,7 +109,7 @@ impl Args {
105109 return Err ( "Options '-l' and '-q' cannot be used together" . to_string ( ) ) ;
106110 }
107111 if self . regexp . is_empty ( ) && self . file . is_empty ( ) && self . single_pattern . is_none ( ) {
108- return Err ( "Required at least one pattern list or file " . to_string ( ) ) ;
112+ return Err ( "A pattern list or at least one file is required " . to_string ( ) ) ;
109113 }
110114 Ok ( ( ) )
111115 }
@@ -197,6 +201,8 @@ impl Args {
197201 output_mode,
198202 patterns,
199203 input_files : self . input_files ,
204+ stdout_lock : io:: stdout ( ) . lock ( ) ,
205+ only_matching : self . only_matching ,
200206 } )
201207 }
202208}
@@ -297,28 +303,118 @@ impl Patterns {
297303 /// # Returns
298304 ///
299305 /// Returns [bool](bool) - `true` if input matches present patterns, else `false`.
300- fn matches ( & self , input : impl AsRef < str > ) -> bool {
306+ fn matches (
307+ & self ,
308+ input : impl AsRef < str > ,
309+ collect_matching_substrings : bool ,
310+ ) -> ( bool , Vec < Vec < u8 > > ) {
301311 let input = input. as_ref ( ) ;
312+
302313 match self {
303314 Patterns :: Fixed ( patterns, ignore_case, line_regexp) => {
304315 let input = if * ignore_case {
305316 input. to_lowercase ( )
306317 } else {
307318 input. to_string ( )
308319 } ;
309- patterns. iter ( ) . any ( |p| {
320+
321+ let mut matching_substrings = Vec :: < Vec < u8 > > :: new ( ) ;
322+
323+ let mut any_pattern_matched = false ;
324+
325+ for pattern in patterns {
310326 if * line_regexp {
311- input == * p
327+ if input != * pattern {
328+ continue ;
329+ }
330+
331+ if !collect_matching_substrings {
332+ return ( true , Vec :: < Vec < u8 > > :: new ( ) ) ;
333+ }
334+
335+ any_pattern_matched = true ;
336+
337+ matching_substrings. push ( pattern. as_bytes ( ) . to_vec ( ) ) ;
312338 } else {
313- input. contains ( p)
339+ for st in input. matches ( pattern) {
340+ if !collect_matching_substrings {
341+ return ( true , Vec :: < Vec < u8 > > :: new ( ) ) ;
342+ }
343+
344+ any_pattern_matched = true ;
345+
346+ matching_substrings. push ( st. as_bytes ( ) . to_vec ( ) ) ;
347+ }
314348 }
315- } )
349+ }
350+
351+ ( any_pattern_matched, matching_substrings)
316352 }
317353 Patterns :: Regex ( patterns) => {
318- let c_input = CString :: new ( input) . unwrap ( ) ;
319- patterns. iter ( ) . any ( |p| unsafe {
320- regexec ( p, c_input. as_ptr ( ) , 0 , ptr:: null_mut ( ) , 0 ) != REG_NOMATCH
321- } )
354+ let nmatch_to_use = if collect_matching_substrings { 1 } else { 0 } ;
355+
356+ let input_slice = input. as_bytes ( ) ;
357+
358+ let mut matching_substrings = Vec :: < Vec < u8 > > :: new ( ) ;
359+
360+ let mut any_pattern_matched = false ;
361+
362+ ' next_pattern: for p in patterns {
363+ let mut current_string_index = 0_usize ;
364+
365+ loop {
366+ let current_string_slice = & input_slice[ current_string_index..] ;
367+
368+ let current_string_c_string = CString :: new ( current_string_slice) . unwrap ( ) ;
369+
370+ let mut regmatch_t_vec = vec ! [
371+ regmatch_t {
372+ rm_so: -1 ,
373+ rm_eo: -1 ,
374+ } ;
375+ nmatch_to_use
376+ ] ;
377+
378+ let regmatch_vec_pointer = regmatch_t_vec. as_mut_ptr ( ) ;
379+
380+ let regexec_return_value = unsafe {
381+ regexec (
382+ p,
383+ current_string_c_string. as_ptr ( ) ,
384+ nmatch_to_use,
385+ regmatch_vec_pointer,
386+ 0 ,
387+ )
388+ } ;
389+
390+ if regexec_return_value != 0 {
391+ debug_assert ! ( regexec_return_value == REG_NOMATCH ) ;
392+
393+ continue ' next_pattern;
394+ }
395+
396+ if !collect_matching_substrings {
397+ return ( true , Vec :: < Vec < u8 > > :: new ( ) ) ;
398+ }
399+
400+ any_pattern_matched = true ;
401+
402+ let regmatch_t = regmatch_t_vec. first ( ) . unwrap ( ) ;
403+
404+ let start = usize:: try_from ( regmatch_t. rm_so ) . unwrap ( ) ;
405+ let end = usize:: try_from ( regmatch_t. rm_eo ) . unwrap ( ) ;
406+
407+ debug_assert ! ( end > 0_usize ) ;
408+
409+ matching_substrings. push ( current_string_slice[ start..end] . to_vec ( ) ) ;
410+
411+ debug_assert ! ( end > current_string_index) ;
412+
413+ current_string_index += end;
414+ }
415+ }
416+
417+ ( any_pattern_matched, matching_substrings)
322418 }
323419 }
324420 }
@@ -357,6 +453,8 @@ struct GrepModel {
357453 output_mode : OutputMode ,
358454 patterns : Patterns ,
359455 input_files : Vec < String > ,
456+ stdout_lock : StdoutLock < ' static > ,
457+ only_matching : bool ,
360458}
361459
362460impl GrepModel {
@@ -398,85 +496,110 @@ impl GrepModel {
398496 }
399497 }
400498
499+ fn print_line_prefix ( & mut self , input_name : & str , line_number : u64 ) {
500+ if self . multiple_inputs {
501+ write ! ( self . stdout_lock, "{input_name}:" ) . unwrap ( ) ;
502+ }
503+
504+ if self . line_number {
505+ write ! ( self . stdout_lock, "{line_number}:" ) . unwrap ( ) ;
506+ }
507+ }
508+
401509 /// Reads lines from buffer and processes them.
402510 ///
403511 /// # Arguments
404512 ///
405513 /// * `input_name` - [str](str) that represents content source name.
406514 /// * `reader` - [Box](Box) that contains object that implements [BufRead] and reads lines.
407515 fn process_input ( & mut self , input_name : & str , mut reader : Box < dyn BufRead > ) {
408- let mut line_number: u64 = 0 ;
516+ let mut line_number = 0_u64 ;
517+
518+ let mut line = String :: new ( ) ;
519+
409520 loop {
410- let mut line = String :: new ( ) ;
411521 line_number += 1 ;
522+
523+ line. clear ( ) ;
524+
525+ // TODO
526+ // Probably should work on non-UTF-8 input
412527 match reader. read_line ( & mut line) {
413528 Ok ( n_read) => {
414529 if n_read == 0 {
415530 break ;
416531 }
417- let trimmed = if line. ends_with ( '\n' ) {
418- & line[ ..line. len ( ) - 1 ]
419- } else {
420- & line
532+
533+ let mut chars = line. chars ( ) ;
534+
535+ let line_without_newline = match chars. next_back ( ) {
536+ Some ( '\n' ) => chars. as_str ( ) ,
537+ _ => line. as_str ( ) ,
421538 } ;
422539
423- let init_matches = self . patterns . matches ( trimmed) ;
540+ let ( line_matches_any_pattern, matching_substrings) = self . patterns . matches (
541+ line_without_newline,
542+ self . only_matching && matches ! ( self . output_mode, OutputMode :: Default ) ,
543+ ) ;
544+
424545 let matches = if self . invert_match {
425- !init_matches
546+ !line_matches_any_pattern
426547 } else {
427- init_matches
548+ line_matches_any_pattern
428549 } ;
550+
429551 if matches {
430552 self . any_matches = true ;
553+
431554 match & mut self . output_mode {
432555 OutputMode :: Count ( count) => {
433556 * count += 1 ;
434557 }
435558 OutputMode :: FilesWithMatches => {
436- println ! ( "{input_name}" ) ;
559+ writeln ! ( & mut self . stdout_lock, "{input_name}" ) . unwrap ( ) ;
560+
437561 break ;
438562 }
439563 OutputMode :: Quiet => {
440564 return ;
441565 }
442566 OutputMode :: Default => {
443- let result = format ! (
444- "{}{}{}" ,
445- if self . multiple_inputs {
446- format! ( "{input_name}:" )
447- } else {
448- String :: new ( )
449- } ,
450- if self . line_number {
451- format! ( "{line_number}:" )
452- } else {
453- String :: new ( )
454- } ,
455- trimmed
456- ) ;
457- println ! ( "{result}" ) ;
567+ if self . only_matching {
568+ for matching_substring in matching_substrings {
569+ self . print_line_prefix ( input_name , line_number ) ;
570+
571+ self . stdout_lock
572+ . write_all ( matching_substring . as_slice ( ) )
573+ . unwrap ( ) ;
574+
575+ self . stdout_lock . write_all ( b" \n " ) . unwrap ( ) ;
576+ }
577+ } else {
578+ self . print_line_prefix ( input_name , line_number ) ;
579+
580+ writeln ! ( self . stdout_lock , "{line_without_newline}" ) . unwrap ( ) ;
581+ }
458582 }
459583 }
460584 }
461- line. clear ( ) ;
462585 }
463586 Err ( err) => {
464587 self . any_errors = true ;
588+
465589 if !self . no_messages {
466- eprintln ! (
467- "{}: Error reading line {} ({})" ,
468- input_name, line_number, err
469- ) ;
590+ eprintln ! ( "{input_name}: Error reading line {line_number} ({err})" , ) ;
470591 }
471592 }
472593 }
473594 }
595+
474596 if let OutputMode :: Count ( count) = & mut self . output_mode {
475597 if self . multiple_inputs {
476- println ! ( "{input_name}:{count}" ) ;
598+ writeln ! ( & mut self . stdout_lock , "{input_name}:{count}" ) . unwrap ( ) ;
477599 } else {
478- println ! ( "{count}" ) ;
600+ writeln ! ( & mut self . stdout_lock , "{count}" ) . unwrap ( ) ;
479601 }
602+
480603 * count = 0 ;
481604 }
482605 }
@@ -487,10 +610,9 @@ impl GrepModel {
487610// 1 - No lines were selected.
488611// >1 - An error occurred.
489612fn main ( ) -> Result < ( ) , Box < dyn std:: error:: Error > > {
490- setlocale ( LocaleCategory :: LcAll , "" ) ;
491- textdomain ( env ! ( "PROJECT_NAME" ) ) ?;
492- bind_textdomain_codeset ( env ! ( "PROJECT_NAME" ) , "UTF-8" ) ?;
493-
613+ textdomain ( PROJECT_NAME ) ?;
614+ bind_textdomain_codeset ( PROJECT_NAME , "UTF-8" ) ?;
615+ // Parse command line arguments
494616 let mut args = Args :: parse ( ) ;
495617
496618 let exit_code = args
@@ -501,7 +623,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
501623 } )
502624 . map ( |mut grep_model| grep_model. grep ( ) )
503625 . unwrap_or_else ( |err| {
504- eprintln ! ( "{}" , err ) ;
626+ eprintln ! ( "{err}" ) ;
505627 2
506628 } ) ;
507629
0 commit comments