From 848c3b7ca3f04badba5d4d008716ca1bdf7a267f Mon Sep 17 00:00:00 2001 From: David Mitchell Date: Wed, 8 Oct 2025 10:46:19 +0100 Subject: [PATCH 01/57] Bump ExtUtils::ParseXS to 3.61 --- dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm | 2 +- dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Constants.pm | 2 +- dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/CountLines.pm | 2 +- dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Eval.pm | 2 +- dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm | 2 +- dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Utilities.pm | 2 +- dist/ExtUtils-ParseXS/lib/ExtUtils/Typemaps.pm | 2 +- dist/ExtUtils-ParseXS/lib/ExtUtils/Typemaps/Cmd.pm | 2 +- dist/ExtUtils-ParseXS/lib/ExtUtils/Typemaps/InputMap.pm | 2 +- dist/ExtUtils-ParseXS/lib/ExtUtils/Typemaps/OutputMap.pm | 2 +- dist/ExtUtils-ParseXS/lib/ExtUtils/Typemaps/Type.pm | 2 +- dist/ExtUtils-ParseXS/lib/perlxs.pod | 2 +- 12 files changed, 12 insertions(+), 12 deletions(-) diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm index 935615442187..04043a17318c 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm @@ -64,7 +64,7 @@ use Symbol; our $VERSION; BEGIN { - $VERSION = '3.60'; + $VERSION = '3.61'; require ExtUtils::ParseXS::Constants; ExtUtils::ParseXS::Constants->VERSION($VERSION); require ExtUtils::ParseXS::CountLines; ExtUtils::ParseXS::CountLines->VERSION($VERSION); require ExtUtils::ParseXS::Node; ExtUtils::ParseXS::Node->VERSION($VERSION); diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Constants.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Constants.pm index 3ead8417bc5e..cff4afe97828 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Constants.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Constants.pm @@ -3,7 +3,7 @@ use strict; use warnings; use Symbol; -our $VERSION = '3.60'; +our $VERSION = '3.61'; =head1 NAME diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/CountLines.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/CountLines.pm index 6908c6e01306..6075ece1f332 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/CountLines.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/CountLines.pm @@ -1,7 +1,7 @@ package ExtUtils::ParseXS::CountLines; use strict; -our $VERSION = '3.60'; +our $VERSION = '3.61'; our $SECTION_END_MARKER; diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Eval.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Eval.pm index 5b91d39f7f44..25b2285afd16 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Eval.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Eval.pm @@ -2,7 +2,7 @@ package ExtUtils::ParseXS::Eval; use strict; use warnings; -our $VERSION = '3.60'; +our $VERSION = '3.61'; =head1 NAME diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm index 4e5583cd1f83..36f94ad86aea 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm @@ -2,7 +2,7 @@ package ExtUtils::ParseXS::Node; use strict; use warnings; -our $VERSION = '3.60'; +our $VERSION = '3.61'; =head1 NAME diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Utilities.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Utilities.pm index 1bdb82cb01e9..87074eedbc75 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Utilities.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Utilities.pm @@ -5,7 +5,7 @@ use Exporter; use File::Spec; use ExtUtils::ParseXS::Constants (); -our $VERSION = '3.60'; +our $VERSION = '3.61'; our (@ISA, @EXPORT_OK); @ISA = qw(Exporter); diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/Typemaps.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/Typemaps.pm index 3d6c4b4f1e39..d4d9283ddeed 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/Typemaps.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/Typemaps.pm @@ -2,7 +2,7 @@ package ExtUtils::Typemaps; use 5.006001; use strict; use warnings; -our $VERSION = '3.60'; +our $VERSION = '3.61'; require ExtUtils::ParseXS; require ExtUtils::ParseXS::Constants; diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/Typemaps/Cmd.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/Typemaps/Cmd.pm index dfe51adab9af..54adbe0fda6b 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/Typemaps/Cmd.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/Typemaps/Cmd.pm @@ -2,7 +2,7 @@ package ExtUtils::Typemaps::Cmd; use 5.006001; use strict; use warnings; -our $VERSION = '3.60'; +our $VERSION = '3.61'; use ExtUtils::Typemaps; diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/Typemaps/InputMap.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/Typemaps/InputMap.pm index 20fa69fb3b5c..956da8c5e528 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/Typemaps/InputMap.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/Typemaps/InputMap.pm @@ -2,7 +2,7 @@ package ExtUtils::Typemaps::InputMap; use 5.006001; use strict; use warnings; -our $VERSION = '3.60'; +our $VERSION = '3.61'; =head1 NAME diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/Typemaps/OutputMap.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/Typemaps/OutputMap.pm index 40d323eb0076..6beecaaa5961 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/Typemaps/OutputMap.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/Typemaps/OutputMap.pm @@ -2,7 +2,7 @@ package ExtUtils::Typemaps::OutputMap; use 5.006001; use strict; use warnings; -our $VERSION = '3.60'; +our $VERSION = '3.61'; =head1 NAME diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/Typemaps/Type.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/Typemaps/Type.pm index e6e8dbb31e2f..82f091b91a8b 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/Typemaps/Type.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/Typemaps/Type.pm @@ -4,7 +4,7 @@ use strict; use warnings; require ExtUtils::Typemaps; -our $VERSION = '3.60'; +our $VERSION = '3.61'; =head1 NAME diff --git a/dist/ExtUtils-ParseXS/lib/perlxs.pod b/dist/ExtUtils-ParseXS/lib/perlxs.pod index 26b8e19a06d3..01848f99974e 100644 --- a/dist/ExtUtils-ParseXS/lib/perlxs.pod +++ b/dist/ExtUtils-ParseXS/lib/perlxs.pod @@ -2231,7 +2231,7 @@ this model, the less likely conflicts will occur. =head1 XS VERSION This document covers features supported by C -(also known as C) 3.60. +(also known as C) 3.61. =head1 AUTHOR DIAGNOSTICS From c45207cf02ff3479e1eafa3d92c1e72927ef3be3 Mon Sep 17 00:00:00 2001 From: David Mitchell Date: Wed, 8 Oct 2025 10:33:14 +0100 Subject: [PATCH 02/57] ParseXS: refactor: add stub top-level node (This commit is part of a series which will extend the AST parse tree from just representing individual XSUBs to representing the whole XS file.) Add ExtUtils::ParseXS::Node::XS_file class. It doesn't do a lot yet. --- dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm | 11 +++++++++ .../lib/ExtUtils/ParseXS/Node.pm | 24 +++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm index 04043a17318c..0e6e4ab71fa5 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm @@ -178,6 +178,9 @@ BEGIN { # File-scoped parsing state: + 'AST', # the Node::XS_file object representing the AST + # tree for the whole XS file + 'typemaps_object', # An ExtUtils::Typemaps object: the result of # reading in the standard (or other) typemap. @@ -404,6 +407,14 @@ sub process_file { $self->{config_allow_exceptions} = $Options{except}; $self->{config_optimize} = $Options{optimize}; + + my $AST = $self->{AST} = ExtUtils::ParseXS::Node::XS_file->new(); + $AST->parse($self) + or $self->death("Failed to parse XS file\n"); + + $AST->as_code($self); + + # Identify the version of xsubpp used print <( +)}; + +sub parse { + my __PACKAGE__ $self = shift; + my ExtUtils::ParseXS $pxs = shift; + + 1; +} + + +sub as_code { + my __PACKAGE__ $self = shift; + my ExtUtils::ParseXS $pxs = shift; + +} + + # ====================================================================== package ExtUtils::ParseXS::Node::xsub; From 38eb7ac40a08e39bb37b16e1af68b6d19a238dde Mon Sep 17 00:00:00 2001 From: David Mitchell Date: Fri, 10 Oct 2025 12:54:37 +0100 Subject: [PATCH 03/57] ParseXS: refactor: add Node::preamble (This commit is part of a series which will extend the AST parse tree from just representing individual XSUBs to representing the whole XS file.) Add a stub Node subclass which is responsible for emitting the preamble at the start of the generated C file. --- dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm | 17 ------ .../lib/ExtUtils/ParseXS/Node.pm | 58 +++++++++++++++++++ 2 files changed, 58 insertions(+), 17 deletions(-) diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm index 0e6e4ab71fa5..0ef0ec4777eb 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm @@ -414,23 +414,6 @@ sub process_file { $AST->as_code($self); - - # Identify the version of xsubpp used - print <{in_filename}. Do not edit this file, edit $self->{in_filename} instead. - * - * ANY CHANGES MADE HERE WILL BE LOST! - * - */ - -EOM - - - print("#line 1 \"" . escape_file_for_line_directive($self->{in_pathname}) . "\"\n") - if $self->{config_WantLineNumbers}; - # Open the input file (using $self->{in_filename} which # is a basename'd $Options{filename} due to chdir above) { diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm index afdc7640407f..e3147339df19 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm @@ -373,12 +373,25 @@ package ExtUtils::ParseXS::Node::XS_file; # Top-level AST node representing an entire XS file BEGIN { $build_subclass->( + 'preamble', # Node::preamble object which emits preamble C code )}; sub parse { my __PACKAGE__ $self = shift; my ExtUtils::ParseXS $pxs = shift; + $self->{line_no} = 1; + $self->{file} = $pxs->{in_pathname}; + + # "Parse" the start of the file. Doesn't actually consume any lines: + # just a placeholder for emitting preamble later + + my $preamble = ExtUtils::ParseXS::Node::preamble->new(); + $self->{preamble} = $preamble; + $preamble->parse($pxs, $self) + or return; + push @{$self->{kids}}, $preamble; + 1; } @@ -387,6 +400,51 @@ sub as_code { my __PACKAGE__ $self = shift; my ExtUtils::ParseXS $pxs = shift; + $_->as_code($pxs, $self) for @{$self->{kids}}; + +} +# ====================================================================== + +package ExtUtils::ParseXS::Node::preamble; + +# AST node representing the boilerplate C code preamble at the start of +# the file. Parsing doesn't actually consume any lines; it exists just for +# its as_code() method which emits the preamble into the C file. + +BEGIN { $build_subclass->( +)}; + +sub parse { + my __PACKAGE__ $self = shift; + my ExtUtils::ParseXS $pxs = shift; + + $self->{line_no} = 1; + $self->{file} = $pxs->{in_pathname}; + 1; +} + +sub as_code { + my __PACKAGE__ $self = shift; + my ExtUtils::ParseXS $pxs = shift; + + # Emit preamble at start of C file, including the + # version it was generated by. + + print ExtUtils::ParseXS::Q(<<"EOM"); + |/* + | * This file was generated automatically by ExtUtils::ParseXS version $ExtUtils::ParseXS::VERSION from the + | * contents of $pxs->{in_filename}. Do not edit this file, edit $pxs->{in_filename} instead. + | * + | * ANY CHANGES MADE HERE WILL BE LOST! + | * + | */ + | +EOM + + print("#line 1 \"" . + ExtUtils::ParseXS::Utilities::escape_file_for_line_directive( + $self->{file}) . "\"\n") + if $pxs->{config_WantLineNumbers}; } From e05aac7a4a4f590eb6bbb49dbb84fadc82d27d9d Mon Sep 17 00:00:00 2001 From: David Mitchell Date: Fri, 10 Oct 2025 16:05:24 +0100 Subject: [PATCH 04/57] ParseXS: refactor: add Node::C_part (This commit is part of a series which will extend the AST parse tree from just representing individual XSUBs to representing the whole XS file.) Add these new AST node types: ExtUtils::ParseXS::Node::C_part; ExtUtils::ParseXS::Node::C_part_POD; ExtUtils::ParseXS::Node::C_part_code; and add this method: ExtUtils::ParseXS::Node::is_xs_module_line() These are collectively used to parse and hold the "C" part of the XS file - i.e. everything that comes before the first MODULE line. A C_part node has children consisting of C_part_POD and C_part_code nodes. --- dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm | 72 +------ .../lib/ExtUtils/ParseXS/Node.pm | 189 ++++++++++++++++++ 2 files changed, 198 insertions(+), 63 deletions(-) diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm index 0ef0ec4777eb..4b3ca636b0d3 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm @@ -408,12 +408,6 @@ sub process_file { $self->{config_optimize} = $Options{optimize}; - my $AST = $self->{AST} = ExtUtils::ParseXS::Node::XS_file->new(); - $AST->parse($self) - or $self->death("Failed to parse XS file\n"); - - $AST->as_code($self); - # Open the input file (using $self->{in_filename} which # is a basename'd $Options{filename} due to chdir above) { @@ -424,72 +418,24 @@ sub process_file { or die "cannot open $self->{in_filename}: $!\n"; } - # ---------------------------------------------------------------- - # Process the first (C language) half of the XS file, up until the first - # MODULE: line - # ---------------------------------------------------------------- - - FIRSTMODULE: - while (readline($self->{in_fh})) { - if (/^=/) { - my $podstartline = $.; - do { - if (/^=cut\s*$/) { - # We can't just write out a /* */ comment, as our embedded - # POD might itself be in a comment. We can't put a /**/ - # comment inside #if 0, as the C standard says that the source - # file is decomposed into preprocessing characters in the stage - # before preprocessing commands are executed. - # I don't want to leave the text as barewords, because the spec - # isn't clear whether macros are expanded before or after - # preprocessing commands are executed, and someone pathological - # may just have defined one of the 3 words as a macro that does - # something strange. Multiline strings are illegal in C, so - # the "" we write must be a string literal. And they aren't - # concatenated until 2 steps later, so we are safe. - # - Nicholas Clark - print("#if 0\n \"Skipped embedded POD.\"\n#endif\n"); - printf("#line %d \"%s\"\n", $. + 1, escape_file_for_line_directive($self->{in_pathname})) - if $self->{config_WantLineNumbers}; - next FIRSTMODULE; - } - - } while (readline($self->{in_fh})); - - # At this point $. is at end of file so die won't state the start - # of the problem, and as we haven't yet read any lines &death won't - # show the correct line in the message either. - die ("Error: Unterminated pod in $self->{in_filename}, line $podstartline\n") - unless $self->{lastline}; - } + my $AST = $self->{AST} = ExtUtils::ParseXS::Node::XS_file->new(); + $AST->parse($self) + or $self->death("Failed to parse XS file\n"); - last if ($self->{PACKAGE_name}, $self->{PREFIX_pattern}) = - /^MODULE\s*=\s*[\w:]+(?:\s+PACKAGE\s*=\s*([\w:]+))?(?:\s+PREFIX\s*=\s*(\S+))?\s*$/; + # At this point, $_ should hold the first MODULE line - print $_; - } - - unless (defined $_) { - warn "Didn't find a 'MODULE ... PACKAGE ... PREFIX' line\n"; - exit 0; # Not a fatal error for the caller process - } + $self->{lastline} = $_; + $self->{lastline_no} = $.; + $self->{XS_parse_stack_top_if_idx} = 0; + my $cpp_next_tmp_define = 'XSubPPtmpAAAA'; - print 'ExtUtils::ParseXS::CountLines'->end_marker, "\n" - if $self->{config_WantLineNumbers}; + $AST->as_code($self); standard_XS_defs(); print 'ExtUtils::ParseXS::CountLines'->end_marker, "\n" if $self->{config_WantLineNumbers}; - $self->{lastline} = $_; - $self->{lastline_no} = $.; - - $self->{XS_parse_stack_top_if_idx} = 0; - - my $cpp_next_tmp_define = 'XSubPPtmpAAAA'; - - # ---------------------------------------------------------------- # Main loop: for each iteration, read in a paragraph's worth of XSUB # definition or XS/CPP directives into @{ $self->{line} }, then try to diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm index e3147339df19..bb7cb9de51bd 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm @@ -362,6 +362,20 @@ sub parse_keywords { return @kids; } +# return (module, package, prefix) values if the line +# is a valid 'MODULE = ...' line + +sub is_xs_module_line { + my __PACKAGE__ $self = shift; + my $line = shift; + + $line =~ + /^ MODULE \s* = \s* [\w:]+ + (?: \s+ PACKAGE \s* = \s* ( [\w:]+ ) )? + (?: \s+ PREFIX \s* = \s* ( \S+ ) )? + \s* $/x; +} + sub as_code { } @@ -374,6 +388,7 @@ package ExtUtils::ParseXS::Node::XS_file; BEGIN { $build_subclass->( 'preamble', # Node::preamble object which emits preamble C code + 'C_part', # the C part of the XS file, before the first MODULE )}; sub parse { @@ -392,6 +407,15 @@ sub parse { or return; push @{$self->{kids}}, $preamble; + # Process the first (C language) half of the XS file, up until the first + # MODULE: line + + my $C_part = ExtUtils::ParseXS::Node::C_part->new(); + $self->{C_part} = $C_part; + $C_part->parse($pxs, $self) + or return; + push @{$self->{kids}}, $C_part; + 1; } @@ -448,6 +472,171 @@ EOM } +# ====================================================================== + +package ExtUtils::ParseXS::Node::C_part; + +# A node representing the C part of the XS file - i.e. everything +# before the first MODULE line + +BEGIN { $build_subclass->( +)}; + +sub parse { + my __PACKAGE__ $self = shift; + my ExtUtils::ParseXS $pxs = shift; + + $self->{line_no} = 1; + $self->{file} = $pxs->{in_pathname}; + + # Read in lines until the first MODULE line, creating a list of + # Node::C_part_code and Node::C_part_POD nodes as children. + # Returns with $_ holding the (unprocessed) next line (or undef for + # EOF) + + $_ = readline($pxs->{in_fh}); + + while (defined $_) { + return 1 if $self->is_xs_module_line($_); + + my $node = + /^=/ ? ExtUtils::ParseXS::Node::C_part_POD->new() + : ExtUtils::ParseXS::Node::C_part_code->new(); + + # Read in next block of code or POD lines + $node->parse($pxs) + or return; + push @{$self->{kids}}, $node; + } + + warn "Didn't find a 'MODULE ... PACKAGE ... PREFIX' line\n"; + exit 0; # Not a fatal error for the caller process +} + + +sub as_code { + my __PACKAGE__ $self = shift; + my ExtUtils::ParseXS $pxs = shift; + + $_->as_code($pxs, $self) for @{$self->{kids}}; + + print 'ExtUtils::ParseXS::CountLines'->end_marker, "\n" + if $pxs->{config_WantLineNumbers}; +} + + +# ====================================================================== + +package ExtUtils::ParseXS::Node::C_part_POD; + +# A node representing a section of POD within the C part of the XS file + +BEGIN { $build_subclass->( + 'pod_lines', # array of lines containing pod, including start and end + # '=foo' lines +)}; + +sub parse { + my __PACKAGE__ $self = shift; + my ExtUtils::ParseXS $pxs = shift; + + $self->{line_no} = $.; + $self->{file} = $pxs->{in_pathname}; + + # This method is called with $_ holding the first line of POD + # and returns with $_ holding the (unprocessed) next line + + do { + push @{$self->{pod_lines}}, $_; + if (/^=cut\s*$/) { + $_ = readline($pxs->{in_fh}); + return 1; + } + } while (readline($pxs->{in_fh})); + + # At this point $. is at end of file so die won't state the start + # of the problem, and as we haven't yet read any lines &death won't + # show the correct line in the message either. + die ( "Error: Unterminated pod in $pxs->{in_filename}, " + . "line $self->{line_no}\n"); +} + + +sub as_code { + my __PACKAGE__ $self = shift; + my ExtUtils::ParseXS $pxs = shift; + + # Emit something in the C file to indicate that a section of POD has + # been elided, while maintaining the correct lines numbers using + # #line. + # + # We can't just write out a /* */ comment, as our embedded POD might + # itself be in a comment. We can't put a /**/ comment inside #if 0, as + # the C standard says that the source file is decomposed into + # preprocessing characters in the stage before preprocessing commands + # are executed. + # + # I don't want to leave the text as barewords, because the spec isn't + # clear whether macros are expanded before or after preprocessing + # commands are executed, and someone pathological may just have + # defined one of the 3 words as a macro that does something strange. + # Multiline strings are illegal in C, so the "" we write must be a + # string literal. And they aren't concatenated until 2 steps later, so + # we are safe. + # - Nicholas Clark + + print ExtUtils::ParseXS::Q(<<"EOF"); + |#if 0 + | "Skipped embedded POD." + |#endif +EOF + + printf("#line %d \"%s\"\n", + $self->{line_no} + @{$self->{pod_lines}}, + ExtUtils::ParseXS::Utilities::escape_file_for_line_directive( + $pxs->{in_pathname})) + if $pxs->{config_WantLineNumbers}; +} + + +# ====================================================================== + +package ExtUtils::ParseXS::Node::C_part_code; + +# A node representing a section of C code within the C part of the XS file + +BEGIN { $build_subclass->( + 'code_lines', # array of lines containing C code +)}; + +sub parse { + my __PACKAGE__ $self = shift; + my ExtUtils::ParseXS $pxs = shift; + + $self->{line_no} = $.; + $self->{file} = $pxs->{in_pathname}; + + # This method is called with $_ holding the first line of C code + # and returns with $_ holding the (unprocessed) next line + + do { + return 1 if $self->is_xs_module_line($_); + return 1 if /^=/; + push @{$self->{code_lines}}, $_; + } while (readline($pxs->{in_fh})); + + 1; +} + +sub as_code { + my __PACKAGE__ $self = shift; + my ExtUtils::ParseXS $pxs = shift; + + print @{$self->{code_lines}}; +} + + + # ====================================================================== package ExtUtils::ParseXS::Node::xsub; From 1c983254423b2a23d169ee2e5e34ec53a3580423 Mon Sep 17 00:00:00 2001 From: David Mitchell Date: Fri, 10 Oct 2025 17:11:44 +0100 Subject: [PATCH 05/57] ParseXS: refactor: add Node::postamble (This commit is part of a series which will extend the AST parse tree from just representing individual XSUBs to representing the whole XS file.) Add a stub Node subclass which is responsible for emitting the postamble to the C file following any C code which has been copied as-is from the C part of the XS file --- dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm | 6 --- .../lib/ExtUtils/ParseXS/Node.pm | 48 +++++++++++++++++++ 2 files changed, 48 insertions(+), 6 deletions(-) diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm index 4b3ca636b0d3..5271adff40c8 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm @@ -79,7 +79,6 @@ use ExtUtils::ParseXS::Utilities qw( valid_proto_string process_typemaps map_type - standard_XS_defs analyze_preprocessor_statement set_cond Warn @@ -431,11 +430,6 @@ sub process_file { $AST->as_code($self); - standard_XS_defs(); - - print 'ExtUtils::ParseXS::CountLines'->end_marker, "\n" - if $self->{config_WantLineNumbers}; - # ---------------------------------------------------------------- # Main loop: for each iteration, read in a paragraph's worth of XSUB # definition or XS/CPP directives into @{ $self->{line} }, then try to diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm index bb7cb9de51bd..749fa90088e5 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm @@ -389,6 +389,8 @@ package ExtUtils::ParseXS::Node::XS_file; BEGIN { $build_subclass->( 'preamble', # Node::preamble object which emits preamble C code 'C_part', # the C part of the XS file, before the first MODULE + 'C_part_postamble',# Node::C_part_postamble object which emits + # boilerplate code following the C code )}; sub parse { @@ -416,6 +418,16 @@ sub parse { or return; push @{$self->{kids}}, $C_part; + # "Parse" the start of the file. Doesn't actually consume any lines: + # just a placeholder for emitting postamble later + + my $C_part_postamble = ExtUtils::ParseXS::Node::C_part_postamble->new(); + $self->{C_part_postamble} = $C_part_postamble; + $C_part_postamble->parse($pxs, $self) + or return; + push @{$self->{kids}}, $C_part_postamble; + + 1; } @@ -637,6 +649,42 @@ sub as_code { +# ====================================================================== + +package ExtUtils::ParseXS::Node::C_part_postamble; + +# AST node representing the boilerplate C code postamble following any +# initial C code contained within the C part of the XS file. +# This node's parse() method doesn't actually consume any lines; the node +# exists just for its as_code() method to emit the postamble into the C +# file. + +BEGIN { $build_subclass->( +)}; + +sub parse { + my __PACKAGE__ $self = shift; + my ExtUtils::ParseXS $pxs = shift; + + $self->{line_no} = 1; + $self->{file} = $pxs->{in_pathname}; + 1; +} + +sub as_code { + my __PACKAGE__ $self = shift; + my ExtUtils::ParseXS $pxs = shift; + + # Emit boilerplate postamble following any code passed through from + # the 'C' part of the XS file + + ExtUtils::ParseXS::Utilities::standard_XS_defs(); + + print 'ExtUtils::ParseXS::CountLines'->end_marker, "\n" + if $pxs->{config_WantLineNumbers}; +} + + # ====================================================================== package ExtUtils::ParseXS::Node::xsub; From 201fc0c9c30cfea7d4e7ddb7ffb30c2ecade52cd Mon Sep 17 00:00:00 2001 From: David Mitchell Date: Fri, 10 Oct 2025 17:52:51 +0100 Subject: [PATCH 06/57] ParseXS: refactor: add Node::global_cpp_line (This commit is part of a series which will extend the AST parse tree from just representing individual XSUBs to representing the whole XS file.) This node stores a single "#foo BAR" C preprocessor line which appears within the XS part of an XS file, and which is in global (file) scope (as opposed to being within code in an XSUB or BOOT). --- dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm | 11 ++---- .../lib/ExtUtils/ParseXS/Node.pm | 39 +++++++++++++++++++ 2 files changed, 43 insertions(+), 7 deletions(-) diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm index 5271adff40c8..e1f95847c7bd 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm @@ -438,20 +438,17 @@ sub process_file { PARAGRAPH: while ($self->fetch_para()) { + # Process and emit any initial C-preprocessor lines and blank # lines. Also, keep track of #if/#else/#endif nesting, updating: # $self->{XS_parse_stack} # $self->{XS_parse_stack_top_if_idx} # $self->{bootcode_early} # $self->{bootcode_later} - while (@{ $self->{line} } && $self->{line}->[0] !~ /^[^\#]/) { - my $ln = shift(@{ $self->{line} }); - print $ln, "\n"; - next unless $ln =~ /^\#\s*((if)(?:n?def)?|elsif|else|endif)\b/; - my $statement = $+; - # update global tracking of #if/#else etc - $self->analyze_preprocessor_statement($statement); + my $node = ExtUtils::ParseXS::Node::global_cpp_line->new(); + $node->parse($self); + $node->as_code($self); } next PARAGRAPH unless @{ $self->{line} }; diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm index 749fa90088e5..3644e3fe6e92 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm @@ -685,6 +685,45 @@ sub as_code { } +# ====================================================================== + +package ExtUtils::ParseXS::Node::global_cpp_line; + +# AST node representing a single C-preprocessor line in file (global) +# scope. (A "single" line can actually include embedded "\\\n"'s from line +# continuations). + +BEGIN { $build_subclass->( + 'cpp_line', # the text of the "# foo" CPP line +)}; + +sub parse { + my __PACKAGE__ $self = shift; + my ExtUtils::ParseXS $pxs = shift; + + $self->SUPER::parse($pxs); # set file/line_no + + my $ln = $self->{cpp_line} = shift(@{$pxs->{line}}); + + # Update global tracking of *conditional* CPP directives;s + # i.e. #if/#else etc + + return 1 unless $ln =~ /^\#\s*((if)(?:n?def)?|elsif|else|endif)\b/; + my $directive = $+; # one of "if", "elsif", "else", "endif" + $pxs->analyze_preprocessor_statement($directive); + + 1; +} + + +sub as_code { + my __PACKAGE__ $self = shift; + my ExtUtils::ParseXS $pxs = shift; + + print $self->{cpp_line}, "\n"; +} + + # ====================================================================== package ExtUtils::ParseXS::Node::xsub; From 51357f9acd690b49cc6d7945aafe2c24530d60eb Mon Sep 17 00:00:00 2001 From: David Mitchell Date: Fri, 10 Oct 2025 18:10:01 +0100 Subject: [PATCH 07/57] ParseXS: refactor: simplify double-negative regex This condition: match a line which doesn't start with anything other than '#' can be more simply expressed as: match a line which starts with '#' They're equivalent. Or at least I think they are. This was doing my head in. --- dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm index e1f95847c7bd..5cea67472975 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm @@ -445,7 +445,7 @@ sub process_file { # $self->{XS_parse_stack_top_if_idx} # $self->{bootcode_early} # $self->{bootcode_later} - while (@{ $self->{line} } && $self->{line}->[0] !~ /^[^\#]/) { + while (@{$self->{line}} && $self->{line}[0] =~ /^#/) { my $node = ExtUtils::ParseXS::Node::global_cpp_line->new(); $node->parse($self); $node->as_code($self); From c153926dac08efc7080a0c58ac7887e655d2d2c1 Mon Sep 17 00:00:00 2001 From: David Mitchell Date: Sun, 12 Oct 2025 15:29:31 +0100 Subject: [PATCH 08/57] ParseXS: refactor: add Node::BOOT (This commit is part of a series which will extend the AST parse tree from just representing individual XSUBs to representing the whole XS file.) Remove the old BOOT_handler() and replace it with a new Node type. --- dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm | 31 +------- .../lib/ExtUtils/ParseXS/Node.pm | 78 +++++++++++++++++++ 2 files changed, 82 insertions(+), 27 deletions(-) diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm index 5cea67472975..100d6c08a7d7 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm @@ -544,7 +544,10 @@ sub process_file { } if ($self->check_keyword("BOOT")) { - $self->BOOT_handler(); + my $node = ExtUtils::ParseXS::Node::BOOT->new(); + unshift @{$self->{line}}, $_; + $node->parse($self); + $node->as_code($self); # BOOT: is a file-scoped keyword which consumes all the lines # following it in the current paragraph (as opposed to just until # the next keyword, like CODE: etc). @@ -836,32 +839,6 @@ sub check_keyword { } -# Handle BOOT: keyword. -# Save all the remaining lines in the paragraph to the bootcode_later -# array, and prepend a '#line' if necessary. - -sub BOOT_handler { - my ExtUtils::ParseXS $self = shift; - - # Check all the @{ $self->{line}} lines for balance: all the - # #if, #else, #endif etc within the BOOT should balance out. - $self->check_conditional_preprocessor_statements(); - - # prepend a '#line' directive if needed - if ( $self->{config_WantLineNumbers} - && $self->{line}->[0] !~ /^\s*#\s*line\b/) - { - push @{ $self->{bootcode_later} }, - sprintf "#line %d \"%s\"\n", - $self->{line_no}->[@{ $self->{line_no} } - @{ $self->{line} }], - escape_file_for_line_directive($self->{in_pathname}); - } - - # Save all the BOOT lines plus trailing empty line to be emitted later. - push @{ $self->{bootcode_later} }, "$_\n" for @{ $self->{line} }, ""; -} - - # ST(): helper function for the various INPUT / OUTPUT code emitting # parts. Generate an "ST(n)" string. This is normally just: # diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm index 3644e3fe6e92..bd6f66812bf9 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm @@ -724,6 +724,84 @@ sub as_code { } +# ====================================================================== + +package ExtUtils::ParseXS::Node::BOOT; + +# Store the code lines associated with the BOOT keyword +# +# Note that unlike other codeblock-like Node classes, BOOT consumes +# *all* lines remaining in the current paragraph, rather than stopping +# at the next keyword, if any. +# It's also file-scoped rather than XSUB-scoped. + +BEGIN { $build_subclass->( + 'lines', # Array ref of all code lines making up the BOOT +)}; + + +# Consume all the remaining lines and store in @$lines. + +sub parse { + my __PACKAGE__ $self = shift; + my ExtUtils::ParseXS $pxs = shift; + + $self->SUPER::parse($pxs); # set file/line_no + + # Check all the @{$pxs->{line}} lines for balance: all the + # #if, #else, #endif etc within the BOOT should balance out. + ExtUtils::ParseXS::check_conditional_preprocessor_statements(); + + # Suck in all remaining lines + + $self->{lines} = [ @{$pxs->{line}} ]; + @{$pxs->{line}} = (); + + # Ignore any text following the keyword on the same line. + # XXX this quietly ignores any such text - really it should + # warn, but not yet for backwards compatibility. + shift @{$self->{lines}}; + + 1; +} + + +sub as_code { + my __PACKAGE__ $self = shift; + my ExtUtils::ParseXS $pxs = shift; + + # Save all the BOOT lines to be emitted later. + push @{$pxs->{bootcode_later}}, $self->boot_code($pxs); +} + + +sub boot_code { + my __PACKAGE__ $self = shift; + my ExtUtils::ParseXS $pxs = shift; + + my @lines; + + # Prepend a '#line' directive if not already present + if ( $pxs->{config_WantLineNumbers} + && @{$self->{lines}} + && $self->{lines}[0] !~ /^\s*#\s*line\b/ + ) + { + push @lines, + sprintf "#line %d \"%s\"\n", + $self->{line_no} + 1, + ExtUtils::ParseXS::Utilities::escape_file_for_line_directive( + $self->{file}); + } + + # Save all the BOOT lines (plus trailing empty line) to be emitted + # later. + push @lines, "$_\n" for @{$self->{lines}}, ""; + + return @lines; +} + + # ====================================================================== package ExtUtils::ParseXS::Node::xsub; From 30c7d5a82ffe1ec5b7cfc5625f41f8ea377cc3f6 Mon Sep 17 00:00:00 2001 From: David Mitchell Date: Mon, 13 Oct 2025 10:23:11 +0100 Subject: [PATCH 09/57] ParseXS: refactor: regularise BOOT processing (This commit is part of a series which will extend the AST parse tree from just representing individual XSUBs to representing the whole XS file.) For some reason most of the file-scoped keywords (PROTOTYPES etc) are handled together in a single 'while /REQUIRE|PROTOTYPES|.../' loop, but then BOOT is checked for separately after that loop has completed. This commit just moves checking for and handling BOOT into that same loop. The only significant difference between BOOT and the other keywords is that BOOT consumes all line until the end of the paragraph, rather than stopping at the next keyword. But this shouldn't make any difference to the top-level parsing loop. --- dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm index 100d6c08a7d7..83e7f5460952 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm @@ -523,7 +523,7 @@ sub process_file { # INCLUDE ones, which open a new file and skip any leading blank # lines. - while (my $kwd = $self->check_keyword("REQUIRE|PROTOTYPES|EXPORT_XSUB_SYMBOLS|FALLBACK|VERSIONCHECK|INCLUDE(?:_COMMAND)?|SCOPE")) { + while (my $kwd = $self->check_keyword("BOOT|REQUIRE|PROTOTYPES|EXPORT_XSUB_SYMBOLS|FALLBACK|VERSIONCHECK|INCLUDE(?:_COMMAND)?|SCOPE")) { my $class = "ExtUtils::ParseXS::Node::$kwd"; if ($class->can('parse')) { @@ -543,17 +543,6 @@ sub process_file { $_ = shift(@{ $self->{line} }); } - if ($self->check_keyword("BOOT")) { - my $node = ExtUtils::ParseXS::Node::BOOT->new(); - unshift @{$self->{line}}, $_; - $node->parse($self); - $node->as_code($self); - # BOOT: is a file-scoped keyword which consumes all the lines - # following it in the current paragraph (as opposed to just until - # the next keyword, like CODE: etc). - next PARAGRAPH; - } - # ---------------------------------------------------------------- # Parse and code-emit an XSUB # ---------------------------------------------------------------- From 863754e1a4fb9b95fdd93518f875b04625c0df4f Mon Sep 17 00:00:00 2001 From: David Mitchell Date: Mon, 13 Oct 2025 11:06:35 +0100 Subject: [PATCH 10/57] ParseXS: refactor: add Node::FALLBACK (This commit is part of a series which will extend the AST parse tree from just representing individual XSUBs to representing the whole XS file.) Remove the old FALLBACK_handler() and replace it with a new Node type. Also, change $pxs->{map_package_to_fallback_string} so that it now holds logical values ('TRUE' etc) rather than the actual strings to later emit into the code ('&PL_sv_yes' etc). Then do the translation at the point the code is emitted. --- dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm | 35 ++++------------ .../lib/ExtUtils/ParseXS/Node.pm | 40 +++++++++++++++++++ 2 files changed, 48 insertions(+), 27 deletions(-) diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm index 83e7f5460952..a04a424c6b47 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm @@ -205,7 +205,7 @@ BEGIN { 'map_package_to_fallback_string', # Hash: for every package, maps it to # the overload fallback state for that package (if # specified). Each value is one of the strings - # "&PL_sv_yes", "&PL_sv_no", "&PL_sv_undef". + # "TRUE", "FALSE", "UNDEF". 'proto_behaviour_specified', # Bool: prototype behaviour has been # specified by the -prototypes switch and/or @@ -728,8 +728,13 @@ EOF # Set ${'Foo::()'} to the fallback value for each overloaded # package 'Foo' (or undef if not specified). # But see the 'XXX' comments above about fallback and $(). - my $fallback = $self->{map_package_to_fallback_string}->{$package} - || "&PL_sv_undef"; + + my $fallback = $self->{map_package_to_fallback_string}{$package}; + $fallback = 'UNDEF' unless defined $fallback; + $fallback = $fallback eq 'TRUE' ? '&PL_sv_yes' + : $fallback eq 'FALSE' ? '&PL_sv_no' + : '&PL_sv_undef'; + print Q(<<"EOF"); | /* The magic for overload gets a GV* via gv_fetchmeth as */ | /* mentioned above, and looks in the SV* slot of it for */ @@ -854,30 +859,6 @@ sub ST { } -sub FALLBACK_handler { - my ExtUtils::ParseXS $self = shift; - my ($setting) = @_; - - # the rest of the current line should contain either TRUE, - # FALSE or UNDEF - - trim_whitespace($setting); - $setting = uc($setting); - - my %map = ( - TRUE => "&PL_sv_yes", 1 => "&PL_sv_yes", - FALSE => "&PL_sv_no", 0 => "&PL_sv_no", - UNDEF => "&PL_sv_undef", - ); - - # check for valid FALLBACK value - $self->death("Error: FALLBACK: TRUE/FALSE/UNDEF") unless exists $map{$setting}; - - $self->{map_package_to_fallback_string}->{$self->{PACKAGE_name}} - = $map{$setting}; -} - - sub REQUIRE_handler { my ExtUtils::ParseXS $self = shift; # the rest of the current line should contain a version number diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm index bd6f66812bf9..c7ea272945b5 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm @@ -3886,6 +3886,46 @@ sub as_code { } +# ====================================================================== + +package ExtUtils::ParseXS::Node::FALLBACK; + +# Process the 'FALLBACK' keyword. +# Its main effect is to update $pxs->{map_package_to_fallback_string} with +# the fallback value for the current package. That is later used to plant +# boot code to set ${package}::() to a true/false/undef value. + +BEGIN { $build_subclass->(-parent => 'oneline', + 'value', # Str: TRUE, FALSE or UNDEF +)}; + + +sub parse { + my __PACKAGE__ $self = shift; + my ExtUtils::ParseXS $pxs = shift; + + $self->SUPER::parse($pxs); # set file/line_no/text + + # The rest of the current line should contain either TRUE, + # FALSE or UNDEF, but we also secretly allow 0 or 1 and lower/mixed + # case. + + my $s = $self->{text}; + + $s = 'TRUE' if $s eq '1'; + $s = 'FALSE' if $s eq '0'; + $s = uc($s); + + $self->death("Error: FALLBACK: TRUE/FALSE/UNDEF") + unless $s =~ /^(TRUE|FALSE|UNDEF)$/; + + $self->{value} = $s; + $pxs->{map_package_to_fallback_string}{$pxs->{PACKAGE_name}} = $s; + + 1; +} + + # ====================================================================== package ExtUtils::ParseXS::Node::enable; From 8f8270f62ead21512184b19b85d2d2163fd1899c Mon Sep 17 00:00:00 2001 From: David Mitchell Date: Mon, 13 Oct 2025 11:24:12 +0100 Subject: [PATCH 11/57] ParseXS: refactor: add Node::REQUIRE (This commit is part of a series which will extend the AST parse tree from just representing individual XSUBs to representing the whole XS file.) Remove the old REQUIRE_handler() and replace it with a new Node type. --- dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm | 19 ---------- .../lib/ExtUtils/ParseXS/Node.pm | 36 +++++++++++++++++++ 2 files changed, 36 insertions(+), 19 deletions(-) diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm index a04a424c6b47..7cd5f0b439aa 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm @@ -859,25 +859,6 @@ sub ST { } -sub REQUIRE_handler { - my ExtUtils::ParseXS $self = shift; - # the rest of the current line should contain a version number - my ($ver) = @_; - - trim_whitespace($ver); - - $self->death("Error: REQUIRE expects a version number") - unless $ver; - - # check that the version number is of the form n.n - $self->death("Error: REQUIRE: expected a number, got '$ver'") - unless $ver =~ /^\d+(\.\d*)?/; - - $self->death("Error: xsubpp $ver (or better) required--this is only $VERSION.") - unless $VERSION >= $ver; -} - - # Push an entry on the @{ $self->{XS_parse_stack} } array containing the # current file state, in preparation for INCLUDEing a new file. (Note that # it doesn't handle type => 'if' style entries, only file entries.) diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm index c7ea272945b5..b4acccdbecc8 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm @@ -3926,6 +3926,42 @@ sub parse { } +# ====================================================================== + +package ExtUtils::ParseXS::Node::REQUIRE; + +# Process the 'REQUIRE' keyword. + +BEGIN { $build_subclass->(-parent => 'oneline', + 'version', # Str: the minimum version allowed, e.g.'1.23' +)}; + + +sub parse { + my __PACKAGE__ $self = shift; + my ExtUtils::ParseXS $pxs = shift; + + $self->SUPER::parse($pxs); # set file/line_no/text + + my $ver = $self->{text}; + + $pxs->death("Error: REQUIRE expects a version number") + unless length $ver; + + # check that the version number is of the form n.n + $pxs->death("Error: REQUIRE: expected a number, got '$ver'") + unless $ver =~ /^\d+(\.\d*)?/; + + my $got = $ExtUtils::ParseXS::VERSION; + $pxs->death("Error: xsubpp $ver (or better) required--this is only $got.") + unless $got >= $ver; + + $self->{version} = $ver; + + 1; +} + + # ====================================================================== package ExtUtils::ParseXS::Node::enable; From 583011c5b14d53ef1b657d2eed0c7a4ed11c8c4e Mon Sep 17 00:00:00 2001 From: David Mitchell Date: Mon, 20 Oct 2025 10:21:53 +0100 Subject: [PATCH 12/57] ParseXS: refactor: store current pkg in Node::xsub (This commit is part of a series which will extend the AST parse tree from just representing individual XSUBs to representing the whole XS file.) In preparation for code generation to be done only after the whole file has been parsed (rather than after each XSUB is parsed, as at present), store the current package name in each Node::xsub object. Then use that value, rather than $pxs->{PACKAGE_name}, to specify the package name in various bits of boot code and in the $Package variable in typemaps. This is because by the end of parsing, $pxs->{PACKAGE_name} will have the value which was extracted from the *last* seen MODULE line in the file, and not the value at the point where an XSUB was parsed. Add several tests to confirm things work, and will continue to work in a few commits' time, when we switch to code generation coming after *all* parsing. --- .../lib/ExtUtils/ParseXS/Eval.pm | 20 +-- .../lib/ExtUtils/ParseXS/Node.pm | 21 ++- dist/ExtUtils-ParseXS/t/001-basic.t | 163 ++++++++++++++++++ 3 files changed, 185 insertions(+), 19 deletions(-) diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Eval.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Eval.pm index 25b2285afd16..6591c9d118c2 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Eval.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Eval.pm @@ -27,12 +27,9 @@ variables. Warns the contents of C<$@> if any. Not all these variables are necessarily considered "public" wrt. use in -typemaps, so beware. Variables set up from the ExtUtils::ParseXS object: +typemaps, so beware. Variables set up from C<$other_hashref>: $Package $func_name $Full_func_name $pname - -Variables set up from C<$other_hashref>: - $var $type $ntype $subtype $arg $ALIAS =cut @@ -40,9 +37,8 @@ Variables set up from C<$other_hashref>: sub eval_output_typemap_code { my ($_pxs, $_code, $_other) = @_; - my ($Package) = @{$_pxs}{qw(PACKAGE_name)}; - my ($var, $type, $ntype, $subtype, $arg, $ALIAS, $func_name, $Full_func_name, $pname) - = @{$_other}{qw(var type ntype subtype arg alias func_name full_C_name full_perl_name)}; + my ($Package, $var, $type, $ntype, $subtype, $arg, $ALIAS, $func_name, $Full_func_name, $pname) + = @{$_other}{qw(Package var type ntype subtype arg alias func_name full_C_name full_perl_name)}; my $rv = eval $_code; warn $@ if $@; @@ -59,12 +55,9 @@ variables. Warns the contents of C<$@> if any. Not all these variables are necessarily considered "public" wrt. use in -typemaps, so beware. Variables set up from the ExtUtils::ParseXS object: +typemaps, so beware. Variables set up from C<$other_hashref>: $Package $func_name $Full_func_name $pname - -Variables set up from C<$other_hashref>: - $var $type $ntype $subtype $num $init $printed_name $arg $argoff $ALIAS =cut @@ -72,9 +65,8 @@ Variables set up from C<$other_hashref>: sub eval_input_typemap_code { my ($_pxs, $_code, $_other) = @_; - my ($Package) = @{$_pxs}{qw(PACKAGE_name)}; - my ($var, $type, $num, $init, $printed_name, $arg, $ntype, $argoff, $subtype, $ALIAS, $func_name, $Full_func_name, $pname) - = @{$_other}{qw(var type num init printed_name arg ntype argoff subtype alias func_name full_C_name full_perl_name)}; + my ($Package, $var, $type, $num, $init, $printed_name, $arg, $ntype, $argoff, $subtype, $ALIAS, $func_name, $Full_func_name, $pname) + = @{$_other}{qw(Package var type num init printed_name arg ntype argoff subtype alias func_name full_C_name full_perl_name)}; my $rv = eval $_code; warn $@ if $@; diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm index b4acccdbecc8..9e7e0bd80ba6 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm @@ -888,9 +888,14 @@ BEGIN { $build_subclass->( # "2": empty prototype # other: a specific prototype. + # Misc + 'SCOPE_enabled', # Bool: "SCOPE: ENABLE" seen, in either the # file or XSUB part of the XS file + 'PACKAGE_name', # value of $pxs->{PACKAGE_name} at parse time + 'PACKAGE_C_name', # value of $pxs->{PACKAGE_C_name} at parse time + )}; @@ -900,6 +905,10 @@ sub parse { $self->SUPER::parse($pxs); # set file/line_no + # record what package we're in + $self->{PACKAGE_name} = $pxs->{PACKAGE_name}; + $self->{PACKAGE_C_name} = $pxs->{PACKAGE_C_name}; + # Initially inherit the prototype behaviour for the XSUB from the # global PROTOTYPES default $self->{prototype} = $pxs->{PROTOTYPES_value}; @@ -1209,7 +1218,7 @@ EOF my $attrs = "@{$self->{attributes}}"; push(@code, ExtUtils::ParseXS::Q(<<"EOF")); | cv = $newXS(\"$pname\", XS_$cname$file_arg$proto_arg); - | apply_attrs_string("$pxs->{PACKAGE_name}", cv, "$attrs", 0); + | apply_attrs_string("$self->{PACKAGE_name}", cv, "$attrs", 0); EOF $pxs->{need_boot_cv} = 1; } @@ -1222,7 +1231,7 @@ EOF %{ $self->{map_interface_name_short_to_original} }) { my $value = $self->{map_interface_name_short_to_original}{$yname}; - $yname = "$pxs->{PACKAGE_name}\::$yname" unless $yname =~ /::/; + $yname = "$self->{PACKAGE_name}\::$yname" unless $yname =~ /::/; my $macro = $self->{interface_macro_set}; $macro = 'XSINTERFACE_FUNC_SET' unless defined $macro; @@ -1259,9 +1268,9 @@ EOF for my $operator (sort keys %{ $self->{overload_name_seen} }) { - $pxs->{map_overloaded_package_to_C_package}->{$pxs->{PACKAGE_name}} - = $pxs->{PACKAGE_C_name}; - my $overload = "$pxs->{PACKAGE_name}\::($operator"; + $pxs->{map_overloaded_package_to_C_package}->{$self->{PACKAGE_name}} + = $self->{PACKAGE_C_name}; + my $overload = "$self->{PACKAGE_name}\::($operator"; push(@code, " (void)$newXS(\"$overload\", XS_$cname$file_arg$proto_arg);\n"); } @@ -1789,6 +1798,7 @@ sub lookup_input_typemap { func_name => $xsub->{decl}{name}, full_perl_name => $xsub->{decl}{full_perl_name}, full_C_name => $xsub->{decl}{full_C_name}, + Package => $xsub->{PACKAGE_name}, }; # The type looked up in the eval is Foo__Bar rather than Foo::Bar @@ -2088,6 +2098,7 @@ sub lookup_output_typemap { func_name => $xsub->{decl}{name}, full_perl_name => $xsub->{decl}{full_perl_name}, full_C_name => $xsub->{decl}{full_C_name}, + Package => $xsub->{PACKAGE_name}, }; diff --git a/dist/ExtUtils-ParseXS/t/001-basic.t b/dist/ExtUtils-ParseXS/t/001-basic.t index 20d599b1229a..5e62d332da66 100644 --- a/dist/ExtUtils-ParseXS/t/001-basic.t +++ b/dist/ExtUtils-ParseXS/t/001-basic.t @@ -5233,4 +5233,167 @@ EOF } +{ + # Check for correct package name; i.e. use the current package name, + # not the last one seen in the file. + + my $preamble = Q(<<'EOF'); + |MODULE = Foo PACKAGE = Foo + | + |PROTOTYPES: DISABLE + | + |TYPEMAP: < Date: Mon, 13 Oct 2025 12:52:11 +0100 Subject: [PATCH 13/57] ParseXS: refactor: add Node::include etc (This commit is part of a series which will extend the AST parse tree from just representing individual XSUBs to representing the whole XS file.) This commit adds these new Node types: ExtUtils::ParseXS::Node::include ExtUtils::ParseXS::Node::INCLUDE ExtUtils::ParseXS::Node::INCLUDE_COMMAND and removes these two (now obsolete) methods: INCLUDE_handler INCLUDE_COMMAND_handler INCLUDE_COMMAND_handler() looks to have been a cut+paste of INCLUDE_handler() with the relevant bits changed to handle a piped command rather than a filename. I've combined their functionality back into a single parse() method within the Node::include base class, then made Node::INCLUDE and Node::INCLUDE_COMMAND trivial subclasses which just set (or not) $self->{is_cmd} and then let the base class do all the heavy lifting. --- dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm | 113 ------------ .../lib/ExtUtils/ParseXS/Node.pm | 170 ++++++++++++++++++ 2 files changed, 170 insertions(+), 113 deletions(-) diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm index 7cd5f0b439aa..e2925b4784b3 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm @@ -883,67 +883,6 @@ sub push_parse_stack { } -sub INCLUDE_handler { - my ExtUtils::ParseXS $self = shift; - $_ = shift; - # the rest of the current line should contain a valid filename - - trim_whitespace($_); - - $self->death("INCLUDE: filename missing") - unless $_; - - $self->death("INCLUDE: output pipe is illegal") - if /^\s*\|/; - - # simple minded recursion detector - $self->death("INCLUDE loop detected") - if $self->{IncludedFiles}->{$_}; - - ++$self->{IncludedFiles}->{$_} unless /\|\s*$/; - - if (/\|\s*$/ && /^\s*perl\s/) { - Warn( $self, "The INCLUDE directive with a command is discouraged." . - " Use INCLUDE_COMMAND instead! In particular using 'perl'" . - " in an 'INCLUDE: ... |' directive is not guaranteed to pick" . - " up the correct perl. The INCLUDE_COMMAND directive allows" . - " the use of \$^X as the currently running perl, see" . - " 'perldoc perlxs' for details."); - } - - $self->push_parse_stack(); - - $self->{in_fh} = Symbol::gensym(); - - # open the new file - open($self->{in_fh}, $_) or $self->death("Cannot open '$_': $!"); - - print Q(<<"EOF"); - | - |/* INCLUDE: Including '$_' from '$self->{in_filename}' */ - | -EOF - - $self->{in_filename} = $_; - $self->{in_pathname} = ( $^O =~ /^mswin/i ) - # See CPAN RT #61908: gcc doesn't like - # backslashes on win32? - ? qq($self->{dir}/$self->{in_filename}) - : File::Spec->catfile($self->{dir}, $self->{in_filename}); - - # Prime the pump by reading the first - # non-blank line - - # skip leading blank lines - while (readline($self->{in_fh})) { - last unless /^\s*$/; - } - - $self->{lastline} = $_; - $self->{lastline_no} = $.; -} - - # Quote a command-line to be suitable for VMS sub QuoteArgs { @@ -980,58 +919,6 @@ sub QuoteArgs { } -sub INCLUDE_COMMAND_handler { - my ExtUtils::ParseXS $self = shift; - $_ = shift; - # the rest of the current line should contain a valid command - - trim_whitespace($_); - - $_ = QuoteArgs($_) if $^O eq 'VMS'; - - $self->death("INCLUDE_COMMAND: command missing") - unless $_; - - $self->death("INCLUDE_COMMAND: pipes are illegal") - if /^\s*\|/ or /\|\s*$/; - - $self->push_parse_stack( IsPipe => 1 ); - - $self->{in_fh} = Symbol::gensym(); - - # If $^X is used in INCLUDE_COMMAND, we know it's supposed to be - # the same perl interpreter as we're currently running - my $X = $self->_safe_quote($^X); # quotes if has spaces - s/^\s*\$\^X/$X/; - - # open the new file - open ($self->{in_fh}, "-|", $_) - or $self->death( $self, "Cannot run command '$_' to include its output: $!"); - - print Q(<<"EOF"); - | - |/* INCLUDE_COMMAND: Including output of '$_' from '$self->{in_filename}' */ - | -EOF - - $self->{in_filename} = $_; - $self->{in_pathname} = $self->{in_filename}; - #$self->{in_pathname} =~ s/\"/\\"/g; # Fails? See CPAN RT #53938: MinGW Broken after 2.21 - $self->{in_pathname} =~ s/\\/\\\\/g; # Works according to reporter of #53938 - - # Prime the pump by reading the first - # non-blank line - - # skip leading blank lines - while (readline($self->{in_fh})) { - last unless /^\s*$/; - } - - $self->{lastline} = $_; - $self->{lastline_no} = $.; -} - - # Pop the type => 'file' entry off the top of the @{ $self->{XS_parse_stack} } # array following the end of processing an INCLUDEd file, and restore the # former state. diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm index 9e7e0bd80ba6..e05693240c4a 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm @@ -1,6 +1,7 @@ package ExtUtils::ParseXS::Node; use strict; use warnings; +use Symbol; our $VERSION = '3.61'; @@ -3973,6 +3974,175 @@ sub parse { } +# ====================================================================== + +package ExtUtils::ParseXS::Node::include; + +# Common base class for the 'INCLUDE' and 'INCLUDE_COMMAND' keywords + +BEGIN { $build_subclass->(-parent => 'oneline', + 'is_cmd', # Bool: is INCLUDE_COMMAND + 'inc_filename', # Str: the file/command to be included + 'old_filename', # Str: the previous file +)}; + + +sub parse { + my __PACKAGE__ $self = shift; + my ExtUtils::ParseXS $pxs = shift; + + $self->SUPER::parse($pxs); # set file/line_no/text + + my $f = $self->{text}; + my $is_cmd = $self->{is_cmd}; + + if ($is_cmd) { + $f = ExtUtils::ParseXS::QuoteArgs($f) if $^O eq 'VMS'; + + $pxs->death("INCLUDE_COMMAND: command missing") + unless length $f; + + $pxs->death("INCLUDE_COMMAND: pipes are illegal") + if $f =~ /^\s*\|/ or $f =~ /\|\s*$/; + } + else { + $pxs->death("INCLUDE: filename missing") + unless length $f; + + $pxs->death("INCLUDE: output pipe is illegal") + if $f =~ /^\s*\|/; + + # simple minded recursion detector + $pxs->death("INCLUDE loop detected") + if $pxs->{IncludedFiles}{$f}; + + ++$pxs->{IncludedFiles}->{$f} unless $f =~ /\|\s*$/; + + if ($f =~ /\|\s*$/ && $f =~ /^\s*perl\s/) { + $pxs->Warn( + "The INCLUDE directive with a command is discouraged." + . " Use INCLUDE_COMMAND instead! In particular using 'perl'" + . " in an 'INCLUDE: ... |' directive is not guaranteed to pick" + . " up the correct perl. The INCLUDE_COMMAND directive allows" + . " the use of \$^X as the currently running perl, see" + . " 'perldoc perlxs' for details." + ); + } + } + + $pxs->push_parse_stack($is_cmd ? (IsPipe => 1) : ()); + + $pxs->{in_fh} = Symbol::gensym(); + + # Open the new file / pipe + + if ($is_cmd) { + # Expand the special token '$^X' into the full path of the + # currently running perl interpreter + my $X = $pxs->_safe_quote($^X); # quotes if has spaces + $f =~ s/^\s*\$\^X/$X/; + + open ($pxs->{in_fh}, "-|", $f) + or $pxs->death( + "Cannot run command '$f' to include its output: $!"); + } + else { + open($pxs->{in_fh}, $f) + or $pxs->death("Cannot open '$f': $!"); + } + + $self->{old_filename} = $pxs->{in_filename}; + $self->{inc_filename} = $f; + $pxs->{in_filename} = $f; + + my $path = $f; + if ($is_cmd) { + #$path =~ s/\"/\\"/g; # Fails? See CPAN RT #53938: MinGW Broken after 2.21 + $path =~ s/\\/\\\\/g; # Works according to reporter of #53938 + } + else { + $path = ($^O =~ /^mswin/i) + # See CPAN RT #61908: gcc doesn't like + # backslashes on win32? + ? "$pxs->{dir}/$path" + : File::Spec->catfile($pxs->{dir}, $path); + } + $pxs->{in_pathname} = $path; + + # Prime the pump by reading the first non-blank line + + # skip leading blank lines + while (readline($pxs->{in_fh})) { + last unless /^\s*$/; + } + + $pxs->{lastline} = $_; + $pxs->{lastline_no} = $.; + + 1; +} + + +sub as_code { + my __PACKAGE__ $self = shift; + my ExtUtils::ParseXS $pxs = shift; + + my $comment = $self->{is_cmd} + ? "INCLUDE_COMMAND: Including output of" + : "INCLUDE: Including"; + + $comment .= " '$self->{inc_filename}' from '$self->{old_filename}'"; + + print ExtUtils::ParseXS::Q(<<"EOF"); + | + |/* $comment */ + | +EOF +} + + +# ====================================================================== + +package ExtUtils::ParseXS::Node::INCLUDE; + +# Process the 'INCLUDE' keyword. Most processing is actually done by the +# parent 'include' class which handles INCLUDE_COMMAND too. + +BEGIN { $build_subclass->(-parent => 'include', +)}; + + +sub parse { + my __PACKAGE__ $self = shift; + my ExtUtils::ParseXS $pxs = shift; + + $self->{is_cmd} = 0; + $self->SUPER::parse($pxs); # main parsing done by Node::include + 1; +} + + +# ====================================================================== + +package ExtUtils::ParseXS::Node::INCLUDE_COMMAND; + +# Process the 'INCLUDE_COMMAND' keyword. Most processing is actually done +# by the parent 'include' class which handles INCLUDE too. + +BEGIN { $build_subclass->(-parent => 'include', +)}; + + +sub parse { + my __PACKAGE__ $self = shift; + my ExtUtils::ParseXS $pxs = shift; + + $self->{is_cmd} = 1; + $self->SUPER::parse($pxs); # main parsing done by Node::include + 1; +} + + # ====================================================================== package ExtUtils::ParseXS::Node::enable; From e65af0e346cd2aca64c338c16f61a8a63ec54df5 Mon Sep 17 00:00:00 2001 From: David Mitchell Date: Mon, 13 Oct 2025 15:57:53 +0100 Subject: [PATCH 14/57] ParseXS: refactor: remove residual FOO_handler (This commit is part of a series which will extend the AST parse tree from just representing individual XSUBs to representing the whole XS file.) Remove a branch which says "if present, call new-style Node::FOO::parse(), else call old-style call FOO_handler()", since there are now AST nodes for all keywords. Also update some of the code comments to remove mention of FOO_handler and that way of working. --- dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm | 42 +++++++------------ 1 file changed, 14 insertions(+), 28 deletions(-) diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm index e2925b4784b3..6ef3829b4c1e 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm @@ -10,13 +10,14 @@ use warnings; # xsubpp. It also makes it easier to test the individual components. # # The bulk of this file is taken up with the process_file() method which -# does the whole job of reading in a .xs file and outputting a .c file. -# It in turn relies on fetch_para() to read chunks of lines from the -# input, and on a bunch of FOO_handler() methods which process each of the -# main XS FOO keywords when encountered. +# does the whole job of reading in a .xs file and outputting a .c file. It +# in turn relies on fetch_para() to read chunks of lines from the input, +# and on various ExtUtils::ParseXS::Node::FOO::parse() methods which build +# up an AST representing the parsed XS file. Then a bunch of as_code() +# methods walk that tree, emitting C code. # -# The remainder of this file mainly consists of helper functions for the -# handlers, and functions to help with outputting stuff. +# The remainder of this file mainly consists of helper functions and +# functions to help with outputting stuff. # # Of particular note is the Q() function, which is typically used to # process escaped ("quoted") heredoc text of C code fragments to be @@ -35,8 +36,7 @@ use warnings; # # ParseXS::Node This and its subclasses provide the nodes # which make up the Abstract Syntax Tree (AST) -# generated by the parser. XXX as of Sep 2024, this -# is very much a Work In Progress. +# generated by the parser. # # ParseXS::Constants defines a few constants used here, such the regex # patterns used to detect a new XS keyword. @@ -515,30 +515,16 @@ sub process_file { # fetch_para(). # # This loop repeatedly: skips any blank lines and then calls - # $self->FOO_handler() if it finds any of the file-scoped keywords - # in the passed pattern. $_ is updated and is available to the - # handlers. - # - # Each of the handlers acts on just the current line, apart from the - # INCLUDE ones, which open a new file and skip any leading blank - # lines. + # the relevant Node::FOO::Parse() method if it finds any of the + # file-scoped keywords in the passed pattern. while (my $kwd = $self->check_keyword("BOOT|REQUIRE|PROTOTYPES|EXPORT_XSUB_SYMBOLS|FALLBACK|VERSIONCHECK|INCLUDE(?:_COMMAND)?|SCOPE")) { my $class = "ExtUtils::ParseXS::Node::$kwd"; - if ($class->can('parse')) { - # this branch handles the newer AST-oriented keyword processing - my $node = $class->new(); - unshift @{$self->{line}}, $_; - $node->parse($self); - $node->as_code($self) if $class->can('as_code'); - } - else { - # this branch handles the older KEYWORD_handler()-oriented processing - my $method = $kwd . "_handler"; - $self->$method($_); # $_ contains the rest of the line after KEYWORD: - } - + my $node = $class->new(); + unshift @{$self->{line}}, $_; + $node->parse($self); + $node->as_code($self) if $class->can('as_code'); next PARAGRAPH unless @{ $self->{line} }; $_ = shift(@{ $self->{line} }); } From 9724016a3d988f7e1fc4d1ca31940f03d8e25f75 Mon Sep 17 00:00:00 2001 From: David Mitchell Date: Tue, 14 Oct 2025 14:10:22 +0100 Subject: [PATCH 15/57] ParseXS: refactor: remove check_keyword() method (This commit is part of a series which will extend the AST parse tree from just representing individual XSUBs to representing the whole XS file.) This method used to be used for scanning for keywords and then invoking old-style FOO_handler()s. Replace its last usage with the new parse_keywords() method (which looks for keywords and creates Node::FOO's instead), and remove the method itself. For now, create a temporary node to accumulate all the file-scoped keywords in a continuous run, and then call as_code() once at the end. I.e for the input: PROTOTYPES: X REQUIRE: Y .. formerly a temporary Node::PROTOTYPES node was created, parsed, and then as_code() immediately called on it, then freed. Then similarly for REQUIRE etc. After this commit, a temporary parent node is created, child nodes are added to it for each of PROTOTYPES, REQUIRE, etc, and finally as_code() is called once which calls as_code() for each child. This is a small step in the process of moving towards deferring calling as_code() until the whole file has been read in and parsed. The next commit will make the temporary parent node into something better. --- dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm | 48 +++++-------------- 1 file changed, 12 insertions(+), 36 deletions(-) diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm index 6ef3829b4c1e..b717f8257df6 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm @@ -503,10 +503,6 @@ sub process_file { $self->{file_SCOPE_enabled} = 0; - # Process next line - - $_ = shift(@{ $self->{line} }); - # ---------------------------------------------------------------- # Process file-scoped keywords # ---------------------------------------------------------------- @@ -518,26 +514,27 @@ sub process_file { # the relevant Node::FOO::Parse() method if it finds any of the # file-scoped keywords in the passed pattern. - while (my $kwd = $self->check_keyword("BOOT|REQUIRE|PROTOTYPES|EXPORT_XSUB_SYMBOLS|FALLBACK|VERSIONCHECK|INCLUDE(?:_COMMAND)?|SCOPE")) { + my $tmp_obj = ExtUtils::ParseXS::Node::XS_file->new(); + $tmp_obj->parse_keywords( + $self, + undef, undef, # xsub and xbody: not needed for non XSUB keywords + undef, # implies process as many keywords as possible + "BOOT|REQUIRE|PROTOTYPES|EXPORT_XSUB_SYMBOLS|FALLBACK" + . "|VERSIONCHECK|INCLUDE|INCLUDE_COMMAND|SCOPE" - my $class = "ExtUtils::ParseXS::Node::$kwd"; - my $node = $class->new(); - unshift @{$self->{line}}, $_; - $node->parse($self); - $node->as_code($self) if $class->can('as_code'); - next PARAGRAPH unless @{ $self->{line} }; - $_ = shift(@{ $self->{line} }); - } + ); + $tmp_obj->as_code($self); + + + next PARAGRAPH unless @{ $self->{line} }; # ---------------------------------------------------------------- # Parse and code-emit an XSUB # ---------------------------------------------------------------- - unshift @{$self->{line}}, $_; my $xsub = ExtUtils::ParseXS::Node::xsub->new(); $xsub->parse($self) or next PARAGRAPH; - $_ = shift @{$self->{line}}; $xsub->as_code($self); $self->{seen_an_XSUB} = 1; # encountered at least one XSUB @@ -798,27 +795,6 @@ sub report_error_count { *errors = \&report_error_count; -# $self->check_keyword("FOO|BAR") -# -# Return a keyword if the next non-blank line matches one of the passed -# keywords, or return undef otherwise. -# -# Expects $_ to be set to the current line. Skip any initial blank lines, -# (consuming @{$self->{line}} and updating $_). -# -# Then if it matches FOO: etc, strip the keyword and any comment from the -# line (leaving any argument in $_) and return the keyword. Return false -# otherwise. - -sub check_keyword { - my ExtUtils::ParseXS $self = shift; - # skip blank lines - $_ = shift(@{ $self->{line} }) while !/\S/ && @{ $self->{line} }; - - s/^(\s*)($_[0])\s*:\s*(?:#.*)?/$1/s && $2; -} - - # ST(): helper function for the various INPUT / OUTPUT code emitting # parts. Generate an "ST(n)" string. This is normally just: # From b4490392ab0dec2893f418ff14eec8bbc8e597d2 Mon Sep 17 00:00:00 2001 From: David Mitchell Date: Sat, 18 Oct 2025 16:27:35 +0100 Subject: [PATCH 16/57] ParseXS: add Node::as_concise method For debugging: displays the node and its children in a compact style similar to perl -MO=Concise --- .../lib/ExtUtils/ParseXS/Node.pm | 70 +++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm index e05693240c4a..f45211a8c93d 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm @@ -19,6 +19,7 @@ ExtUtils::ParseXS::Node - Classes for nodes of an Abstract Syntax Tree $foo->parse(...) or die; $foo->as_code(...); + $foo->as_concise(1); =head1 DESCRIPTION @@ -95,6 +96,9 @@ parameters. Some subclasses may have additional helper methods. +The as_concise() method returns a line-per-node string representation of +the node and any children. It is intended mainly for debugging. + =head2 Class Hierachy C and its sub-classes form the following inheritance hierarchy. @@ -381,6 +385,72 @@ sub is_xs_module_line { sub as_code { } +# as_concise(): for debugging: +# +# Return a string representing a concise line-per-node representation +# of the node and any children, in the spirit of 'perl -MO=Concise'. +# Intended to be human- rather than machine-readable. +# +# The single optional parameter, depth, is for indentation purposes + +sub as_concise { + my __PACKAGE__ $self = shift; + my $depth = shift; + $depth = 0 unless defined $depth; + + my $f = $self->{file}; + $f = '??' unless defined $f; + $f =~ s{^.*/}{}; + substr($f,0,10) = '' if length($f) > 10; + + my $l = $self->{line_no}; + $l = defined $l ? sprintf("%-3d", $l) : '?? '; + + my $s = sprintf "%-15s", "$f:$l"; + $s .= (' ' x $depth); + + my $class = ref $self; + $class =~ s/^.*:://g; + $s .= "${class}: "; + + my @kv; + + for my $key (sort grep !/^(file|line_no|kids)$/, keys %$self) { + my $v = $self->{$key}; + + # some basic pretty-printing + + if (!defined $v) { + $v = '-'; + } + elsif (ref $v) { + $v = "[ref]"; + } + elsif ($v =~ /^-?\d+(\.\d+)?$/) { + # leave as-is + } + else { + $v = "$v"; + $v =~ s/"/\\"/g; + my $max = 20; + substr($v, $max) = '...' if length($v) > $max; + $v = qq("$v"); + } + + push @kv, "$key=$v"; + } + + $s .= join '; ', @kv; + $s .= "\n"; + + if ($self->{kids}) { + $s .= $_->as_concise($depth+1) for @{$self->{kids}}; + } + + $s; +} + + # ====================================================================== package ExtUtils::ParseXS::Node::XS_file; From 552401e86879d5fa88dfc27925244e39f77634d7 Mon Sep 17 00:00:00 2001 From: David Mitchell Date: Tue, 14 Oct 2025 14:55:49 +0100 Subject: [PATCH 17/57] ParseXS: refactor: add Node::cpp_scope (This commit is part of a series which will extend the AST parse tree from just representing individual XSUBs to representing the whole XS file.) Add a new node type, ExtUtils::ParseXS::Node::cpp_scope. This node takes on responsibility for the handling the main file-scoped parsing loop, which looks for file-scoped keywords, CPP directives, the start of XSUB declarations etc. It is the main child of the top-most Node::XS_file node type. The bulk of this commit consists of cutting and pasting the main parsing loop (including fetch_para()) out from ExtUtils::ParseXS::process_file() and into ExtUtils::ParseXS::Node::cpp_scope::parse(). For now this as a bit hacky - all tests pass, but mainly by ensuring (temporarily) that various bits of code are emitted during the parsing phase, rather than via a call to as_code(), in order to preserve the correct ordering. The next few commits will gradually clean things up. This commit also converts the lexical variable $cpp_next_tmp_define into a field of the ExtUtils::ParseXS object, as it now needs accessing outside of process_file(). The intent for this node type is that it represents anywhere within a single C preprocessor #if / #else / #endif branch / scope. For this commit, there's only a single cpp_scope node, representing the whole XS part of the main file. Over the next few commits, further nodes will be added: during INCLUDE, which is a whole separate scope, and between CPP conditional directives. When all is complete, it will allow for simpler handling of handling "duplicate" XSUB declarations in different #if branches, and the XS_parse_stack mechanism can be eliminated. --- dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm | 123 +---------- .../lib/ExtUtils/ParseXS/Node.pm | 193 +++++++++++++++++- 2 files changed, 200 insertions(+), 116 deletions(-) diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm index b717f8257df6..412ab674cecd 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm @@ -192,6 +192,10 @@ BEGIN { # the top element of the stack, since that also # includes elements for each INCLUDE etc. + 'cpp_next_tmp_define',# the next string like XSubPPtmpAAAA + # to use as CPP defines for distringuishing + # similar calls to newXS() etc + 'MODULE_cname', # MODULE canonical name (i.e. after s/\W/_/g). 'PACKAGE_name', # PACKAGE name. 'PACKAGE_C_name', # Ditto, but with tr/:/_/. @@ -426,125 +430,14 @@ sub process_file { $self->{lastline} = $_; $self->{lastline_no} = $.; $self->{XS_parse_stack_top_if_idx} = 0; - my $cpp_next_tmp_define = 'XSubPPtmpAAAA'; + $self->{cpp_next_tmp_define} = 'XSubPPtmpAAAA'; $AST->as_code($self); - # ---------------------------------------------------------------- - # Main loop: for each iteration, read in a paragraph's worth of XSUB - # definition or XS/CPP directives into @{ $self->{line} }, then try to - # interpret those lines. - # ---------------------------------------------------------------- - - PARAGRAPH: - while ($self->fetch_para()) { - - # Process and emit any initial C-preprocessor lines and blank - # lines. Also, keep track of #if/#else/#endif nesting, updating: - # $self->{XS_parse_stack} - # $self->{XS_parse_stack_top_if_idx} - # $self->{bootcode_early} - # $self->{bootcode_later} - while (@{$self->{line}} && $self->{line}[0] =~ /^#/) { - my $node = ExtUtils::ParseXS::Node::global_cpp_line->new(); - $node->parse($self); - $node->as_code($self); - } - - next PARAGRAPH unless @{ $self->{line} }; - - if ( $self->{XS_parse_stack_top_if_idx} - && !$self->{XS_parse_stack}->[$self->{XS_parse_stack_top_if_idx}]{varname}) - { - # We are inside an #if, but have not yet #defined its xsubpp variable. - # - # At the start of every '#if ...' which is external to an XSUB, - # we emit '#define XSubPPtmpXXXX 1', for increasing XXXX. - # Later, when emitting initialisation code in places like a boot - # block, it can then be made conditional via, e.g. - # #if XSubPPtmpXXXX - # newXS(...); - # #endif - # So that only the defined XSUBs get added to the symbol table. - print "#define $cpp_next_tmp_define 1\n\n"; - push(@{ $self->{bootcode_early} }, "#if $cpp_next_tmp_define\n"); - push(@{ $self->{bootcode_later} }, "#if $cpp_next_tmp_define\n"); - $self->{XS_parse_stack}->[$self->{XS_parse_stack_top_if_idx}]{varname} - = $cpp_next_tmp_define++; - } - - # This will die on something like - # - # | CODE: - # | foo(); - # | - # |#define X - # | bar(); - # - # due to the define starting at column 1 and being preceded by a blank - # line: so the define and bar() aren't parsed as part of the CODE - # block. - - $self->death( - "Code is not inside a function" - ." (maybe last function was ended by a blank line " - ." followed by a statement on column one?)") - if $self->{line}->[0] =~ /^\s/; - - # The SCOPE keyword can appear both in file scope (just before an - # XSUB) and as an XSUB keyword. This field maintains the state of the - # former: reset it at the start of processing any file-scoped - # keywords just before the XSUB (i.e. without any blank lines, e.g. - # SCOPE: ENABLE - # int - # foo(...) - # These semantics may not be particularly sensible, but they maintain - # backwards compatibility for now. - - $self->{file_SCOPE_enabled} = 0; - - # ---------------------------------------------------------------- - # Process file-scoped keywords - # ---------------------------------------------------------------- - - # Note that MODULE and TYPEMAP will already have been processed by - # fetch_para(). - # - # This loop repeatedly: skips any blank lines and then calls - # the relevant Node::FOO::Parse() method if it finds any of the - # file-scoped keywords in the passed pattern. - - my $tmp_obj = ExtUtils::ParseXS::Node::XS_file->new(); - $tmp_obj->parse_keywords( - $self, - undef, undef, # xsub and xbody: not needed for non XSUB keywords - undef, # implies process as many keywords as possible - "BOOT|REQUIRE|PROTOTYPES|EXPORT_XSUB_SYMBOLS|FALLBACK" - . "|VERSIONCHECK|INCLUDE|INCLUDE_COMMAND|SCOPE" - - ); - $tmp_obj->as_code($self); - - - next PARAGRAPH unless @{ $self->{line} }; - - # ---------------------------------------------------------------- - # Parse and code-emit an XSUB - # ---------------------------------------------------------------- - - my $xsub = ExtUtils::ParseXS::Node::xsub->new(); - $xsub->parse($self) - or next PARAGRAPH; - - $xsub->as_code($self); - $self->{seen_an_XSUB} = 1; # encountered at least one XSUB - - # ---------------------------------------------------------------- - # end of XSUB - # ---------------------------------------------------------------- - - } # END 'PARAGRAPH' 'while' loop + my $cpp_scope = ExtUtils::ParseXS::Node::cpp_scope->new({type => 'main'}); + $cpp_scope->parse($self); + $cpp_scope->as_code($self); # ---------------------------------------------------------------- # End of main loop and at EOF: all paragraphs (and thus XSUBs) have now diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm index f45211a8c93d..1359b20d5193 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm @@ -756,6 +756,192 @@ sub as_code { } +# ====================================================================== + +package ExtUtils::ParseXS::Node::cpp_scope; + +# Node representing a part of an XS file which is all in the same C +# preprocessor scope as regards C preprocessor (CPP) conditionals, i.e. +# #if/#elsif/#else/#endif etc. +# +# Note that this only considers file-scoped C preprocessor directives; +# ones within a code block such as CODE or BOOT don't contribute to the +# state maintained here. +# +# Initially the whole XS part of the main XS file is considered a single +# scope, so the single main cpp_scope node would have, as children, all +# the file-scoped nodes such as Node::PROTOTYPES and any Node::xsub's. +# +# After an INCLUDE, the new XS file is considered as being in a separate +# scope, and gets its own child cpp_scope node. +# +# Once an XS file starts having file-scope CPP conditionals, then each +# branch of the conditional is considered a separate scope and gets its +# own cpp_scope node. Nested conditionals cause nested cpp_scope objects +# in the AST. +# +# The main reason for this node type is to separate out the AST into +# separate sections which can each have the same named XSUB without a +# 'duplicate XSUB' warning, and where newXS()-type calls can be added to +# to the boot code for *both* XSUBs, guarded by suitable #ifdef's. +# +# This node is the main high-level node where file-scoped parsing takes +# place: its parse() method contains a fetch_para() loop which does all +# the looking for file-scoped keywords, CPP directives, and XSUB +# declarations. It implements a recursive-decent parser by creating child +# cpp_scope nodes and recursing into that child's parse() method (which +# does its own fetch_para() calls). + +BEGIN { $build_subclass->( + 'type', # what sort of scope: 'main', 'include' or 'if' +)}; + +sub parse { + my __PACKAGE__ $self = shift; + my ExtUtils::ParseXS $pxs = shift; + + + # ---------------------------------------------------------------- + # Main loop: for each iteration, read in a paragraph's worth of XSUB + # definition or XS/CPP directives into @{ $self->{line} }, then try to + # interpret those lines. + # ---------------------------------------------------------------- + + my $iters; + + PARAGRAPH: + while ($pxs->fetch_para()) { + + if (!$iters++) { + # set file/line_no after first call to fetch_para() + $self->SUPER::parse($pxs); + } + + # Process and emit any initial C-preprocessor lines and blank + # lines. Note that any non-CPP lines starting with '#' will + # already have been filtered out by fetch_para(). + # + # Also, keep track of #if/#else/#endif nesting, updating: + # $pxs->{XS_parse_stack} + # $pxs->{XS_parse_stack_top_if_idx} + # $pxs->{bootcode_early} + # $pxs->{bootcode_later} + while (@{$pxs->{line}} && $pxs->{line}[0] =~ /^#/) { + my $node = ExtUtils::ParseXS::Node::global_cpp_line->new(); + $node->parse($pxs); + push @{$self->{kids}}, $node; + + # XXX tmp prematurely emit code + $_->as_code($pxs, $self) for @{$self->{kids}}; + @{$self->{kids}} = (); + } + + next PARAGRAPH unless @{ $pxs->{line} }; + + if ( $pxs->{XS_parse_stack_top_if_idx} + && !$pxs->{XS_parse_stack}->[$pxs->{XS_parse_stack_top_if_idx}]{varname}) + { + # We are inside an #if, but have not yet #defined its xsubpp variable. + # + # At the start of every '#if ...' which is external to an XSUB, + # we emit '#define XSubPPtmpXXXX 1', for increasing XXXX. + # Later, when emitting initialisation code in places like a boot + # block, it can then be made conditional via, e.g. + # #if XSubPPtmpXXXX + # newXS(...); + # #endif + # So that only the defined XSUBs get added to the symbol table. + print "#define $pxs->{cpp_next_tmp_define} 1\n\n"; + push(@{ $pxs->{bootcode_early} }, "#if $pxs->{cpp_next_tmp_define}\n"); + push(@{ $pxs->{bootcode_later} }, "#if $pxs->{cpp_next_tmp_define}\n"); + $pxs->{XS_parse_stack}->[$pxs->{XS_parse_stack_top_if_idx}]{varname} + = $pxs->{cpp_next_tmp_define}++; + } + + # This will die on something like + # + # | CODE: + # | foo(); + # | + # |#define X + # | bar(); + # + # due to the define starting at column 1 and being preceded by a blank + # line: so the define and bar() aren't parsed as part of the CODE + # block. + + $pxs->death( + "Code is not inside a function" + ." (maybe last function was ended by a blank line " + ." followed by a statement on column one?)") + if $pxs->{line}->[0] =~ /^\s/; + + # The SCOPE keyword can appear both in file scope (just before an + # XSUB) and as an XSUB keyword. This field maintains the state of the + # former: reset it at the start of processing any file-scoped + # keywords just before the XSUB (i.e. without any blank lines, e.g. + # SCOPE: ENABLE + # int + # foo(...) + # These semantics may not be particularly sensible, but they maintain + # backwards compatibility for now. + + $pxs->{file_SCOPE_enabled} = 0; + + # Process file-scoped keywords + + # ---------------------------------------------------------------- + # Process file-scoped keywords + # ---------------------------------------------------------------- + + # Note that MODULE and TYPEMAP will already have been processed by + # fetch_para(). + # + # This loop repeatedly: skips any blank lines and then calls + # the relevant Node::FOO::parse() method if it finds any of the + # file-scoped keywords in the passed pattern. + + $self->parse_keywords( + $pxs, + undef, undef, # xsub and xbody: not needed for non XSUB keywords + undef, # implies process as many keywords as possible + "BOOT|REQUIRE|PROTOTYPES|EXPORT_XSUB_SYMBOLS|FALLBACK" + . "|VERSIONCHECK|INCLUDE|INCLUDE_COMMAND|SCOPE" + ); + + next PARAGRAPH unless @{ $pxs->{line} }; + + # ---------------------------------------------------------------- + # Parse and code-emit an XSUB + # ---------------------------------------------------------------- + + my $xsub = ExtUtils::ParseXS::Node::xsub->new(); + $xsub->parse($pxs) + or next PARAGRAPH; + push @{$self->{kids}}, $xsub; + + $pxs->{seen_an_XSUB} = 1; # encountered at least one XSUB + + # XXX tmp prematurely emit code + $_->as_code($pxs, $self) for @{$self->{kids}}; + @{$self->{kids}} = (); + + } # END 'PARAGRAPH' 'while' loop + # XXX tmp prematurely emit code + $_->as_code($pxs, $self) for @{$self->{kids}}; + + 1; +} + +sub as_code { + my __PACKAGE__ $self = shift; + my ExtUtils::ParseXS $pxs = shift; + + # XXX tmp prematurely emit code + # XXX $_->as_code($pxs, $self) for @{$self->{kids}}; +} + + # ====================================================================== package ExtUtils::ParseXS::Node::global_cpp_line; @@ -4149,11 +4335,16 @@ sub parse { $pxs->{lastline} = $_; $pxs->{lastline_no} = $.; + # XXX tmp prematurely emit code + $self->XXX_as_code($pxs); + 1; } -sub as_code { +# XXX tmp prematurely emit code +sub as_code {} +sub XXX_as_code { my __PACKAGE__ $self = shift; my ExtUtils::ParseXS $pxs = shift; From a78e4348d497df9fb3b57fdde035b4b926b64969 Mon Sep 17 00:00:00 2001 From: David Mitchell Date: Wed, 15 Oct 2025 18:08:47 +0100 Subject: [PATCH 18/57] ParseXS: refactor: use Node::cpp_scope for INCLUDE (This commit is part of a series which will extend the AST parse tree from just representing individual XSUBs to representing the whole XS file.) This commit makes the INCLUDE keyword's parse() method use a new Node::cpp_scope node as a child to hold the nodes generated by parsing the included file. So whereas before those nodes were flattened into the parent list of nodes, they now form a distinct subtree. This involves recursively calling Node::cpp_scope::parse() and fetch_para(), rather than the included file being processed in the main loop. For example, given a main file which contains two declared XSUBs, main1 and main2, and an included file with XSUBs inc1 and inc2; then previously, the parse tree would have had this nesting: XS_file cpp_scope[type=main] xsub main1 include xsub inc1 xsub inc2 xsub main2 whereas it now looks like: XS_file cpp_scope[type=main] xsub main1 include cpp_scope[type=include] xsub inc1 xsub inc2 xsub main2 This commit introduces a slight bug in terms of the order in which BOOT blocks in main and included files get emitted; this will be fixed in a few commits' time. --- dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm | 3 +- .../lib/ExtUtils/ParseXS/Node.pm | 30 +++++++++++++++---- 2 files changed, 27 insertions(+), 6 deletions(-) diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm index 412ab674cecd..91d68d389e53 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm @@ -1023,9 +1023,10 @@ sub fetch_para { $self->death("Error: Unterminated '#if/#ifdef/#ifndef'") if !defined $self->{lastline} && $self->{XS_parse_stack}->[-1]{type} eq 'if'; + return 0 if not defined $self->{lastline}; # EOF + @{ $self->{line} } = (); @{ $self->{line_no} } = (); - return $self->PopFile() if not defined $self->{lastline}; # EOF if ($self->{lastline} =~ /^MODULE\s*=\s*([\w:]+)(?:\s+PACKAGE\s*=\s*([\w:]+))?(?:\s+PREFIX\s*=\s*(\S+))?\s*$/) diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm index 1359b20d5193..61d3089e8dd1 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm @@ -793,7 +793,8 @@ package ExtUtils::ParseXS::Node::cpp_scope; # does its own fetch_para() calls). BEGIN { $build_subclass->( - 'type', # what sort of scope: 'main', 'include' or 'if' + 'type', # Str: what sort of scope: 'main', 'include' or 'if' + 'is_cmd', # Bool: for include type, it's INCLUDE_COMMAND )}; sub parse { @@ -4288,6 +4289,10 @@ sub parse { $pxs->push_parse_stack($is_cmd ? (IsPipe => 1) : ()); + #XXX + my $old_lines = $pxs->{line}; + $pxs->{line} = []; + $pxs->{in_fh} = Symbol::gensym(); # Open the new file / pipe @@ -4323,21 +4328,36 @@ sub parse { ? "$pxs->{dir}/$path" : File::Spec->catfile($pxs->{dir}, $path); } - $pxs->{in_pathname} = $path; + $pxs->{in_pathname} = $self->{file} = $path; # Prime the pump by reading the first non-blank line - - # skip leading blank lines while (readline($pxs->{in_fh})) { last unless /^\s*$/; } $pxs->{lastline} = $_; - $pxs->{lastline_no} = $.; + chomp $pxs->{lastline}; + $pxs->{lastline_no} = $self->{line_no} = $.; # XXX tmp prematurely emit code $self->XXX_as_code($pxs); + my $cpp_scope = ExtUtils::ParseXS::Node::cpp_scope->new({ + type => 'include', + is_cmd => $self->{is_cmd}, + }); + $cpp_scope->parse($pxs); + push @{$self->{kids}}, $cpp_scope; + + $pxs->PopFile(); + #XXX + $pxs->{line} = $old_lines; + + + # XXX tmp prematurely emit code + $cpp_scope->as_code($pxs); + pop @{$self->{kids}}; + 1; } From 7add86a0af8411fbbcdc08d72c36a640c2ab816e Mon Sep 17 00:00:00 2001 From: David Mitchell Date: Wed, 15 Oct 2025 20:00:43 +0100 Subject: [PATCH 19/57] ParseXS: refactor: rm push_parse_stack, PopFile (This commit is part of a series which will extend the AST parse tree from just representing individual XSUBs to representing the whole XS file.) Delete the ExtUtils::ParseXS methods push_parse_stack() and PopFile(), and inline their single use each in ExtUtils::ParseXS::Node::include::parse(). This is commit is just a crude cut+paste and s/$self/$pxs/g. Subsequent commits will clean the code up. --- dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm | 69 ------------------- .../lib/ExtUtils/ParseXS/Node.pm | 51 +++++++++++++- 2 files changed, 48 insertions(+), 72 deletions(-) diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm index 91d68d389e53..5ec42edbb85a 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm @@ -714,30 +714,6 @@ sub ST { } -# Push an entry on the @{ $self->{XS_parse_stack} } array containing the -# current file state, in preparation for INCLUDEing a new file. (Note that -# it doesn't handle type => 'if' style entries, only file entries.) - -sub push_parse_stack { - my ExtUtils::ParseXS $self = shift; - my %args = @_; - # Save the current file context. - push(@{ $self->{XS_parse_stack} }, { - type => 'file', - LastLine => $self->{lastline}, - LastLineNo => $self->{lastline_no}, - Line => $self->{line}, - LineNo => $self->{line_no}, - Filename => $self->{in_filename}, - Filepathname => $self->{in_pathname}, - Handle => $self->{in_fh}, - IsPipe => scalar($self->{in_filename} =~ /\|\s*$/), - %args, - }); - -} - - # Quote a command-line to be suitable for VMS sub QuoteArgs { @@ -774,51 +750,6 @@ sub QuoteArgs { } -# Pop the type => 'file' entry off the top of the @{ $self->{XS_parse_stack} } -# array following the end of processing an INCLUDEd file, and restore the -# former state. - -sub PopFile { - my ExtUtils::ParseXS $self = shift; - - return 0 unless $self->{XS_parse_stack}->[-1]{type} eq 'file'; - - my $data = pop @{ $self->{XS_parse_stack} }; - my $ThisFile = $self->{in_filename}; - my $isPipe = $data->{IsPipe}; - - --$self->{IncludedFiles}->{$self->{in_filename}} - unless $isPipe; - - close $self->{in_fh}; - - $self->{in_fh} = $data->{Handle}; - # $in_filename is the leafname, which for some reason is used for diagnostic - # messages, whereas $in_pathname is the full pathname, and is used for - # #line directives. - $self->{in_filename} = $data->{Filename}; - $self->{in_pathname} = $data->{Filepathname}; - $self->{lastline} = $data->{LastLine}; - $self->{lastline_no} = $data->{LastLineNo}; - @{ $self->{line} } = @{ $data->{Line} }; - @{ $self->{line_no} } = @{ $data->{LineNo} }; - - if ($isPipe and $? ) { - --$self->{lastline_no}; - print STDERR "Error reading from pipe '$ThisFile': $! in $self->{in_filename}, line $self->{lastline_no}\n" ; - exit 1; - } - - print Q(<<"EOF"); - | - |/* INCLUDE: Returning to '$self->{in_filename}' from '$ThisFile' */ - | -EOF - - return 1; -} - - # Unescape a string (typically a heredoc): # - strip leading ' |' (any number of leading spaces) # - and replace [[ and ]] diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm index 61d3089e8dd1..bc873cfa3df7 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm @@ -4287,7 +4287,21 @@ sub parse { } } - $pxs->push_parse_stack($is_cmd ? (IsPipe => 1) : ()); + { + # Save the current file context. + push(@{ $pxs->{XS_parse_stack} }, { + type => 'file', + LastLine => $pxs->{lastline}, + LastLineNo => $pxs->{lastline_no}, + Line => $pxs->{line}, + LineNo => $pxs->{line_no}, + Filename => $pxs->{in_filename}, + Filepathname => $pxs->{in_pathname}, + Handle => $pxs->{in_fh}, + IsPipe => $is_cmd + || scalar($pxs->{in_filename} =~ /\|\s*$/), + }); + } #XXX my $old_lines = $pxs->{line}; @@ -4349,11 +4363,42 @@ sub parse { $cpp_scope->parse($pxs); push @{$self->{kids}}, $cpp_scope; - $pxs->PopFile(); + { + my $data = pop @{ $pxs->{XS_parse_stack} }; + my $ThisFile = $pxs->{in_filename}; + my $isPipe = $data->{IsPipe}; + + --$pxs->{IncludedFiles}->{$pxs->{in_filename}} + unless $isPipe; + + close $pxs->{in_fh}; + + $pxs->{in_fh} = $data->{Handle}; + # $in_filename is the leafname, which for some reason is used for diagnostic + # messages, whereas $in_pathname is the full pathname, and is used for + # #line directives. + $pxs->{in_filename} = $data->{Filename}; + $pxs->{in_pathname} = $data->{Filepathname}; + $pxs->{lastline} = $data->{LastLine}; + $pxs->{lastline_no} = $data->{LastLineNo}; + @{ $pxs->{line} } = @{ $data->{Line} }; + @{ $pxs->{line_no} } = @{ $data->{LineNo} }; + + if ($isPipe and $? ) { + --$pxs->{lastline_no}; + print STDERR "Error reading from pipe '$ThisFile': $! in $pxs->{in_filename}, line $pxs->{lastline_no}\n" ; + exit 1; + } + + print ExtUtils::ParseXS::Q(<<"EOF"); + | + |/* INCLUDE: Returning to '$self->{old_filename}' from '$ThisFile' */ + | +EOF + } #XXX $pxs->{line} = $old_lines; - # XXX tmp prematurely emit code $cpp_scope->as_code($pxs); pop @{$self->{kids}}; From 9753f419bbe4a035bfb7167e8bf651af79fa8d53 Mon Sep 17 00:00:00 2001 From: David Mitchell Date: Fri, 17 Oct 2025 06:38:26 +0100 Subject: [PATCH 20/57] ParseXS: refactor: INCLUDE: save state locally (This commit is part of a series which will extend the AST parse tree from just representing individual XSUBs to representing the whole XS file.) Previously whenever INCLUDE or INCLUDE_COMMAND parsed a new file, the old state (filename, old lines etc) were saved in a new entry pushed onto the XS_parse_stack. The previous commit inlined this push and pull into Node::include::parse(), which now looked a bit like: sub parse() { ... push @{$pxs->{XS_parse_stack}}, { type =>' file', Filename => $pxs->{in_filename}, ... } ... open new file and parse ... my $data = pop @{$pxs->{XS_parse_stack}}; $pxs->{in_pathname} = $data->{Filepathname}; ... } The intention over the next few commits is to eliminate XS_parse_stack completely: the functionality of the stack will be replaced by a nested series of Node::cpp_scope nodes within the AST. For now, this commit still pushes/pops an entry (so that #if / #endif scope analysing continues to work), but saves all the values in a local var. So after this commit, Node::include::parse() looks like: sub parse() { ... push @{$pxs->{XS_parse_stack}}, { type =>' file' }; my @saved = ($pxs->{in_filename}, ...); ... open new file and parse ... pop @{$pxs->{XS_parse_stack}}; ($pxs->{in_filename}, ...) = @saved; } The rest of this commit is just general tidying up and reordering of some of the code within parse() without change in functionality. Except I also added $pxs->{line_no} = []; in addition to the already present $pxs->{line} = []; --- .../lib/ExtUtils/ParseXS/Node.pm | 89 ++++++++----------- 1 file changed, 37 insertions(+), 52 deletions(-) diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm index bc873cfa3df7..f7201c6d6009 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm @@ -4287,30 +4287,24 @@ sub parse { } } - { - # Save the current file context. - push(@{ $pxs->{XS_parse_stack} }, { - type => 'file', - LastLine => $pxs->{lastline}, - LastLineNo => $pxs->{lastline_no}, - Line => $pxs->{line}, - LineNo => $pxs->{line_no}, - Filename => $pxs->{in_filename}, - Filepathname => $pxs->{in_pathname}, - Handle => $pxs->{in_fh}, - IsPipe => $is_cmd - || scalar($pxs->{in_filename} =~ /\|\s*$/), - }); - } + # XXX tmp: maintain state for #if scope processing + push @{$pxs->{XS_parse_stack}}, { type => 'file' }; - #XXX - my $old_lines = $pxs->{line}; - $pxs->{line} = []; + # Save the current file context. - $pxs->{in_fh} = Symbol::gensym(); + my @save_keys = qw(in_fh in_filename in_pathname + lastline lastline_no line line_no); + my @saved = @$pxs{@save_keys}; + + my $isPipe = $is_cmd || $pxs->{in_filename} =~ /\|\s*$/; + + $pxs->{line} = []; + $pxs->{line_no} = []; # Open the new file / pipe + $pxs->{in_fh} = Symbol::gensym(); + if ($is_cmd) { # Expand the special token '$^X' into the full path of the # currently running perl interpreter @@ -4356,6 +4350,8 @@ sub parse { # XXX tmp prematurely emit code $self->XXX_as_code($pxs); + # Parse included file + my $cpp_scope = ExtUtils::ParseXS::Node::cpp_scope->new({ type => 'include', is_cmd => $self->{is_cmd}, @@ -4363,41 +4359,30 @@ sub parse { $cpp_scope->parse($pxs); push @{$self->{kids}}, $cpp_scope; - { - my $data = pop @{ $pxs->{XS_parse_stack} }; - my $ThisFile = $pxs->{in_filename}; - my $isPipe = $data->{IsPipe}; - - --$pxs->{IncludedFiles}->{$pxs->{in_filename}} - unless $isPipe; - - close $pxs->{in_fh}; - - $pxs->{in_fh} = $data->{Handle}; - # $in_filename is the leafname, which for some reason is used for diagnostic - # messages, whereas $in_pathname is the full pathname, and is used for - # #line directives. - $pxs->{in_filename} = $data->{Filename}; - $pxs->{in_pathname} = $data->{Filepathname}; - $pxs->{lastline} = $data->{LastLine}; - $pxs->{lastline_no} = $data->{LastLineNo}; - @{ $pxs->{line} } = @{ $data->{Line} }; - @{ $pxs->{line_no} } = @{ $data->{LineNo} }; - - if ($isPipe and $? ) { - --$pxs->{lastline_no}; - print STDERR "Error reading from pipe '$ThisFile': $! in $pxs->{in_filename}, line $pxs->{lastline_no}\n" ; - exit 1; - } + --$pxs->{IncludedFiles}->{$pxs->{in_filename}} + unless $isPipe; - print ExtUtils::ParseXS::Q(<<"EOF"); - | - |/* INCLUDE: Returning to '$self->{old_filename}' from '$ThisFile' */ - | -EOF + close $pxs->{in_fh}; + + # Restore the current file context. + + @$pxs{@save_keys} = @saved; + + if ($isPipe and $? ) { + --$pxs->{lastline_no}; + print STDERR "Error reading from pipe '$self->{inc_filename}': $! in $pxs->{in_filename}, line $pxs->{lastline_no}\n" ; + exit 1; } - #XXX - $pxs->{line} = $old_lines; + + # XXX tmp: maintain state for #if scope processing + pop @{$pxs->{XS_parse_stack}}; + + # XXX this needs to go in as_code() + print ExtUtils::ParseXS::Q(<<"EOF"); + | + |/* INCLUDE: Returning to '$self->{old_filename}' from '$self->{inc_filename}' */ + | +EOF # XXX tmp prematurely emit code $cpp_scope->as_code($pxs); From eb877633e5cdd876d4f9a7fab0adde6096534d3b Mon Sep 17 00:00:00 2001 From: David Mitchell Date: Sat, 18 Oct 2025 11:24:24 +0100 Subject: [PATCH 21/57] ParseXS: refactor: global_cpp_line: parse CPP line (This commit is part of a series which will extend the AST parse tree from just representing individual XSUBs to representing the whole XS file.) Make the recently-added Node::global_cpp_line::parse() method split a C preprocessor line into its components and set some flags indicating whether its part of a conditional and if so, an if or variant. Save these values as new fields in the node. They will come in useful shortly. --- .../lib/ExtUtils/ParseXS/Node.pm | 25 ++++++++++++++----- 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm index f7201c6d6009..b741d5c479a6 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm @@ -952,7 +952,12 @@ package ExtUtils::ParseXS::Node::global_cpp_line; # continuations). BEGIN { $build_subclass->( - 'cpp_line', # the text of the "# foo" CPP line + 'cpp_line', # Str: the full text of the "# foo" CPP line + 'directive', # Str: one of 'define', 'endif' etc + 'rest', # Str: the rest of the line following the directive + 'is_cond', # Bool: it's an ifdef/else/endif etc + 'is_if', # Bool: it's an if/ifdef/ifndef + 'is_endif' # Bool: it's an endif )}; sub parse { @@ -961,14 +966,22 @@ sub parse { $self->SUPER::parse($pxs); # set file/line_no - my $ln = $self->{cpp_line} = shift(@{$pxs->{line}}); + my $line = shift @{$pxs->{line}}; + + my ($directive, $rest) = $line =~ /^ \# \s* (\w+) (?:\s+ (.*) \s* $)?/sx + or $pxs->death("Internal error: can't parse CPP line: $line\n"); + $rest = '' unless defined $rest; + my $is_cond = $directive =~ /^(if|ifdef|ifndef|elsif|else|endif)$/; + my $is_if = $directive =~ /^(if|ifdef|ifndef)$/; + my $is_endif = $directive =~ /^endif$/; + @$self{qw(cpp_line directive rest is_cond is_if is_endif)} + = ($line, $directive, $rest, $is_cond, $is_if, $is_endif); - # Update global tracking of *conditional* CPP directives;s + # Update global tracking of *conditional* CPP directives; # i.e. #if/#else etc - return 1 unless $ln =~ /^\#\s*((if)(?:n?def)?|elsif|else|endif)\b/; - my $directive = $+; # one of "if", "elsif", "else", "endif" - $pxs->analyze_preprocessor_statement($directive); + return 1 unless $is_cond; + $pxs->analyze_preprocessor_statement($is_if ? 'if' : $directive); 1; } From df8d3d7e961875c2b6d648cc0afd53aa9de9fc7e Mon Sep 17 00:00:00 2001 From: David Mitchell Date: Sat, 18 Oct 2025 11:59:12 +0100 Subject: [PATCH 22/57] ParseXS: fix #elif processing The XS parser is supposed to be able to do basic processing of file-scoped C preprocessor directives: in particular, to handle alternative XSUB declarations; e.g.: #if X void foo(short i) #elif Y void foo(long i) #else void foo(int i) #endif But due to a bug present since this feature was added in perl5.003 (and which no one has complained about for 29 years), the XS parser was incorrectly looking for the string '#elsif' rather than '#elif'. So the above would fail, but something using only 'else' would compile okay: #if X void foo(short i) #else void foo(int i) #endif This commit fixes the bug and adds some tests. --- .../lib/ExtUtils/ParseXS/Node.pm | 2 +- dist/ExtUtils-ParseXS/t/001-basic.t | 81 +++++++++++++++++++ 2 files changed, 82 insertions(+), 1 deletion(-) diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm index b741d5c479a6..b889e2bede1c 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm @@ -971,7 +971,7 @@ sub parse { my ($directive, $rest) = $line =~ /^ \# \s* (\w+) (?:\s+ (.*) \s* $)?/sx or $pxs->death("Internal error: can't parse CPP line: $line\n"); $rest = '' unless defined $rest; - my $is_cond = $directive =~ /^(if|ifdef|ifndef|elsif|else|endif)$/; + my $is_cond = $directive =~ /^(if|ifdef|ifndef|elif|else|endif)$/; my $is_if = $directive =~ /^(if|ifdef|ifndef)$/; my $is_endif = $directive =~ /^endif$/; @$self{qw(cpp_line directive rest is_cond is_if is_endif)} diff --git a/dist/ExtUtils-ParseXS/t/001-basic.t b/dist/ExtUtils-ParseXS/t/001-basic.t index 5e62d332da66..13ae70c1f054 100644 --- a/dist/ExtUtils-ParseXS/t/001-basic.t +++ b/dist/ExtUtils-ParseXS/t/001-basic.t @@ -5232,6 +5232,87 @@ EOF test_many($preamble, 'XS_Foo_', \@test_fns); } +{ + # Test C-preprocessor parsing + + my $preamble = Q(<<'EOF'); + |MODULE = Foo PACKAGE = Foo + | + |PROTOTYPES: DISABLE + | +EOF + + my @test_fns = ( + [ + "CPP basic", + [ Q(<<'EOF') ], + |#ifdef USE_SHORT + | + |short foo() + | + |#elif USE_LONG + | + |long foo() + | + |#else + | + |int foo() + | + |#endif +EOF + [ 0, 0, qr{ + ^ \#ifdef\ USE_SHORT \n + ^ \#define\ XSubPPtmpAAAA\ 1 \n + + .* + + ^ \s* short \s+ RETVAL; \s* \n + + .* + + ^ \#elif\ USE_LONG \n + ^ \#define\ XSubPPtmpAAAB\ 1 \n + + .* + + ^ \s* long \s+ RETVAL; \s* \n + + .* + + ^ \#else \n + ^ \#define\ XSubPPtmpAAAC\ 1 \n + + .* + + ^ \s* int \s+ RETVAL; \s* \n + + .* + ^ \#endif \n + + }smx, + "has corrrect XSubPPtmpAAAA etc definitions" + ], + + [ 0, 0, qr{ + ^ \#if\ XSubPPtmpAAAA \n + .* newXS .* + ^ \#endif \n + ^ \#if\ XSubPPtmpAAAB \n + .* newXS .* + ^ \#endif \n + ^ \#if\ XSubPPtmpAAAC \n + .* newXS .* + ^ \#endif \n + + }smx, + "has corrrect XSubPPtmpAAAA etc boot usage" + ], + ], + ); + + test_many($preamble, undef, \@test_fns); +} + { # Check for correct package name; i.e. use the current package name, From 21967f3fcfe525b8b5d683df21aba6ab9a8a0e7e Mon Sep 17 00:00:00 2001 From: David Mitchell Date: Tue, 21 Oct 2025 13:43:03 +0100 Subject: [PATCH 23/57] ParseXS: add more #if/#endif tests Add more tests for how the XS parser processes conditional C preprocessor directives in file scope. Note that some of these tests will start to fail after the next commit, which re-implements the processing. The fixups to these tests will indicate what has changed. --- dist/ExtUtils-ParseXS/t/001-basic.t | 202 ++++++++++++++++++++++++++++ 1 file changed, 202 insertions(+) diff --git a/dist/ExtUtils-ParseXS/t/001-basic.t b/dist/ExtUtils-ParseXS/t/001-basic.t index 13ae70c1f054..8c805196f382 100644 --- a/dist/ExtUtils-ParseXS/t/001-basic.t +++ b/dist/ExtUtils-ParseXS/t/001-basic.t @@ -5308,6 +5308,208 @@ EOF "has corrrect XSubPPtmpAAAA etc boot usage" ], ], + + [ + "CPP two independent branches", + [ Q(<<'EOF') ], + |#ifdef USE_SHORT + |short foo() + |#endif + |#if USE_LONG + |long foo() + |#endif +EOF + [ 1, 0, qr{Warning: duplicate function definition}, + "got expected warning" ], + ], + + [ + "CPP one branch, one main", + [ Q(<<'EOF') ], + |#ifdef USE_SHORT + |short foo() + |#endif + |long foo() +EOF + [ 1, 0, qr{Warning: duplicate function definition}, + "got expected warning" ], + ], + + [ + "CPP two in one branch", + [ Q(<<'EOF') ], + |#ifdef USE_SHORT + |short foo() + | + |long foo() + |#endif +EOF + [ 1, 0, qr{Warning: duplicate function definition}, + "got expected warning" ], + ], + + [ + "CPP two in main", + [ Q(<<'EOF') ], + |short foo() + | + |long foo() +EOF + [ 1, 0, qr{Warning: duplicate function definition}, + "got expected warning" ], + ], + + [ + "CPP nested conditions", + [ Q(<<'EOF') ], + |#ifdef C1 + | + |short foo() + | + |#ifdef C2 + | + |long foo() + | + |#endif + | + |int foo() + | + |#endif +EOF + [ 1, 0, qr{Warning: duplicate function definition}, + "got expected warning" ], + ], + + [ + "CPP nested conditions, different fns", + [ Q(<<'EOF') ], + |#ifdef C1 + | + |short foo() + | + |#ifdef C2 + | + |long bar() + | + |#endif + | + |int baz() + | + |#endif +EOF + [ 0, 0, qr{ + ^ \#ifdef\ C1 \n + ^ \#define\ XSubPPtmpAAAA\ 1 \n + .* + ^ \s* short \s+ RETVAL; \s* \n + .* + ^ \#ifdef\ C2 \n + ^ \#define\ XSubPPtmpAAAB\ 1 \n + .* + ^ \s* long \s+ RETVAL; \s* \n + .* + ^ \#endif \n + .* + ^ \s* int \s+ RETVAL; \s* \n + .* + ^ \#endif \n + }smx, + "ifdefs in order" ], + ], + + [ + "CPP with indentation", + [ Q(<<'EOF') ], + |#ifdef C1 + |# ifdef C2 + |long bar() + |# endif + |#endif +EOF + [ 0, 0, qr{ + ^ \#ifdef\ C1 \n + ^ \#\ \ ifdef\ C2 \n + ^ \#define\ XSubPPtmpAAAA\ 1 \n + .* + ^ \s* long \s+ RETVAL; \s* \n + .* + ^ \#\ \ endif \n + ^ \#endif \n + }smx, + "ifdefs in order" ], + ], + + [ + "CPP: trivial branch", + [ Q(<<'EOF') ], + |#ifdef C1 + |#define BLAH1 + |#endif +EOF + [ 0, 1, qr{XSubPPtmpAAA}, "no guard" ], + ], + + [ + "CPP: guard and other CPP ordering", + [ Q(<<'EOF') ], + |#ifdef C1 + |#define BLAH1 + | + |short foo() + | + |#endif +EOF + + [ 0, 0, qr{ + ^ \#ifdef\ C1 \n + .* + ^ \#define\ BLAH1\n + .* + ^ \#define\ XSubPPtmpAAAA\ 1 \n + .* + ^ \s* short \s+ RETVAL; \s* \n + .* + ^ \#endif \n + }smx, + "ifdefs in order" ], + ], + + [ + "CPP balanced else", + [ Q(<<'EOF') ], + |#else + | + |short foo() +EOF + [ 1, 0, qr{Error: 'else' with no matching 'if'}, + "got expected err" ], + ], + + [ + "CPP balanced if", + [ Q(<<'EOF') ], + |#ifdef + | + |short foo() +EOF + [ 1, 0, qr{Error: Unterminated '#if/#ifdef/#ifndef'}, + "got expected err" ], + ], + + [ + "stray CPP / indented XSUB", + [ Q(<<'EOF') ], + |#define FOO + | int +EOF + [ 1, 0, qr{\QCode is not inside a function\E + \Q (maybe last function was ended by a blank line \E + \Q followed by a statement on column one?)\E + }x, + "got expected err" ], + ], + + ); test_many($preamble, undef, \@test_fns); From a085cc8c93b855d465b536deb9459103cd6dc0b7 Mon Sep 17 00:00:00 2001 From: David Mitchell Date: Sat, 18 Oct 2025 15:58:13 +0100 Subject: [PATCH 24/57] ParseXS: refactor: reimplement #if processing (This commit is part of a series which will extend the AST parse tree from just representing individual XSUBs to representing the whole XS file.) This commit includes a few significant changes to ParseXS which are not separable into individual commits. Some of these also involve minor changes in behaviour. Hence a rather long commit message, using pseudo-POD headers to break things up a bit and give a bit of structure. =head1 Background The XS parser does some basic analysis of file-scoped C preprocessor conditional directives such as #if, #elif, #endif. This is mainly for the purpose of handling otherwise duplicate XSUBs, e.g. #ifdef USE_2ARG int foo(int i, int j) #else int foo(int i) #endif For the above, the parser suppresses any 'duplicate' warning, and adds a 'guard' define at the start of each branch, such as the following in the emitted C code: #ifdef USE_2ARG #define XSubPPtmpAAAA 1 XS_EUPXS(XS_Foo__Bar_foo) { ... } #else #define XSubPPtmpAAAB 1 XS_EUPXS(XS_Foo__Bar_foo) { ... } #endif It then uses those guards in the boot code to ensure that only the correct XSUB(s) (and aliases etc) are registered: #if XSubPPtmpAAAA newXS_deffile("Foo::Bar::foo", XS_Foo__Bar_foo); #endif #if XSubPPtmpAAAB newXS_deffile("Foo::Bar::foo", XS_Foo__Bar_foo); #endif =head1 The changes in this commit =head2 Add a Node::cpp_scope node between branches The XS code above was, before this commit, compiled into an AST which looked something like: XS_file preamble C_part postamble cpp_scope[type=main] global_cpp_line [if] xsub global_cpp_line [else] xsub global_cpp_line [endif] After this commit, all nodes between each conditional CPP line are now hung off a new cpp_scope node: XS_file preamble C_part postamble cpp_scope[type=main] global_cpp_line [if] cpp_scope[type=if] xsub global_cpp_line [else] cpp_scope[type=if] xsub global_cpp_line [endif] =head2 The logic to do the #if/#endif processing is completely reimplemented from scratch. Formerly, @{pxs->{XS_parse_stack}} was a stack that was pushed or popped for every INCLUDE and #if/#endif, and the state about conditionals was contained in that stack. After this commit, the state is instead implied by the structure of the AST; in particular, in the currently active cpp_scope node. The cpp_scope::parse() method has the fetch_para() loop, and each time it encounters an #if etc it, it creates a child cpp_scope node and recursively calls its parse() method. That method returns when it sees the end of a scope, such as encountering an #elif, #else or #endif or EOF. Now, virtually all of the logic involved in maintaining '#if' state is in cpp_scope::parse(), and all the logic to emit '#define XSubPPtmpAAAB 1' etc code is in cpp_scope::as_code(). This is in contrast to previously, where the logic and code generation were liberally spread around in ParseXS.pm and Node.pm To facilitate this change, two fields have been added to the cpp_scope node type: guard_name - the 'XSubPPtmpAAAB' or whatever to use for this branch; seen_xsubs - hash of XSUB names seen in this branch. After this commit, the data in @{pxs->{XS_parse_stack}} isn't used; the next commit will remove it completely. =head2 Almost all code is now generated after parsing is finished. At the start of this git branch, code was emitted after each XSUB was parsed. Now (with a few residual exceptions to be addressed shortly) the whole file is first parsed into a single AST, and *then* the top-level as_code() method is called to generate the entire C file from the AST. Some of the changes in this commit represent the removal of temporary shims that preserved the old parse/emit ordering, and which are now no longer required. =head1 Visible changes in behaviour Although this commit is mostly intended to be just a refactoring, some changes in behaviour (in terms of what C code is generated and what warnings are issued) has occurred. These are made visible in the changes this commit makes to t/001-basic.t, which deliberately contains tests for the behaviours which were about to change. The changes are: =head2 fewer warnings The "Warning: duplicate function definition" warning is now emitted less often, to avoid false positives. In particular, this fixes GH 19661. That ticket concerned code like #if C1 void foo() #endif #if C2 void foo() #endif The logic was such that only one of C1 and C2 could ever be true (basically DBD:MariaDB was adding a missing DBI function if the installed DBI was an old one). But the XS parser can't know that. So it used to warn. The big change in this commit is to now warn only for two identically named XSUBs which appear in the *same* cpp_scope. This removes false positives. It may add false negatives, but that is fairly harmless, as it just means that bad C code will be generated, and the C compiler will eventually complain anyway. The guard definitions are still generated as before. This change in behaviour was intentional, but was also the easiest: it meant just checking the seen_xsubs hash for the current cpp_scope node. For more complex behaviour (if you are considering implementing it), it may require having the concept of a 'current' cpp_scope, e.g. $pxs->{cur_cpp_scope}, plus a chain of pointers from the current scope back to outer ones. (This is slightly problematic, as weak refs aren't available, as Scalar::Util may not be built yet - so it might require higher level node(s) to have a destructor which searches for those links and deletes them first) Once the chain is in place, then it's possible for a newly-added XSUB to update the parent seen_xsubs hashes too, etc. =head2 The sequencing of XSubPPtmpAAA, XSubPPtmpAAB etc may change Previously, the sequence of the guard defines was strictly in the order they appear in the C file. Now, ones in a nested scope come before outer ones, e.g. #ifdef C1 #define XSubPPtmpAAB 1 # ifdef C2 # define XSubPPtmpAAA 1 ... This should be harmless. =head2 XSubPPtmpAAA etc may be placed slightly differently Previously in something like #ifdef C1 #define X 1 #define Y 2 the guard would be added after any other CPP lines; the above would cause this C code to be emitted: #ifdef C1 #define X 1 #define Y 2 #define XSubPPtmpAAA 1 but is now: #ifdef C1 #define XSubPPtmpAAA 1 #define X 1 #define Y 2 i.e.the guard now always comes *immediately* after the conditional at the start of the branch it will guard. This should be harmless. --- dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm | 11 +- .../lib/ExtUtils/ParseXS/Node.pm | 200 +++++++++++------- dist/ExtUtils-ParseXS/t/001-basic.t | 46 ++-- 3 files changed, 159 insertions(+), 98 deletions(-) diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm index 5ec42edbb85a..718681d455f8 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm @@ -432,12 +432,11 @@ sub process_file { $self->{XS_parse_stack_top_if_idx} = 0; $self->{cpp_next_tmp_define} = 'XSubPPtmpAAAA'; - $AST->as_code($self); - - my $cpp_scope = ExtUtils::ParseXS::Node::cpp_scope->new({type => 'main'}); $cpp_scope->parse($self); - $cpp_scope->as_code($self); + push @{$AST->{kids}}, $cpp_scope; + + $AST->as_code($self); # ---------------------------------------------------------------- # End of main loop and at EOF: all paragraphs (and thus XSUBs) have now @@ -950,10 +949,6 @@ sub _maybe_parse_typemap_block { sub fetch_para { my ExtUtils::ParseXS $self = shift; - # unmatched #if at EOF - $self->death("Error: Unterminated '#if/#ifdef/#ifndef'") - if !defined $self->{lastline} && $self->{XS_parse_stack}->[-1]{type} eq 'if'; - return 0 if not defined $self->{lastline}; # EOF @{ $self->{line} } = (); diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm index b889e2bede1c..ae6cfae98518 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm @@ -793,71 +793,108 @@ package ExtUtils::ParseXS::Node::cpp_scope; # does its own fetch_para() calls). BEGIN { $build_subclass->( - 'type', # Str: what sort of scope: 'main', 'include' or 'if' - 'is_cmd', # Bool: for include type, it's INCLUDE_COMMAND + 'type', # Str: what sort of scope: 'main', 'include' or 'if' + 'is_cmd', # Bool: for include type, it's INCLUDE_COMMAND + 'guard_name', # Str: the name of the XSubPPtmpAAAA guard define + 'seen_xsubs', # Hash: the names of any XSUBs seen in this scope )}; sub parse { my __PACKAGE__ $self = shift; my ExtUtils::ParseXS $pxs = shift; - # ---------------------------------------------------------------- # Main loop: for each iteration, read in a paragraph's worth of XSUB # definition or XS/CPP directives into @{ $self->{line} }, then try to # interpret those lines. # ---------------------------------------------------------------- - my $iters; - PARAGRAPH: - while ($pxs->fetch_para()) { + while ( ($pxs->{line} && @{$pxs->{line}}) || $pxs->fetch_para()) { - if (!$iters++) { - # set file/line_no after first call to fetch_para() + if ( !defined($self->{line_no}) + && defined $pxs->{line_no}[0] + ) { + # set file/line_no after line number info is available: + # typically after the first call to fetch_para() $self->SUPER::parse($pxs); } - # Process and emit any initial C-preprocessor lines and blank + # Process any initial C-preprocessor lines and blank # lines. Note that any non-CPP lines starting with '#' will # already have been filtered out by fetch_para(). # - # Also, keep track of #if/#else/#endif nesting, updating: - # $pxs->{XS_parse_stack} - # $pxs->{XS_parse_stack_top_if_idx} - # $pxs->{bootcode_early} - # $pxs->{bootcode_later} + # Also, keep track of #if/#else/#endif nesting. + while (@{$pxs->{line}} && $pxs->{line}[0] =~ /^#/) { my $node = ExtUtils::ParseXS::Node::global_cpp_line->new(); $node->parse($pxs); push @{$self->{kids}}, $node; - # XXX tmp prematurely emit code - $_->as_code($pxs, $self) for @{$self->{kids}}; - @{$self->{kids}} = (); - } + next unless $node->{is_cond}; - next PARAGRAPH unless @{ $pxs->{line} }; + # Parse branches of a CPP conditionals within a nested scope - if ( $pxs->{XS_parse_stack_top_if_idx} - && !$pxs->{XS_parse_stack}->[$pxs->{XS_parse_stack_top_if_idx}]{varname}) - { - # We are inside an #if, but have not yet #defined its xsubpp variable. - # - # At the start of every '#if ...' which is external to an XSUB, - # we emit '#define XSubPPtmpXXXX 1', for increasing XXXX. - # Later, when emitting initialisation code in places like a boot - # block, it can then be made conditional via, e.g. - # #if XSubPPtmpXXXX - # newXS(...); - # #endif - # So that only the defined XSUBs get added to the symbol table. - print "#define $pxs->{cpp_next_tmp_define} 1\n\n"; - push(@{ $pxs->{bootcode_early} }, "#if $pxs->{cpp_next_tmp_define}\n"); - push(@{ $pxs->{bootcode_later} }, "#if $pxs->{cpp_next_tmp_define}\n"); - $pxs->{XS_parse_stack}->[$pxs->{XS_parse_stack_top_if_idx}]{varname} - = $pxs->{cpp_next_tmp_define}++; - } + if (not $node->{is_if}) { + $pxs->death("Error: '". $node->{directive} + . "' with no matching 'if'") + if $self->{type} ne 'if'; + + # we should already be within a nested scope; this + # CPP condition keyword just ends that scope. Our + # (recursive) caller will handle processing any further + # branches if it's an elif/else rather than endif + + return 1 + } + + # So it's an 'if'/'ifdef' etc node. Start a new + # Node::cpp_scope sub-parse to handle that branch and then any + # other branches of the same conditional. + + while (1) { + # Parse the branch in new scope + my $scope = ExtUtils::ParseXS::Node::cpp_scope->new( + {type => 'if'}); + $scope->parse($pxs) + or return 1; + + # Sub-parsing of that branch should have terminated + # at an elif/endif line rather than falling off the + # end of the file + my $last = $scope->{kids}[-1]; + unless ( + defined $last + && $last->isa( + 'ExtUtils::ParseXS::Node::global_cpp_line') + && $last->{is_cond} + && !$last->{is_if} + ) { + $pxs->death("Error: Unterminated '#if/#ifdef/#ifndef'") + } + + # Move the CPP line which terminated the branch from + # the end of the inner scope to the current scope + pop @{$scope->{kids}}; + push @{$self->{kids}}, $scope, $last; + + if (grep { ref($_) !~ /::global_cpp_line$/ } + @{$scope->{kids}} ) + { + # the inner scope has some content, so needs + # a '#define XSubPPtmpAAAA 1'-style guard + $scope->{guard_name} = $pxs->{cpp_next_tmp_define}++; + } + + # any more branches to process of current if? + last if $last->{is_endif}; + } # while 1 + } # while /^#/ + + # skip blank lines + shift @{$pxs->{line}} while @{$pxs->{line}} && $pxs->{line}[0] !~ /\S/; + + next PARAGRAPH unless @{ $pxs->{line} }; # This will die on something like # @@ -910,6 +947,9 @@ sub parse { . "|VERSIONCHECK|INCLUDE|INCLUDE_COMMAND|SCOPE" ); + # skip blank lines + shift @{$pxs->{line}} while @{$pxs->{line}} && $pxs->{line}[0] !~ /\S/; + next PARAGRAPH unless @{ $pxs->{line} }; # ---------------------------------------------------------------- @@ -921,15 +961,28 @@ sub parse { or next PARAGRAPH; push @{$self->{kids}}, $xsub; - $pxs->{seen_an_XSUB} = 1; # encountered at least one XSUB + # Check for a duplicate function definition in this scope + { + my $name = $xsub->{decl}{full_C_name}; + if ($self->{seen_xsubs}{$name}) { + (my $short = $name) =~ s/^$pxs->{PACKAGE_C_name}_//; + $pxs->Warn( "Warning: duplicate function definition " + . "'$short' detected"); + } + $self->{seen_xsubs}{$name} = 1; + } + - # XXX tmp prematurely emit code - $_->as_code($pxs, $self) for @{$self->{kids}}; - @{$self->{kids}} = (); + # should just be a fake END token left + die "Internal error: bad END token\n" + unless $pxs->{line} + && @{$pxs->{line}} == 1 + && $pxs->{line}[0] eq "$ExtUtils::ParseXS::END:"; + pop @{$pxs->{line}}; + + $pxs->{seen_an_XSUB} = 1; # encountered at least one XSUB } # END 'PARAGRAPH' 'while' loop - # XXX tmp prematurely emit code - $_->as_code($pxs, $self) for @{$self->{kids}}; 1; } @@ -938,8 +991,21 @@ sub as_code { my __PACKAGE__ $self = shift; my ExtUtils::ParseXS $pxs = shift; - # XXX tmp prematurely emit code - # XXX $_->as_code($pxs, $self) for @{$self->{kids}}; + my $g = $self->{guard_name}; + + if (defined $g) { + print "#define $g 1\n\n" if defined $g; + push @{$pxs->{bootcode_early}}, "#if $g\n"; + push @{$pxs->{bootcode_later}}, "#if $g\n"; + } + + $_->as_code($pxs, $self) for @{$self->{kids}}; + + if (defined $g) { + push @{$pxs->{bootcode_early}}, "#endif\n"; + push @{$pxs->{bootcode_later}}, "#endif\n"; + } + } @@ -977,12 +1043,6 @@ sub parse { @$self{qw(cpp_line directive rest is_cond is_if is_endif)} = ($line, $directive, $rest, $is_cond, $is_if, $is_endif); - # Update global tracking of *conditional* CPP directives; - # i.e. #if/#else etc - - return 1 unless $is_cond; - $pxs->analyze_preprocessor_statement($is_if ? 'if' : $directive); - 1; } @@ -1648,15 +1708,6 @@ sub parse { # $self->{full_C_name} "BAR__BAZ_bar" # $params_text "param1, param2, param3" - # Check for a duplicate function definition, but ignoring multiple - # definitions within the branches of an #if/#else/#endif - for my $tmp (@{ $pxs->{XS_parse_stack} }) { - next unless defined $tmp->{functions}{$full_cname}; - $pxs->Warn( "Warning: duplicate function definition " - . "'$clean_func_name' detected"); - last; - } - # mark C function name as used $pxs->{XS_parse_stack}-> [$pxs->{XS_parse_stack_top_if_idx}]{functions}{$full_cname}++; @@ -4360,9 +4411,6 @@ sub parse { chomp $pxs->{lastline}; $pxs->{lastline_no} = $self->{line_no} = $.; - # XXX tmp prematurely emit code - $self->XXX_as_code($pxs); - # Parse included file my $cpp_scope = ExtUtils::ParseXS::Node::cpp_scope->new({ @@ -4390,24 +4438,11 @@ sub parse { # XXX tmp: maintain state for #if scope processing pop @{$pxs->{XS_parse_stack}}; - # XXX this needs to go in as_code() - print ExtUtils::ParseXS::Q(<<"EOF"); - | - |/* INCLUDE: Returning to '$self->{old_filename}' from '$self->{inc_filename}' */ - | -EOF - - # XXX tmp prematurely emit code - $cpp_scope->as_code($pxs); - pop @{$self->{kids}}; - 1; } -# XXX tmp prematurely emit code -sub as_code {} -sub XXX_as_code { +sub as_code { my __PACKAGE__ $self = shift; my ExtUtils::ParseXS $pxs = shift; @@ -4422,6 +4457,15 @@ sub XXX_as_code { |/* $comment */ | EOF + + $_->as_code($pxs) for @{$self->{kids}}; + + print ExtUtils::ParseXS::Q(<<"EOF"); + | + |/* INCLUDE: Returning to '$self->{old_filename}' from '$self->{inc_filename}' */ + | +EOF + } diff --git a/dist/ExtUtils-ParseXS/t/001-basic.t b/dist/ExtUtils-ParseXS/t/001-basic.t index 8c805196f382..09b49bff2b6f 100644 --- a/dist/ExtUtils-ParseXS/t/001-basic.t +++ b/dist/ExtUtils-ParseXS/t/001-basic.t @@ -318,8 +318,7 @@ like $stderr, '/Error: no INPUT definition/', "Exercise typemap error"; $stderr = PrimitiveCapture::capture_stderr(sub { $pxs->process_file(filename => $filename, output => \*FH, prototypes => 1); }); - TODO: { - local $TODO = 'GH 19661'; + { unlike $stderr, qr/Warning: duplicate function definition 'do' detected in \Q$filename\E/, "No 'duplicate function definition' warning observed in $filename"; @@ -330,8 +329,7 @@ like $stderr, '/Error: no INPUT definition/', "Exercise typemap error"; $stderr = PrimitiveCapture::capture_stderr(sub { $pxs->process_file(filename => $filename, output => \*FH, prototypes => 1); }); - TODO: { - local $TODO = 'GH 19661'; + { unlike $stderr, qr/Warning: duplicate function definition 'do' detected in \Q$filename\E/, "No 'duplicate function definition' warning observed in $filename"; @@ -5319,8 +5317,21 @@ EOF |long foo() |#endif EOF - [ 1, 0, qr{Warning: duplicate function definition}, - "got expected warning" ], + [ 0, 0, qr{ + ^ \#ifdef\ USE_SHORT \n + ^ \#define\ XSubPPtmpAAAA\ 1 \n + .* + ^ \s* short \s+ RETVAL; \s* \n + .* + ^ \#endif \n + ^ \#if\ USE_LONG \n + ^ \#define\ XSubPPtmpAAAB\ 1 \n + .* + ^ \s* long \s+ RETVAL; \s* \n + .* + ^ \#endif \n + }smx, + "ifdefs in order" ], ], [ @@ -5331,8 +5342,17 @@ EOF |#endif |long foo() EOF - [ 1, 0, qr{Warning: duplicate function definition}, - "got expected warning" ], + [ 0, 0, qr{ + ^ \#ifdef\ USE_SHORT \n + ^ \#define\ XSubPPtmpAAAA\ 1 \n + .* + ^ \s* short \s+ RETVAL; \s* \n + .* + ^ \#endif \n + .* + ^ \s* long \s+ RETVAL; \s* \n + }smx, + "ifdefs in order" ], ], [ @@ -5399,12 +5419,12 @@ EOF EOF [ 0, 0, qr{ ^ \#ifdef\ C1 \n - ^ \#define\ XSubPPtmpAAAA\ 1 \n + ^ \#define\ XSubPPtmpAAAB\ 1 \n .* ^ \s* short \s+ RETVAL; \s* \n .* ^ \#ifdef\ C2 \n - ^ \#define\ XSubPPtmpAAAB\ 1 \n + ^ \#define\ XSubPPtmpAAAA\ 1 \n .* ^ \s* long \s+ RETVAL; \s* \n .* @@ -5428,6 +5448,8 @@ EOF EOF [ 0, 0, qr{ ^ \#ifdef\ C1 \n + ^ \#define\ XSubPPtmpAAAB\ 1 \n + ^ \s* \n ^ \#\ \ ifdef\ C2 \n ^ \#define\ XSubPPtmpAAAA\ 1 \n .* @@ -5463,10 +5485,10 @@ EOF [ 0, 0, qr{ ^ \#ifdef\ C1 \n .* - ^ \#define\ BLAH1\n - .* ^ \#define\ XSubPPtmpAAAA\ 1 \n .* + ^ \#define\ BLAH1\n + .* ^ \s* short \s+ RETVAL; \s* \n .* ^ \#endif \n From 08815518f8dd81af8922935975160586128ee30b Mon Sep 17 00:00:00 2001 From: David Mitchell Date: Mon, 20 Oct 2025 16:37:09 +0100 Subject: [PATCH 25/57] ParseXS: refactor: remove parser stack (This commit is part of a series which will extend the AST parse tree from just representing individual XSUBs to representing the whole XS file.) The previous commits have changed how the parser tracks file-scoped #if / #endif sections - this is now simply part of the structure of the AST. So this commit removes the old parser stack, @{$pxs->{XS_parse_stack}}, which used to be used to track this info. The stack was still present, but was no longer used to deduce #if state. It also removes the analyze_preprocessor_statements() method and the test file which used to test it, t/111-analyze_preprocessor_statements.t. One minor regression in this commit is that a warning about about unbalanced #if/#endif within an XSUB or BOOT block used to sometimes emit an extra hint about what the problem might be: print STDERR " (precede it with a blank line if the matching #if is outside the function)\n" if $self->{XS_parse_stack}->[-1]{type} eq 'if'; Since it's no longer trivial to check whether we're currently within a global #if/endif section, I've kept the 'unbalanced' warning, but deleted the additional hint. If anybody wants to add it back, you'll need to add a field to the EU::PXS class such as $pxs->{current_cpp_scope} which keeps track of which is the current (innermost) Node::cpp_scope node being parsed. Then check_conditional_preprocessor_statements() can use it in place of the "$self->{XS_parse_stack}->[-1]{type} eq 'if';" test. --- MANIFEST | 1 - dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm | 17 ---- .../lib/ExtUtils/ParseXS/Node.pm | 14 +-- .../lib/ExtUtils/ParseXS/Utilities.pm | 90 ------------------- .../t/111-analyze_preprocessor_statements.t | 15 ---- .../t/113-check_cond_preproc_statements.t | 17 +--- 6 files changed, 3 insertions(+), 151 deletions(-) delete mode 100644 dist/ExtUtils-ParseXS/t/111-analyze_preprocessor_statements.t diff --git a/MANIFEST b/MANIFEST index a07c286fce6f..7be48425118e 100644 --- a/MANIFEST +++ b/MANIFEST @@ -4202,7 +4202,6 @@ dist/ExtUtils-ParseXS/t/105-valid_proto_string.t ExtUtils::ParseXS tests dist/ExtUtils-ParseXS/t/106-process_typemaps.t ExtUtils::ParseXS tests dist/ExtUtils-ParseXS/t/108-map_type.t ExtUtils::ParseXS tests dist/ExtUtils-ParseXS/t/109-standard_XS_defs.t ExtUtils::ParseXS tests -dist/ExtUtils-ParseXS/t/111-analyze_preprocessor_statements.t ExtUtils::ParseXS tests dist/ExtUtils-ParseXS/t/112-set_cond.t ExtUtils::ParseXS tests dist/ExtUtils-ParseXS/t/113-check_cond_preproc_statements.t ExtUtils::ParseXS tests dist/ExtUtils-ParseXS/t/114-blurt_death_Warn.t ExtUtils::ParseXS tests diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm index 718681d455f8..cce0660d22bd 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm @@ -79,14 +79,12 @@ use ExtUtils::ParseXS::Utilities qw( valid_proto_string process_typemaps map_type - analyze_preprocessor_statement set_cond Warn WarnHint current_line_number blurt death - check_conditional_preprocessor_statements escape_file_for_line_directive report_typemap_failure ); @@ -185,13 +183,6 @@ BEGIN { 'error_count', # Num: count of number of errors seen so far. - 'XS_parse_stack', # Array of hashes: nested INCLUDE and #if states. - - 'XS_parse_stack_top_if_idx', # Index of the current top-most '#if' on the - # XS_parse_stack. Note that it's not necessarily - # the top element of the stack, since that also - # includes elements for each INCLUDE etc. - 'cpp_next_tmp_define',# the next string like XSubPPtmpAAAA # to use as CPP defines for distringuishing # similar calls to newXS() etc @@ -320,13 +311,6 @@ sub process_file { $ExtUtils::ParseXS::VMS_SymSet = ExtUtils::XSSymSet->new(28); } - # XS_parse_stack is an array of hashes. Each hash records the current - # state when a new file is INCLUDEd, or when within a (possibly nested) - # file-scoped #if / #ifdef. - # The 'type' field of each hash is either 'file' for INCLUDE, or 'if' - # for within an #if / #endif. - @{ $self->{XS_parse_stack} } = ({type => 'none'}); - $self->{bootcode_early} = []; $self->{bootcode_later} = []; @@ -429,7 +413,6 @@ sub process_file { $self->{lastline} = $_; $self->{lastline_no} = $.; - $self->{XS_parse_stack_top_if_idx} = 0; $self->{cpp_next_tmp_define} = 'XSubPPtmpAAAA'; my $cpp_scope = ExtUtils::ParseXS::Node::cpp_scope->new({type => 'main'}); diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm index ae6cfae98518..c18354eb1d46 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm @@ -1081,7 +1081,7 @@ sub parse { # Check all the @{$pxs->{line}} lines for balance: all the # #if, #else, #endif etc within the BOOT should balance out. - ExtUtils::ParseXS::check_conditional_preprocessor_statements(); + ExtUtils::ParseXS::Utilities::check_conditional_preprocessor_statements(); # Suck in all remaining lines @@ -1267,7 +1267,7 @@ sub parse { # Check all the @{ $pxs->{line}} lines for balance: all the # #if, #else, #endif etc within the XSUB should balance out. - ExtUtils::ParseXS::check_conditional_preprocessor_statements(); + ExtUtils::ParseXS::Utilities::check_conditional_preprocessor_statements(); # ---------------------------------------------------------------- # Each iteration of this loop will process 1 optional CASE: line, @@ -1708,10 +1708,6 @@ sub parse { # $self->{full_C_name} "BAR__BAZ_bar" # $params_text "param1, param2, param3" - # mark C function name as used - $pxs->{XS_parse_stack}-> - [$pxs->{XS_parse_stack_top_if_idx}]{functions}{$full_cname}++; - # ---------------------------------------------------------------- # Process the XSUB's signature. # @@ -4351,9 +4347,6 @@ sub parse { } } - # XXX tmp: maintain state for #if scope processing - push @{$pxs->{XS_parse_stack}}, { type => 'file' }; - # Save the current file context. my @save_keys = qw(in_fh in_filename in_pathname @@ -4435,9 +4428,6 @@ sub parse { exit 1; } - # XXX tmp: maintain state for #if scope processing - pop @{$pxs->{XS_parse_stack}}; - 1; } diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Utilities.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Utilities.pm index 87074eedbc75..86da930c48ca 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Utilities.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Utilities.pm @@ -17,7 +17,6 @@ our (@ISA, @EXPORT_OK); process_typemaps map_type standard_XS_defs - analyze_preprocessor_statement set_cond Warn WarnHint @@ -43,7 +42,6 @@ ExtUtils::ParseXS::Utilities - Subroutines used with ExtUtils::ParseXS process_typemaps map_type standard_XS_defs - analyze_preprocessor_statement set_cond Warn blurt @@ -483,92 +481,6 @@ EOF return 1; } -=head2 C - -=over 4 - -=item * Purpose - -Process a CPP conditional line (C<#if> etc), to keep track of conditional -nesting. In particular, it updates C<< @{$self->{XS_parse_stack}} >> which -contains the current list of nested conditions, and -C<< $self->{XS_parse_stack_top_if_idx} >> which indicates the most recent -C in that stack. So an C<#if> pushes, an C<#endif> pops, an C<#else> -modifies etc. Each element is a hash of the form: - - { - type => 'if', - varname => 'XSubPPtmpAAAA', # maintained by caller - - # XS functions defined within this branch of the - # conditional (maintained by caller) - functions => { - 'Foo::Bar::baz' => 1, - ... - } - # XS functions seen within any previous branch - other_functions => {... } - -It also updates C<< $self->{bootcode_early} >> and -C<< $self->{bootcode_late} >> with extra CPP directives. - -=item * Arguments - - $self->analyze_preprocessor_statement($statement); - -=back - -=cut - -sub analyze_preprocessor_statement { - my ExtUtils::ParseXS $self = shift; - my ($statement) = @_; - - my $ix = $self->{XS_parse_stack_top_if_idx}; - - if ($statement eq 'if') { - # #if or #ifdef - $ix = @{ $self->{XS_parse_stack} }; - push(@{ $self->{XS_parse_stack} }, {type => 'if'}); - } - else { - # An #else/#elsif/#endif. - - $self->death("Error: '$statement' with no matching 'if'") - if $self->{XS_parse_stack}->[-1]{type} ne 'if'; - - if ($self->{XS_parse_stack}->[-1]{varname}) { - # close any '#ifdef XSubPPtmpAAAA' inserted earlier into boot code. - push(@{ $self->{bootcode_early} }, "#endif\n"); - push(@{ $self->{bootcode_later} }, "#endif\n"); - } - - my(@fns) = keys %{$self->{XS_parse_stack}->[-1]{functions}}; - - if ($statement ne 'endif') { - # Add current functions to the hash of functions seen in previous - # branch limbs, then reset for this next limb of the branch. - @{$self->{XS_parse_stack}->[-1]{other_functions}}{@fns} = (1) x @fns; - @{$self->{XS_parse_stack}->[-1]}{qw(varname functions)} = ('', {}); - } - else { - # #endif - pop stack and update new top entry - my($tmp) = pop(@{ $self->{XS_parse_stack} }); - 0 while (--$ix - && $self->{XS_parse_stack}->[$ix]{type} ne 'if'); - - # For all functions declared within any limb of the just-popped - # if/endif, mark them as having appeared within this limb of the - # outer nested branch. - push(@fns, keys %{$tmp->{other_functions}}); - @{$self->{XS_parse_stack}->[$ix]{functions}}{@fns} = (1) x @fns; - } - } - - $self->{XS_parse_stack_top_if_idx} = $ix; -} - - =head2 C =over 4 @@ -776,8 +688,6 @@ sub check_conditional_preprocessor_statements { } elsif (!$cpplevel) { $self->Warn("Warning: #else/elif/endif without #if in this function"); - print STDERR " (precede it with a blank line if the matching #if is outside the function)\n" - if $self->{XS_parse_stack}->[-1]{type} eq 'if'; return; } elsif ($cpp =~ /^\#\s*endif/) { diff --git a/dist/ExtUtils-ParseXS/t/111-analyze_preprocessor_statements.t b/dist/ExtUtils-ParseXS/t/111-analyze_preprocessor_statements.t deleted file mode 100644 index ebd631cb61ed..000000000000 --- a/dist/ExtUtils-ParseXS/t/111-analyze_preprocessor_statements.t +++ /dev/null @@ -1,15 +0,0 @@ -#!/usr/bin/perl -use strict; -use warnings; -$| = 1; -use Test::More qw(no_plan); # tests => 7; -use ExtUtils::ParseXS::Utilities qw( - analyze_preprocessor_statement -); - -# XXX not yet tested -# $self->analyze_preprocessor_statement($statement); - -pass("Passed all tests in $0"); - - diff --git a/dist/ExtUtils-ParseXS/t/113-check_cond_preproc_statements.t b/dist/ExtUtils-ParseXS/t/113-check_cond_preproc_statements.t index 870aff04909b..9b4a18f22e99 100644 --- a/dist/ExtUtils-ParseXS/t/113-check_cond_preproc_statements.t +++ b/dist/ExtUtils-ParseXS/t/113-check_cond_preproc_statements.t @@ -3,7 +3,7 @@ use strict; use warnings; use File::Spec; use lib (-d 't' ? File::Spec->catdir(qw(t lib)) : 'lib'); -use Test::More tests => 13; +use Test::More tests => 11; use ExtUtils::ParseXS; use ExtUtils::ParseXS::Utilities qw( check_conditional_preprocessor_statements @@ -12,8 +12,6 @@ use PrimitiveCapture; my $self = bless({} => 'ExtUtils::ParseXS'); $self->{line} = []; -$self->{XS_parse_stack} = []; -$self->{XS_parse_stack}->[0] = {}; { $self->{line} = [ @@ -26,7 +24,6 @@ $self->{XS_parse_stack}->[0] = {}; "#endif this_is_an_endif_statement", ]; $self->{line_no} = [ 17 .. 23 ]; - $self->{XS_parse_stack}->[-1]{type} = 'if'; $self->{in_filename} = 'myfile1'; my $rv; @@ -49,7 +46,6 @@ $self->{XS_parse_stack}->[0] = {}; "#endif this_is_an_endif_statement", ]; $self->{line_no} = [ 17 .. 23 ]; - $self->{XS_parse_stack}->[-1]{type} = 'if'; $self->{in_filename} = 'myfile1'; my $rv; @@ -70,7 +66,6 @@ $self->{XS_parse_stack}->[0] = {}; "#endif this_is_an_endif_statement", ]; $self->{line_no} = [ 17 .. 22 ]; - $self->{XS_parse_stack}->[-1]{type} = 'if'; $self->{in_filename} = 'myfile1'; my $rv; @@ -83,10 +78,6 @@ $self->{XS_parse_stack}->[0] = {}; qr/Warning: #else\/elif\/endif without #if in this function/, "Got expected warning: lack of #if" ); - like( $stderr, - qr/precede it with a blank line/s, - "Got expected warning: advice re blank line" - ); } { @@ -99,7 +90,6 @@ $self->{XS_parse_stack}->[0] = {}; "#endif this_is_an_endif_statement", ]; $self->{line_no} = [ 17 .. 22 ]; - $self->{XS_parse_stack}->[-1]{type} = 'file'; $self->{in_filename} = 'myfile1'; my $rv; @@ -112,10 +102,6 @@ $self->{XS_parse_stack}->[0] = {}; qr/Warning: #else\/elif\/endif without #if in this function/, "Got expected warning: lack of #if" ); - unlike( $stderr, - qr/precede it with a blank line/s, - "Did not get unexpected stderr" - ); } { @@ -128,7 +114,6 @@ $self->{XS_parse_stack}->[0] = {}; "Gamma this is not an if/elif/elsif/endif", ]; $self->{line_no} = [ 17 .. 22 ]; - $self->{XS_parse_stack}->[-1]{type} = 'if'; $self->{in_filename} = 'myfile1'; my $rv; From 71642350e5036759f00204211565ee7a69bb8a05 Mon Sep 17 00:00:00 2001 From: David Mitchell Date: Thu, 23 Oct 2025 15:14:19 +0100 Subject: [PATCH 26/57] ParseXS: refactor: move main parsing into XS_file (This commit is part of a series which will extend the AST parse tree from just representing individual XSUBs to representing the whole XS file.) Move the parsing of the XS part of the main file from process_file() into cpp_scope::parse(). No functional change. --- dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm | 11 ----------- .../lib/ExtUtils/ParseXS/Node.pm | 19 +++++++++++++++++-- 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm index cce0660d22bd..c44320829447 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm @@ -408,17 +408,6 @@ sub process_file { my $AST = $self->{AST} = ExtUtils::ParseXS::Node::XS_file->new(); $AST->parse($self) or $self->death("Failed to parse XS file\n"); - - # At this point, $_ should hold the first MODULE line - - $self->{lastline} = $_; - $self->{lastline_no} = $.; - $self->{cpp_next_tmp_define} = 'XSubPPtmpAAAA'; - - my $cpp_scope = ExtUtils::ParseXS::Node::cpp_scope->new({type => 'main'}); - $cpp_scope->parse($self); - push @{$AST->{kids}}, $cpp_scope; - $AST->as_code($self); # ---------------------------------------------------------------- diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm index c18354eb1d46..622ca1727596 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm @@ -462,6 +462,7 @@ BEGIN { $build_subclass->( 'C_part', # the C part of the XS file, before the first MODULE 'C_part_postamble',# Node::C_part_postamble object which emits # boilerplate code following the C code + 'cpp_scope', # node holding all the XS part of the main file )}; sub parse { @@ -471,6 +472,9 @@ sub parse { $self->{line_no} = 1; $self->{file} = $pxs->{in_pathname}; + # Initialise the sequence of guard defines used by cpp_scope + $pxs->{cpp_next_tmp_define} = 'XSubPPtmpAAAA'; + # "Parse" the start of the file. Doesn't actually consume any lines: # just a placeholder for emitting preamble later @@ -489,8 +493,8 @@ sub parse { or return; push @{$self->{kids}}, $C_part; - # "Parse" the start of the file. Doesn't actually consume any lines: - # just a placeholder for emitting postamble later + # "Parse" the bit following any C code. Doesn't actually consume any + # lines: just a placeholder for emitting postamble code. my $C_part_postamble = ExtUtils::ParseXS::Node::C_part_postamble->new(); $self->{C_part_postamble} = $C_part_postamble; @@ -498,6 +502,17 @@ sub parse { or return; push @{$self->{kids}}, $C_part_postamble; + # At this point, $_ should hold the first MODULE line + + $pxs->{lastline} = $_; + $pxs->{lastline_no} = $.; + + # Parse the XS half of the file + + my $cpp_scope = ExtUtils::ParseXS::Node::cpp_scope->new({type => 'main'}); + $self->{cpp_scope} = $cpp_scope; + $cpp_scope->parse($pxs); + push @{$self->{kids}}, $cpp_scope; 1; } From 7436cf56a053d24c7f99b979a0bd0b8b2996c7c3 Mon Sep 17 00:00:00 2001 From: David Mitchell Date: Thu, 23 Oct 2025 15:50:08 +0100 Subject: [PATCH 27/57] ParseXS: refactor: add Node::pre_boot (This commit is part of a series which will extend the AST parse tree from just representing individual XSUBs to representing the whole XS file.) Add a stub Node subclass which is responsible for emitting any C code which comes after the XS file has been parsed (and any user XSUBs emiited), but before the boot XSUB is emitted. Currently this code is just concerned with adding overload methods. The code has just been moved from process_file() to the node's as_code method() and is otherwise unchanged. --- dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm | 59 ------------ .../lib/ExtUtils/ParseXS/Node.pm | 92 +++++++++++++++++++ 2 files changed, 92 insertions(+), 59 deletions(-) diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm index c44320829447..aa5254a5dac5 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm @@ -410,65 +410,6 @@ sub process_file { or $self->death("Failed to parse XS file\n"); $AST->as_code($self); - # ---------------------------------------------------------------- - # End of main loop and at EOF: all paragraphs (and thus XSUBs) have now - # been read in and processed. Do any final post-processing. - # ---------------------------------------------------------------- - - # Process any overloading. - # - # For each package FOO which has had at least one overloaded method - # specified: - # - create a stub XSUB in that package called nil; - # - generate code to be added to the boot XSUB which links that XSUB - # to the symbol table entry *{"FOO::()"}. This mimics the action in - # overload::import() which creates the stub method as a quick way to - # check whether an object is overloaded (including via inheritance), - # by doing $self->can('()'). - # - Further down, we add a ${"FOO:()"} scalar containing the value of - # 'fallback' (or undef if not specified). - # - # XXX In 5.18.0, this arrangement was changed in overload.pm, but hasn't - # been updated here. The *() glob was being used for two different - # purposes: a sub to do a quick check of overloadability, and a scalar - # to indicate what 'fallback' value was specified (even if it wasn't - # specified). The commits: - # v5.16.0-87-g50853fa94f - # v5.16.0-190-g3866ea3be5 - # v5.17.1-219-g79c9643d87 - # changed this so that overloadability is checked by &((, while fallback - # is checked by $() (and not present unless specified by 'fallback' - # as opposed to the always being present, but sometimes undef). - # Except that, in the presence of fallback, &() is added too for - # backcompat reasons (which I don't fully understand - DAPM). - # See overload.pm's import() and OVERLOAD() methods for more detail. - # - # So this code needs updating to match. - - for my $package (sort keys %{ $self->{map_overloaded_package_to_C_package} }) - { - # make them findable with fetchmethod - my $packid = $self->{map_overloaded_package_to_C_package}->{$package}; - print Q(<<"EOF"); - |XS_EUPXS(XS_${packid}_nil); /* prototype to pass -Wmissing-prototypes */ - |XS_EUPXS(XS_${packid}_nil) - |{ - | dXSARGS; - | PERL_UNUSED_VAR(items); - | XSRETURN_EMPTY; - |} - | -EOF - - unshift(@{ $self->{bootcode_early} }, Q(<<"EOF")); - | /* Making a sub named "${package}::()" allows the package */ - | /* to be findable via fetchmethod(), and causes */ - | /* overload::Overloaded("$package") to return true. */ - | (void)newXS_deffile("${package}::()", XS_${packid}_nil); -EOF - } - - # ---------------------------------------------------------------- # Emit the boot XSUB initialization routine # ---------------------------------------------------------------- diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm index 622ca1727596..376ace5c8747 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm @@ -463,6 +463,7 @@ BEGIN { $build_subclass->( 'C_part_postamble',# Node::C_part_postamble object which emits # boilerplate code following the C code 'cpp_scope', # node holding all the XS part of the main file + 'pre_boot', # node holding code after user XSUBs but before boot XSUB )}; sub parse { @@ -514,6 +515,19 @@ sub parse { $cpp_scope->parse($pxs); push @{$self->{kids}}, $cpp_scope; + # Now at EOF: all paragraphs (and thus XSUBs) have now been read in + # and processed. Do any final post-processing. + + # "Parse" the bit following any C code. Doesn't actually consume any + # lines: just a placeholder for emitting any code which should follow + # user XSUBs but which comes before the boot XSUB + + my $pre_boot = ExtUtils::ParseXS::Node::pre_boot->new(); + $self->{pre_boot} = $pre_boot; + push @{$self->{kids}}, $pre_boot; + $pre_boot->parse($pxs) + or return; + 1; } @@ -1147,6 +1161,84 @@ sub boot_code { return @lines; } +# ====================================================================== + +package ExtUtils::ParseXS::Node::pre_boot; + +# AST node representing C code that is emitted after all user-defined +# XSUBs but before the boot XSUB. (This currently consists of +# 'Foo::Bar::()' XSUBs for any packages which have overloading.) +# +# This node's parse() method doesn't actually consume any lines; the node +# exists just for its as_code() method. + +BEGIN { $build_subclass->( +)}; + +sub parse { + my __PACKAGE__ $self = shift; + my ExtUtils::ParseXS $pxs = shift; + + $self->SUPER::parse($pxs); # set file/line_no + 1; +} + +sub as_code { + my __PACKAGE__ $self = shift; + my ExtUtils::ParseXS $pxs = shift; + + # For each package FOO which has had at least one overloaded method + # specified: + # - create a stub XSUB in that package called nil; + # - generate code to be added to the boot XSUB which links that XSUB + # to the symbol table entry *{"FOO::()"}. This mimics the action in + # overload::import() which creates the stub method as a quick way to + # check whether an object is overloaded (including via inheritance), + # by doing $self->can('()'). + # - Further down, we add a ${"FOO:()"} scalar containing the value of + # 'fallback' (or undef if not specified). + # + # XXX In 5.18.0, this arrangement was changed in overload.pm, but hasn't + # been updated here. The *() glob was being used for two different + # purposes: a sub to do a quick check of overloadability, and a scalar + # to indicate what 'fallback' value was specified (even if it wasn't + # specified). The commits: + # v5.16.0-87-g50853fa94f + # v5.16.0-190-g3866ea3be5 + # v5.17.1-219-g79c9643d87 + # changed this so that overloadability is checked by &((, while fallback + # is checked by $() (and not present unless specified by 'fallback' + # as opposed to the always being present, but sometimes undef). + # Except that, in the presence of fallback, &() is added too for + # backcompat reasons (which I don't fully understand - DAPM). + # See overload.pm's import() and OVERLOAD() methods for more detail. + # + # So this code needs updating to match. + + for my $package (sort keys %{$pxs->{map_overloaded_package_to_C_package}}) + { + # make them findable with fetchmethod + my $packid = $pxs->{map_overloaded_package_to_C_package}{$package}; + print ExtUtils::ParseXS::Q(<<"EOF"); + |XS_EUPXS(XS_${packid}_nil); /* prototype to pass -Wmissing-prototypes */ + |XS_EUPXS(XS_${packid}_nil) + |{ + | dXSARGS; + | PERL_UNUSED_VAR(items); + | XSRETURN_EMPTY; + |} + | +EOF + + unshift(@{ $pxs->{bootcode_early} }, ExtUtils::ParseXS::Q(<<"EOF")); + | /* Making a sub named "${package}::()" allows the package */ + | /* to be findable via fetchmethod(), and causes */ + | /* overload::Overloaded("$package") to return true. */ + | (void)newXS_deffile("${package}::()", XS_${packid}_nil); +EOF + } +} + # ====================================================================== From 4aa0301478cd00c3078137f244c32883ae04809e Mon Sep 17 00:00:00 2001 From: David Mitchell Date: Thu, 23 Oct 2025 16:13:13 +0100 Subject: [PATCH 28/57] ParseXS: refactor: add Node::boot_xsub (This commit is part of a series which will extend the AST parse tree from just representing individual XSUBs to representing the whole XS file.) Add a stub Node subclass which is responsible for emitting the C code for the boot XSUB. The code has just been moved from process_file() to the node's as_code method() and is otherwise unchanged. --- dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm | 167 --------------- .../lib/ExtUtils/ParseXS/Node.pm | 200 ++++++++++++++++++ 2 files changed, 200 insertions(+), 167 deletions(-) diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm index aa5254a5dac5..825e2be6bb80 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm @@ -410,173 +410,6 @@ sub process_file { or $self->death("Failed to parse XS file\n"); $AST->as_code($self); - # ---------------------------------------------------------------- - # Emit the boot XSUB initialization routine - # ---------------------------------------------------------------- - - print Q(<<"EOF"); - |#ifdef __cplusplus - |extern "C" [[ - |#endif -EOF - - print Q(<<"EOF"); - |XS_EXTERNAL(boot_$self->{MODULE_cname}); /* prototype to pass -Wmissing-prototypes */ - |XS_EXTERNAL(boot_$self->{MODULE_cname}) - |[[ - |#if PERL_VERSION_LE(5, 21, 5) - | dVAR; dXSARGS; - |#else - | dVAR; ${\($self->{VERSIONCHECK_value} ? 'dXSBOOTARGSXSAPIVERCHK;' : 'dXSBOOTARGSAPIVERCHK;')} - |#endif -EOF - - # Declare a 'file' var for passing to newXS() and variants. - # - # If there is no $self->{seen_an_XSUB} then there are no xsubs - # in this .xs so 'file' is unused, so silence warnings. - # - # 'file' can also be unused in other circumstances: in particular, - # newXS_deffile() doesn't take a file parameter. So suppress any - # 'unused var' warning always. - # - # Give it the correct 'const'ness: Under 5.8.x and lower, newXS() is - # declared in proto.h as expecting a non-const file name argument. If - # the wrong qualifier is used, it causes breakage with C++ compilers and - # warnings with recent gcc. - - print Q(<<"EOF") if $self->{seen_an_XSUB}; - |#if PERL_VERSION_LE(5, 8, 999) /* PERL_VERSION_LT is 5.33+ */ - | char* file = __FILE__; - |#else - | const char* file = __FILE__; - |#endif - | - | PERL_UNUSED_VAR(file); -EOF - - # Emit assorted declarations - - print Q(<<"EOF"); - | - | PERL_UNUSED_VAR(cv); /* -W */ - | PERL_UNUSED_VAR(items); /* -W */ -EOF - - if ($self->{VERSIONCHECK_value}) { - print Q(<<"EOF") ; - |#if PERL_VERSION_LE(5, 21, 5) - | XS_VERSION_BOOTCHECK; - |# ifdef XS_APIVERSION_BOOTCHECK - | XS_APIVERSION_BOOTCHECK; - |# endif - |#endif - | -EOF - - } else { - print Q(<<"EOF") ; - |#if PERL_VERSION_LE(5, 21, 5) && defined(XS_APIVERSION_BOOTCHECK) - | XS_APIVERSION_BOOTCHECK; - |#endif - | -EOF - - } - - # Declare a 'cv' var within a scope small enough to be visible just to - # newXS() calls which need to do further processing of the cv: in - # particular, when emitting one of: - # XSANY.any_i32 = $value; - # XSINTERFACE_FUNC_SET(cv, $value); - - if ($self->{need_boot_cv}) { - print Q(<<"EOF"); - | [[ - | CV * cv; - | -EOF - } - - # More overload stuff - - if (keys %{ $self->{map_overloaded_package_to_C_package} }) { - # Emit just once if any overloads: - # Before 5.10, PL_amagic_generation used to need setting to at least a - # non-zero value to tell perl that any overloading was present. - print Q(<<"EOF"); - | /* register the overloading (type 'A') magic */ - |#if PERL_VERSION_LE(5, 8, 999) /* PERL_VERSION_LT is 5.33+ */ - | PL_amagic_generation++; - |#endif -EOF - - for my $package (sort keys %{ $self->{map_overloaded_package_to_C_package} }) { - # Emit once for each package with overloads: - # Set ${'Foo::()'} to the fallback value for each overloaded - # package 'Foo' (or undef if not specified). - # But see the 'XXX' comments above about fallback and $(). - - my $fallback = $self->{map_package_to_fallback_string}{$package}; - $fallback = 'UNDEF' unless defined $fallback; - $fallback = $fallback eq 'TRUE' ? '&PL_sv_yes' - : $fallback eq 'FALSE' ? '&PL_sv_no' - : '&PL_sv_undef'; - - print Q(<<"EOF"); - | /* The magic for overload gets a GV* via gv_fetchmeth as */ - | /* mentioned above, and looks in the SV* slot of it for */ - | /* the "fallback" status. */ - | sv_setsv( - | get_sv( "${package}::()", TRUE ), - | $fallback - | ); -EOF - - } - } - - # Emit any boot code associated with newXS(). - - print @{ $self->{bootcode_early} }; - - # Emit closing scope for the 'CV *cv' declaration - - if ($self->{need_boot_cv}) { - print Q(<<"EOF"); - | ]] -EOF - } - - # Emit any lines derived from BOOT: sections - - if (@{ $self->{bootcode_later} }) { - print "\n /* Initialisation Section */\n\n"; - print @{$self->{bootcode_later}}; - print 'ExtUtils::ParseXS::CountLines'->end_marker, "\n" - if $self->{config_WantLineNumbers}; - print "\n /* End of Initialisation Section */\n\n"; - } - - # Emit code to call any UNITCHECK blocks and return true. Since 5.22, - # this is been put into a separate function. - print Q(<<'EOF'); - |#if PERL_VERSION_LE(5, 21, 5) - |# if PERL_VERSION_GE(5, 9, 0) - | if (PL_unitcheckav) - | call_list(PL_scopestack_ix, PL_unitcheckav); - |# endif - | XSRETURN_YES; - |#else - | Perl_xs_boot_epilog(aTHX_ ax); - |#endif - |]] - | - |#ifdef __cplusplus - |]] - |#endif -EOF - warn("Please specify prototyping behavior for $self->{in_filename} (see perlxs manual)\n") unless $self->{proto_behaviour_specified}; diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm index 376ace5c8747..f285b483d958 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm @@ -464,6 +464,7 @@ BEGIN { $build_subclass->( # boilerplate code following the C code 'cpp_scope', # node holding all the XS part of the main file 'pre_boot', # node holding code after user XSUBs but before boot XSUB + 'boot_xsub', # node holding code which generates the boot XSUB )}; sub parse { @@ -528,6 +529,14 @@ sub parse { $pre_boot->parse($pxs) or return; + # Emit the boot XSUB initialization routine + + my $boot_xsub = ExtUtils::ParseXS::Node::boot_xsub->new(); + $self->{boot_xsub} = $boot_xsub; + push @{$self->{kids}}, $boot_xsub; + $boot_xsub->parse($pxs) + or return; + 1; } @@ -1240,6 +1249,197 @@ EOF } +# ====================================================================== + +package ExtUtils::ParseXS::Node::boot_xsub; + +# AST node representing C code that is emitted to create the boo XSUB. +# +# This node's parse() method doesn't actually consume any lines; the node +# exists just for its as_code() method. + +BEGIN { $build_subclass->( +)}; + +sub parse { + my __PACKAGE__ $self = shift; + my ExtUtils::ParseXS $pxs = shift; + + $self->SUPER::parse($pxs); # set file/line_no + 1; +} + +sub as_code { + my __PACKAGE__ $self = shift; + my ExtUtils::ParseXS $pxs = shift; + + # Emit the boot_Foo__Bar() C function / XSUB + + print ExtUtils::ParseXS::Q(<<"EOF"); + |#ifdef __cplusplus + |extern "C" [[ + |#endif +EOF + + print ExtUtils::ParseXS::Q(<<"EOF"); + |XS_EXTERNAL(boot_$pxs->{MODULE_cname}); /* prototype to pass -Wmissing-prototypes */ + |XS_EXTERNAL(boot_$pxs->{MODULE_cname}) + |[[ + |#if PERL_VERSION_LE(5, 21, 5) + | dVAR; dXSARGS; + |#else + | dVAR; ${\($pxs->{VERSIONCHECK_value} ? 'dXSBOOTARGSXSAPIVERCHK;' : 'dXSBOOTARGSAPIVERCHK;')} + |#endif +EOF + + # Declare a 'file' var for passing to newXS() and variants. + # + # If there is no $pxs->{seen_an_XSUB} then there are no xsubs + # in this .xs so 'file' is unused, so silence warnings. + # + # 'file' can also be unused in other circumstances: in particular, + # newXS_deffile() doesn't take a file parameter. So suppress any + # 'unused var' warning always. + # + # Give it the correct 'const'ness: Under 5.8.x and lower, newXS() is + # declared in proto.h as expecting a non-const file name argument. If + # the wrong qualifier is used, it causes breakage with C++ compilers and + # warnings with recent gcc. + + print ExtUtils::ParseXS::Q(<<"EOF") if $pxs->{seen_an_XSUB}; + |#if PERL_VERSION_LE(5, 8, 999) /* PERL_VERSION_LT is 5.33+ */ + | char* file = __FILE__; + |#else + | const char* file = __FILE__; + |#endif + | + | PERL_UNUSED_VAR(file); +EOF + + # Emit assorted declarations + + print ExtUtils::ParseXS::Q(<<"EOF"); + | + | PERL_UNUSED_VAR(cv); /* -W */ + | PERL_UNUSED_VAR(items); /* -W */ +EOF + + if ($pxs->{VERSIONCHECK_value}) { + print ExtUtils::ParseXS::Q(<<"EOF") ; + |#if PERL_VERSION_LE(5, 21, 5) + | XS_VERSION_BOOTCHECK; + |# ifdef XS_APIVERSION_BOOTCHECK + | XS_APIVERSION_BOOTCHECK; + |# endif + |#endif + | +EOF + + } else { + print ExtUtils::ParseXS::Q(<<"EOF") ; + |#if PERL_VERSION_LE(5, 21, 5) && defined(XS_APIVERSION_BOOTCHECK) + | XS_APIVERSION_BOOTCHECK; + |#endif + | +EOF + + } + + # Declare a 'cv' var within a scope small enough to be visible just to + # newXS() calls which need to do further processing of the cv: in + # particular, when emitting one of: + # XSANY.any_i32 = $value; + # XSINTERFACE_FUNC_SET(cv, $value); + + if ($pxs->{need_boot_cv}) { + print ExtUtils::ParseXS::Q(<<"EOF"); + | [[ + | CV * cv; + | +EOF + } + + # More overload stuff + + if (keys %{ $pxs->{map_overloaded_package_to_C_package} }) { + # Emit just once if any overloads: + # Before 5.10, PL_amagic_generation used to need setting to at least a + # non-zero value to tell perl that any overloading was present. + print ExtUtils::ParseXS::Q(<<"EOF"); + | /* register the overloading (type 'A') magic */ + |#if PERL_VERSION_LE(5, 8, 999) /* PERL_VERSION_LT is 5.33+ */ + | PL_amagic_generation++; + |#endif +EOF + + for my $package (sort keys %{ $pxs->{map_overloaded_package_to_C_package} }) { + # Emit once for each package with overloads: + # Set ${'Foo::()'} to the fallback value for each overloaded + # package 'Foo' (or undef if not specified). + # But see the 'XXX' comments above about fallback and $(). + + my $fallback = $pxs->{map_package_to_fallback_string}{$package}; + $fallback = 'UNDEF' unless defined $fallback; + $fallback = $fallback eq 'TRUE' ? '&PL_sv_yes' + : $fallback eq 'FALSE' ? '&PL_sv_no' + : '&PL_sv_undef'; + + print ExtUtils::ParseXS::Q(<<"EOF"); + | /* The magic for overload gets a GV* via gv_fetchmeth as */ + | /* mentioned above, and looks in the SV* slot of it for */ + | /* the "fallback" status. */ + | sv_setsv( + | get_sv( "${package}::()", TRUE ), + | $fallback + | ); +EOF + + } + } + + # Emit any boot code associated with newXS(). + + print @{ $pxs->{bootcode_early} }; + + # Emit closing scope for the 'CV *cv' declaration + + if ($pxs->{need_boot_cv}) { + print ExtUtils::ParseXS::Q(<<"EOF"); + | ]] +EOF + } + + # Emit any lines derived from BOOT: sections + + if (@{ $pxs->{bootcode_later} }) { + print "\n /* Initialisation Section */\n\n"; + print @{$pxs->{bootcode_later}}; + print 'ExtUtils::ParseXS::CountLines'->end_marker, "\n" + if $pxs->{config_WantLineNumbers}; + print "\n /* End of Initialisation Section */\n\n"; + } + + # Emit code to call any UNITCHECK blocks and return true. Since 5.22, + # this is been put into a separate function. + print ExtUtils::ParseXS::Q(<<'EOF'); + |#if PERL_VERSION_LE(5, 21, 5) + |# if PERL_VERSION_GE(5, 9, 0) + | if (PL_unitcheckav) + | call_list(PL_scopestack_ix, PL_unitcheckav); + |# endif + | XSRETURN_YES; + |#else + | Perl_xs_boot_epilog(aTHX_ ax); + |#endif + |]] + | + |#ifdef __cplusplus + |]] + |#endif +EOF +} + + # ====================================================================== package ExtUtils::ParseXS::Node::xsub; From 6b8284850e12e447526599e12e40aa8f75675a31 Mon Sep 17 00:00:00 2001 From: David Mitchell Date: Thu, 23 Oct 2025 16:37:53 +0100 Subject: [PATCH 29/57] ParseXS: refactor: tidy Node::boot_xsub::parse() (This commit is part of a series which will extend the AST parse tree from just representing individual XSUBs to representing the whole XS file.) The previous commit moved code from process_file() into it's own method, with minimal changes. Now clean the code up a bit: - use $open_brace, $close_brace instead of the magic [[ ]]; - clean up comments, white space etc; - use Q to make some code strings more readable. --- .../lib/ExtUtils/ParseXS/Node.pm | 64 +++++++++++-------- 1 file changed, 37 insertions(+), 27 deletions(-) diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm index f285b483d958..ef2c5cc95b3a 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm @@ -1277,14 +1277,11 @@ sub as_code { print ExtUtils::ParseXS::Q(<<"EOF"); |#ifdef __cplusplus - |extern "C" [[ + |extern "C" $open_brace |#endif -EOF - - print ExtUtils::ParseXS::Q(<<"EOF"); |XS_EXTERNAL(boot_$pxs->{MODULE_cname}); /* prototype to pass -Wmissing-prototypes */ |XS_EXTERNAL(boot_$pxs->{MODULE_cname}) - |[[ + |$open_brace |#if PERL_VERSION_LE(5, 21, 5) | dVAR; dXSARGS; |#else @@ -1325,7 +1322,7 @@ EOF EOF if ($pxs->{VERSIONCHECK_value}) { - print ExtUtils::ParseXS::Q(<<"EOF") ; + print ExtUtils::ParseXS::Q(<<"EOF"); |#if PERL_VERSION_LE(5, 21, 5) | XS_VERSION_BOOTCHECK; |# ifdef XS_APIVERSION_BOOTCHECK @@ -1334,26 +1331,25 @@ EOF |#endif | EOF - - } else { + } + else { print ExtUtils::ParseXS::Q(<<"EOF") ; |#if PERL_VERSION_LE(5, 21, 5) && defined(XS_APIVERSION_BOOTCHECK) | XS_APIVERSION_BOOTCHECK; |#endif | EOF - } - # Declare a 'cv' var within a scope small enough to be visible just to - # newXS() calls which need to do further processing of the cv: in - # particular, when emitting one of: + # Declare a 'cv' variable within a scope small enough to be visible + # just to newXS() calls which need to do further processing of the cv: + # in particular, when emitting one of: # XSANY.any_i32 = $value; # XSINTERFACE_FUNC_SET(cv, $value); if ($pxs->{need_boot_cv}) { print ExtUtils::ParseXS::Q(<<"EOF"); - | [[ + | $open_brace | CV * cv; | EOF @@ -1363,8 +1359,9 @@ EOF if (keys %{ $pxs->{map_overloaded_package_to_C_package} }) { # Emit just once if any overloads: - # Before 5.10, PL_amagic_generation used to need setting to at least a - # non-zero value to tell perl that any overloading was present. + # Before 5.10, PL_amagic_generation used to need setting to at + # least a non-zero value to tell perl that any overloading was + # present. print ExtUtils::ParseXS::Q(<<"EOF"); | /* register the overloading (type 'A') magic */ |#if PERL_VERSION_LE(5, 8, 999) /* PERL_VERSION_LT is 5.33+ */ @@ -1372,7 +1369,9 @@ EOF |#endif EOF - for my $package (sort keys %{ $pxs->{map_overloaded_package_to_C_package} }) { + for my $package ( + sort keys %{ $pxs->{map_overloaded_package_to_C_package} }) + { # Emit once for each package with overloads: # Set ${'Foo::()'} to the fallback value for each overloaded # package 'Foo' (or undef if not specified). @@ -1380,7 +1379,7 @@ EOF my $fallback = $pxs->{map_package_to_fallback_string}{$package}; $fallback = 'UNDEF' unless defined $fallback; - $fallback = $fallback eq 'TRUE' ? '&PL_sv_yes' + $fallback = $fallback eq 'TRUE' ? '&PL_sv_yes' : $fallback eq 'FALSE' ? '&PL_sv_no' : '&PL_sv_undef'; @@ -1393,7 +1392,6 @@ EOF | $fallback | ); EOF - } } @@ -1405,23 +1403,35 @@ EOF if ($pxs->{need_boot_cv}) { print ExtUtils::ParseXS::Q(<<"EOF"); - | ]] + | $close_brace EOF } # Emit any lines derived from BOOT: sections - if (@{ $pxs->{bootcode_later} }) { - print "\n /* Initialisation Section */\n\n"; + if (@{$pxs->{bootcode_later}}) { + print ExtUtils::ParseXS::Q(<<"EOF"); + | + | /* Initialisation Section */ + | +EOF + print @{$pxs->{bootcode_later}}; + print 'ExtUtils::ParseXS::CountLines'->end_marker, "\n" if $pxs->{config_WantLineNumbers}; - print "\n /* End of Initialisation Section */\n\n"; + + print ExtUtils::ParseXS::Q(<<"EOF"); + | + | /* End of Initialisation Section */ + | +EOF } - # Emit code to call any UNITCHECK blocks and return true. Since 5.22, - # this is been put into a separate function. - print ExtUtils::ParseXS::Q(<<'EOF'); + # Emit code to call any UNITCHECK blocks and return true. + # Since 5.22, this is been put into a separate function. + + print ExtUtils::ParseXS::Q(<<"EOF"); |#if PERL_VERSION_LE(5, 21, 5) |# if PERL_VERSION_GE(5, 9, 0) | if (PL_unitcheckav) @@ -1431,10 +1441,10 @@ EOF |#else | Perl_xs_boot_epilog(aTHX_ ax); |#endif - |]] + |$close_brace | |#ifdef __cplusplus - |]] + |$close_brace |#endif EOF } From 39cf1455c57ae5661d62813f74c53a6bbb9f12a5 Mon Sep 17 00:00:00 2001 From: David Mitchell Date: Thu, 23 Oct 2025 17:08:49 +0100 Subject: [PATCH 30/57] ParseXS: refactor: move some code (This commit is part of a series which will extend the AST parse tree from just representing individual XSUBs to representing the whole XS file.) Move some code from process_file() to XS_file::parse() No change in functionality. --- dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm | 17 ++--------------- .../lib/ExtUtils/ParseXS/Node.pm | 15 +++++++++++++++ 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm index 825e2be6bb80..18eaa2452ffa 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm @@ -311,18 +311,8 @@ sub process_file { $ExtUtils::ParseXS::VMS_SymSet = ExtUtils::XSSymSet->new(28); } - $self->{bootcode_early} = []; - $self->{bootcode_later} = []; - - # hash of package name => package C name - $self->{map_overloaded_package_to_C_package} = {}; - # hashref of package name => fallback setting - $self->{map_package_to_fallback_string} = {}; - $self->{error_count} = 0; # count - - # Most of the 1500 lines below uses these globals. We'll have to - # clean this up sometime, probably. For now, we just pull them out - # of %Options. -Ken + # Most of the parser uses these globals. We'll have to clean this up + # sometime, probably. For now, we just pull them out of %Options. -Ken $self->{config_RetainCplusplusHierarchicalTypes} = $Options{hiertype}; $self->{PROTOTYPES_value} = $Options{prototypes}; @@ -410,9 +400,6 @@ sub process_file { or $self->death("Failed to parse XS file\n"); $AST->as_code($self); - warn("Please specify prototyping behavior for $self->{in_filename} (see perlxs manual)\n") - unless $self->{proto_behaviour_specified}; - chdir($orig_cwd); select($orig_fh); untie *PSEUDO_STDOUT if tied *PSEUDO_STDOUT; diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm index ef2c5cc95b3a..43260a626dcd 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm @@ -474,6 +474,17 @@ sub parse { $self->{line_no} = 1; $self->{file} = $pxs->{in_pathname}; + $pxs->{bootcode_early} = []; + $pxs->{bootcode_later} = []; + + # Hash of package name => package C name + $pxs->{map_overloaded_package_to_C_package} = {}; + + # Hashref of package name => fallback setting + $pxs->{map_package_to_fallback_string} = {}; + + $pxs->{error_count} = 0; + # Initialise the sequence of guard defines used by cpp_scope $pxs->{cpp_next_tmp_define} = 'XSubPPtmpAAAA'; @@ -537,6 +548,10 @@ sub parse { $boot_xsub->parse($pxs) or return; + warn( "Please specify prototyping behavior for " + . "$pxs->{in_filename} (see perlxs manual)\n") + unless $pxs->{proto_behaviour_specified}; + 1; } From be71c4d72329323abfbddde205ec202a5e6a9319 Mon Sep 17 00:00:00 2001 From: David Mitchell Date: Fri, 24 Oct 2025 01:49:36 +0100 Subject: [PATCH 31/57] ParseXS: emit package overload registrations later (This commit is part of a series which will extend the AST parse tree from just representing individual XSUBs to representing the whole XS file.) In the boot XSUB, a bunch of newXS() calls are added, to register all the XSUBs in the perl namespace, plus all aliases, overload methods etc. In addition, for every package which contains at least one overload method, a special stub sub called Foo::Bar::() is registered. This is used at run-time to determine whether overloading is present for a particular class. This commit changes the order, by changing an unshift to a push. Previously the stub package subs were registered first, then the XSUBs; now its the other way round. e.g. /* mark these two packages as having overload methods */ newXS_deffile("X::Y::()", XS_X__Y_nil); newXS_deffile("P::Q::()", XS_P__Q_nil); /* foo XSUB used as an overload method for <=> etc */ newXS_deffile("P::Q::foo", XS_P__Q_foo); newXS_deffile("P::Q::(<=>", XS_P__Q_foo); ... /* bar XSUB used as an overload method for <=> etc */ newXS_deffile("X::Y::bar", XS_X__Y_bar); newXS_deffile("X::Y::(<=>", XS_X__Y_bar); ... becomes /* foo XSUB used as an overload method for <=> etc */ newXS_deffile("P::Q::foo", XS_P__Q_foo); newXS_deffile("P::Q::(<=>", XS_P__Q_foo); ... /* bar XSUB used as an overload method for <=> etc */ newXS_deffile("X::Y::bar", XS_X__Y_bar); newXS_deffile("X::Y::(<=>", XS_X__Y_bar); ... /* mark these two packages as having overload methods */ newXS_deffile("P::Q::()", XS_P__Q_nil); newXS_deffile("X::Y::()", XS_X__Y_nil); This should make no difference to the functionality, but will make a further refactoring of boot code generation easier. AFAIKT, using unshift rather than push in the original 2002 commit didn't have any particular intent. --- dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm index 43260a626dcd..1daa18c4bb3a 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm @@ -1254,7 +1254,7 @@ sub as_code { | EOF - unshift(@{ $pxs->{bootcode_early} }, ExtUtils::ParseXS::Q(<<"EOF")); + push(@{ $pxs->{bootcode_early} }, ExtUtils::ParseXS::Q(<<"EOF")); | /* Making a sub named "${package}::()" allows the package */ | /* to be findable via fetchmethod(), and causes */ | /* overload::Overloaded("$package") to return true. */ From bcc6c051bba946b37de68f1e485b3ba97ca94501 Mon Sep 17 00:00:00 2001 From: David Mitchell Date: Fri, 24 Oct 2025 12:03:27 +0100 Subject: [PATCH 32/57] ParseXS: refactor: add as_boot_code() methods (This commit is part of a series which will extend the AST parse tree from just representing individual XSUBs to representing the whole XS file.) Before this commit, as_code() methods were responsible for generating both the general code that goes in the C file, and also code that specifically goes in the boot SUB. They achieved this by pushing lines into the arrays $pxs->{bootcode_early} and $pxs->{bootcode_later}. This commit adds as_boot_code() methods to the relevant node types and removes the bootcode_early and bootcode_later arrays. Now, the C code file is generated by walking the AST, calling as_code() methods. When it gets to the boot_xsub::as_code() method, which is responsible for emitting the XS(boot_Foo__Bar) {} boot XSUB, that sub itself then does a second treewalk, calling as_boot_code() methods, which accumulates lines to be added early and later on in the boot SUB. --- dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm | 7 -- .../lib/ExtUtils/ParseXS/Node.pm | 106 ++++++++++++------ 2 files changed, 72 insertions(+), 41 deletions(-) diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm index 18eaa2452ffa..2559d405db75 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm @@ -218,13 +218,6 @@ BEGIN { 'need_boot_cv', # must declare 'cv' within the boot function - 'bootcode_early', # Array of code lines to emit early in boot XSUB: - # typically newXS() calls - - 'bootcode_later', # Array of code lines to emit later on in boot XSUB: - # typically lines from a BOOT: XS file section - - # Per-XSUB parsing state: 'file_SCOPE_enabled', # Bool: the current state of the file-scope diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm index 1daa18c4bb3a..bf28b42a1556 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm @@ -66,6 +66,9 @@ create C or C nodes as appropriate, and so on. Secondly, as_code() descends its sub-tree, outputting the tree as C code. +Some nodes also have an as_boot_code() method for adding any code to +the boot XSUB. This returns two array refs, one containing a list of code lines to be inserted early into the boot XSUB, and a second for later lines. + Note that parsing and code-generation are done as two separate phases; parse() should only build a tree and never emit code. @@ -384,6 +387,29 @@ sub is_xs_module_line { sub as_code { } +# Most node types inherit this: just continue walking the tree +# looking for any nodes which provide some boot code. +# It returns two array refs; one for lines of code to be injected early +# into the boot XSUB, the second for later code. + +sub as_boot_code { + my __PACKAGE__ $self = shift; + my ExtUtils::ParseXS $pxs = shift; + + my ($early, $later) = ([], []); + my $kids = $self->{kids}; + if ($kids) { + for (@$kids) { + my ($e, $l) = $_->as_boot_code($pxs); + push @$early, @$e; + push @$later, @$l; + } + } + return $early, $later; +} + + + # as_concise(): for debugging: # @@ -474,9 +500,6 @@ sub parse { $self->{line_no} = 1; $self->{file} = $pxs->{in_pathname}; - $pxs->{bootcode_early} = []; - $pxs->{bootcode_later} = []; - # Hash of package name => package C name $pxs->{map_overloaded_package_to_C_package} = {}; @@ -1040,25 +1063,34 @@ sub parse { 1; } + sub as_code { my __PACKAGE__ $self = shift; my ExtUtils::ParseXS $pxs = shift; my $g = $self->{guard_name}; + print "#define $g 1\n\n" if defined $g; + $_->as_code($pxs, $self) for @{$self->{kids}}; +} - if (defined $g) { - print "#define $g 1\n\n" if defined $g; - push @{$pxs->{bootcode_early}}, "#if $g\n"; - push @{$pxs->{bootcode_later}}, "#if $g\n"; - } - $_->as_code($pxs, $self) for @{$self->{kids}}; +sub as_boot_code { + my __PACKAGE__ $self = shift; + my ExtUtils::ParseXS $pxs = shift; + + # accumulate all the newXS()'s in $early and the BOOT blocks in $later, + my ($early, $later) = $self->SUPER::as_boot_code($pxs); + # then wrap them all within '#if XSubPPtmpAAAA' guards + my $g = $self->{guard_name}; if (defined $g) { - push @{$pxs->{bootcode_early}}, "#endif\n"; - push @{$pxs->{bootcode_later}}, "#endif\n"; + unshift @$early, "#if $g\n"; + unshift @$later, "#if $g\n"; + push @$early, "#endif\n"; + push @$later, "#endif\n"; } + return $early, $later; } @@ -1150,16 +1182,7 @@ sub parse { } -sub as_code { - my __PACKAGE__ $self = shift; - my ExtUtils::ParseXS $pxs = shift; - - # Save all the BOOT lines to be emitted later. - push @{$pxs->{bootcode_later}}, $self->boot_code($pxs); -} - - -sub boot_code { +sub as_boot_code { my __PACKAGE__ $self = shift; my ExtUtils::ParseXS $pxs = shift; @@ -1182,7 +1205,7 @@ sub boot_code { # later. push @lines, "$_\n" for @{$self->{lines}}, ""; - return @lines; + return [], \@lines; } # ====================================================================== @@ -1237,7 +1260,7 @@ sub as_code { # backcompat reasons (which I don't fully understand - DAPM). # See overload.pm's import() and OVERLOAD() methods for more detail. # - # So this code needs updating to match. + # So this code (and the code in as_boot_code) needs updating to match. for my $package (sort keys %{$pxs->{map_overloaded_package_to_C_package}}) { @@ -1253,14 +1276,25 @@ sub as_code { |} | EOF + } +} - push(@{ $pxs->{bootcode_early} }, ExtUtils::ParseXS::Q(<<"EOF")); +sub as_boot_code { + my __PACKAGE__ $self = shift; + my ExtUtils::ParseXS $pxs = shift; + + my @early; + for my $package (sort keys %{$pxs->{map_overloaded_package_to_C_package}}) + { + my $packid = $pxs->{map_overloaded_package_to_C_package}{$package}; + push @early, ExtUtils::ParseXS::Q(<<"EOF"); | /* Making a sub named "${package}::()" allows the package */ | /* to be findable via fetchmethod(), and causes */ | /* overload::Overloaded("$package") to return true. */ | (void)newXS_deffile("${package}::()", XS_${packid}_nil); EOF } + return \@early, []; } @@ -1288,6 +1322,10 @@ sub as_code { my __PACKAGE__ $self = shift; my ExtUtils::ParseXS $pxs = shift; + # Walk the AST accumulating any boot code generated by + # the various nodes' as_boot_code() methods. + my ($early, $later) = $pxs->{AST}->as_boot_code($pxs); + # Emit the boot_Foo__Bar() C function / XSUB print ExtUtils::ParseXS::Q(<<"EOF"); @@ -1412,7 +1450,7 @@ EOF # Emit any boot code associated with newXS(). - print @{ $pxs->{bootcode_early} }; + print @$early; # Emit closing scope for the 'CV *cv' declaration @@ -1424,14 +1462,14 @@ EOF # Emit any lines derived from BOOT: sections - if (@{$pxs->{bootcode_later}}) { + if (@$later) { print ExtUtils::ParseXS::Q(<<"EOF"); | | /* Initialisation Section */ | EOF - print @{$pxs->{bootcode_later}}; + print @$later; print 'ExtUtils::ParseXS::CountLines'->end_marker, "\n" if $pxs->{config_WantLineNumbers}; @@ -1801,9 +1839,6 @@ EOF # Emit final closing bracket for the XSUB. print "$close_brace\n\n"; - - # generate all the 'newXS()' etc boot code needed for this XSUB - push @{$pxs->{bootcode_early}}, $self->boot_code($pxs); } @@ -1811,7 +1846,7 @@ EOF # call(s) plus any additional boot stuff like handling attributes or # storing an alias index in the XSUB's CV. -sub boot_code { +sub as_boot_code { my __PACKAGE__ $self = shift; my ExtUtils::ParseXS $pxs = shift; @@ -1931,14 +1966,12 @@ EOF for my $operator (sort keys %{ $self->{overload_name_seen} }) { - $pxs->{map_overloaded_package_to_C_package}->{$self->{PACKAGE_name}} - = $self->{PACKAGE_C_name}; my $overload = "$self->{PACKAGE_name}\::($operator"; push(@code, " (void)$newXS(\"$overload\", XS_$cname$file_arg$proto_arg);\n"); } - return @code; + return \@code, []; } @@ -5224,6 +5257,11 @@ sub parse { $self->{ops}{$1} = 1; $xsub->{overload_name_seen}{$1} = 1; } + + # Mark the current package as being overloaded + $pxs->{map_overloaded_package_to_C_package}->{$xsub->{PACKAGE_name}} + = $xsub->{PACKAGE_C_name}; + 1; } From ae7ba0976a5d2f23fc060d23f2904752844e80b1 Mon Sep 17 00:00:00 2001 From: David Mitchell Date: Fri, 24 Oct 2025 14:09:18 +0100 Subject: [PATCH 33/57] ParseXS: refactor: inline and rm standard_XS_defs (This commit is part of a series which will extend the AST parse tree from just representing individual XSUBs to representing the whole XS file.) The ExtUtils::ParseXS::Utilities::standard_XS_defs() function just returns a big string containing all the standard boilerplate code that gets added to the C file. Delete this function, and instead include the text directly within C_part_postamble::as_code(). --- MANIFEST | 1 - .../lib/ExtUtils/ParseXS/Node.pm | 155 ++++++++++++++- .../lib/ExtUtils/ParseXS/Utilities.pm | 182 ------------------ .../ExtUtils-ParseXS/t/109-standard_XS_defs.t | 27 --- 4 files changed, 154 insertions(+), 211 deletions(-) delete mode 100644 dist/ExtUtils-ParseXS/t/109-standard_XS_defs.t diff --git a/MANIFEST b/MANIFEST index 7be48425118e..430735c1e098 100644 --- a/MANIFEST +++ b/MANIFEST @@ -4201,7 +4201,6 @@ dist/ExtUtils-ParseXS/t/104-map_type.t ExtUtils::ParseXS tests dist/ExtUtils-ParseXS/t/105-valid_proto_string.t ExtUtils::ParseXS tests dist/ExtUtils-ParseXS/t/106-process_typemaps.t ExtUtils::ParseXS tests dist/ExtUtils-ParseXS/t/108-map_type.t ExtUtils::ParseXS tests -dist/ExtUtils-ParseXS/t/109-standard_XS_defs.t ExtUtils::ParseXS tests dist/ExtUtils-ParseXS/t/112-set_cond.t ExtUtils::ParseXS tests dist/ExtUtils-ParseXS/t/113-check_cond_preproc_statements.t ExtUtils::ParseXS tests dist/ExtUtils-ParseXS/t/114-blurt_death_Warn.t ExtUtils::ParseXS tests diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm index bf28b42a1556..af4f4c82b179 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm @@ -825,7 +825,160 @@ sub as_code { # Emit boilerplate postamble following any code passed through from # the 'C' part of the XS file - ExtUtils::ParseXS::Utilities::standard_XS_defs(); + print ExtUtils::ParseXS::Q(<<'EOF'); + |#ifndef PERL_UNUSED_VAR + |# define PERL_UNUSED_VAR(var) if (0) var = var + |#endif + | + |#ifndef dVAR + |# define dVAR dNOOP + |#endif + | + | + |/* This stuff is not part of the API! You have been warned. */ + |#ifndef PERL_VERSION_DECIMAL + |# define PERL_VERSION_DECIMAL(r,v,s) (r*1000000 + v*1000 + s) + |#endif + |#ifndef PERL_DECIMAL_VERSION + |# define PERL_DECIMAL_VERSION \ + | PERL_VERSION_DECIMAL(PERL_REVISION,PERL_VERSION,PERL_SUBVERSION) + |#endif + |#ifndef PERL_VERSION_GE + |# define PERL_VERSION_GE(r,v,s) \ + | (PERL_DECIMAL_VERSION >= PERL_VERSION_DECIMAL(r,v,s)) + |#endif + |#ifndef PERL_VERSION_LE + |# define PERL_VERSION_LE(r,v,s) \ + | (PERL_DECIMAL_VERSION <= PERL_VERSION_DECIMAL(r,v,s)) + |#endif + | + |/* XS_INTERNAL is the explicit static-linkage variant of the default + | * XS macro. + | * + | * XS_EXTERNAL is the same as XS_INTERNAL except it does not include + | * "STATIC", ie. it exports XSUB symbols. You probably don't want that + | * for anything but the BOOT XSUB. + | * + | * See XSUB.h in core! + | */ + | + | + |/* TODO: This might be compatible further back than 5.10.0. */ + |#if PERL_VERSION_GE(5, 10, 0) && PERL_VERSION_LE(5, 15, 1) + |# undef XS_EXTERNAL + |# undef XS_INTERNAL + |# if defined(__CYGWIN__) && defined(USE_DYNAMIC_LOADING) + |# define XS_EXTERNAL(name) __declspec(dllexport) XSPROTO(name) + |# define XS_INTERNAL(name) STATIC XSPROTO(name) + |# endif + |# if defined(__SYMBIAN32__) + |# define XS_EXTERNAL(name) EXPORT_C XSPROTO(name) + |# define XS_INTERNAL(name) EXPORT_C STATIC XSPROTO(name) + |# endif + |# ifndef XS_EXTERNAL + |# if defined(HASATTRIBUTE_UNUSED) && !defined(__cplusplus) + |# define XS_EXTERNAL(name) void name(pTHX_ CV* cv __attribute__unused__) + |# define XS_INTERNAL(name) STATIC void name(pTHX_ CV* cv __attribute__unused__) + |# else + |# ifdef __cplusplus + |# define XS_EXTERNAL(name) extern "C" XSPROTO(name) + |# define XS_INTERNAL(name) static XSPROTO(name) + |# else + |# define XS_EXTERNAL(name) XSPROTO(name) + |# define XS_INTERNAL(name) STATIC XSPROTO(name) + |# endif + |# endif + |# endif + |#endif + | + |/* perl >= 5.10.0 && perl <= 5.15.1 */ + | + | + |/* The XS_EXTERNAL macro is used for functions that must not be static + | * like the boot XSUB of a module. If perl didn't have an XS_EXTERNAL + | * macro defined, the best we can do is assume XS is the same. + | * Dito for XS_INTERNAL. + | */ + |#ifndef XS_EXTERNAL + |# define XS_EXTERNAL(name) XS(name) + |#endif + |#ifndef XS_INTERNAL + |# define XS_INTERNAL(name) XS(name) + |#endif + | + |/* Now, finally, after all this mess, we want an ExtUtils::ParseXS + | * internal macro that we're free to redefine for varying linkage due + | * to the EXPORT_XSUB_SYMBOLS XS keyword. This is internal, use + | * XS_EXTERNAL(name) or XS_INTERNAL(name) in your code if you need to! + | */ + | + |#undef XS_EUPXS + |#if defined(PERL_EUPXS_ALWAYS_EXPORT) + |# define XS_EUPXS(name) XS_EXTERNAL(name) + |#else + | /* default to internal */ + |# define XS_EUPXS(name) XS_INTERNAL(name) + |#endif + | + |#ifndef PERL_ARGS_ASSERT_CROAK_XS_USAGE + |#define PERL_ARGS_ASSERT_CROAK_XS_USAGE assert(cv); assert(params) + | + |/* prototype to pass -Wmissing-prototypes */ + |STATIC void + |S_croak_xs_usage(const CV *const cv, const char *const params); + | + |STATIC void + |S_croak_xs_usage(const CV *const cv, const char *const params) + |{ + | const GV *const gv = CvGV(cv); + | + | PERL_ARGS_ASSERT_CROAK_XS_USAGE; + | + | if (gv) { + | const char *const gvname = GvNAME(gv); + | const HV *const stash = GvSTASH(gv); + | const char *const hvname = stash ? HvNAME(stash) : NULL; + | + | if (hvname) + | Perl_croak_nocontext("Usage: %s::%s(%s)", hvname, gvname, params); + | else + | Perl_croak_nocontext("Usage: %s(%s)", gvname, params); + | } else { + | /* Pants. I don't think that it should be possible to get here. */ + | Perl_croak_nocontext("Usage: CODE(0x%" UVxf ")(%s)", PTR2UV(cv), params); + | } + |} + |#undef PERL_ARGS_ASSERT_CROAK_XS_USAGE + | + |#define croak_xs_usage S_croak_xs_usage + | + |#endif + | + |/* NOTE: the prototype of newXSproto() is different in versions of perls, + | * so we define a portable version of newXSproto() + | */ + |#ifdef newXS_flags + |#define newXSproto_portable(name, c_impl, file, proto) newXS_flags(name, c_impl, file, proto, 0) + |#else + |#define newXSproto_portable(name, c_impl, file, proto) (PL_Sv=(SV*)newXS(name, c_impl, file), sv_setpv(PL_Sv, proto), (CV*)PL_Sv) + |#endif /* !defined(newXS_flags) */ + | + |#if PERL_VERSION_LE(5, 21, 5) + |# define newXS_deffile(a,b) Perl_newXS(aTHX_ a,b,file) + |#else + |# define newXS_deffile(a,b) Perl_newXS_deffile(aTHX_ a,b) + |#endif + | + |/* simple backcompat versions of the TARGx() macros with no optimisation */ + |#ifndef TARGi + |# define TARGi(iv, do_taint) sv_setiv_mg(TARG, iv) + |# define TARGu(uv, do_taint) sv_setuv_mg(TARG, uv) + |# define TARGn(nv, do_taint) sv_setnv_mg(TARG, nv) + |#endif + | +EOF + + # Fix up line number reckoning print 'ExtUtils::ParseXS::CountLines'->end_marker, "\n" if $pxs->{config_WantLineNumbers}; diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Utilities.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Utilities.pm index 86da930c48ca..f45a0b1d69eb 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Utilities.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Utilities.pm @@ -16,7 +16,6 @@ our (@ISA, @EXPORT_OK); valid_proto_string process_typemaps map_type - standard_XS_defs set_cond Warn WarnHint @@ -41,7 +40,6 @@ ExtUtils::ParseXS::Utilities - Subroutines used with ExtUtils::ParseXS valid_proto_string process_typemaps map_type - standard_XS_defs set_cond Warn blurt @@ -301,186 +299,6 @@ sub map_type { } -=head2 C - -=over 4 - -=item * Purpose - -Writes to the C<.c> output file certain preprocessor directives and function -headers needed in all such files. - -=item * Arguments - -None. - -=item * Return Value - -Returns true. - -=back - -=cut - -sub standard_XS_defs { - print <<"EOF"; -#ifndef PERL_UNUSED_VAR -# define PERL_UNUSED_VAR(var) if (0) var = var -#endif - -#ifndef dVAR -# define dVAR dNOOP -#endif - - -/* This stuff is not part of the API! You have been warned. */ -#ifndef PERL_VERSION_DECIMAL -# define PERL_VERSION_DECIMAL(r,v,s) (r*1000000 + v*1000 + s) -#endif -#ifndef PERL_DECIMAL_VERSION -# define PERL_DECIMAL_VERSION \\ - PERL_VERSION_DECIMAL(PERL_REVISION,PERL_VERSION,PERL_SUBVERSION) -#endif -#ifndef PERL_VERSION_GE -# define PERL_VERSION_GE(r,v,s) \\ - (PERL_DECIMAL_VERSION >= PERL_VERSION_DECIMAL(r,v,s)) -#endif -#ifndef PERL_VERSION_LE -# define PERL_VERSION_LE(r,v,s) \\ - (PERL_DECIMAL_VERSION <= PERL_VERSION_DECIMAL(r,v,s)) -#endif - -/* XS_INTERNAL is the explicit static-linkage variant of the default - * XS macro. - * - * XS_EXTERNAL is the same as XS_INTERNAL except it does not include - * "STATIC", ie. it exports XSUB symbols. You probably don't want that - * for anything but the BOOT XSUB. - * - * See XSUB.h in core! - */ - - -/* TODO: This might be compatible further back than 5.10.0. */ -#if PERL_VERSION_GE(5, 10, 0) && PERL_VERSION_LE(5, 15, 1) -# undef XS_EXTERNAL -# undef XS_INTERNAL -# if defined(__CYGWIN__) && defined(USE_DYNAMIC_LOADING) -# define XS_EXTERNAL(name) __declspec(dllexport) XSPROTO(name) -# define XS_INTERNAL(name) STATIC XSPROTO(name) -# endif -# if defined(__SYMBIAN32__) -# define XS_EXTERNAL(name) EXPORT_C XSPROTO(name) -# define XS_INTERNAL(name) EXPORT_C STATIC XSPROTO(name) -# endif -# ifndef XS_EXTERNAL -# if defined(HASATTRIBUTE_UNUSED) && !defined(__cplusplus) -# define XS_EXTERNAL(name) void name(pTHX_ CV* cv __attribute__unused__) -# define XS_INTERNAL(name) STATIC void name(pTHX_ CV* cv __attribute__unused__) -# else -# ifdef __cplusplus -# define XS_EXTERNAL(name) extern "C" XSPROTO(name) -# define XS_INTERNAL(name) static XSPROTO(name) -# else -# define XS_EXTERNAL(name) XSPROTO(name) -# define XS_INTERNAL(name) STATIC XSPROTO(name) -# endif -# endif -# endif -#endif - -/* perl >= 5.10.0 && perl <= 5.15.1 */ - - -/* The XS_EXTERNAL macro is used for functions that must not be static - * like the boot XSUB of a module. If perl didn't have an XS_EXTERNAL - * macro defined, the best we can do is assume XS is the same. - * Dito for XS_INTERNAL. - */ -#ifndef XS_EXTERNAL -# define XS_EXTERNAL(name) XS(name) -#endif -#ifndef XS_INTERNAL -# define XS_INTERNAL(name) XS(name) -#endif - -/* Now, finally, after all this mess, we want an ExtUtils::ParseXS - * internal macro that we're free to redefine for varying linkage due - * to the EXPORT_XSUB_SYMBOLS XS keyword. This is internal, use - * XS_EXTERNAL(name) or XS_INTERNAL(name) in your code if you need to! - */ - -#undef XS_EUPXS -#if defined(PERL_EUPXS_ALWAYS_EXPORT) -# define XS_EUPXS(name) XS_EXTERNAL(name) -#else - /* default to internal */ -# define XS_EUPXS(name) XS_INTERNAL(name) -#endif - -EOF - - print <<"EOF"; -#ifndef PERL_ARGS_ASSERT_CROAK_XS_USAGE -#define PERL_ARGS_ASSERT_CROAK_XS_USAGE assert(cv); assert(params) - -/* prototype to pass -Wmissing-prototypes */ -STATIC void -S_croak_xs_usage(const CV *const cv, const char *const params); - -STATIC void -S_croak_xs_usage(const CV *const cv, const char *const params) -{ - const GV *const gv = CvGV(cv); - - PERL_ARGS_ASSERT_CROAK_XS_USAGE; - - if (gv) { - const char *const gvname = GvNAME(gv); - const HV *const stash = GvSTASH(gv); - const char *const hvname = stash ? HvNAME(stash) : NULL; - - if (hvname) - Perl_croak_nocontext("Usage: %s::%s(%s)", hvname, gvname, params); - else - Perl_croak_nocontext("Usage: %s(%s)", gvname, params); - } else { - /* Pants. I don't think that it should be possible to get here. */ - Perl_croak_nocontext("Usage: CODE(0x%" UVxf ")(%s)", PTR2UV(cv), params); - } -} -#undef PERL_ARGS_ASSERT_CROAK_XS_USAGE - -#define croak_xs_usage S_croak_xs_usage - -#endif - -/* NOTE: the prototype of newXSproto() is different in versions of perls, - * so we define a portable version of newXSproto() - */ -#ifdef newXS_flags -#define newXSproto_portable(name, c_impl, file, proto) newXS_flags(name, c_impl, file, proto, 0) -#else -#define newXSproto_portable(name, c_impl, file, proto) (PL_Sv=(SV*)newXS(name, c_impl, file), sv_setpv(PL_Sv, proto), (CV*)PL_Sv) -#endif /* !defined(newXS_flags) */ - -#if PERL_VERSION_LE(5, 21, 5) -# define newXS_deffile(a,b) Perl_newXS(aTHX_ a,b,file) -#else -# define newXS_deffile(a,b) Perl_newXS_deffile(aTHX_ a,b) -#endif - -/* simple backcompat versions of the TARGx() macros with no optimisation */ -#ifndef TARGi -# define TARGi(iv, do_taint) sv_setiv_mg(TARG, iv) -# define TARGu(uv, do_taint) sv_setuv_mg(TARG, uv) -# define TARGn(nv, do_taint) sv_setnv_mg(TARG, nv) -#endif - -EOF - return 1; -} - =head2 C =over 4 diff --git a/dist/ExtUtils-ParseXS/t/109-standard_XS_defs.t b/dist/ExtUtils-ParseXS/t/109-standard_XS_defs.t deleted file mode 100644 index da039205e3d1..000000000000 --- a/dist/ExtUtils-ParseXS/t/109-standard_XS_defs.t +++ /dev/null @@ -1,27 +0,0 @@ -#!/usr/bin/perl -use strict; -use warnings; -$| = 1; -use Test::More tests => 4; -use File::Spec; -use lib (-d 't' ? File::Spec->catdir(qw(t lib)) : 'lib'); -use ExtUtils::ParseXS::Utilities qw( - standard_XS_defs -); -use PrimitiveCapture; - -my @statements = ( - '#ifndef PERL_UNUSED_VAR', - '#ifndef PERL_ARGS_ASSERT_CROAK_XS_USAGE', - '#ifdef newXS_flags', -); - -my $stdout = PrimitiveCapture::capture_stdout(sub { - standard_XS_defs(); -}); - -foreach my $s (@statements) { - like( $stdout, qr/$s/s, "Printed <$s>" ); -} - -pass("Passed all tests in $0"); From 7ab37e8e917e8f6744ce736e4f8ecdb42f7426ad Mon Sep 17 00:00:00 2001 From: David Mitchell Date: Fri, 24 Oct 2025 16:00:40 +0100 Subject: [PATCH 34/57] ParseXS: refactor: don't use $_ for C part of file (This commit is part of a series which will extend the AST parse tree from just representing individual XSUBs to representing the whole XS file.) When parsing the C part of the XS file (mainly looking for POD and C code), don't use $_ and $. to hold the state of the next line. This was ok when it was a simple loop, but now that the logic is spread over multiple parse() methods, store the next line to process in $pxs->{lastline} and $pxs->{lastline_no}. (The XS half of the file already uses those variables, along with @{$pxs->{line}} etc.) --- .../lib/ExtUtils/ParseXS/Node.pm | 67 ++++++++++--------- 1 file changed, 37 insertions(+), 30 deletions(-) diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm index af4f4c82b179..848a8a85d757 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm @@ -520,6 +520,7 @@ sub parse { or return; push @{$self->{kids}}, $preamble; + # Process the first (C language) half of the XS file, up until the first # MODULE: line @@ -538,11 +539,6 @@ sub parse { or return; push @{$self->{kids}}, $C_part_postamble; - # At this point, $_ should hold the first MODULE line - - $pxs->{lastline} = $_; - $pxs->{lastline_no} = $.; - # Parse the XS half of the file my $cpp_scope = ExtUtils::ParseXS::Node::cpp_scope->new({type => 'main'}); @@ -650,16 +646,18 @@ sub parse { # Read in lines until the first MODULE line, creating a list of # Node::C_part_code and Node::C_part_POD nodes as children. - # Returns with $_ holding the (unprocessed) next line (or undef for - # EOF) + # Returns with $pxs->{lastline} holding the (unprocessed) next line + # (or undef for EOF) - $_ = readline($pxs->{in_fh}); + $pxs->{lastline} = readline($pxs->{in_fh}); + $pxs->{lastline_no} = $.; - while (defined $_) { - return 1 if $self->is_xs_module_line($_); + while (defined $pxs->{lastline}) { + return 1 if $self->is_xs_module_line($pxs->{lastline}); my $node = - /^=/ ? ExtUtils::ParseXS::Node::C_part_POD->new() + $pxs->{lastline} =~ /^=/ + ? ExtUtils::ParseXS::Node::C_part_POD->new() : ExtUtils::ParseXS::Node::C_part_code->new(); # Read in next block of code or POD lines @@ -699,19 +697,22 @@ sub parse { my __PACKAGE__ $self = shift; my ExtUtils::ParseXS $pxs = shift; - $self->{line_no} = $.; + $self->{line_no} = $pxs->{lastline_no}; $self->{file} = $pxs->{in_pathname}; - # This method is called with $_ holding the first line of POD - # and returns with $_ holding the (unprocessed) next line + # This method is called with $pxs->{lastline} holding the first line + # of POD and returns with $pxs->{lastline} holding the (unprocessed) + # next line following the =cut line - do { - push @{$self->{pod_lines}}, $_; - if (/^=cut\s*$/) { - $_ = readline($pxs->{in_fh}); - return 1; - } - } while (readline($pxs->{in_fh})); + my $cut; + while (1) { + push @{$self->{pod_lines}}, $pxs->{lastline}; + $pxs->{lastline} = readline($pxs->{in_fh}); + $pxs->{lastline_no} = $.; + return 1 if $cut; + last unless defined $pxs->{lastline}; + $cut = $pxs->{lastline} =~ /^=cut\s*$/; + } # At this point $. is at end of file so die won't state the start # of the problem, and as we haven't yet read any lines &death won't @@ -772,17 +773,23 @@ sub parse { my __PACKAGE__ $self = shift; my ExtUtils::ParseXS $pxs = shift; - $self->{line_no} = $.; + $self->{line_no} = $pxs->{lastline_no}; $self->{file} = $pxs->{in_pathname}; - # This method is called with $_ holding the first line of C code - # and returns with $_ holding the (unprocessed) next line + # This method is called with $pxs->{lastline} holding the first line + # of (possibly) C code and returns with $pxs->{lastline} holding the + # first (unprocessed) line which isn't C code (i.e. its the start of + # POD or a MODULE line) - do { - return 1 if $self->is_xs_module_line($_); - return 1 if /^=/; - push @{$self->{code_lines}}, $_; - } while (readline($pxs->{in_fh})); + my $cut; + while (1) { + return 1 if $self->is_xs_module_line($pxs->{lastline}); + return 1 if $pxs->{lastline} =~ /^=/; + push @{$self->{code_lines}}, $pxs->{lastline}; + $pxs->{lastline} = readline($pxs->{in_fh}); + $pxs->{lastline_no} = $.; + last unless defined $pxs->{lastline}; + } 1; } @@ -813,7 +820,7 @@ sub parse { my __PACKAGE__ $self = shift; my ExtUtils::ParseXS $pxs = shift; - $self->{line_no} = 1; + $self->{line_no} = $pxs->{lastline_no}; $self->{file} = $pxs->{in_pathname}; 1; } From 3194b77c4f530c642a95adce388270c21631a58f Mon Sep 17 00:00:00 2001 From: David Mitchell Date: Fri, 24 Oct 2025 20:40:54 +0100 Subject: [PATCH 35/57] ParseXS: refactor: remove $ExtUtils::ParseXS::END (This commit is part of a series which will extend the AST parse tree from just representing individual XSUBs to representing the whole XS file.) The parser, when parsing an XSUB (but not any other parts of an XS file), pushes a special token onto the end of the list of lines in @{$pxs->{line}} which make up the current paragraph. This token, $ExtUtils::ParseXS::END = "!End!\n\n"; (along with a trailing ':') is designed to look like an impossible keyword which can't actually appear in the source code (due to the multiple newlines). It looks like it was originally added in perl5.002 to make the parsing code easier, but I don't really understand why. It just makes the parser harder to understand. So this commit removes it, and just relies on @{$pxs->{line}} being zero to detect the end of the paragraph. This change doesn't alter the C code generated from any of the XS files bundled with perl. --- dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm | 4 +--- .../lib/ExtUtils/ParseXS/Node.pm | 24 +++++-------------- 2 files changed, 7 insertions(+), 21 deletions(-) diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm index 2559d405db75..57478fba9f77 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm @@ -103,10 +103,8 @@ our $AUTHOR_WARNINGS; $AUTHOR_WARNINGS = ($ENV{AUTHOR_WARNINGS} || 0) unless defined $AUTHOR_WARNINGS; -# "impossible" keyword (multiple newline) -our $END = "!End!\n\n"; # Match an XS Keyword -our $BLOCK_regexp = '\s*(' . $ExtUtils::ParseXS::Constants::XSKeywordsAlternation . "|$END)\\s*:"; +our $BLOCK_regexp = '\s*(' . $ExtUtils::ParseXS::Constants::XSKeywordsAlternation . ")\\s*:"; # All the valid fields of an ExtUtils::ParseXS hash object. The 'use diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm index 848a8a85d757..8f27dccfb6be 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm @@ -1210,12 +1210,8 @@ sub parse { - # should just be a fake END token left - die "Internal error: bad END token\n" - unless $pxs->{line} - && @{$pxs->{line}} == 1 - && $pxs->{line}[0] eq "$ExtUtils::ParseXS::END:"; - pop @{$pxs->{line}}; + die "Internal error: unexpectedly not at EOF\n" + if @{$pxs->{line}}; $pxs->{seen_an_XSUB} = 1; # encountered at least one XSUB } # END 'PARAGRAPH' 'while' loop @@ -1786,13 +1782,6 @@ sub parse { or return; push @{$self->{kids}}, $decl; - # Append a fake EOF-keyword line. This makes it easy to do "all lines - # until the next keyword" style loops, since the fake END line (which - # includes a \n so it can't appear in the wild) is also matched as a - # keyword. - push(@{ $pxs->{line} }, "$ExtUtils::ParseXS::END:"); - push(@{ $pxs->{line_no} }, $pxs->{line_no}->[-1]); - $_ = ''; # Check all the @{ $pxs->{line}} lines for balance: all the @@ -1810,7 +1799,7 @@ sub parse { my $case_had_cond; # the previous CASE had a condition # Repeatedly look for CASE or XSUB body. - while (@{ $pxs->{line} }) { + while (1) { # Parse a CASE statement if present. my ($case) = $self->parse_keywords( @@ -1830,10 +1819,10 @@ sub parse { else { $seen_bare_xbody = 1; if ($num++) { - my $l = $pxs->{line}[0]; # After the first CASE+body, we should only encounter # further CASE+bodies or end-of-paragraph - last if $l eq "$ExtUtils::ParseXS::END:"; + last unless @{$pxs->{line}}; + my $l = $pxs->{line}[0]; $pxs->death( $l =~ /^$ExtUtils::ParseXS::BLOCK_regexp/o ? "Error: misplaced '$1:'" @@ -5701,8 +5690,7 @@ sub parse { $self->SUPER::parse($pxs); # set file/line_no/lines $xsub->{seen_PPCODE} = 1; - # The only thing left should be the special "!End!\n\n" token. - $pxs->death("Error: PPCODE must be the last thing") if @{$pxs->{line}} > 1; + $pxs->death("Error: PPCODE must be the last thing") if @{$pxs->{line}}; 1; } From 88aa069f7601eb75b6c2b41b078b14f3408d4eb2 Mon Sep 17 00:00:00 2001 From: David Mitchell Date: Fri, 24 Oct 2025 21:16:29 +0100 Subject: [PATCH 36/57] ParseXS: refactor: remove stray $_ (This commit is part of a series which will extend the AST parse tree from just representing individual XSUBs to representing the whole XS file.) Remove this line: $_ = ''; which is a leftover from when $_ used to maintain the current line between calls to various parsing subs. It's redundant now. --- dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm | 2 -- 1 file changed, 2 deletions(-) diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm index 8f27dccfb6be..c4ebae9c542d 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm @@ -1782,8 +1782,6 @@ sub parse { or return; push @{$self->{kids}}, $decl; - $_ = ''; - # Check all the @{ $pxs->{line}} lines for balance: all the # #if, #else, #endif etc within the XSUB should balance out. ExtUtils::ParseXS::Utilities::check_conditional_preprocessor_statements(); From 75ea6d6119472824b90f017bf115de697f9906c2 Mon Sep 17 00:00:00 2001 From: David Mitchell Date: Sat, 25 Oct 2025 14:04:14 +0100 Subject: [PATCH 37/57] ParseXS: spot MODULE line syntax errors Previously, a line which was not a completely syntactically correct MODULE line was not treated as a module line; instead it got passed on uninterpreted until likely causing an error further along in parsing. This commit changes things so that anything that looks like the *start* of a module line is treated as a module line, and is *only then* examined for full syntactic correctness, giving an error if not ok. For example: previously, this line: MODULE = Foo XXXPACKAGE = Foo::Bar gave the weird error message: Error: Function definition too short 'MODULE = Foo XXXPACKAGE ... but now gives the error message: Error: unparseable MODULE line: 'MODULE = Foo XXXPACKAGE ... In particular, any line starting with /^MODULE\s*[=:]/ is now treated as an attempt to declare a module, including the syntactically incorrect 'MODULE:' form. This is in the same spirit that other keywords are already processed; for example PROTOTYPES: XXXENABLE is treated as as a badly-formed PROTOTYPES line rather than an otherwise unrecognised and unprocessed line. --- dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm | 16 +++-- .../lib/ExtUtils/ParseXS/Node.pm | 25 ++----- .../lib/ExtUtils/ParseXS/Utilities.pm | 28 +++++++- dist/ExtUtils-ParseXS/t/001-basic.t | 66 +++++++++++++++++++ 4 files changed, 111 insertions(+), 24 deletions(-) diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm index 57478fba9f77..f556ce3f8550 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm @@ -523,8 +523,6 @@ sub _process_module_xs_line { $self->{PACKAGE_class} = $self->{PACKAGE_name}; $self->{PACKAGE_class} .= "::" if $self->{PACKAGE_class} ne ""; - - $self->{lastline} = ""; } @@ -674,14 +672,24 @@ sub fetch_para { my ExtUtils::ParseXS $self = shift; return 0 if not defined $self->{lastline}; # EOF + chomp $self->{lastline}; # may not already have been for first MODULE line @{ $self->{line} } = (); @{ $self->{line_no} } = (); - if ($self->{lastline} =~ - /^MODULE\s*=\s*([\w:]+)(?:\s+PACKAGE\s*=\s*([\w:]+))?(?:\s+PREFIX\s*=\s*(\S+))?\s*$/) + if (ExtUtils::ParseXS::Utilities::looks_like_MODULE_line($self->{lastline})) { + $self->{lastline} =~ + /^ + MODULE \s* = \s* ([\w:]+) + (?: \s+ PACKAGE \s* = \s* ([\w:]+))? + (?: \s+ PREFIX \s* = \s* (\S+))? + \s* + $/x + or $self->death("Error: unparseable MODULE line: '$self->{lastline}'"); + $self->_process_module_xs_line($1, $2, $3); + $self->{lastline} = ""; } # count how many #ifdef levels we see in this paragraph diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm index c4ebae9c542d..9de349e35fd9 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm @@ -370,21 +370,6 @@ sub parse_keywords { return @kids; } -# return (module, package, prefix) values if the line -# is a valid 'MODULE = ...' line - -sub is_xs_module_line { - my __PACKAGE__ $self = shift; - my $line = shift; - - $line =~ - /^ MODULE \s* = \s* [\w:]+ - (?: \s+ PACKAGE \s* = \s* ( [\w:]+ ) )? - (?: \s+ PREFIX \s* = \s* ( \S+ ) )? - \s* $/x; -} - - sub as_code { } # Most node types inherit this: just continue walking the tree @@ -646,14 +631,15 @@ sub parse { # Read in lines until the first MODULE line, creating a list of # Node::C_part_code and Node::C_part_POD nodes as children. - # Returns with $pxs->{lastline} holding the (unprocessed) next line - # (or undef for EOF) + # Returns with $pxs->{lastline} holding the next line (i.e. the MODULE + # line) or errors out if not found $pxs->{lastline} = readline($pxs->{in_fh}); $pxs->{lastline_no} = $.; while (defined $pxs->{lastline}) { - return 1 if $self->is_xs_module_line($pxs->{lastline}); + return 1 if ExtUtils::ParseXS::Utilities::looks_like_MODULE_line( + $pxs->{lastline}); my $node = $pxs->{lastline} =~ /^=/ @@ -783,7 +769,8 @@ sub parse { my $cut; while (1) { - return 1 if $self->is_xs_module_line($pxs->{lastline}); + return 1 if ExtUtils::ParseXS::Utilities::looks_like_MODULE_line( + $pxs->{lastline}); return 1 if $pxs->{lastline} =~ /^=/; push @{$self->{code_lines}}, $pxs->{lastline}; $pxs->{lastline} = readline($pxs->{in_fh}); diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Utilities.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Utilities.pm index f45a0b1d69eb..a3be93d43cc9 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Utilities.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Utilities.pm @@ -25,6 +25,7 @@ our (@ISA, @EXPORT_OK); check_conditional_preprocessor_statements escape_file_for_line_directive report_typemap_failure + looks_like_MODULE_line ); =head1 NAME @@ -362,7 +363,11 @@ The current line number. sub current_line_number { my ExtUtils::ParseXS $self = shift; - my $line_number = $self->{line_no}->[@{ $self->{line_no} } - @{ $self->{line} } -1]; + # NB: until the first MODULE line is encountered, $self->{line_no} etc + # won't have been populated + my $line_number = @{$self->{line_no}} + ? $self->{line_no}->[@{ $self->{line_no} } - @{ $self->{line} } -1] + : $self->{lastline_no}; return $line_number; } @@ -588,6 +593,27 @@ sub report_typemap_failure { return(); } +=head2 C + +Returns true if the passed line looks like an attempt to be a MODULE line. +Note that it doesn't check for valid syntax. This allows the caller to do +its own parsing of the line, providing some sort of 'invalid MODULE line' +check. As compared with thinking that its not a MODULE line if its syntax +is slightly off, leading instead to some weird error about a bad start to +an XSUB or something. + +In particular, a line starting C returns true, because it's +likely to be an attempt by the programmer to write a MODULE line, even +though it's invalid syntax. + +=cut + +sub looks_like_MODULE_line { + my $line = shift; + $line =~ /^MODULE\s*[=:]/; +} + + 1; diff --git a/dist/ExtUtils-ParseXS/t/001-basic.t b/dist/ExtUtils-ParseXS/t/001-basic.t index 09b49bff2b6f..9f38c335eb91 100644 --- a/dist/ExtUtils-ParseXS/t/001-basic.t +++ b/dist/ExtUtils-ParseXS/t/001-basic.t @@ -5701,4 +5701,70 @@ EOF test_many($preamble, 'boot_Foo', \@test_fns); } +{ + # Test reporting of bad syntax on MODULE lines. + + my $preamble = Q(<<'EOF'); +EOF + + my @test_fns = ( + [ + '1st MODULE PKG', + [ Q(<<'EOF') ], + |MODULE = X PKG = Y + | + |PROTOTYPES: DISABLE + | +EOF + + [ 1, 0, qr{Error: unparseable MODULE line: 'MODULE = X PKG = Y'}, + "got expected err msg" + ], + ], + [ + '1st MODULE colon', + [ Q(<<'EOF') ], + |MODULE: X PACKAGE = Y + | + |PROTOTYPES: DISABLE + | +EOF + + [ 1, 0, qr{Error: unparseable MODULE line: 'MODULE: X PACKAGE = Y'}, + "got expected err msg" + ], + ], + [ + '2nd MODULE PKG', + [ Q(<<'EOF') ], + |MODULE = Foo PACKAGE = Foo + | + |PROTOTYPES: DISABLE + | + |MODULE = X PKG = Y +EOF + + [ 1, 0, qr{Error: unparseable MODULE line: 'MODULE = X PKG = Y'}, + "got expected err msg" + ], + ], + [ + '2nd MODULE colon', + [ Q(<<'EOF') ], + |MODULE = Foo PACKAGE = Foo + | + |PROTOTYPES: DISABLE + | + |MODULE: X PACKAGE = Y +EOF + + [ 1, 0, qr{Error: unparseable MODULE line: 'MODULE: X PACKAGE = Y'}, + "got expected err msg" + ], + ], + ); + + test_many($preamble, undef, \@test_fns); +} + done_testing; From 7eb5946f880a70a56c109de45fd3ed044767e8e4 Mon Sep 17 00:00:00 2001 From: David Mitchell Date: Sat, 25 Oct 2025 15:38:44 +0100 Subject: [PATCH 38/57] ParseXS: refactor: add Node::MODULE (This commit is part of a series which will extend the AST parse tree from just representing individual XSUBs to representing the whole XS file.) Add a new node type, ExtUtils::ParseXS::Node::MODULE. this (mostly) regularises the treatment of a MODULE line, now handled in the usual way by parse_keywords(), rather than being a special magical snowflake that got its own handling in fetch_para(). This commit also changes one parameter of parse_keywords() from being a boolean to being a bit flag, now that there are now *two* slightly special cases to flag up: MODULE, in addition to NOT_IMPLEMENTED_YET. This commit is supposed to have no changes in behaviour, but there *might* be some edge cases that I haven't thought of. --- dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm | 33 ------- .../lib/ExtUtils/ParseXS/Node.pm | 89 +++++++++++++++++-- 2 files changed, 81 insertions(+), 41 deletions(-) diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm index f556ce3f8550..da1a75d53507 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm @@ -508,24 +508,6 @@ sub Q { } -# Process "MODULE = Foo ..." lines and update global state accordingly - -sub _process_module_xs_line { - my ExtUtils::ParseXS $self = shift; - my ($module, $pkg, $prefix) = @_; - - ($self->{MODULE_cname} = $module) =~ s/\W/_/g; - - $self->{PACKAGE_name} = defined($pkg) ? $pkg : ''; - $self->{PREFIX_pattern} = quotemeta( defined($prefix) ? $prefix : '' ); - - ($self->{PACKAGE_C_name} = $self->{PACKAGE_name}) =~ tr/:/_/; - - $self->{PACKAGE_class} = $self->{PACKAGE_name}; - $self->{PACKAGE_class} .= "::" if $self->{PACKAGE_class} ne ""; -} - - # Skip any embedded POD sections, reading in lines from {in_fh} as necessary. sub _maybe_skip_pod { @@ -677,21 +659,6 @@ sub fetch_para { @{ $self->{line} } = (); @{ $self->{line_no} } = (); - if (ExtUtils::ParseXS::Utilities::looks_like_MODULE_line($self->{lastline})) - { - $self->{lastline} =~ - /^ - MODULE \s* = \s* ([\w:]+) - (?: \s+ PACKAGE \s* = \s* ([\w:]+))? - (?: \s+ PREFIX \s* = \s* (\S+))? - \s* - $/x - or $self->death("Error: unparseable MODULE line: '$self->{lastline}'"); - - $self->_process_module_xs_line($1, $2, $3); - $self->{lastline} = ""; - } - # count how many #ifdef levels we see in this paragraph # decrementing when we see an endif. if we see an elsif # or endif without a corresponding #ifdef then we don't diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm index 9de349e35fd9..7cc52e22a2a9 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS/Node.pm @@ -208,6 +208,11 @@ shown above. my $open_brace = '{'; my $close_brace = '}'; +# values for parse_keywords() flags +# (Can't assume 'constant.pm' is present yet) + +my $keywords_flag_MODULE = 1; +my $keywords_flag_NOT_IMPLEMENTED_YET = 2; # Utility sub to handle all the boilerplate of declaring a Node subclass, # including setting up @INC and @FIELDS. Intended to be called from within @@ -327,6 +332,10 @@ sub parse { # If $max is defined, it specifies the maximum number of keywords to # process. This value is typically passed as undef (unlimited) or 1 # (just grab the next keyword). +# $flags can contain $keywords_flag_MODULE or +# keywords_flag_NOT_IMPLEMENTED_YET to indicate to match one of those +# keywords too (whose syntax is slightly different from 'KEY:' and +# so need special handling sub parse_keywords { my __PACKAGE__ $self = shift; @@ -335,24 +344,39 @@ sub parse_keywords { my $xbody = shift; my $max = shift; # max number of keywords to process my $pat = shift; - my $do_notimplemented = shift; + my $flags = shift; + + $flags = 0 unless defined $flags; my $n = 0; my @kids; while (@{$pxs->{line}}) { my $line = shift @{$pxs->{line}}; next unless $line =~ /\S/; - # extract/delete recognised keyword and any following comment - unless ( $line =~ s/^(\s*)($pat)\s*:\s*(?:#.*)?/$1/s - or ( $do_notimplemented + + # extract/delete recognised keyword and any following text + my $keyword; + + if ( ($flags & $keywords_flag_MODULE) + && ExtUtils::ParseXS::Utilities::looks_like_MODULE_line($line) + ) + { + $keyword = 'MODULE'; + } + elsif ( $line =~ s/^(\s*)($pat)\s*:\s*(?:#.*)?/$1/s + or ( ($flags & $keywords_flag_NOT_IMPLEMENTED_YET) && $line =~ s/^(\s*)(NOT_IMPLEMENTED_YET)/$1/ ) - ) { + ) + { + $keyword = $2 + } + else { # stop at unrecognised line unshift @{$pxs->{line}}, $line; last; } - my $keyword = $2; + unshift @{$pxs->{line}}, $line; # create a node for the keyword and parse any lines associated # with it. @@ -1167,7 +1191,8 @@ sub parse { undef, undef, # xsub and xbody: not needed for non XSUB keywords undef, # implies process as many keywords as possible "BOOT|REQUIRE|PROTOTYPES|EXPORT_XSUB_SYMBOLS|FALLBACK" - . "|VERSIONCHECK|INCLUDE|INCLUDE_COMMAND|SCOPE" + . "|VERSIONCHECK|INCLUDE|INCLUDE_COMMAND|SCOPE", + $keywords_flag_MODULE, ); # skip blank lines @@ -4252,7 +4277,7 @@ sub parse { $pxs, $xsub, $xbody, 1, # match at most one keyword "CODE|PPCODE", - 1, # also match NOT_IMPLEMENTED_YET + $keywords_flag_NOT_IMPLEMENTED_YET, ); # Didn't find a CODE keyword or similar, so auto-generate a call @@ -4529,6 +4554,54 @@ sub parse { } +# ====================================================================== + +package ExtUtils::ParseXS::Node::MODULE; + +# Process a MODULE keyword, e.g. +# +# MODULE = Foo PACKAGE = Foo::Bar PREFIX = foo_ + +BEGIN { $build_subclass->(-parent => 'oneline', + 'module', # Str + 'package', # Str: may be '' + 'prefix', # Str: may be '' +)}; + + +sub parse { + my __PACKAGE__ $self = shift; + my ExtUtils::ParseXS $pxs = shift; + + $self->SUPER::parse($pxs); # set file/line_no + + my $line = $self->{text}; + my ($module, $pkg, $prefix) = $line =~ + /^ + MODULE \s* = \s* ([\w:]+) + (?: \s+ PACKAGE \s* = \s* ([\w:]+))? + (?: \s+ PREFIX \s* = \s* (\S+))? + \s* + $/x + or $pxs->death("Error: unparseable MODULE line: '$line'"); + + $self->{module} = $module; + ($pxs->{MODULE_cname} = $module) =~ s/\W/_/g; + + $self->{package} = $pxs->{PACKAGE_name} = defined($pkg) ? $pkg : ''; + + $self->{prefix} = $prefix = defined($prefix) ? $prefix : ''; + $pxs->{PREFIX_pattern} = quotemeta($prefix); + + ($pxs->{PACKAGE_C_name} = $pxs->{PACKAGE_name}) =~ tr/:/_/; + + $pxs->{PACKAGE_class} = $pxs->{PACKAGE_name}; + $pxs->{PACKAGE_class} .= "::" if $pxs->{PACKAGE_class} ne ""; + + 1; +} + + # ====================================================================== package ExtUtils::ParseXS::Node::NOT_IMPLEMENTED_YET; From 4eb804fa2e82bfa8b59b7e7b89e861c753aaa523 Mon Sep 17 00:00:00 2001 From: David Mitchell Date: Sat, 25 Oct 2025 16:37:51 +0100 Subject: [PATCH 39/57] ParseXS: spot TYPEMAP: line syntax errors Previously, a line which was not a completely syntactically correct TYPEMAP: line was not treated as a TYPEMAP line; instead it got passed on uninterpreted until likely causing an error further along in parsing. This commit changes things so that anything starting with /^TYPEMAP\s*:/ is treated as a TYPEMAP line, and is *only then* examined for full syntactic correctness, giving an error if not ok. This is similar to two commits ago which did the same for the MODULE keyword. --- dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm | 10 +++++-- dist/ExtUtils-ParseXS/t/001-basic.t | 28 +++++++++++++++++++ 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm index da1a75d53507..a45cecdfefc9 100644 --- a/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm +++ b/dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm @@ -533,8 +533,13 @@ sub _maybe_parse_typemap_block { # This is special cased from the usual paragraph-handler logic # due to the HEREdoc-ish syntax. - if ($self->{lastline} =~ /^TYPEMAP\s*:\s*<<\s*(?:(["'])(.+?)\1|([^\s'"]+?))\s*;?\s*$/) - { + return unless $self->{lastline} =~ /^TYPEMAP\s*:/; + + $self->{lastline} =~ /^TYPEMAP\s*:\s*<<\s*(?:(["'])(.+?)\1|([^\s'"]+?))\s*;?\s*$/ + or $self->death("Error: unparseable TYPEMAP line: '$self->{lastline}'"); + + + my $end_marker = quotemeta(defined($1) ? $2 : $3); # Scan until we find $end_marker alone on a line. @@ -554,7 +559,6 @@ sub _maybe_parse_typemap_block { $self->{typemaps_object}->merge(typemap => $tmap, replace => 1); $self->{lastline} = ""; - } } diff --git a/dist/ExtUtils-ParseXS/t/001-basic.t b/dist/ExtUtils-ParseXS/t/001-basic.t index 9f38c335eb91..26bf4ecffdd9 100644 --- a/dist/ExtUtils-ParseXS/t/001-basic.t +++ b/dist/ExtUtils-ParseXS/t/001-basic.t @@ -5767,4 +5767,32 @@ EOF test_many($preamble, undef, \@test_fns); } + +{ + # Test reporting of bad syntax on TYPEMAP lines. + + my $preamble = Q(<<'EOF'); + |MODULE = Foo PACKAGE = Foo + | + |PROTOTYPES: DISABLE + | +EOF + + my @test_fns = ( + [ + 'TYPEMAP syntax err', + [ Q(<<'EOF') ], + |TYPEMAP: Date: Sat, 25 Oct 2025 19:02:23 +0100 Subject: [PATCH 40/57] ParseXS: refactor: add Node::TYPEMAP (This commit is part of a series which will extend the AST parse tree from just representing individual XSUBs to representing the whole XS file.) Add a new node type, ExtUtils::ParseXS::Node::TYPEMAP. this (mostly) regularises the treatment of a TYPEMAP line, now handled in the usual way by parse_keywords(), rather than being processed solely within fetch_para(). fetch_para() does still need to do *some* processing: it has to read the TYPEMAP line, extract out the <