From ddf7ca735d9c85874de96be7990917707cf9c760 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 24 Oct 2025 04:05:27 +0000 Subject: [PATCH 1/3] Initial plan From 55b89e634adbbef0f5a8cacc1831569030f168b2 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 24 Oct 2025 04:23:01 +0000 Subject: [PATCH 2/3] Add test case for anchor name bug (issue #15) Co-authored-by: oalders <96205+oalders@users.noreply.github.com> --- "[0], attrs: \", join(\", \", map { \"$_=-" | 0 t/anchor_name_bug.html | 11 +++++ t/anchor_name_bug.t | 45 +++++++++++++++++++++ 3 files changed, 56 insertions(+) create mode 100644 "[0], attrs: \", join(\", \", map { \"$_=-" create mode 100644 t/anchor_name_bug.html create mode 100644 t/anchor_name_bug.t diff --git "a/[0], attrs: \", join(\", \", map { \"$_=-" "b/[0], attrs: \", join(\", \", map { \"$_=-" new file mode 100644 index 00000000..e69de29b diff --git a/t/anchor_name_bug.html b/t/anchor_name_bug.html new file mode 100644 index 00000000..dc439ae8 --- /dev/null +++ b/t/anchor_name_bug.html @@ -0,0 +1,11 @@ + + + Test for anchor name bug + + +

hello world

+ +

test1

+

test2

+ + diff --git a/t/anchor_name_bug.t b/t/anchor_name_bug.t new file mode 100644 index 00000000..67af3747 --- /dev/null +++ b/t/anchor_name_bug.t @@ -0,0 +1,45 @@ +#!perl + +use warnings; +use strict; + +use Test::More; +use URI::file (); + +BEGIN { + use_ok('WWW::Mechanize'); +} + +# Test for issue #15: Mechanize seemed to discard the first URL after +# an tag in a html page. +# See: http://code.google.com/p/www-mechanize/issues/detail?id=15 + +my $mech = WWW::Mechanize->new( cookie_jar => undef, max_redirect => 0 ); +isa_ok( $mech, 'WWW::Mechanize' ); + +my $uri = URI::file->new_abs('t/anchor_name_bug.html')->as_string; + +$mech->get($uri); +ok( $mech->success, "Fetched $uri" ) or die q{Can't get test page}; + +# The bug was that the first link after was being discarded +my @links = $mech->find_all_links(); + +# We should find exactly 2 links (test1 and test2), NOT just 1 +is( scalar(@links), 2, 'Should find 2 links, not just 1' ); + +# Verify first link is test1 +my $link1 = $mech->find_link( text => 'test1' ); +isa_ok( $link1, 'WWW::Mechanize::Link', 'First link (test1) should exist' ); +is( $link1->url, 'http://www.url1.com/gi1?a=1', 'First link URL is correct' ); + +# Verify second link is test2 +my $link2 = $mech->find_link( text => 'test2' ); +isa_ok( $link2, 'WWW::Mechanize::Link', 'Second link (test2) should exist' ); +is( $link2->url, 'http://www.url2.com/gi2?a=2', 'Second link URL is correct' ); + +# Verify links are in correct order +is( $links[0]->url, 'http://www.url1.com/gi1?a=1', 'First link in order' ); +is( $links[1]->url, 'http://www.url2.com/gi2?a=2', 'Second link in order' ); + +done_testing(); From 1d2354a9473fe8ef5f82eb88e5e5cc80aa154ce6 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 24 Oct 2025 04:26:59 +0000 Subject: [PATCH 3/3] Fix anchor name bug: prevent self-closing tags from consuming next link Co-authored-by: oalders <96205+oalders@users.noreply.github.com> --- "[0], attrs: \", join(\", \", map { \"$_=-" | 0 lib/WWW/Mechanize.pm | 6 +++++- 2 files changed, 5 insertions(+), 1 deletion(-) delete mode 100644 "[0], attrs: \", join(\", \", map { \"$_=-" diff --git "a/[0], attrs: \", join(\", \", map { \"$_=-" "b/[0], attrs: \", join(\", \", map { \"$_=-" deleted file mode 100644 index e69de29b..00000000 diff --git a/lib/WWW/Mechanize.pm b/lib/WWW/Mechanize.pm index 83a7f304..fa0dbbfa 100644 --- a/lib/WWW/Mechanize.pm +++ b/lib/WWW/Mechanize.pm @@ -3500,7 +3500,11 @@ sub _link_from_token { my $text; my $name; if ( $tag eq 'a' ) { - $text = $parser->get_trimmed_text("/$tag"); + # Only get text content if this is not a self-closing tag + # Self-closing tags (e.g., ) have a '/' key in attrs + if ( !$attrs->{'/'} ) { + $text = $parser->get_trimmed_text("/$tag"); + } $text = q{} unless defined $text; my $onClick = $attrs->{onclick};