Fixed parsing errors for forum posts that contained a copy of the signature separator in the signature

Galarzaa90 · Galarzaa90 · commit 3aa4a12a522b · 2021-04-27T09:50:51.000-07:00
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -6,6 +6,12 @@ Changelog
     Due to this library relying on external content, older versions are not guaranteed to work.
     Try to always use the latest version.
 
+.. v4.1.2
+
+4.1.2 (2021-04-27)
+==================
+- Fixed parsing errors for forum posts that contained a copy of the signature separator in the signature.
+
 .. v4.1.1
 
 4.1.1 (2021-04-19)
diff --git a/tests/tests_client.py b/tests/tests_client.py
@@ -1,4 +1,5 @@
 import datetime
+import sys
 import unittest.mock
 
 import aiohttp
@@ -278,6 +279,7 @@ async def test_client_fetch_event_calendar_invalid_params(self):
             await self.client.fetch_event_schedule(3)
 
     @unittest.mock.patch("tibiapy.bazaar.AuctionDetails._parse_page_items")
+    @unittest.skipIf(sys.version_info < (3, 8, 0), "AsyncMock was implemented in 3.8")
     async def test_client__fetch_all_pages_success(self, parse_page_items):
         """Testing internal method to fetch all pages of an auction item collection."""
         paginator = tibiapy.ItemSummary(page=1, total_pages=5)
diff --git a/tibiapy/__init__.py b/tibiapy/__init__.py
@@ -1,4 +1,4 @@
-__version__ = '4.1.1'
+__version__ = '4.1.2'
 __author__ = 'Allan Galarza'
 
 import logging
diff --git a/tibiapy/forum.py b/tibiapy/forum.py
@@ -1071,13 +1071,22 @@ def _parse_post_table(cls, post_table, offset=1):
         character_info_container = post_table.find("div", attrs={"class": "PostCharacterText"})
         post_author = ForumAuthor._parse_author_table(character_info_container)
         content_container = post_table.find("div", attrs={"class": "PostText"})
-        content = content_container.encode_contents().decode()
         title = None
         signature = None
-        if signature_separator in content:
-            content, _ = content.split(signature_separator)
-        title_raw, content = content.split("<br/><br/>", 1)
         emoticon = None
+        signature_container = post_table.find("td", attrs={"class": "ff_pagetext"})
+        if signature_container:
+            # Remove the signature's content from content container
+            signature_container.extract()
+            signature = signature_container.encode_contents().decode()
+        content = content_container.encode_contents().decode()
+        if signature_container:
+            # The signature separator will still be part of the content container, so we remove it
+            parts = content.split(signature_separator)
+            # This will handle the post containing another signature separator within the content
+            # We join back all the pieces except for the last one
+            content = signature_separator.join(parts[:-1])
+        title_raw, content = content.split("<br/><br/>", 1)
         if title_raw:
             title_html = bs4.BeautifulSoup(title_raw, 'lxml')
             emoticon_img = title_html.find("img")
@@ -1086,9 +1095,6 @@ def _parse_post_table(cls, post_table, offset=1):
             title_tag = title_html.find("b")
             if title_tag:
                 title = title_tag.text
-        signature_container = post_table.find("td", attrs={"class": "ff_pagetext"})
-        if signature_container:
-            signature = signature_container.encode_contents().decode()
         post_details = post_table.find('div', attrs={"class": "PostDetails"})
         dates = post_dates_regex.findall(post_details.text)
         edited_date = None

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-__version__ = '4.1.1'`
	`1`	`+__version__ = '4.1.2'`
`2`	`2`	`__author__ = 'Allan Galarza'`
`3`	`3`
`4`	`4`	`import logging`