@@ -1071,13 +1071,22 @@ def _parse_post_table(cls, post_table, offset=1):
10711071 character_info_container = post_table .find ("div" , attrs = {"class" : "PostCharacterText" })
10721072 post_author = ForumAuthor ._parse_author_table (character_info_container )
10731073 content_container = post_table .find ("div" , attrs = {"class" : "PostText" })
1074- content = content_container .encode_contents ().decode ()
10751074 title = None
10761075 signature = None
1077- if signature_separator in content :
1078- content , _ = content .split (signature_separator )
1079- title_raw , content = content .split ("<br/><br/>" , 1 )
10801076 emoticon = None
1077+ signature_container = post_table .find ("td" , attrs = {"class" : "ff_pagetext" })
1078+ if signature_container :
1079+ # Remove the signature's content from content container
1080+ signature_container .extract ()
1081+ signature = signature_container .encode_contents ().decode ()
1082+ content = content_container .encode_contents ().decode ()
1083+ if signature_container :
1084+ # The signature separator will still be part of the content container, so we remove it
1085+ parts = content .split (signature_separator )
1086+ # This will handle the post containing another signature separator within the content
1087+ # We join back all the pieces except for the last one
1088+ content = signature_separator .join (parts [:- 1 ])
1089+ title_raw , content = content .split ("<br/><br/>" , 1 )
10811090 if title_raw :
10821091 title_html = bs4 .BeautifulSoup (title_raw , 'lxml' )
10831092 emoticon_img = title_html .find ("img" )
@@ -1086,9 +1095,6 @@ def _parse_post_table(cls, post_table, offset=1):
10861095 title_tag = title_html .find ("b" )
10871096 if title_tag :
10881097 title = title_tag .text
1089- signature_container = post_table .find ("td" , attrs = {"class" : "ff_pagetext" })
1090- if signature_container :
1091- signature = signature_container .encode_contents ().decode ()
10921098 post_details = post_table .find ('div' , attrs = {"class" : "PostDetails" })
10931099 dates = post_dates_regex .findall (post_details .text )
10941100 edited_date = None
0 commit comments