Skip to content

Commit 9e6335e

Browse files
committed
rss-bot: Add option to convert body to Markdown
1 parent d8cfa66 commit 9e6335e

File tree

3 files changed

+23
-1
lines changed

3 files changed

+23
-1
lines changed

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ module = [
6767
"google_auth_oauthlib.*",
6868
"googleapiclient.*",
6969
"irc.*",
70+
"markdownify.*",
7071
"mercurial.*",
7172
"nio.*",
7273
"oauth2client.*",
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
feedparser>=6.0.10
2+
markdownify>=0.11.6

zulip/integrations/rss/rss-bot

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,12 @@ import re
1313
import sys
1414
import time
1515
import urllib.parse
16+
from collections.abc import Callable
1617
from html.parser import HTMLParser
1718
from typing import Any, Dict, List, Optional, Tuple
1819

1920
import feedparser
21+
from markdownify import markdownify
2022
from typing_extensions import override
2123

2224
import zulip
@@ -92,6 +94,19 @@ parser.add_argument(
9294
help="Convert $ to $$ (for KaTeX processing)",
9395
default=False,
9496
)
97+
body = parser.add_mutually_exclusive_group()
98+
body.add_argument(
99+
"--strip",
100+
dest="strip",
101+
action="store_true",
102+
help="Strip HTML tags from body",
103+
)
104+
body.add_argument(
105+
"--markdownify",
106+
dest="strip",
107+
action="store_false",
108+
help="Convert body from HTML to Markdown",
109+
)
95110

96111
opts = parser.parse_args()
97112

@@ -178,7 +193,12 @@ def send_zulip(entry: Any, feed_name: str) -> Dict[str, Any]:
178193
body = unwrap_text(body)
179194

180195
title = f"**[{entry.title}]({entry.link})**\n" if hasattr(entry, "title") else ""
181-
content = f"{title}{strip_tags(body)}\n{entry.link}"
196+
197+
def md(html: str) -> str:
198+
return markdownify(html, escape_underscores=False)
199+
200+
convert: Callable[[str], str] = strip_tags if opts.strip else md
201+
content = f"{title}{convert(body)}\n{entry.link}"
182202

183203
if opts.math:
184204
content = content.replace("$", "$$")

0 commit comments

Comments
 (0)