Skip to content

Commit 80931b3

Browse files
TheTripleVDalton Smith
andauthored
Add site_name Support and Enhance Description (#4)
* Move html tag formatting to a function * Add support for og:site_name tag * Enhance description * Get title by parsing context instead of from doctree * Add tests to new description creator * Add newline * Add newline Co-authored-by: Dalton Smith <gamefollower26@gmail.com>
1 parent 0e85f65 commit 80931b3

File tree

15 files changed

+293
-17
lines changed

15 files changed

+293
-17
lines changed

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ These values are placed in the conf.py of your sphinx project.
1212
* This config option is very important, set it to the URL the site is being hosted on.
1313
* `ogp_description_length`
1414
* Configure the amount of characters taken from a page. The default of 200 is probably good for most people. If something other than a number is used, it defaults back to 200.
15+
* `ogp_site_name`
16+
* This is not required. Name of the site. This is displayed above the title.
1517
* `ogp_image`
1618
* This is not required. Link to image to show.
1719
* `ogp_type`

sphinxext/opengraph.py

Lines changed: 165 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,182 @@
11
from urllib.parse import urljoin
2+
import docutils.nodes as nodes
3+
import string
4+
from html.parser import HTMLParser
5+
26

37
DEFAULT_DESCRIPTION_LENGTH = 200
48

9+
class HTMLTextParser(HTMLParser):
10+
"""
11+
Parse HTML into text
12+
"""
13+
def __init__(self):
14+
super().__init__()
15+
# All text found
16+
self.text = ""
17+
# Only text outside of html tags
18+
self.text_outside_tags = ""
19+
self.level = 0
20+
21+
def handle_starttag(self, tag, attrs):
22+
self.level += 1
23+
24+
def handle_endtag(self, tag):
25+
self.level -= 1
26+
27+
def handle_data(self, data):
28+
self.text += data
29+
if self.level == 0:
30+
self.text_outside_tags += data
31+
32+
class OGMetadataCreatorVisitor(nodes.NodeVisitor):
33+
"""
34+
Finds the title and creates a description from a doctree
35+
"""
36+
37+
def __init__(self, desc_len, known_titles=None, document=None):
38+
39+
# Hack to prevent requirement for the doctree to be passed in.
40+
# It's only used by doctree.walk(...) to print debug messages.
41+
if document == None:
42+
class document_cls:
43+
class reporter:
44+
@staticmethod
45+
def debug(*args, **kwaargs):
46+
pass
47+
48+
document = document_cls()
49+
50+
if known_titles == None:
51+
known_titles = []
52+
53+
super().__init__(document)
54+
self.description = ""
55+
self.desc_len = desc_len
56+
self.list_level = 0
57+
self.known_titles = known_titles
58+
self.first_title_found = False
59+
60+
# Exceptions can't be raised from dispatch_departure()
61+
# This is used to loop the stop call back to the next dispatch_visit()
62+
self.stop = False
63+
64+
def dispatch_visit(self, node: nodes.Element) -> None:
65+
66+
if self.stop:
67+
raise nodes.StopTraversal
68+
69+
# Skip all admonitions
70+
if isinstance(node, nodes.Admonition):
71+
raise nodes.SkipNode
72+
73+
# Mark start of nested lists
74+
if isinstance(node, nodes.Sequential):
75+
self.list_level += 1
76+
if self.list_level > 1:
77+
self.description += "-"
78+
79+
# Skip the first title if it's the title of the page
80+
if not self.first_title_found and isinstance(node, nodes.title):
81+
self.first_title_found = True
82+
if node.astext() in self.known_titles:
83+
raise nodes.SkipNode
84+
85+
# Only include leaf nodes in the description
86+
if len(node.children) == 0:
87+
text = node.astext().replace("\r", "").replace("\n", " ").strip()
88+
89+
# Remove double spaces
90+
while text.find(" ") != -1:
91+
text = text.replace(" ", " ")
92+
93+
# Put a space between elements if one does not already exist.
94+
if (
95+
len(self.description) > 0
96+
and len(text) > 0
97+
and self.description[-1] not in string.whitespace
98+
and text[0] not in string.whitespace + string.punctuation
99+
):
100+
self.description += " "
101+
102+
self.description += text
103+
104+
def dispatch_departure(self, node: nodes.Element) -> None:
105+
106+
# Separate title from text
107+
if isinstance(node, nodes.title):
108+
self.description += ":"
109+
110+
# Separate list elements
111+
if isinstance(node, nodes.Part):
112+
self.description += ","
113+
114+
# Separate end of list from text
115+
if isinstance(node, nodes.Sequential):
116+
if self.description[-1] == ",":
117+
self.description = self.description[:-1]
118+
self.description += "."
119+
self.list_level -= 1
5120

6-
def get_tags(context, doctree, config):
7-
# Get the URL of the specific page
8-
page_url = urljoin(config["ogp_site_url"], context["pagename"] + context["file_suffix"])
9-
# Get the image from the config
10-
image_url = config["ogp_image"]
121+
# Check for length
122+
if len(self.description) > self.desc_len:
123+
self.description = self.description[: self.desc_len]
124+
if self.desc_len >= 3:
125+
self.description = self.description[:-3] + "..."
11126

12-
# Get the first X letters from the page (Configured in config)
13-
description = doctree.astext().replace('\n', ' ')
127+
self.stop = True
128+
129+
130+
def make_tag(property: str, content: str) -> str:
131+
return f'<meta property="{property}" content="{content}" />\n '
132+
133+
134+
def get_tags(context, doctree, config):
14135

136+
# Set length of description
15137
try:
16138
desc_len = int(config["ogp_description_length"])
17139
except ValueError:
18140
desc_len = DEFAULT_DESCRIPTION_LENGTH
19141

20-
if len(description) > desc_len:
21-
description = description[:desc_len - 3] + "..."
142+
# Get the title and parse any html in it
143+
htp = HTMLTextParser()
144+
htp.feed(context["title"])
145+
htp.close()
22146

23-
# Make the ogp tags
24-
tags = """
25-
<meta property="og:title" content="{title}" />
26-
<meta property="og:type" content="{type}" />
27-
<meta property="og:url" content="{url}" />
28-
<meta property="og:description" content="{desc}" />
29-
""".format(title=context["title"], type=config["ogp_type"], url=page_url, desc=description)
147+
# Parse/walk doctree for metadata (tag/description)
148+
mcv = OGMetadataCreatorVisitor(desc_len, [htp.text, htp.text_outside_tags])
149+
doctree.walkabout(mcv)
30150

151+
tags = "\n "
152+
153+
# title tag
154+
tags += make_tag("og:title", htp.text)
155+
156+
# type tag
157+
tags += make_tag("og:type", config["ogp_type"])
158+
159+
# url tag
160+
# Get the URL of the specific page
161+
page_url = urljoin(
162+
config["ogp_site_url"],
163+
context["pagename"] + context["file_suffix"]
164+
)
165+
tags += make_tag("og:url", page_url)
166+
167+
# site name tag
168+
site_name = config["ogp_site_name"]
169+
if site_name:
170+
tags += make_tag("og:site_name", site_name)
171+
172+
# description tag
173+
tags += make_tag("og:description", mcv.description)
174+
175+
# image tag
176+
# Get the image from the config
177+
image_url = config["ogp_image"]
31178
if image_url:
32-
tags += '<meta property="og:image" content="{image}" />'.format(image=image_url)
179+
tags += make_tag("og:image", image_url)
33180

34181
return tags
35182

@@ -44,6 +191,7 @@ def setup(app):
44191
app.add_config_value("ogp_description_length", DEFAULT_DESCRIPTION_LENGTH, "html")
45192
app.add_config_value("ogp_image", None, "html")
46193
app.add_config_value("ogp_type", "website", "html")
194+
app.add_config_value("ogp_site_name", None, "html")
47195

48196
app.connect('html-page-context', html_page_context)
49197

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
extensions = ["sphinxext.opengraph"]
2+
3+
master_doc = "index"
4+
exclude_patterns = ["_build"]
5+
6+
html_theme = "basic"
7+
8+
ogp_site_url = "http://example.org/"
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Example sentence 1. Example sentence 2.

tests/roots/test-list/conf.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
extensions = ["sphinxext.opengraph"]
2+
3+
master_doc = "index"
4+
exclude_patterns = ["_build"]
5+
6+
html_theme = "basic"
7+
8+
ogp_site_url = "http://example.org/"

tests/roots/test-list/index.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
* Item 1
2+
* Item 2
3+
* Item 3
4+
* Item 4
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
extensions = ["sphinxext.opengraph"]
2+
3+
master_doc = "index"
4+
exclude_patterns = ["_build"]
5+
6+
html_theme = "basic"
7+
8+
ogp_site_url = "http://example.org/"
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
* Item 1
2+
* Item 2
3+
4+
* Nested Item 1
5+
* Nested Item 2
6+
7+
* Item 3
8+
* Item 4

tests/roots/test-sitename/conf.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
extensions = ["sphinxext.opengraph"]
2+
3+
master_doc = "index"
4+
exclude_patterns = ["_build"]
5+
6+
html_theme = "basic"
7+
8+
ogp_site_url = "http://example.org/"
9+
ogp_site_name = "Example's Docs!"
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Suspendisse at lorem ornare, fringilla massa nec, venenatis mi. Donec erat sapien, tincidunt nec rhoncus nec, scelerisque id diam. Orci varius natoque penatibus et magnis dis parturient mauris.

0 commit comments

Comments
 (0)