From c9e918078a8587e639b69ac0be2c3c65ed9c2b58 Mon Sep 17 00:00:00 2001 From: celsius narhwal Date: Wed, 25 Jan 2023 14:43:01 -0500 Subject: [PATCH 1/7] improvement: Implement support for NumPy-style docstrings --- .changelog/_unreleased.toml | 8 + pyproject.toml | 1 + readme.md | 4 +- .../contrib/processors/numpy.py | 251 ++++++++++++++++++ .../contrib/processors/smart.py | 8 +- test/processors/test_numpy.py | 118 ++++++++ 6 files changed, 386 insertions(+), 4 deletions(-) create mode 100644 .changelog/_unreleased.toml create mode 100644 src/pydoc_markdown/contrib/processors/numpy.py create mode 100644 test/processors/test_numpy.py diff --git a/.changelog/_unreleased.toml b/.changelog/_unreleased.toml new file mode 100644 index 00000000..69b7d074 --- /dev/null +++ b/.changelog/_unreleased.toml @@ -0,0 +1,8 @@ +[[entries]] +id = "5be79248-7b86-465d-953c-d0c69ab64e8a" +type = "improvement" +description = "Implement support for NumPy-style docstrings" +author = "celsiusnarhwal" +issues = [ + "https://github.com/celsiusnarhwal/pydoc-markdown/issues/251", +] diff --git a/pyproject.toml b/pyproject.toml index db413183..ded6ec4a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,6 +35,7 @@ tomli = "^2.0.0" tomli_w = "^1.0.0" yapf = ">=0.30.0" watchdog = "*" +numpydoc = "^1.5.0" [tool.poetry.urls] Homepage = "https://github.com/NiklasRosenstein/pydoc-markdown" diff --git a/readme.md b/readme.md index 9cee2b97..b2f4a6b8 100644 --- a/readme.md +++ b/readme.md @@ -35,8 +35,8 @@ YAML configuration, you should install the package directly through Pipx. ### Features -* Understands multiple documentation styles (Sphinx, Google, Pydoc-Markdown specific) and converts them to properly - formatted Markdown +* Understands multiple documentation styles (Sphinx, Google, NumPy, Pydoc-Markdown specific) and converts them to + properly formatted Markdown * Can parse docstrings for variables thanks to [docspec][] (`#:` block before or string literal after the statement) * Generates links to other API objects per the documentation syntax (e.g. `#OtherClass` for the Pydoc-Markdown style) diff --git a/src/pydoc_markdown/contrib/processors/numpy.py b/src/pydoc_markdown/contrib/processors/numpy.py new file mode 100644 index 00000000..f813f494 --- /dev/null +++ b/src/pydoc_markdown/contrib/processors/numpy.py @@ -0,0 +1,251 @@ +# -*- coding: utf8 -*- +# Copyright (c) 2019 Niklas Rosenstein +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to +# deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +# sell copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. + +import dataclasses +import itertools +import re +import typing as t +import warnings +from contextlib import contextmanager + +import docspec +from numpydoc.docscrape import NumpyDocString, Parameter +from numpydoc.validate import validate + +from pydoc_markdown.interfaces import Processor, Resolver + + +@contextmanager +def _filter_numpydoc_warnings(action: str): + warnings.filterwarnings(action, module="numpydoc.docscrape") + yield + warnings.resetwarnings() + + +class _DocstringWrapper: + # Wraps docstrings for use with numpydoc.validate.validate(). + __qualname__ = "pydoc_markdown.contrib.processors.numpy._DocstringWrapper" + + +@dataclasses.dataclass +class NumpyProcessor(Processor): + # numpydoc doesn't like when a heading appears twice in the same docstring so we have to use tags to + # keep numpydoc from recognizing the example headings. This also means the example code block has to be + # delineated with HTML tags instead of Markdown syntax. + """ + This processor parses NumPy-style docstrings and converts them to Markdown syntax. + + References + ---------- + - https://numpydoc.readthedocs.io/en/latest/format.html + + Examples + -------- +
+    
+    Parameters
+    ----------
+    arg: str
+        This argument should be a string.
+
+    Raises
+    ------
+    ValueError
+        If *arg* is not a string.
+
+    Returns
+    -------
+    int
+        The length of the string.
+    
+    
+ + Renders as: + + Parameters + ---------- + arg : str + This argument should be a string. + + Raises + ------ + ValueError + If *arg* is not a string. + + Returns + ------- + int + The length of the string. + + @doc:fmt:numpy + """ + + _SECTION_MAP = { + "Summary": ["Summary", "Extended Summary"], + "Arguments": ["Parameters", "Other Parameters"], + "Returns": ["Returns"], + "Yields": ["Yields"], + "Receives": ["Receives"], + "Attributes": ["Attributes"], + "Methods": ["Methods"], + "Raises": ["Raises"], + "Warns": ["Warns"], + "Warnings": ["Warnings"], + "See Also": ["See Also"], + "Notes": ["Notes"], + "References": ["References"], + "Examples": ["Examples"], + } + + @staticmethod + def check_docstring_format(docstring: str) -> bool: + _DocstringWrapper.__doc__ = docstring + + with _filter_numpydoc_warnings("error"): + try: + return not validate(_DocstringWrapper.__qualname__).get("Errors") + except Warning: + return False + + def process(self, modules: t.List[docspec.Module], resolver: t.Optional[Resolver]) -> None: + docspec.visit(modules, self._process) + + def _process(self, node: docspec.ApiObject): + if not node.docstring: + return + + docstring = NumpyDocString(node.docstring.content) + lines = [] + + # Filter self._SECTION_MAP to only include sections used in the docstring + active_sections = {k: v for k, v in self._SECTION_MAP.items() if any(docstring.get(sec) for sec in v)} + + # numpydoc is opinionated when it comes to section order so we have to preserve the order of the original + # docstring ourselves + + # First, we create a regex pattern to match all section headings in the docstring + keyword_regex = re.compile( + "|".join( + [rf"{keyword}(?:\r?\n)-{{{len(keyword)}}}" for keyword in itertools.chain(*active_sections.values())] + ) + ) + + # Second, we strip each patten match of hyphens and whitespace + keyword_matches = [match.replace("-", "").strip() for match in keyword_regex.findall(node.docstring.content)] + + # Third, we determine the section order in the eventual output based on the order of the headings in the + # original docstring (but always starting with the summary) + section_order = [ + "Summary", + *[next(key for key, value in active_sections.items() if keyword in value) for keyword in keyword_matches], + ] + + # Finally, we sort active_sections according to the section order we just determined + sorted_sections = sorted(active_sections.items(), key=lambda x: section_order.index(x[0])) + + for section, keywords in sorted_sections: + lines.extend(self._get_section_contents(docstring, section, keywords)) + + node.docstring.content = "\n".join(lines) + + def _get_section_contents(self, docstring: NumpyDocString, section: str, keywords: list) -> list[str]: + contents = list(itertools.chain([docstring.get(sec) for sec in keywords])) + + if section == "Summary": + return self._parse_summary(contents) + else: + # contents needs to be flattened for all sections aside from Summary + contents = list(itertools.chain(*contents)) + if section in ["Notes", "References"]: + return self._parse_notes_and_references(section, contents) + elif section == "Examples": + return self._parse_examples(contents) + elif section == "See Also": + return self._parse_see_also(contents) + elif any(isinstance(item, Parameter) for item in contents): + return self._parse_parameters(section, contents) + else: + return [f"\n**{section}**\n", *contents] if contents else [] + + @staticmethod + def _parse_summary(contents: list[str]) -> list[str]: + summary, extended = contents + return [*summary, "", *extended] if extended else [*summary] + + @staticmethod + def _parse_parameters(section: str, parameters: list[Parameter]) -> list[str]: + lines = [] + + for param in parameters: + name, cls, desc = param + desc = "\n".join(desc) + + if name and cls and desc: + lines.append(f"* **{name}** (`{cls}`): {desc}") + elif name and cls: + lines.append(f"* **{name}** (`{cls}`)") + elif name and desc: + lines.append(f"* **{name}**: {desc}") + elif cls and desc: + lines.append(f"* `{cls}`: {desc}") + elif name: + lines.append(f"* **{name}**") + elif cls: + lines.append(f"* `{cls}`") + elif desc: + lines.append(f"* {desc}") + + return [f"\n**{section}**\n", *lines] if lines else [] + + @staticmethod + def _parse_notes_and_references(section: str, contents: list[str]) -> list[str]: + contents = "\n".join(contents) + citations = re.compile("(\.\. )?\[(?P\w+)][_ ]?") + + replacements = {"Notes": "{ref_id}", "References": "{ref_id}. "} + + for match in citations.finditer(contents): + ref_id = match.group("ref_id") + contents = contents.replace(match.group(0), replacements[section].format(ref_id=ref_id)) + + return [f"\n**{section}**\n", *contents.splitlines()] + + @staticmethod + def _parse_examples(contents: list[str]) -> list[str]: + # Wraps doctests in Python codeblocks and leaves all other content as is + doctests = re.compile(r"(>>>(?:.+(?:\r?\n|$))+)", flags=re.MULTILINE) + return ["\n**Examples**\n", *doctests.sub("```python\n\g<0>\n```", "\n".join(contents)).splitlines()] + + @staticmethod + def _parse_see_also(contents: list[tuple]) -> list[str]: + lines = [] + + for group in contents: + sublines = [] + objs, desc = group + + sublines.append("* " + ", ".join([f":{obj[1]}:`{obj[0]}`" if obj[1] else f"{obj[0]}" for obj in objs])) + + if desc: + sublines[-1] += ": " + "\n".join(desc) + lines.extend(sublines) + + return [f"\n**See Also**\n", *lines] diff --git a/src/pydoc_markdown/contrib/processors/smart.py b/src/pydoc_markdown/contrib/processors/smart.py index b42bf577..786b27b0 100644 --- a/src/pydoc_markdown/contrib/processors/smart.py +++ b/src/pydoc_markdown/contrib/processors/smart.py @@ -25,6 +25,7 @@ import docspec from pydoc_markdown.contrib.processors.google import GoogleProcessor +from pydoc_markdown.contrib.processors.numpy import NumpyProcessor from pydoc_markdown.contrib.processors.pydocmd import PydocmdProcessor from pydoc_markdown.contrib.processors.sphinx import SphinxProcessor from pydoc_markdown.interfaces import Processor, Resolver @@ -33,13 +34,14 @@ @dataclasses.dataclass class SmartProcessor(Processor): """ - This processor picks the #GoogleProcessor, #SphinxProcessor or #PydocmdProcessor after + This processor picks the #GoogleProcessor, #SphinxProcessor, #PydocmdProcessor, or #NumpyProcessor after guessing which is appropriate from the syntax it finds in the docstring. """ google: GoogleProcessor = dataclasses.field(default_factory=GoogleProcessor) pydocmd: PydocmdProcessor = dataclasses.field(default_factory=PydocmdProcessor) sphinx: SphinxProcessor = dataclasses.field(default_factory=SphinxProcessor) + numpy: NumpyProcessor = dataclasses.field(default_factory=NumpyProcessor) def process(self, modules: t.List[docspec.Module], resolver: t.Optional[Resolver]) -> None: docspec.visit(modules, self._process) @@ -48,7 +50,7 @@ def _process(self, obj: docspec.ApiObject): if not obj.docstring: return None - for name in ("google", "pydocmd", "sphinx"): + for name in ("google", "pydocmd", "sphinx", "numpy"): indicator = "@doc:fmt:" + name if indicator in obj.docstring.content: obj.docstring.content = obj.docstring.content.replace(indicator, "") @@ -58,4 +60,6 @@ def _process(self, obj: docspec.ApiObject): return self.sphinx._process(obj) if self.google.check_docstring_format(obj.docstring.content): return self.google._process(obj) + if self.numpy.check_docstring_format(obj.docstring.content): + return self.numpy._process(obj) return self.pydocmd._process(obj) diff --git a/test/processors/test_numpy.py b/test/processors/test_numpy.py new file mode 100644 index 00000000..008709df --- /dev/null +++ b/test/processors/test_numpy.py @@ -0,0 +1,118 @@ +import pytest + +from pydoc_markdown.contrib.processors.numpy import NumpyProcessor +from . import assert_processor_result + +docstring_a = """ + Generate ordinary dialogue. + + Extended Summary + ---------------- + This function generates ordinary dialogue so that users can fully enjoy how efficient the code is. + + Parameters + ---------- + lines : int + The number of lines of dialogue to generate. + + Returns + ------- + list[str] + The generated lines of dialogue. + + Raises + ------ + ValueError + If *lines* is not a positive integer. + + Examples + -------- + >>> ordinary_dialogue(5) + ["You should just read this manga as is.", + "Why would anyone want to make an anime adaptation?", + "This is a dialogue-heavy piece with hardly any action.", + "Not to mention most of it takes place in a dressing room.", + "So why would anyone turn a manga like this into an anime?"] + """ + +md_docstring_a = """ + Generate ordinary dialogue. + + This function generates ordinary dialogue so that users can fully enjoy how efficient the code is. + + **Arguments** + + * **lines** (`int`): The number of lines of dialogue to generate. + + **Returns** + + * `list[str]`: The generated lines of dialogue. + + **Raises** + + * `ValueError`: If *lines* is not a positive integer. + + **Examples** + + ```python + >>> ordinary_dialogue(5) + ["You should just read this manga as is.", + "Why would anyone want to make an anime adaptation?", + "This is a dialogue-heavy piece with hardly any action.", + "Not to mention most of it takes place in a dressing room.", + "So why would anyone turn a manga like this into an anime?"] + ``` + """ + +docstring_b = """ + Shout "You fool!". + + Notes + ----- + The average "You fool!" travels at 340 m/s[1]_. + + References + ---------- + .. [1] Tsutomu Mizushima (Director). (2012, July 5). Normal Dialogue / Different Clothes / Shouting Instructions + (No. 1). In Joshiraku. Mainichi Broadcasting System. + + Examples + -------- + >>> you_fool() + "You fool!" + + See Also + -------- + :func:`bakayarou` + The same function but in Japanese for no reason in particular. + """ + +md_docstring_b = """ + Shout "You fool!". + + **Notes** + + The average "You fool!" travels at 340 m/s1. + + **References** + + 1. Tsutomu Mizushima (Director). (2012, July 5). Normal Dialogue / Different Clothes / Shouting Instructions + (No. 1). In Joshiraku. Mainichi Broadcasting System. + + **Examples** + + ```python + >>> you_fool() + "You fool!" + ``` + + **See Also** + + * :func:`bakayarou`: The same function but in Japanese for no reason in particular. + """ + + +@pytest.mark.parametrize("processor", [NumpyProcessor()]) +def test_numpy_processor(processor): + assert_processor_result(processor or NumpyProcessor(), docstring_a, md_docstring_a) + assert_processor_result(processor or NumpyProcessor(), docstring_b, md_docstring_b) From f874940aa790bd637f85de2835a43bac920ca24c Mon Sep 17 00:00:00 2001 From: celsius narhwal Date: Wed, 25 Jan 2023 18:50:49 -0500 Subject: [PATCH 2/7] Formatting adjustments --- .../contrib/processors/numpy.py | 52 +++++++++++++++---- 1 file changed, 41 insertions(+), 11 deletions(-) diff --git a/src/pydoc_markdown/contrib/processors/numpy.py b/src/pydoc_markdown/contrib/processors/numpy.py index f813f494..7ac1efba 100644 --- a/src/pydoc_markdown/contrib/processors/numpy.py +++ b/src/pydoc_markdown/contrib/processors/numpy.py @@ -125,7 +125,9 @@ def check_docstring_format(docstring: str) -> bool: except Warning: return False - def process(self, modules: t.List[docspec.Module], resolver: t.Optional[Resolver]) -> None: + def process( + self, modules: t.List[docspec.Module], resolver: t.Optional[Resolver] + ) -> None: docspec.visit(modules, self._process) def _process(self, node: docspec.ApiObject): @@ -136,7 +138,11 @@ def _process(self, node: docspec.ApiObject): lines = [] # Filter self._SECTION_MAP to only include sections used in the docstring - active_sections = {k: v for k, v in self._SECTION_MAP.items() if any(docstring.get(sec) for sec in v)} + active_sections = { + k: v + for k, v in self._SECTION_MAP.items() + if any(docstring.get(sec) for sec in v) + } # numpydoc is opinionated when it comes to section order so we have to preserve the order of the original # docstring ourselves @@ -144,29 +150,42 @@ def _process(self, node: docspec.ApiObject): # First, we create a regex pattern to match all section headings in the docstring keyword_regex = re.compile( "|".join( - [rf"{keyword}(?:\r?\n)-{{{len(keyword)}}}" for keyword in itertools.chain(*active_sections.values())] + [ + rf"{keyword}(?:\r?\n)-{{{len(keyword)}}}" + for keyword in itertools.chain(*active_sections.values()) + ] ) ) # Second, we strip each patten match of hyphens and whitespace - keyword_matches = [match.replace("-", "").strip() for match in keyword_regex.findall(node.docstring.content)] + keyword_matches = [ + match.replace("-", "").strip() + for match in keyword_regex.findall(node.docstring.content) + ] # Third, we determine the section order in the eventual output based on the order of the headings in the # original docstring (but always starting with the summary) section_order = [ "Summary", - *[next(key for key, value in active_sections.items() if keyword in value) for keyword in keyword_matches], + *[ + next(key for key, value in active_sections.items() if keyword in value) + for keyword in keyword_matches + ], ] # Finally, we sort active_sections according to the section order we just determined - sorted_sections = sorted(active_sections.items(), key=lambda x: section_order.index(x[0])) + active_sections = sorted( + active_sections.items(), key=lambda x: section_order.index(x[0]) + ) - for section, keywords in sorted_sections: + for section, keywords in active_sections: lines.extend(self._get_section_contents(docstring, section, keywords)) node.docstring.content = "\n".join(lines) - def _get_section_contents(self, docstring: NumpyDocString, section: str, keywords: list) -> list[str]: + def _get_section_contents( + self, docstring: NumpyDocString, section: str, keywords: list + ) -> list[str]: contents = list(itertools.chain([docstring.get(sec) for sec in keywords])) if section == "Summary": @@ -224,7 +243,9 @@ def _parse_notes_and_references(section: str, contents: list[str]) -> list[str]: for match in citations.finditer(contents): ref_id = match.group("ref_id") - contents = contents.replace(match.group(0), replacements[section].format(ref_id=ref_id)) + contents = contents.replace( + match.group(0), replacements[section].format(ref_id=ref_id) + ) return [f"\n**{section}**\n", *contents.splitlines()] @@ -232,7 +253,10 @@ def _parse_notes_and_references(section: str, contents: list[str]) -> list[str]: def _parse_examples(contents: list[str]) -> list[str]: # Wraps doctests in Python codeblocks and leaves all other content as is doctests = re.compile(r"(>>>(?:.+(?:\r?\n|$))+)", flags=re.MULTILINE) - return ["\n**Examples**\n", *doctests.sub("```python\n\g<0>\n```", "\n".join(contents)).splitlines()] + return [ + "\n**Examples**\n", + *doctests.sub("```python\n\g<0>\n```", "\n".join(contents)).splitlines(), + ] @staticmethod def _parse_see_also(contents: list[tuple]) -> list[str]: @@ -242,10 +266,16 @@ def _parse_see_also(contents: list[tuple]) -> list[str]: sublines = [] objs, desc = group - sublines.append("* " + ", ".join([f":{obj[1]}:`{obj[0]}`" if obj[1] else f"{obj[0]}" for obj in objs])) + sublines.append( + "* " + + ", ".join( + [f":{obj[1]}:`{obj[0]}`" if obj[1] else f"{obj[0]}" for obj in objs] + ) + ) if desc: sublines[-1] += ": " + "\n".join(desc) + lines.extend(sublines) return [f"\n**See Also**\n", *lines] From c4fd4d146eda5d2e8995a941a6eb91c9079c7f8b Mon Sep 17 00:00:00 2001 From: celsius narhwal Date: Wed, 25 Jan 2023 19:03:07 -0500 Subject: [PATCH 3/7] Formatting adjustments --- .../contrib/processors/numpy.py | 44 ++++--------------- 1 file changed, 9 insertions(+), 35 deletions(-) diff --git a/src/pydoc_markdown/contrib/processors/numpy.py b/src/pydoc_markdown/contrib/processors/numpy.py index 7ac1efba..fba7aa96 100644 --- a/src/pydoc_markdown/contrib/processors/numpy.py +++ b/src/pydoc_markdown/contrib/processors/numpy.py @@ -125,9 +125,7 @@ def check_docstring_format(docstring: str) -> bool: except Warning: return False - def process( - self, modules: t.List[docspec.Module], resolver: t.Optional[Resolver] - ) -> None: + def process(self, modules: t.List[docspec.Module], resolver: t.Optional[Resolver]) -> None: docspec.visit(modules, self._process) def _process(self, node: docspec.ApiObject): @@ -138,11 +136,7 @@ def _process(self, node: docspec.ApiObject): lines = [] # Filter self._SECTION_MAP to only include sections used in the docstring - active_sections = { - k: v - for k, v in self._SECTION_MAP.items() - if any(docstring.get(sec) for sec in v) - } + active_sections = {k: v for k, v in self._SECTION_MAP.items() if any(docstring.get(sec) for sec in v)} # numpydoc is opinionated when it comes to section order so we have to preserve the order of the original # docstring ourselves @@ -150,42 +144,29 @@ def _process(self, node: docspec.ApiObject): # First, we create a regex pattern to match all section headings in the docstring keyword_regex = re.compile( "|".join( - [ - rf"{keyword}(?:\r?\n)-{{{len(keyword)}}}" - for keyword in itertools.chain(*active_sections.values()) - ] + [rf"{keyword}(?:\r?\n)-{{{len(keyword)}}}" for keyword in itertools.chain(*active_sections.values())] ) ) # Second, we strip each patten match of hyphens and whitespace - keyword_matches = [ - match.replace("-", "").strip() - for match in keyword_regex.findall(node.docstring.content) - ] + keyword_matches = [match.replace("-", "").strip() for match in keyword_regex.findall(node.docstring.content)] # Third, we determine the section order in the eventual output based on the order of the headings in the # original docstring (but always starting with the summary) section_order = [ "Summary", - *[ - next(key for key, value in active_sections.items() if keyword in value) - for keyword in keyword_matches - ], + *[next(key for key, value in active_sections.items() if keyword in value) for keyword in keyword_matches], ] # Finally, we sort active_sections according to the section order we just determined - active_sections = sorted( - active_sections.items(), key=lambda x: section_order.index(x[0]) - ) + active_sections = sorted(active_sections.items(), key=lambda x: section_order.index(x[0])) for section, keywords in active_sections: lines.extend(self._get_section_contents(docstring, section, keywords)) node.docstring.content = "\n".join(lines) - def _get_section_contents( - self, docstring: NumpyDocString, section: str, keywords: list - ) -> list[str]: + def _get_section_contents(self, docstring: NumpyDocString, section: str, keywords: list) -> list[str]: contents = list(itertools.chain([docstring.get(sec) for sec in keywords])) if section == "Summary": @@ -243,9 +224,7 @@ def _parse_notes_and_references(section: str, contents: list[str]) -> list[str]: for match in citations.finditer(contents): ref_id = match.group("ref_id") - contents = contents.replace( - match.group(0), replacements[section].format(ref_id=ref_id) - ) + contents = contents.replace(match.group(0), replacements[section].format(ref_id=ref_id)) return [f"\n**{section}**\n", *contents.splitlines()] @@ -266,12 +245,7 @@ def _parse_see_also(contents: list[tuple]) -> list[str]: sublines = [] objs, desc = group - sublines.append( - "* " - + ", ".join( - [f":{obj[1]}:`{obj[0]}`" if obj[1] else f"{obj[0]}" for obj in objs] - ) - ) + sublines.append("* " + ", ".join([f":{obj[1]}:`{obj[0]}`" if obj[1] else f"{obj[0]}" for obj in objs])) if desc: sublines[-1] += ": " + "\n".join(desc) From 1de1dbf785e9921daacdfb5ad0d8ed9e3c29af84 Mon Sep 17 00:00:00 2001 From: GitHub Action Date: Sat, 27 May 2023 21:07:33 +0000 Subject: [PATCH 4/7] Updated PR references in 1 changelogs. skip-checks: true --- .changelog/_unreleased.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/.changelog/_unreleased.toml b/.changelog/_unreleased.toml index 5ff6ec66..d0da0b3b 100644 --- a/.changelog/_unreleased.toml +++ b/.changelog/_unreleased.toml @@ -3,6 +3,7 @@ id = "5be79248-7b86-465d-953c-d0c69ab64e8a" type = "improvement" description = "Implement support for NumPy-style docstrings" author = "celsiusnarhwal" +pr = "https://github.com/NiklasRosenstein/pydoc-markdown/pull/279" issues = [ "https://github.com/celsiusnarhwal/pydoc-markdown/issues/251", ] From 0156c47136f94a766ff99be98a0b7257e134ba73 Mon Sep 17 00:00:00 2001 From: Niklas Rosenstein Date: Sat, 27 May 2023 21:21:33 +0000 Subject: [PATCH 5/7] fmt, import __future__.annotations and fix mypy lints --- .../contrib/processors/numpy.py | 20 +++++++++---------- test/processors/test_numpy.py | 1 + 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/src/pydoc_markdown/contrib/processors/numpy.py b/src/pydoc_markdown/contrib/processors/numpy.py index fba7aa96..0da96419 100644 --- a/src/pydoc_markdown/contrib/processors/numpy.py +++ b/src/pydoc_markdown/contrib/processors/numpy.py @@ -19,6 +19,8 @@ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS # IN THE SOFTWARE. +from __future__ import annotations + import dataclasses import itertools import re @@ -27,14 +29,14 @@ from contextlib import contextmanager import docspec -from numpydoc.docscrape import NumpyDocString, Parameter -from numpydoc.validate import validate +from numpydoc.docscrape import NumpyDocString, Parameter # type: ignore[import] +from numpydoc.validate import validate # type: ignore[import] from pydoc_markdown.interfaces import Processor, Resolver @contextmanager -def _filter_numpydoc_warnings(action: str): +def _filter_numpydoc_warnings(action: warnings._ActionKind): warnings.filterwarnings(action, module="numpydoc.docscrape") yield warnings.resetwarnings() @@ -159,9 +161,7 @@ def _process(self, node: docspec.ApiObject): ] # Finally, we sort active_sections according to the section order we just determined - active_sections = sorted(active_sections.items(), key=lambda x: section_order.index(x[0])) - - for section, keywords in active_sections: + for section, keywords in sorted(active_sections.items(), key=lambda x: section_order.index(x[0])): lines.extend(self._get_section_contents(docstring, section, keywords)) node.docstring.content = "\n".join(lines) @@ -217,16 +217,16 @@ def _parse_parameters(section: str, parameters: list[Parameter]) -> list[str]: @staticmethod def _parse_notes_and_references(section: str, contents: list[str]) -> list[str]: - contents = "\n".join(contents) + content_string = "\n".join(contents) citations = re.compile("(\.\. )?\[(?P\w+)][_ ]?") replacements = {"Notes": "{ref_id}", "References": "{ref_id}. "} - for match in citations.finditer(contents): + for match in citations.finditer(content_string): ref_id = match.group("ref_id") - contents = contents.replace(match.group(0), replacements[section].format(ref_id=ref_id)) + content_string = content_string.replace(match.group(0), replacements[section].format(ref_id=ref_id)) - return [f"\n**{section}**\n", *contents.splitlines()] + return [f"\n**{section}**\n", *content_string.splitlines()] @staticmethod def _parse_examples(contents: list[str]) -> list[str]: diff --git a/test/processors/test_numpy.py b/test/processors/test_numpy.py index 008709df..b564a26e 100644 --- a/test/processors/test_numpy.py +++ b/test/processors/test_numpy.py @@ -1,6 +1,7 @@ import pytest from pydoc_markdown.contrib.processors.numpy import NumpyProcessor + from . import assert_processor_result docstring_a = """ From 11cbbe079df4e510dfaa54171f8233d1acd2b391 Mon Sep 17 00:00:00 2001 From: Niklas Rosenstein Date: Sat, 27 May 2023 21:35:36 +0000 Subject: [PATCH 6/7] Streamline SmartProcessor implementation and log which processor it picks for each ApiObject --- .../contrib/processors/pydocmd.py | 2 +- .../contrib/processors/smart.py | 49 +++++++++++++++---- 2 files changed, 41 insertions(+), 10 deletions(-) diff --git a/src/pydoc_markdown/contrib/processors/pydocmd.py b/src/pydoc_markdown/contrib/processors/pydocmd.py index 676e85b4..ee36e40c 100644 --- a/src/pydoc_markdown/contrib/processors/pydocmd.py +++ b/src/pydoc_markdown/contrib/processors/pydocmd.py @@ -73,7 +73,7 @@ class PydocmdProcessor(Processor): def process(self, modules: t.List[docspec.Module], resolver: t.Optional[Resolver]) -> None: docspec.visit(modules, self._process) - def _process(self, node: docspec.ApiObject): + def _process(self, node: docspec.ApiObject) -> None: if not node.docstring: return lines = [] diff --git a/src/pydoc_markdown/contrib/processors/smart.py b/src/pydoc_markdown/contrib/processors/smart.py index 786b27b0..865e52a1 100644 --- a/src/pydoc_markdown/contrib/processors/smart.py +++ b/src/pydoc_markdown/contrib/processors/smart.py @@ -20,9 +20,11 @@ # IN THE SOFTWARE. import dataclasses +import logging import typing as t import docspec +from typing_extensions import Protocol from pydoc_markdown.contrib.processors.google import GoogleProcessor from pydoc_markdown.contrib.processors.numpy import NumpyProcessor @@ -30,6 +32,18 @@ from pydoc_markdown.contrib.processors.sphinx import SphinxProcessor from pydoc_markdown.interfaces import Processor, Resolver +logger = logging.getLogger(__name__) + + +class DelegatableProcessor(Protocol): + def _process(self, node: docspec.ApiObject) -> None: + ... + + +class CheckCapableProcessor(DelegatableProcessor, Protocol): + def check_docstring_format(self, docstring: str) -> bool: + ... + @dataclasses.dataclass class SmartProcessor(Processor): @@ -50,16 +64,33 @@ def _process(self, obj: docspec.ApiObject): if not obj.docstring: return None - for name in ("google", "pydocmd", "sphinx", "numpy"): + object_name = ".".join(x.name for x in obj.path) + object_type = type(obj).__name__ + + processors: t.List[t.Tuple[str, DelegatableProcessor]] = [ + ("sphinx", self.sphinx), + ("google", self.google), + ("numpy", self.numpy), + ("pydocmd", self.pydocmd), + ] + + checkable_processors: t.List[t.Tuple[str, CheckCapableProcessor]] = [ + ("sphinx", self.sphinx), + ("google", self.google), + ("numpy", self.numpy), + ] + + for name, processor in processors: indicator = "@doc:fmt:" + name if indicator in obj.docstring.content: + logger.info("Using `%s` processor for %s `%s` (explicit)", name, object_type, object_name) obj.docstring.content = obj.docstring.content.replace(indicator, "") - return getattr(self, name)._process(obj) - - if self.sphinx.check_docstring_format(obj.docstring.content): - return self.sphinx._process(obj) - if self.google.check_docstring_format(obj.docstring.content): - return self.google._process(obj) - if self.numpy.check_docstring_format(obj.docstring.content): - return self.numpy._process(obj) + return processor._process(obj) + + for name, processor in checkable_processors: + if processor.check_docstring_format(obj.docstring.content): + logger.info("Using `%s` processor for %s `%s` (detected)", name, object_type, object_name) + return processor._process(obj) + + logger.info("Using `pydocmd` processor for %s `%s` (default)", name, object_type, object_name) return self.pydocmd._process(obj) From a98a68fd7214e15af8f609b34f8fe7623810a456 Mon Sep 17 00:00:00 2001 From: Niklas Rosenstein Date: Sat, 27 May 2023 21:46:02 +0000 Subject: [PATCH 7/7] add print to assert_processor_result() which makes it easier to understand what is actually produced --- test/processors/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test/processors/__init__.py b/test/processors/__init__.py index 9117b3b1..f02b1996 100644 --- a/test/processors/__init__.py +++ b/test/processors/__init__.py @@ -11,4 +11,5 @@ def assert_processor_result(processor, docstring, expected_output): ) processor.process([module], None) assert module.docstring + print(module.docstring.content) assert_text_equals(module.docstring.content, textwrap.dedent(expected_output))