diff --git a/.changelog/_unreleased.toml b/.changelog/_unreleased.toml index 177b402a..d0da0b3b 100644 --- a/.changelog/_unreleased.toml +++ b/.changelog/_unreleased.toml @@ -1,3 +1,13 @@ +[[entries]] +id = "5be79248-7b86-465d-953c-d0c69ab64e8a" +type = "improvement" +description = "Implement support for NumPy-style docstrings" +author = "celsiusnarhwal" +pr = "https://github.com/NiklasRosenstein/pydoc-markdown/pull/279" +issues = [ + "https://github.com/celsiusnarhwal/pydoc-markdown/issues/251", +] + [[entries]] id = "4409675c-ea67-4c56-be5a-a7310f779c15" type = "improvement" diff --git a/pyproject.toml b/pyproject.toml index ca9dcc20..bb968ead 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,6 +38,7 @@ tomli = "^2.0.0" tomli_w = "^1.0.0" yapf = ">=0.30.0" watchdog = "*" +numpydoc = "^1.5.0" [tool.poetry.dev-dependencies] pytest = "*" diff --git a/readme.md b/readme.md index 2d4a02be..b2fc53b5 100644 --- a/readme.md +++ b/readme.md @@ -31,8 +31,8 @@ If you plan on using the [Novella][] integration, you may want to install it as: ### Features 🌟 -* Understands multiple documentation styles (Sphinx, Google, Pydoc-Markdown specific) and converts them to properly - formatted Markdown +* Understands multiple documentation styles (Sphinx, Google, NumPy, Pydoc-Markdown specific) and converts them to + properly formatted Markdown * Can parse docstrings for variables thanks to [docspec][] (`#:` block before or string literal after the statement) * Generates links to other API objects per the documentation syntax (e.g. `#OtherClass` for the Pydoc-Markdown style) * Configure the output using a YAML file or `pyProject.toml`, then you're only one command away to generate the diff --git a/src/pydoc_markdown/contrib/processors/numpy.py b/src/pydoc_markdown/contrib/processors/numpy.py new file mode 100644 index 00000000..0da96419 --- /dev/null +++ b/src/pydoc_markdown/contrib/processors/numpy.py @@ -0,0 +1,255 @@ +# -*- coding: utf8 -*- +# Copyright (c) 2019 Niklas Rosenstein +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to +# deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +# sell copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. + +from __future__ import annotations + +import dataclasses +import itertools +import re +import typing as t +import warnings +from contextlib import contextmanager + +import docspec +from numpydoc.docscrape import NumpyDocString, Parameter # type: ignore[import] +from numpydoc.validate import validate # type: ignore[import] + +from pydoc_markdown.interfaces import Processor, Resolver + + +@contextmanager +def _filter_numpydoc_warnings(action: warnings._ActionKind): + warnings.filterwarnings(action, module="numpydoc.docscrape") + yield + warnings.resetwarnings() + + +class _DocstringWrapper: + # Wraps docstrings for use with numpydoc.validate.validate(). + __qualname__ = "pydoc_markdown.contrib.processors.numpy._DocstringWrapper" + + +@dataclasses.dataclass +class NumpyProcessor(Processor): + # numpydoc doesn't like when a heading appears twice in the same docstring so we have to use tags to + # keep numpydoc from recognizing the example headings. This also means the example code block has to be + # delineated with HTML tags instead of Markdown syntax. + """ + This processor parses NumPy-style docstrings and converts them to Markdown syntax. + + References + ---------- + - https://numpydoc.readthedocs.io/en/latest/format.html + + Examples + -------- +
+    
+    Parameters
+    ----------
+    arg: str
+        This argument should be a string.
+
+    Raises
+    ------
+    ValueError
+        If *arg* is not a string.
+
+    Returns
+    -------
+    int
+        The length of the string.
+    
+    
+ + Renders as: + + Parameters + ---------- + arg : str + This argument should be a string. + + Raises + ------ + ValueError + If *arg* is not a string. + + Returns + ------- + int + The length of the string. + + @doc:fmt:numpy + """ + + _SECTION_MAP = { + "Summary": ["Summary", "Extended Summary"], + "Arguments": ["Parameters", "Other Parameters"], + "Returns": ["Returns"], + "Yields": ["Yields"], + "Receives": ["Receives"], + "Attributes": ["Attributes"], + "Methods": ["Methods"], + "Raises": ["Raises"], + "Warns": ["Warns"], + "Warnings": ["Warnings"], + "See Also": ["See Also"], + "Notes": ["Notes"], + "References": ["References"], + "Examples": ["Examples"], + } + + @staticmethod + def check_docstring_format(docstring: str) -> bool: + _DocstringWrapper.__doc__ = docstring + + with _filter_numpydoc_warnings("error"): + try: + return not validate(_DocstringWrapper.__qualname__).get("Errors") + except Warning: + return False + + def process(self, modules: t.List[docspec.Module], resolver: t.Optional[Resolver]) -> None: + docspec.visit(modules, self._process) + + def _process(self, node: docspec.ApiObject): + if not node.docstring: + return + + docstring = NumpyDocString(node.docstring.content) + lines = [] + + # Filter self._SECTION_MAP to only include sections used in the docstring + active_sections = {k: v for k, v in self._SECTION_MAP.items() if any(docstring.get(sec) for sec in v)} + + # numpydoc is opinionated when it comes to section order so we have to preserve the order of the original + # docstring ourselves + + # First, we create a regex pattern to match all section headings in the docstring + keyword_regex = re.compile( + "|".join( + [rf"{keyword}(?:\r?\n)-{{{len(keyword)}}}" for keyword in itertools.chain(*active_sections.values())] + ) + ) + + # Second, we strip each patten match of hyphens and whitespace + keyword_matches = [match.replace("-", "").strip() for match in keyword_regex.findall(node.docstring.content)] + + # Third, we determine the section order in the eventual output based on the order of the headings in the + # original docstring (but always starting with the summary) + section_order = [ + "Summary", + *[next(key for key, value in active_sections.items() if keyword in value) for keyword in keyword_matches], + ] + + # Finally, we sort active_sections according to the section order we just determined + for section, keywords in sorted(active_sections.items(), key=lambda x: section_order.index(x[0])): + lines.extend(self._get_section_contents(docstring, section, keywords)) + + node.docstring.content = "\n".join(lines) + + def _get_section_contents(self, docstring: NumpyDocString, section: str, keywords: list) -> list[str]: + contents = list(itertools.chain([docstring.get(sec) for sec in keywords])) + + if section == "Summary": + return self._parse_summary(contents) + else: + # contents needs to be flattened for all sections aside from Summary + contents = list(itertools.chain(*contents)) + if section in ["Notes", "References"]: + return self._parse_notes_and_references(section, contents) + elif section == "Examples": + return self._parse_examples(contents) + elif section == "See Also": + return self._parse_see_also(contents) + elif any(isinstance(item, Parameter) for item in contents): + return self._parse_parameters(section, contents) + else: + return [f"\n**{section}**\n", *contents] if contents else [] + + @staticmethod + def _parse_summary(contents: list[str]) -> list[str]: + summary, extended = contents + return [*summary, "", *extended] if extended else [*summary] + + @staticmethod + def _parse_parameters(section: str, parameters: list[Parameter]) -> list[str]: + lines = [] + + for param in parameters: + name, cls, desc = param + desc = "\n".join(desc) + + if name and cls and desc: + lines.append(f"* **{name}** (`{cls}`): {desc}") + elif name and cls: + lines.append(f"* **{name}** (`{cls}`)") + elif name and desc: + lines.append(f"* **{name}**: {desc}") + elif cls and desc: + lines.append(f"* `{cls}`: {desc}") + elif name: + lines.append(f"* **{name}**") + elif cls: + lines.append(f"* `{cls}`") + elif desc: + lines.append(f"* {desc}") + + return [f"\n**{section}**\n", *lines] if lines else [] + + @staticmethod + def _parse_notes_and_references(section: str, contents: list[str]) -> list[str]: + content_string = "\n".join(contents) + citations = re.compile("(\.\. )?\[(?P\w+)][_ ]?") + + replacements = {"Notes": "{ref_id}", "References": "{ref_id}. "} + + for match in citations.finditer(content_string): + ref_id = match.group("ref_id") + content_string = content_string.replace(match.group(0), replacements[section].format(ref_id=ref_id)) + + return [f"\n**{section}**\n", *content_string.splitlines()] + + @staticmethod + def _parse_examples(contents: list[str]) -> list[str]: + # Wraps doctests in Python codeblocks and leaves all other content as is + doctests = re.compile(r"(>>>(?:.+(?:\r?\n|$))+)", flags=re.MULTILINE) + return [ + "\n**Examples**\n", + *doctests.sub("```python\n\g<0>\n```", "\n".join(contents)).splitlines(), + ] + + @staticmethod + def _parse_see_also(contents: list[tuple]) -> list[str]: + lines = [] + + for group in contents: + sublines = [] + objs, desc = group + + sublines.append("* " + ", ".join([f":{obj[1]}:`{obj[0]}`" if obj[1] else f"{obj[0]}" for obj in objs])) + + if desc: + sublines[-1] += ": " + "\n".join(desc) + + lines.extend(sublines) + + return [f"\n**See Also**\n", *lines] diff --git a/src/pydoc_markdown/contrib/processors/pydocmd.py b/src/pydoc_markdown/contrib/processors/pydocmd.py index 676e85b4..ee36e40c 100644 --- a/src/pydoc_markdown/contrib/processors/pydocmd.py +++ b/src/pydoc_markdown/contrib/processors/pydocmd.py @@ -73,7 +73,7 @@ class PydocmdProcessor(Processor): def process(self, modules: t.List[docspec.Module], resolver: t.Optional[Resolver]) -> None: docspec.visit(modules, self._process) - def _process(self, node: docspec.ApiObject): + def _process(self, node: docspec.ApiObject) -> None: if not node.docstring: return lines = [] diff --git a/src/pydoc_markdown/contrib/processors/smart.py b/src/pydoc_markdown/contrib/processors/smart.py index b42bf577..865e52a1 100644 --- a/src/pydoc_markdown/contrib/processors/smart.py +++ b/src/pydoc_markdown/contrib/processors/smart.py @@ -20,26 +20,42 @@ # IN THE SOFTWARE. import dataclasses +import logging import typing as t import docspec +from typing_extensions import Protocol from pydoc_markdown.contrib.processors.google import GoogleProcessor +from pydoc_markdown.contrib.processors.numpy import NumpyProcessor from pydoc_markdown.contrib.processors.pydocmd import PydocmdProcessor from pydoc_markdown.contrib.processors.sphinx import SphinxProcessor from pydoc_markdown.interfaces import Processor, Resolver +logger = logging.getLogger(__name__) + + +class DelegatableProcessor(Protocol): + def _process(self, node: docspec.ApiObject) -> None: + ... + + +class CheckCapableProcessor(DelegatableProcessor, Protocol): + def check_docstring_format(self, docstring: str) -> bool: + ... + @dataclasses.dataclass class SmartProcessor(Processor): """ - This processor picks the #GoogleProcessor, #SphinxProcessor or #PydocmdProcessor after + This processor picks the #GoogleProcessor, #SphinxProcessor, #PydocmdProcessor, or #NumpyProcessor after guessing which is appropriate from the syntax it finds in the docstring. """ google: GoogleProcessor = dataclasses.field(default_factory=GoogleProcessor) pydocmd: PydocmdProcessor = dataclasses.field(default_factory=PydocmdProcessor) sphinx: SphinxProcessor = dataclasses.field(default_factory=SphinxProcessor) + numpy: NumpyProcessor = dataclasses.field(default_factory=NumpyProcessor) def process(self, modules: t.List[docspec.Module], resolver: t.Optional[Resolver]) -> None: docspec.visit(modules, self._process) @@ -48,14 +64,33 @@ def _process(self, obj: docspec.ApiObject): if not obj.docstring: return None - for name in ("google", "pydocmd", "sphinx"): + object_name = ".".join(x.name for x in obj.path) + object_type = type(obj).__name__ + + processors: t.List[t.Tuple[str, DelegatableProcessor]] = [ + ("sphinx", self.sphinx), + ("google", self.google), + ("numpy", self.numpy), + ("pydocmd", self.pydocmd), + ] + + checkable_processors: t.List[t.Tuple[str, CheckCapableProcessor]] = [ + ("sphinx", self.sphinx), + ("google", self.google), + ("numpy", self.numpy), + ] + + for name, processor in processors: indicator = "@doc:fmt:" + name if indicator in obj.docstring.content: + logger.info("Using `%s` processor for %s `%s` (explicit)", name, object_type, object_name) obj.docstring.content = obj.docstring.content.replace(indicator, "") - return getattr(self, name)._process(obj) + return processor._process(obj) + + for name, processor in checkable_processors: + if processor.check_docstring_format(obj.docstring.content): + logger.info("Using `%s` processor for %s `%s` (detected)", name, object_type, object_name) + return processor._process(obj) - if self.sphinx.check_docstring_format(obj.docstring.content): - return self.sphinx._process(obj) - if self.google.check_docstring_format(obj.docstring.content): - return self.google._process(obj) + logger.info("Using `pydocmd` processor for %s `%s` (default)", name, object_type, object_name) return self.pydocmd._process(obj) diff --git a/test/processors/__init__.py b/test/processors/__init__.py index 9117b3b1..f02b1996 100644 --- a/test/processors/__init__.py +++ b/test/processors/__init__.py @@ -11,4 +11,5 @@ def assert_processor_result(processor, docstring, expected_output): ) processor.process([module], None) assert module.docstring + print(module.docstring.content) assert_text_equals(module.docstring.content, textwrap.dedent(expected_output)) diff --git a/test/processors/test_numpy.py b/test/processors/test_numpy.py new file mode 100644 index 00000000..b564a26e --- /dev/null +++ b/test/processors/test_numpy.py @@ -0,0 +1,119 @@ +import pytest + +from pydoc_markdown.contrib.processors.numpy import NumpyProcessor + +from . import assert_processor_result + +docstring_a = """ + Generate ordinary dialogue. + + Extended Summary + ---------------- + This function generates ordinary dialogue so that users can fully enjoy how efficient the code is. + + Parameters + ---------- + lines : int + The number of lines of dialogue to generate. + + Returns + ------- + list[str] + The generated lines of dialogue. + + Raises + ------ + ValueError + If *lines* is not a positive integer. + + Examples + -------- + >>> ordinary_dialogue(5) + ["You should just read this manga as is.", + "Why would anyone want to make an anime adaptation?", + "This is a dialogue-heavy piece with hardly any action.", + "Not to mention most of it takes place in a dressing room.", + "So why would anyone turn a manga like this into an anime?"] + """ + +md_docstring_a = """ + Generate ordinary dialogue. + + This function generates ordinary dialogue so that users can fully enjoy how efficient the code is. + + **Arguments** + + * **lines** (`int`): The number of lines of dialogue to generate. + + **Returns** + + * `list[str]`: The generated lines of dialogue. + + **Raises** + + * `ValueError`: If *lines* is not a positive integer. + + **Examples** + + ```python + >>> ordinary_dialogue(5) + ["You should just read this manga as is.", + "Why would anyone want to make an anime adaptation?", + "This is a dialogue-heavy piece with hardly any action.", + "Not to mention most of it takes place in a dressing room.", + "So why would anyone turn a manga like this into an anime?"] + ``` + """ + +docstring_b = """ + Shout "You fool!". + + Notes + ----- + The average "You fool!" travels at 340 m/s[1]_. + + References + ---------- + .. [1] Tsutomu Mizushima (Director). (2012, July 5). Normal Dialogue / Different Clothes / Shouting Instructions + (No. 1). In Joshiraku. Mainichi Broadcasting System. + + Examples + -------- + >>> you_fool() + "You fool!" + + See Also + -------- + :func:`bakayarou` + The same function but in Japanese for no reason in particular. + """ + +md_docstring_b = """ + Shout "You fool!". + + **Notes** + + The average "You fool!" travels at 340 m/s1. + + **References** + + 1. Tsutomu Mizushima (Director). (2012, July 5). Normal Dialogue / Different Clothes / Shouting Instructions + (No. 1). In Joshiraku. Mainichi Broadcasting System. + + **Examples** + + ```python + >>> you_fool() + "You fool!" + ``` + + **See Also** + + * :func:`bakayarou`: The same function but in Japanese for no reason in particular. + """ + + +@pytest.mark.parametrize("processor", [NumpyProcessor()]) +def test_numpy_processor(processor): + assert_processor_result(processor or NumpyProcessor(), docstring_a, md_docstring_a) + assert_processor_result(processor or NumpyProcessor(), docstring_b, md_docstring_b)