⚡️ Speed up function markdown_to_marimo by 85%
#578
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
📄 85% (0.85x) speedup for
markdown_to_marimoinmarimo/_convert/utils.py⏱️ Runtime :
198 microseconds→107 microseconds(best of231runs)📝 Explanation and details
The optimization achieves an 85% speedup by eliminating a performance bottleneck in the
indent_textfunction and removing unnecessary function calls for static strings.Key Optimizations:
Replaced
textwrap.indentwith custom implementation: The original code usedtextwrap.indent(text, INDENT)which is a general-purpose function with significant overhead. The optimized version implements a simple, direct approach using string replacement:INDENT + text.replace('\n', '\n' + INDENT). This avoids the internal complexity and abstraction layers oftextwrap.indent.Inlined static string indentation: Instead of calling
codegen.indent_text('r"""')to indent the constant string'r"""', the optimized code directly uses the pre-computed result' r"""'. This eliminates 31 function calls (as shown in the profiler data).Performance Impact:
codegen.indent_text('r"""')call consumed 55.8% of total execution time (338,180 ns out of 605,754 ns)Test Case Performance:
The optimization particularly excels with multiline markdown inputs, showing 175-349% speedups for cases like:
Single-line cases show minimal impact since they don't trigger the indentation logic, which is expected behavior.
This optimization is especially valuable since
markdown_to_marimoappears to be a utility function that could be called frequently during markdown processing workflows, making the elimination of thetextwrap.indentoverhead particularly beneficial.✅ Correctness verification report:
⚙️ Existing Unit Tests and Runtime
_convert/test_convert_utils.py::test_markdown_to_marimo🌀 Generated Regression Tests and Runtime
import textwrap
imports
import pytest
from marimo._convert.utils import markdown_to_marimo
unit tests
--------------------
1. BASIC TEST CASES
--------------------
def test_simple_single_line():
# Basic: single line markdown, no special chars
codeflash_output = markdown_to_marimo("hello world") # 834ns -> 835ns (0.120% slower)
def test_single_line_with_quotes():
# Basic: single line with double quotes (not triple)
codeflash_output = markdown_to_marimo('hello "world"') # 786ns -> 802ns (2.00% slower)
def test_single_line_with_single_quotes():
# Basic: single line with single quotes
codeflash_output = markdown_to_marimo("hello 'world'") # 766ns -> 773ns (0.906% slower)
def test_single_line_with_triple_quotes():
# Basic: single line with triple quotes, should be escaped
codeflash_output = markdown_to_marimo('hello """world"""') # 1.36μs -> 1.38μs (1.16% slower)
def test_multiline_markdown():
# Basic: multi-line markdown
input_md = "hello\nworld"
expected = "\n".join([
"mo.md(",
" r"""",
"hello\nworld",
""""",
")"
])
codeflash_output = markdown_to_marimo(input_md) # 4.22μs -> 1.00μs (320% faster)
def test_multiline_with_triple_quotes():
# Basic: multi-line with triple quotes
input_md = "hello\n"""\nworld"
expected = "\n".join([
"mo.md(",
" r"""",
"hello\n\"\"\"\nworld",
""""",
")"
])
codeflash_output = markdown_to_marimo(input_md) # 4.14μs -> 1.35μs (207% faster)
def test_single_line_with_backslash():
# Basic: single line with backslash (should not be escaped in r-string)
codeflash_output = markdown_to_marimo(r"hello \ world") # 778ns -> 735ns (5.85% faster)
def test_multiline_with_backslash():
# Basic: multi-line with backslash
input_md = r"hello \n world\nfoo"
expected = "\n".join([
"mo.md(",
" r"""",
r"hello \n world\nfoo",
""""",
")"
])
codeflash_output = markdown_to_marimo(input_md) # 728ns -> 793ns (8.20% slower)
def test_single_line_with_unicode():
# Basic: single line with unicode
codeflash_output = markdown_to_marimo("café") # 1.14μs -> 1.18μs (3.15% slower)
def test_multiline_with_unicode():
# Basic: multi-line with unicode
input_md = "café\nnaïve"
expected = "\n".join([
"mo.md(",
" r"""",
"café\nnaïve",
""""",
")"
])
codeflash_output = markdown_to_marimo(input_md) # 4.18μs -> 1.05μs (297% faster)
--------------------
2. EDGE TEST CASES
--------------------
def test_empty_string():
# Edge: empty string input
codeflash_output = markdown_to_marimo("") # 725ns -> 700ns (3.57% faster)
def test_string_with_only_spaces():
# Edge: string of only spaces
codeflash_output = markdown_to_marimo(" ") # 755ns -> 783ns (3.58% slower)
def test_string_with_only_newlines():
# Edge: string of only newlines
input_md = "\n\n"
expected = "\n".join([
"mo.md(",
" r"""",
"\n\n",
""""",
")"
])
codeflash_output = markdown_to_marimo(input_md) # 3.98μs -> 886ns (349% faster)
def test_string_with_triple_quotes_only():
# Edge: string is only triple quotes
codeflash_output = markdown_to_marimo('"""') # 1.11μs -> 1.10μs (1.27% faster)
def test_string_with_six_quotes():
# Edge: string with six double quotes in a row
input_md = '""""""'
expected = 'mo.md(r"""\"\"\"\"\"\"""")'
codeflash_output = markdown_to_marimo(input_md) # 1.06μs -> 1.11μs (4.15% slower)
def test_multiline_with_mixed_newlines():
# Edge: string with mixed \r, \n, \r\n
input_md = "a\r\nb\nc\rd"
expected = "\n".join([
"mo.md(",
" r"""",
"a\r\nb\nc\rd",
""""",
")"
])
codeflash_output = markdown_to_marimo(input_md) # 3.86μs -> 973ns (297% faster)
def test_multiline_with_leading_and_trailing_newlines():
# Edge: multiline with leading/trailing newlines
input_md = "\nhello\nworld\n"
expected = "\n".join([
"mo.md(",
" r"""",
"\nhello\nworld\n",
""""",
")"
])
codeflash_output = markdown_to_marimo(input_md) # 3.68μs -> 930ns (296% faster)
def test_multiline_with_only_whitespace_lines():
# Edge: multiline with lines that are only whitespace
input_md = "foo\n \nbar"
expected = "\n".join([
"mo.md(",
" r"""",
"foo\n \nbar",
""""",
")"
])
codeflash_output = markdown_to_marimo(input_md) # 3.60μs -> 882ns (308% faster)
def test_string_with_tabs():
# Edge: string with tabs
input_md = "foo\tbar"
codeflash_output = markdown_to_marimo(input_md) # 768ns -> 767ns (0.130% faster)
def test_multiline_with_tabs():
# Edge: multiline with tabs
input_md = "foo\tbar\nbaz\tquux"
expected = "\n".join([
"mo.md(",
" r"""",
"foo\tbar\nbaz\tquux",
""""",
")"
])
codeflash_output = markdown_to_marimo(input_md) # 3.87μs -> 984ns (294% faster)
def test_string_with_escaped_quotes():
# Edge: string with escaped quotes
input_md = 'foo \" bar'
codeflash_output = markdown_to_marimo(input_md) # 800ns -> 830ns (3.61% slower)
def test_string_with_mixed_quotes():
# Edge: string with both single and double quotes
input_md = ""'""
codeflash_output = markdown_to_marimo(input_md) # 780ns -> 791ns (1.39% slower)
def test_string_with_trailing_and_leading_spaces():
# Edge: string with leading and trailing spaces
input_md = " hello world "
codeflash_output = markdown_to_marimo(input_md) # 784ns -> 806ns (2.73% slower)
def test_string_with_special_characters():
# Edge: string with special markdown characters
input_md = "# Heading\n* Bullet\n> Quote"
expected = "\n".join([
"mo.md(",
" r"""",
"# Heading\n* Bullet\n> Quote",
""""",
")"
])
codeflash_output = markdown_to_marimo(input_md) # 4.03μs -> 942ns (328% faster)
--------------------
3. LARGE SCALE TEST CASES
--------------------
def test_large_single_line():
# Large: very long single line
s = "a" * 999
codeflash_output = markdown_to_marimo(s) # 1.32μs -> 1.29μs (2.56% faster)
def test_large_multiline():
# Large: 1000 lines of 'foo'
lines = ["foo"] * 1000
input_md = "\n".join(lines)
expected = "\n".join([
"mo.md(",
" r"""",
input_md,
""""",
")"
])
codeflash_output = markdown_to_marimo(input_md) # 4.89μs -> 1.78μs (175% faster)
def test_large_multiline_with_triple_quotes():
# Large: 1000 lines, every 100th line contains triple quotes
lines = []
for i in range(1000):
if i % 100 == 0:
lines.append('"""')
else:
lines.append("foo")
input_md = "\n".join(lines)
# All triple quotes should be escaped
expected_lines = []
for i in range(1000):
if i % 100 == 0:
expected_lines.append('\"\"\"')
else:
expected_lines.append("foo")
expected = "\n".join([
"mo.md(",
" r"""",
"\n".join(['\"\"\"' if i % 100 == 0 else "foo" for i in range(1000)]),
""""",
")"
])
codeflash_output = markdown_to_marimo(input_md) # 6.33μs -> 3.37μs (87.9% faster)
def test_large_single_line_with_triple_quotes():
# Large: single line with many triple quotes
s = ' '.join(['"""'] * 100)
expected = 'mo.md(r"""' + ' '.join(['\"\"\"'] * 100) + '""")'
codeflash_output = markdown_to_marimo(s) # 3.25μs -> 3.20μs (1.72% faster)
def test_large_multiline_with_varied_content():
# Large: 1000 lines, alternating content
lines = []
for i in range(1000):
if i % 3 == 0:
lines.append("foo")
elif i % 3 == 1:
lines.append('"""')
else:
lines.append("# Heading")
input_md = "\n".join(lines)
expected = "\n".join([
"mo.md(",
" r"""",
"\n".join([
"foo" if i % 3 == 0 else
'\"\"\"' if i % 3 == 1 else
"# Heading"
for i in range(1000)
]),
""""",
")"
])
codeflash_output = markdown_to_marimo(input_md) # 13.4μs -> 10.3μs (29.6% faster)
--------------------
4. PROPERTY-BASED TESTS (OPTIONAL, for robustness)
--------------------
@pytest.mark.parametrize("input_md", [
"foo",
"foo\nbar",
'foo """ bar',
'foo\n"""',
"",
" ",
"\n\n",
"foo\nbar\nbaz",
"foo\tbar",
"foo\nbar\nbaz\nqux",
"foo\n" * 999,
])
def test_idempotency(input_md):
# The function should always return a string
codeflash_output = markdown_to_marimo(input_md); result = codeflash_output # 29.5μs -> 11.9μs (148% faster)
codeflash_output is used to check that the output of the original code is the same as that of the optimized code.
#------------------------------------------------
from future import annotations
import textwrap
imports
import pytest # used for our unit tests
from marimo._convert.utils import markdown_to_marimo
unit tests
-------------------
Basic Test Cases
-------------------
def test_single_line_markdown():
# Simple markdown, no newlines
input_str = "Hello, world!"
expected = 'mo.md(r"""Hello, world!""")'
codeflash_output = markdown_to_marimo(input_str) # 747ns -> 853ns (12.4% slower)
def test_single_line_with_special_chars():$x$ "$x$ """)'
# Markdown with special characters
input_str = "Hello, world!
expected = 'mo.md(r"""Hello, world!
codeflash_output = markdown_to_marimo(input_str) # 757ns -> 818ns (7.46% slower)
def test_single_line_with_triple_quotes():
# Markdown containing triple quotes
input_str = 'Here is a """triple quote"""'
expected = 'mo.md(r"""Here is a \"\"\"triple quote\"\"\"""")'
codeflash_output = markdown_to_marimo(input_str) # 1.38μs -> 1.40μs (1.29% slower)
def test_single_line_empty_string():
# Empty string input
input_str = ""
expected = 'mo.md(r""" """)'
codeflash_output = markdown_to_marimo(input_str) # 710ns -> 703ns (0.996% faster)
def test_single_line_space_string():
# String with a single space
input_str = " "
expected = 'mo.md(r""" """)'
codeflash_output = markdown_to_marimo(input_str) # 689ns -> 696ns (1.01% slower)
def test_multi_line_markdown():
# Multi-line markdown input
input_str = "Hello,\nworld!"
expected = (
"mo.md(\n"
" r"""\n"
"Hello,\n"
"world!\n"
""""\n"
")"
)
codeflash_output = markdown_to_marimo(input_str) # 4.41μs -> 1.15μs (284% faster)
def test_multi_line_with_triple_quotes():
# Multi-line markdown with triple quotes
input_str = 'First line\n"""Second line"""\nThird line'
expected = (
"mo.md(\n"
" r"""\n"
"First line\n"
'\"\"\"Second line\"\"\"' + "\n"
"Third line\n"
""""\n"
")"
)
codeflash_output = markdown_to_marimo(input_str) # 4.56μs -> 1.75μs (160% faster)
def test_multi_line_with_special_chars():$x$ "$x$ \n"
# Multi-line markdown with special characters
input_str = "Line 1\nLine 2 bold\nLine 3
expected = (
"mo.md(\n"
" r"""\n"
"Line 1\n"
"Line 2 bold\n"
"Line 3
""""\n"
")"
)
codeflash_output = markdown_to_marimo(input_str) # 3.81μs -> 1.09μs (251% faster)
-------------------
Edge Test Cases
-------------------
def test_only_triple_quotes():
# Input is only triple quotes
input_str = '"""'
expected = 'mo.md(r"""\"\"\"""")'
codeflash_output = markdown_to_marimo(input_str) # 1.05μs -> 1.04μs (0.192% faster)
def test_six_quotes_in_a_row():
# Input is six quotes in a row
input_str = '""""""'
expected = 'mo.md(r"""\"\"\"\"\"\"""")'
codeflash_output = markdown_to_marimo(input_str) # 1.06μs -> 1.10μs (3.90% slower)
def test_triple_quotes_multiline():
# Triple quotes on multiple lines
input_str = '"""\n"""\n"""\n'
expected = (
"mo.md(\n"
" r"""\n"
'\"\"\"\n'
'\"\"\"\n'
'\"\"\"\n'
"\n"
""""\n"
")"
)
codeflash_output = markdown_to_marimo(input_str) # 4.52μs -> 1.46μs (211% faster)
def test_input_with_backslashes():
# Input containing backslashes
input_str = r"This is a backslash: \"
expected = 'mo.md(r"""This is a backslash: \\""")'
codeflash_output = markdown_to_marimo(input_str) # 818ns -> 794ns (3.02% faster)
def test_input_with_newline_and_triple_quotes():
# Input with newline and triple quotes
input_str = 'First line\n"""Second line"""'
expected = (
"mo.md(\n"
" r"""\n"
"First line\n"
'\"\"\"Second line\"\"\"' + "\n"
""""\n"
")"
)
codeflash_output = markdown_to_marimo(input_str) # 4.63μs -> 1.58μs (193% faster)
def test_input_only_newlines():
# Input is only newlines
input_str = "\n\n"
expected = (
"mo.md(\n"
" r"""\n"
"\n"
"\n"
""""\n"
")"
)
codeflash_output = markdown_to_marimo(input_str) # 3.77μs -> 1.03μs (267% faster)
def test_input_with_leading_and_trailing_whitespace():
# Input with leading and trailing whitespace
input_str = " Leading\nTrailing "
expected = (
"mo.md(\n"
" r"""\n"
" Leading\n"
"Trailing \n"
""""\n"
")"
)
codeflash_output = markdown_to_marimo(input_str) # 3.68μs -> 1.09μs (236% faster)
def test_input_with_unicode_characters():
# Input with unicode characters
input_str = "Hello, 世界\nПривет, мир"
expected = (
"mo.md(\n"
" r"""\n"
"Hello, 世界\n"
"Привет, мир\n"
""""\n"
")"
)
codeflash_output = markdown_to_marimo(input_str) # 4.41μs -> 1.75μs (153% faster)
def test_input_with_tabs():
# Input with tabs
input_str = "Tab\tseparated\tvalues"
expected = 'mo.md(r"""Tab\tseparated\tvalues""")'
codeflash_output = markdown_to_marimo(input_str) # 800ns -> 815ns (1.84% slower)
def test_input_with_carriage_return():
# Input with carriage return
input_str = "Line1\rLine2"
expected = 'mo.md(r"""Line1\rLine2""")'
codeflash_output = markdown_to_marimo(input_str) # 779ns -> 788ns (1.14% slower)
def test_input_with_mixed_newlines():
# Input with mixed newlines (\r\n and \n)
input_str = "Line1\r\nLine2\nLine3"
expected = (
"mo.md(\n"
" r"""\n"
"Line1\r\nLine2\nLine3\n"
""""\n"
")"
)
codeflash_output = markdown_to_marimo(input_str) # 4.14μs -> 1.11μs (272% faster)
-------------------
Large Scale Test Cases
-------------------
def test_large_single_line():
# Large single line input
input_str = "a" * 1000
expected = f'mo.md(r"""{"a"*1000}""")'
codeflash_output = markdown_to_marimo(input_str) # 1.17μs -> 1.20μs (2.42% slower)
def test_large_multi_line():
# Large multi-line input, 1000 lines
input_str = "\n".join([f"Line {i}" for i in range(1000)])
expected = (
"mo.md(\n"
" r"""\n"
+ "\n".join([f"Line {i}" for i in range(1000)]) + "\n"
""""\n"
")"
)
codeflash_output = markdown_to_marimo(input_str) # 6.00μs -> 2.90μs (107% faster)
def test_large_multi_line_with_triple_quotes():
# Large multi-line input with triple quotes in each line
input_str = "\n".join([f'Line {i} """' for i in range(1000)])
expected = (
"mo.md(\n"
" r"""\n"
+ "\n".join([f'Line {i} \"\"\"' for i in range(1000)]) + "\n"
""""\n"
")"
)
codeflash_output = markdown_to_marimo(input_str) # 27.1μs -> 23.5μs (15.2% faster)
def test_large_single_line_with_triple_quotes():
# Large single line with many triple quotes
input_str = '"""'.join(['x'] * 333) # 332 triple quotes between 333 x's
codeflash_output is used to check that the output of the original code is the same as that of the optimized code.
#------------------------------------------------
from marimo._convert.utils import markdown_to_marimo
def test_markdown_to_marimo():
markdown_to_marimo('\n')
def test_markdown_to_marimo_2():
markdown_to_marimo('')
🔎 Concolic Coverage Tests and Runtime
codeflash_concolic_k_oa4bjc/tmpz315i79u/test_concolic_coverage.py::test_markdown_to_marimocodeflash_concolic_k_oa4bjc/tmpz315i79u/test_concolic_coverage.py::test_markdown_to_marimo_2To edit these changes
git checkout codeflash/optimize-markdown_to_marimo-mhtxuhj7and push.