1313# limitations under the License.
1414
1515import re
16- from typing import Optional
16+ from typing import Optional , List , Dict
1717
1818import pypandoc # type: ignore
1919
2020from gapic .utils .lines import wrap
2121
22+ # --- PERFORMANCE CACHE ---
23+ _RAW_RST_CACHE : Dict [str , str ] = {}
24+
25+
26+ def _aggressive_fast_convert (text : str ) -> Optional [str ]:
27+ """
28+ Converts common Markdown (Code, Links, Lists) to RST using pure Python.
29+ Only gives up (returns None) for complex structures like Tables.
30+ """
31+ # 1. TABLE CHECK (The only thing we strictly need Pandoc for)
32+ # If we see a pipe surrounded by spaces, it's likely a table.
33+ if re .search (r" \| " , text ) or re .search (r"\|\n" , text ):
34+ return None
35+
36+ # 2. CODE BLOCKS: `code` -> ``code``
37+ # RST requires double backticks. Markdown uses one.
38+ # We look for backticks that aren't already double.
39+ # Regex: Negative lookbehind/lookahead to ensure we don't match ``already rst``.
40+ converted = re .sub (r"(?<!`)`([^`]+)`(?!`)" , r"``\1``" , text )
41+
42+ # 3. LINKS: [Text](URL) -> `Text <URL>`__
43+ # We use anonymous links (__) to avoid collision issues.
44+ converted = re .sub (r"\[([^\]]+)\]\(([^)]+)\)" , r"`\1 <\2>`__" , converted )
45+
46+ # 4. BOLD: **text** -> **text** (Compatible, no change needed)
47+
48+ # 5. HEADINGS: # Heading -> Heading\n=======
49+ # (Simple fix for H1/H2, mostly sufficient for docstrings)
50+ converted = re .sub (r"^# (.*)$" , r"\1\n" + "=" * 10 , converted , flags = re .MULTILINE )
51+ converted = re .sub (r"^## (.*)$" , r"\1\n" + "-" * 10 , converted , flags = re .MULTILINE )
52+
53+ # 6. LISTS: Markdown lists (- item) work in RST mostly fine.
54+ # We just ensure there's a newline before a list starts to satisfy RST strictness.
55+ converted = re .sub (r"(\n[^-*].*)\n\s*[-*] " , r"\1\n\n- " , converted )
56+
57+ return converted
58+
59+
60+ def batch_convert_docstrings (docstrings : List [str ]):
61+ """
62+ Optimized Batch Processor.
63+ 1. Tries Aggressive Python Conversion first.
64+ 2. Only sends Tables/Complex items to Pandoc.
65+ """
66+ unique_docs = set (docstrings )
67+
68+ # Filter: Only keep strings that need conversion and aren't in cache
69+ candidates = [
70+ d for d in unique_docs
71+ if d
72+ and d not in _RAW_RST_CACHE
73+ and re .search (r"[|*`_[\]#]" , d ) # Only interesting chars
74+ ]
75+
76+ if not candidates :
77+ return
78+
79+ pandoc_batch : List [str ] = []
80+
81+ # 1. Try Python Conversion
82+ for doc in candidates :
83+ fast_result = _aggressive_fast_convert (doc )
84+ if fast_result is not None :
85+ # Success: Saved ~50ms per call
86+ _RAW_RST_CACHE [doc ] = fast_result .strip ()
87+ else :
88+ # Failed: Must use Pandoc (Tables, etc)
89+ pandoc_batch .append (doc )
90+
91+ # 2. Process Remainder with Pandoc (Likely < 10 items)
92+ if not pandoc_batch :
93+ return
94+
95+ separator = "\n \n __GAPIC_BATCH_SPLIT__\n \n "
96+ giant_payload = separator .join (pandoc_batch )
97+
98+ try :
99+ converted_payload = pypandoc .convert_text (
100+ giant_payload ,
101+ "rst" ,
102+ format = "commonmark" ,
103+ extra_args = ["--columns=1000" ]
104+ )
105+ except Exception :
106+ return
107+
108+ split_marker = "__GAPIC_BATCH_SPLIT__"
109+ results = converted_payload .split (split_marker )
110+
111+ if len (results ) == len (pandoc_batch ):
112+ for original , converted in zip (pandoc_batch , results ):
113+ _RAW_RST_CACHE [original ] = converted .strip ()
114+
22115
23116def rst (
24117 text : str ,
@@ -27,59 +120,53 @@ def rst(
27120 nl : Optional [bool ] = None ,
28121 source_format : str = "commonmark" ,
29122):
30- """Convert the given text to ReStructured Text.
31-
32- Args:
33- text (str): The text to convert.
34- width (int): The number of columns.
35- indent (int): The number of columns to indent each line of text
36- (except the first).
37- nl (bool): Whether to append a trailing newline.
38- Defaults to appending a newline if the result is more than
39- one line long.
40- source_format (str): The source format. This is ``commonmark`` by
41- default, which is what is used by convention in protocol buffers.
42-
43- Returns:
44- str: The same text, in RST format.
45- """
46- # Quick check: If the text block does not appear to have any formatting,
47- # do not convert it.
48- # (This makes code generation significantly faster; calling out to pandoc
49- # is by far the most expensive thing we do.)
50- if not re .search (r"[|*`_[\]]" , text ):
123+ """Convert the given text to ReStructured Text."""
124+
125+ # 1. Super Fast Path: No special chars? Just wrap.
126+ if not re .search (r"[|*`_[\]#]" , text ):
51127 answer = wrap (
52128 text ,
53129 indent = indent ,
54130 offset = indent + 3 ,
55131 width = width - indent ,
56132 )
133+ return _finalize (answer , nl , indent )
134+
135+ # 2. Check Cache
136+ if text in _RAW_RST_CACHE :
137+ raw_rst = _RAW_RST_CACHE [text ]
57138 else :
58- # Convert from CommonMark to ReStructured Text.
59- answer = (
60- pypandoc .convert_text (
139+ # Slow Path: Missed by batch or new string.
140+ # TRY PYTHON CONVERT FIRST.
141+ # This prevents the 'Slow Path' from actually being slow.
142+ fast_result = _aggressive_fast_convert (text )
143+
144+ if fast_result is not None :
145+ raw_rst = fast_result .strip ()
146+ else :
147+ # The absolute last resort: Shell out to Pandoc
148+ raw_rst = pypandoc .convert_text (
61149 text ,
62150 "rst" ,
63151 format = source_format ,
64- extra_args = ["--columns=%d" % (width - indent )],
65- )
66- .strip ()
67- .replace ("\n " , f"\n { ' ' * indent } " )
68- )
152+ extra_args = ["--columns=1000" ]
153+ ).strip ()
154+
155+ _RAW_RST_CACHE [text ] = raw_rst
156+
157+ # 3. Python Formatting
158+ if "::" in raw_rst or ".. code" in raw_rst :
159+ answer = raw_rst .replace ("\n " , f"\n { ' ' * indent } " )
160+ else :
161+ answer = wrap (raw_rst , indent = indent , offset = indent , width = width - indent )
162+
163+ return _finalize (answer , nl , indent )
69164
70- # Add a newline to the end of the document if any line breaks are
71- # already present.
72- #
73- # This causes the closing """ to be on the subsequent line only when
74- # appropriate.
165+
166+ def _finalize (answer , nl , indent ):
167+ """Helper to handle trailing newlines and quotes."""
75168 if nl or ("\n " in answer and nl is None ):
76169 answer += "\n " + " " * indent
77-
78- # If the text ends in a double-quote, append a period.
79- # This ensures that we do not get a parse error when this output is
80- # followed by triple-quotes.
81170 if answer .endswith ('"' ):
82171 answer += "."
83-
84- # Done; return the answer.
85- return answer
172+ return answer
0 commit comments