Coverage for src/markdown_katex/extension.py: 97%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

206 statements  

1# This file is part of the markdown-katex project 

2# https://github.com/mbarkhau/markdown-katex 

3# 

4# Copyright (c) 2019-2021 Manuel Barkhau (mbarkhau@gmail.com) - MIT License 

5# SPDX-License-Identifier: MIT 

6import re 

7import json 

8import base64 

9import typing as typ 

10import hashlib 

11import logging 

12 

13from markdown.extensions import Extension 

14from markdown.preprocessors import Preprocessor 

15from markdown.postprocessors import Postprocessor 

16 

17from markdown_katex import wrapper 

18from markdown_katex.html import KATEX_STYLES 

19 

20logger = logging.getLogger(__name__) 

21 

22 

23SVG_ELEM_RE = re.compile(r"<svg.*?</svg>", flags=re.MULTILINE | re.DOTALL) 

24 

25SVG_XMLNS = 'xmlns="http://www.w3.org/2000/svg" ' + 'xmlns:xlink="http://www.w3.org/1999/xlink" ' 

26 

27B64IMG_TMPL = '<img src="data:image/svg+xml;base64,{img_text}"/>' 

28 

29 

30FENCE_RE = re.compile(r"^(\s*)(`{3,}|~{3,})") 

31BLOCK_START_RE = re.compile(r"^(\s*)(`{3,}|~{3,})math") 

32BLOCK_CLEAN_RE = re.compile(r"^(\s*)(`{3,}|~{3,})math(.*)(\2)$", flags=re.DOTALL) 

33 

34 

35def _clean_block_text(block_text: str) -> str: 

36 block_match = BLOCK_CLEAN_RE.match(block_text) 

37 if block_match: 

38 return block_match.group(3) 

39 else: 

40 return block_text 

41 

42 

43def make_marker_id(text: str) -> str: 

44 data = text.encode("utf-8") 

45 return hashlib.md5(data).hexdigest() 

46 

47 

48def svg2img(html: str) -> str: 

49 """Converts inline svg elements to images. 

50 

51 This is done as a work around for #75 of WeasyPrint 

52 https://github.com/Kozea/WeasyPrint/issues/75 

53 """ 

54 while True: 

55 match = SVG_ELEM_RE.search(html) 

56 if match: 

57 svg_text = match.group(0) 

58 if "xmlns" not in svg_text: 

59 svg_text = svg_text.replace("<svg ", "<svg " + SVG_XMLNS) 

60 svg_data = svg_text.encode("utf-8") 

61 img_b64_data: bytes = base64.standard_b64encode(svg_data) 

62 img_b64_text = img_b64_data.decode("utf-8") 

63 img_b64_tag = B64IMG_TMPL.format(img_text=img_b64_text) 

64 start, end = match.span() 

65 html = html[:start] + img_b64_tag + html[end:] 

66 else: 

67 break 

68 

69 return html 

70 

71 

72def tex2html(tex: str, options: wrapper.Options = None) -> str: 

73 if options: 

74 no_inline_svg = options.get("no_inline_svg", False) 

75 else: 

76 no_inline_svg = False 

77 

78 # These are options of the extension, not of the katex-cli program. 

79 if options: 

80 options.pop('no_inline_svg' , None) 

81 options.pop('insert_fonts_css', None) 

82 

83 result = wrapper.tex2html(tex, options) 

84 if no_inline_svg: 

85 result = svg2img(result) 

86 return result 

87 

88 

89def md_block2html(block_text: str, default_options: wrapper.Options = None) -> str: 

90 options: wrapper.Options = {'display-mode': True} 

91 

92 if default_options: 

93 options.update(default_options) 

94 

95 block_text = _clean_block_text(block_text) 

96 header, rest = block_text.split("\n", 1) 

97 if "{" in header and "}" in header: 

98 options.update(json.loads(header)) 

99 block_text = rest 

100 

101 return tex2html(block_text, options) 

102 

103 

104def _clean_inline_text(inline_text: str) -> str: 

105 if inline_text.startswith("$``"): 

106 inline_text = inline_text[len("$``") :] 

107 if inline_text.startswith("$`"): 

108 inline_text = inline_text[len("$`") :] 

109 if inline_text.endswith("``$"): 

110 inline_text = inline_text[: -len("``$")] 

111 if inline_text.endswith("`$"): 

112 inline_text = inline_text[: -len("`$")] 

113 return inline_text 

114 

115 

116def md_inline2html(inline_text: str, default_options: wrapper.Options = None) -> str: 

117 options = default_options.copy() if default_options else {} 

118 inline_text = _clean_inline_text(inline_text) 

119 return tex2html(inline_text, options) 

120 

121 

122INLINE_DELIM_RE = re.compile(r"`{1,2}") 

123 

124 

125class InlineCodeItem(typ.NamedTuple): 

126 

127 inline_text: str 

128 start : int 

129 end : int 

130 

131 

132def iter_inline_katex(line: str) -> typ.Iterable[InlineCodeItem]: 

133 pos = 0 

134 while True: 

135 inline_match_start = INLINE_DELIM_RE.search(line, pos) 

136 if inline_match_start is None: 

137 break 

138 

139 pos = inline_match_start.end() 

140 start = inline_match_start.start() 

141 delim = inline_match_start.group() 

142 

143 try: 

144 end = line.index(delim, start + len(delim)) + (len(delim) - 1) 

145 except ValueError: 

146 continue 

147 

148 pos = end 

149 

150 if line[start - 1] != "$": 

151 continue 

152 if line[end + 1] != "$": 

153 continue 

154 

155 inline_text = line[start - 1 : end + 2] 

156 pos = end + len(delim) 

157 

158 yield InlineCodeItem(inline_text, start - 1, end + 2) 

159 

160 

161class KatexExtension(Extension): 

162 def __init__(self, **kwargs) -> None: 

163 self.config = { 

164 'no_inline_svg' : ["", "Replace inline <svg> with <img> tags."], 

165 'insert_fonts_css': ["", "Insert font loading stylesheet."], 

166 } 

167 for name, options_text in wrapper.parse_options().items(): 

168 self.config[name] = ["", options_text] 

169 

170 self.options: wrapper.Options = {} 

171 for name in self.config: 

172 val_configured = self.getConfig(name, "") 

173 val = kwargs.get(name, val_configured) 

174 

175 if val != "": 

176 self.options[name] = val 

177 

178 self.math_html: typ.Dict[str, str] = {} 

179 super().__init__(**kwargs) 

180 

181 def reset(self) -> None: 

182 self.math_html.clear() 

183 

184 def extendMarkdown(self, md) -> None: 

185 preproc = KatexPreprocessor(md, self) 

186 md.preprocessors.register(preproc, name='katex_fenced_code_block', priority=50) 

187 

188 postproc = KatexPostprocessor(md, self) 

189 md.postprocessors.register(postproc, name='katex_fenced_code_block', priority=0) 

190 md.registerExtension(self) 

191 

192 

193class KatexPreprocessor(Preprocessor): 

194 def __init__(self, md, ext: KatexExtension) -> None: 

195 super().__init__(md) 

196 self.ext: KatexExtension = ext 

197 

198 def _make_tag_for_block(self, block_lines: typ.List[str]) -> str: 

199 indent_len = len(block_lines[0]) - len(block_lines[0].lstrip()) 

200 indent_text = block_lines[0][:indent_len] 

201 

202 block_text = "\n".join(line[indent_len:] for line in block_lines).rstrip() 

203 marker_id = make_marker_id("block" + block_text) 

204 marker_tag = f"tmp_block_md_katex_{marker_id}" 

205 

206 math_html = md_block2html(block_text, self.ext.options) 

207 self.ext.math_html[marker_tag] = f"<p>{math_html}</p>" 

208 return indent_text + marker_tag 

209 

210 def _make_tag_for_inline(self, inline_text: str) -> str: 

211 marker_id = make_marker_id("inline" + inline_text) 

212 marker_tag = f"tmp_inline_md_katex_{marker_id}" 

213 

214 math_html = md_inline2html(inline_text, self.ext.options) 

215 self.ext.math_html[marker_tag] = math_html 

216 return marker_tag 

217 

218 def _iter_out_lines(self, lines: typ.List[str]) -> typ.Iterable[str]: 

219 is_in_math_fence = False 

220 is_in_fence = False 

221 expected_close_fence = "```" 

222 

223 block_lines: typ.List[str] = [] 

224 

225 for line in lines: 

226 if is_in_fence: 

227 yield line 

228 is_ending_fence = line.rstrip() == expected_close_fence 

229 if is_ending_fence: 

230 is_in_fence = False 

231 elif is_in_math_fence: 

232 block_lines.append(line) 

233 is_ending_fence = line.rstrip() == expected_close_fence 

234 if is_ending_fence: 

235 is_in_math_fence = False 

236 marker_tag = self._make_tag_for_block(block_lines) 

237 del block_lines[:] 

238 yield marker_tag 

239 else: 

240 math_fence_match = BLOCK_START_RE.match(line) 

241 fence_match = FENCE_RE.match(line) 

242 if math_fence_match: 

243 is_in_math_fence = True 

244 prefix = math_fence_match.group(1) 

245 expected_close_fence = prefix + math_fence_match.group(2) 

246 block_lines.append(line) 

247 elif fence_match: 

248 is_in_fence = True 

249 prefix = fence_match.group(1) 

250 expected_close_fence = prefix + fence_match.group(2) 

251 yield line 

252 else: 

253 inline_codes = list(iter_inline_katex(line)) 

254 for code in reversed(inline_codes): 

255 # iterate in reverse, so that start and end indexes 

256 # remain valid after replacements 

257 marker_tag = self._make_tag_for_inline(code.inline_text) 

258 line = line[: code.start] + marker_tag + line[code.end :] 

259 

260 yield line 

261 

262 # unclosed block 

263 if block_lines: 

264 for line in block_lines: 

265 yield line 

266 

267 def run(self, lines: typ.List[str]) -> typ.List[str]: 

268 return list(self._iter_out_lines(lines)) 

269 

270 

271# NOTE (mb): 

272# Q: Why this business with the Postprocessor? Why 

273# not just do `yield tag_text` and save the hassle 

274# of `self.ext.math_html[marker_tag] = tag_text` ? 

275# A: Maybe there are other processors that can't be 

276# trusted to leave the inserted markup alone. Maybe 

277# the inserted markup could be incorrectly parsed as 

278# valid markdown. 

279 

280 

281class KatexPostprocessor(Postprocessor): 

282 def __init__(self, md, ext: KatexExtension) -> None: 

283 super().__init__(md) 

284 self.ext: KatexExtension = ext 

285 

286 def run(self, text: str) -> str: 

287 if any(marker in text for marker in self.ext.math_html): 

288 if self.ext.options: 

289 insert_fonts_css = self.ext.options.get("insert_fonts_css", True) 

290 else: 

291 insert_fonts_css = True 

292 

293 if insert_fonts_css and KATEX_STYLES not in text: 

294 text = KATEX_STYLES + text 

295 

296 for marker, html in self.ext.math_html.items(): 

297 is_block = marker.startswith("tmp_block_md_katex_") 

298 is_inline = marker.startswith("tmp_inline_md_katex_") 

299 assert is_block or is_inline 

300 

301 if marker in text: 

302 if is_block: 

303 wrapped_marker = "<p>" + marker + "</p>" 

304 else: 

305 wrapped_marker = marker 

306 

307 while marker in text: 

308 if wrapped_marker in text: 

309 text = text.replace(wrapped_marker, html) 

310 else: 

311 text = text.replace(marker, html) 

312 else: 

313 logger.warning(f"KatexPostprocessor couldn't find: {marker}") 

314 

315 return text