import asyncio import re import html as html_module import markdown import bleach from nonebot import logger async def markdown_to_image(markdown_text: str, output_path: str, browser=None): """将 Markdown 转换为 HTML 并使用 Puppeteer 截图。""" page = None should_close_browser = False try: # Convert markdown to HTML. The markdown library handles special chars safely. # Note: do NOT html.escape() before markdown.markdown() - it breaks markdown syntax. html_content = markdown.markdown(markdown_text, extensions=["fenced_code", "tables"]) # Sanitize to prevent XSS from malicious AI responses allowed_tags = [ 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'br', 'hr', 'ul', 'ol', 'li', 'blockquote', 'pre', 'code', 'span', 'table', 'thead', 'tbody', 'tr', 'th', 'td', 'strong', 'em', 'b', 'i', 'u', 'a', 'img', 'div', ] allowed_attrs = {'a': ['href', 'title'], 'img': ['src', 'alt', 'title']} html_content = bleach.clean(html_content, tags=allowed_tags, attributes=allowed_attrs) # 使用传入的浏览器实例或创建新的 if browser is None: from pyppeteer import launch browser = await launch(headless=True, args=['--no-sandbox', '--disable-setuid-sandbox']) should_close_browser = True page = await browser.newPage() page.setDefaultNavigationTimeout(15000) # 设置页面样式,使内容更美观 await page.setContent(f"""