microproduct/dem-sentiral/ISCEApp/site-packages/recommonmark/parser.py

296 lines
9.9 KiB
Python
Raw Permalink Normal View History

2023-08-28 10:17:29 +00:00
"""Docutils CommonMark parser"""
import sys
from os.path import splitext
from docutils import parsers, nodes
from sphinx import addnodes
from commonmark import Parser
from warnings import warn
if sys.version_info < (3, 0):
from urlparse import urlparse, unquote
else:
from urllib.parse import urlparse, unquote
__all__ = ['CommonMarkParser']
class CommonMarkParser(parsers.Parser):
"""Docutils parser for CommonMark"""
supported = ('md', 'markdown')
translate_section_name = None
default_config = {
'known_url_schemes': None,
}
def __init__(self):
self._level_to_elem = {}
def parse(self, inputstring, document):
self.document = document
self.current_node = document
self.config = self.default_config.copy()
try:
new_cfg = self.document.settings.env.config.recommonmark_config
self.config.update(new_cfg)
except AttributeError:
pass
self.setup_parse(inputstring, document)
self.setup_sections()
parser = Parser()
ast = parser.parse(inputstring + '\n')
self.convert_ast(ast)
self.finish_parse()
def convert_ast(self, ast):
for (node, entering) in ast.walker():
fn_prefix = "visit" if entering else "depart"
fn_name = "{0}_{1}".format(fn_prefix, node.t.lower())
fn_default = "default_{0}".format(fn_prefix)
fn = getattr(self, fn_name, None)
if fn is None:
fn = getattr(self, fn_default)
fn(node)
# Node type enter/exit handlers
def default_visit(self, mdnode):
pass
def default_depart(self, mdnode):
"""Default node depart handler
If there is a matching ``visit_<type>`` method for a container node,
then we should make sure to back up to it's parent element when the node
is exited.
"""
if mdnode.is_container():
fn_name = 'visit_{0}'.format(mdnode.t)
if not hasattr(self, fn_name):
warn("Container node skipped: type={0}".format(mdnode.t))
else:
self.current_node = self.current_node.parent
def visit_heading(self, mdnode):
# Test if we're replacing a section level first
if isinstance(self.current_node, nodes.section):
if self.is_section_level(mdnode.level, self.current_node):
self.current_node = self.current_node.parent
title_node = nodes.title()
title_node.line = mdnode.sourcepos[0][0]
new_section = nodes.section()
new_section.line = mdnode.sourcepos[0][0]
new_section.append(title_node)
self.add_section(new_section, mdnode.level)
# Set the current node to the title node to accumulate text children/etc
# for heading.
self.current_node = title_node
def depart_heading(self, _):
"""Finish establishing section
Wrap up title node, but stick in the section node. Add the section names
based on all the text nodes added to the title.
"""
assert isinstance(self.current_node, nodes.title)
# The title node has a tree of text nodes, use the whole thing to
# determine the section id and names
text = self.current_node.astext()
if self.translate_section_name:
text = self.translate_section_name(text)
name = nodes.fully_normalize_name(text)
section = self.current_node.parent
section['names'].append(name)
self.document.note_implicit_target(section, section)
self.current_node = section
def visit_text(self, mdnode):
self.current_node.append(nodes.Text(mdnode.literal, mdnode.literal))
def visit_softbreak(self, _):
self.current_node.append(nodes.Text('\n'))
def visit_linebreak(self, _):
self.current_node.append(nodes.raw('', '<br />', format='html'))
def visit_paragraph(self, mdnode):
p = nodes.paragraph(mdnode.literal)
p.line = mdnode.sourcepos[0][0]
self.current_node.append(p)
self.current_node = p
def visit_emph(self, _):
n = nodes.emphasis()
self.current_node.append(n)
self.current_node = n
def visit_strong(self, _):
n = nodes.strong()
self.current_node.append(n)
self.current_node = n
def visit_code(self, mdnode):
n = nodes.literal(mdnode.literal, mdnode.literal)
self.current_node.append(n)
def visit_link(self, mdnode):
ref_node = nodes.reference()
# Check destination is supported for cross-linking and remove extension
destination = mdnode.destination
_, ext = splitext(destination)
# Check if the destination starts with a url scheme, since internal and
# external links need to be handled differently.
url_check = urlparse(destination)
known_url_schemes = self.config.get('known_url_schemes')
if known_url_schemes:
scheme_known = url_check.scheme in known_url_schemes
else:
scheme_known = bool(url_check.scheme)
# TODO check for other supported extensions, such as those specified in
# the Sphinx conf.py file but how to access this information?
if not scheme_known and ext.replace('.', '') in self.supported:
destination = destination.replace(ext, '')
ref_node['refuri'] = destination
# TODO okay, so this is acutally not always the right line number, but
# these mdnodes won't have sourcepos on them for whatever reason. This
# is better than 0 though.
ref_node.line = self._get_line(mdnode)
if mdnode.title:
ref_node['title'] = mdnode.title
next_node = ref_node
# If there's not a url scheme (e.g. 'https' for 'https:...' links),
# or there is a scheme but it's not in the list of known_url_schemes,
# then assume it's a cross-reference and pass it to Sphinx as an `:any:` ref.
if not url_check.fragment and not scheme_known:
wrap_node = addnodes.pending_xref(
reftarget=unquote(destination),
reftype='any',
refdomain=None, # Added to enable cross-linking
refexplicit=True,
refwarn=True
)
# TODO also not correct sourcepos
wrap_node.line = self._get_line(mdnode)
if mdnode.title:
wrap_node['title'] = mdnode.title
wrap_node.append(ref_node)
next_node = wrap_node
self.current_node.append(next_node)
self.current_node = ref_node
def depart_link(self, mdnode):
if isinstance(self.current_node.parent, addnodes.pending_xref):
self.current_node = self.current_node.parent.parent
else:
self.current_node = self.current_node.parent
def visit_image(self, mdnode):
img_node = nodes.image()
img_node['uri'] = mdnode.destination
if mdnode.first_child and mdnode.first_child.literal:
content = [mdnode.first_child.literal]
n = mdnode.first_child
mdnode.first_child.literal = ''
mdnode.first_child = mdnode.last_child = None
while getattr(n, 'nxt'):
n.nxt, n = None, n.nxt
content.append(n.literal)
img_node['alt'] = ''.join(content)
self.current_node.append(img_node)
self.current_node = img_node
def visit_list(self, mdnode):
list_node = None
if (mdnode.list_data['type'] == "bullet"):
list_node_cls = nodes.bullet_list
else:
list_node_cls = nodes.enumerated_list
list_node = list_node_cls()
list_node.line = mdnode.sourcepos[0][0]
self.current_node.append(list_node)
self.current_node = list_node
def visit_item(self, mdnode):
node = nodes.list_item()
node.line = mdnode.sourcepos[0][0]
self.current_node.append(node)
self.current_node = node
def visit_code_block(self, mdnode):
kwargs = {}
if mdnode.is_fenced and mdnode.info:
kwargs['language'] = mdnode.info
text = ''.join(mdnode.literal)
if text.endswith('\n'):
text = text[:-1]
node = nodes.literal_block(text, text, **kwargs)
self.current_node.append(node)
def visit_block_quote(self, mdnode):
q = nodes.block_quote()
q.line = mdnode.sourcepos[0][0]
self.current_node.append(q)
self.current_node = q
def visit_html(self, mdnode):
raw_node = nodes.raw(mdnode.literal,
mdnode.literal, format='html')
if mdnode.sourcepos is not None:
raw_node.line = mdnode.sourcepos[0][0]
self.current_node.append(raw_node)
def visit_html_inline(self, mdnode):
self.visit_html(mdnode)
def visit_html_block(self, mdnode):
self.visit_html(mdnode)
def visit_thematic_break(self, _):
self.current_node.append(nodes.transition())
# Section handling
def setup_sections(self):
self._level_to_elem = {0: self.document}
def add_section(self, section, level):
parent_level = max(
section_level for section_level in self._level_to_elem
if level > section_level
)
parent = self._level_to_elem[parent_level]
parent.append(section)
self._level_to_elem[level] = section
# Prune level to limit
self._level_to_elem = dict(
(section_level, section)
for section_level, section in self._level_to_elem.items()
if section_level <= level
)
def is_section_level(self, level, section):
return self._level_to_elem.get(level, None) == section
def _get_line(self, mdnode):
while mdnode:
if mdnode.sourcepos:
return mdnode.sourcepos[0][0]
mdnode = mdnode.parent
return 0