296 lines
9.9 KiB
Python
296 lines
9.9 KiB
Python
"""Docutils CommonMark parser"""
|
|
|
|
import sys
|
|
from os.path import splitext
|
|
|
|
from docutils import parsers, nodes
|
|
from sphinx import addnodes
|
|
|
|
from commonmark import Parser
|
|
|
|
from warnings import warn
|
|
|
|
if sys.version_info < (3, 0):
|
|
from urlparse import urlparse, unquote
|
|
else:
|
|
from urllib.parse import urlparse, unquote
|
|
|
|
__all__ = ['CommonMarkParser']
|
|
|
|
|
|
class CommonMarkParser(parsers.Parser):
|
|
|
|
"""Docutils parser for CommonMark"""
|
|
|
|
supported = ('md', 'markdown')
|
|
translate_section_name = None
|
|
|
|
default_config = {
|
|
'known_url_schemes': None,
|
|
}
|
|
|
|
def __init__(self):
|
|
self._level_to_elem = {}
|
|
|
|
def parse(self, inputstring, document):
|
|
self.document = document
|
|
self.current_node = document
|
|
self.config = self.default_config.copy()
|
|
try:
|
|
new_cfg = self.document.settings.env.config.recommonmark_config
|
|
self.config.update(new_cfg)
|
|
except AttributeError:
|
|
pass
|
|
self.setup_parse(inputstring, document)
|
|
self.setup_sections()
|
|
parser = Parser()
|
|
ast = parser.parse(inputstring + '\n')
|
|
self.convert_ast(ast)
|
|
self.finish_parse()
|
|
|
|
def convert_ast(self, ast):
|
|
for (node, entering) in ast.walker():
|
|
fn_prefix = "visit" if entering else "depart"
|
|
fn_name = "{0}_{1}".format(fn_prefix, node.t.lower())
|
|
fn_default = "default_{0}".format(fn_prefix)
|
|
fn = getattr(self, fn_name, None)
|
|
if fn is None:
|
|
fn = getattr(self, fn_default)
|
|
fn(node)
|
|
|
|
# Node type enter/exit handlers
|
|
def default_visit(self, mdnode):
|
|
pass
|
|
|
|
def default_depart(self, mdnode):
|
|
"""Default node depart handler
|
|
|
|
If there is a matching ``visit_<type>`` method for a container node,
|
|
then we should make sure to back up to it's parent element when the node
|
|
is exited.
|
|
"""
|
|
if mdnode.is_container():
|
|
fn_name = 'visit_{0}'.format(mdnode.t)
|
|
if not hasattr(self, fn_name):
|
|
warn("Container node skipped: type={0}".format(mdnode.t))
|
|
else:
|
|
self.current_node = self.current_node.parent
|
|
|
|
def visit_heading(self, mdnode):
|
|
# Test if we're replacing a section level first
|
|
if isinstance(self.current_node, nodes.section):
|
|
if self.is_section_level(mdnode.level, self.current_node):
|
|
self.current_node = self.current_node.parent
|
|
|
|
title_node = nodes.title()
|
|
title_node.line = mdnode.sourcepos[0][0]
|
|
|
|
new_section = nodes.section()
|
|
new_section.line = mdnode.sourcepos[0][0]
|
|
new_section.append(title_node)
|
|
|
|
self.add_section(new_section, mdnode.level)
|
|
|
|
# Set the current node to the title node to accumulate text children/etc
|
|
# for heading.
|
|
self.current_node = title_node
|
|
|
|
def depart_heading(self, _):
|
|
"""Finish establishing section
|
|
|
|
Wrap up title node, but stick in the section node. Add the section names
|
|
based on all the text nodes added to the title.
|
|
"""
|
|
assert isinstance(self.current_node, nodes.title)
|
|
# The title node has a tree of text nodes, use the whole thing to
|
|
# determine the section id and names
|
|
text = self.current_node.astext()
|
|
if self.translate_section_name:
|
|
text = self.translate_section_name(text)
|
|
name = nodes.fully_normalize_name(text)
|
|
section = self.current_node.parent
|
|
section['names'].append(name)
|
|
self.document.note_implicit_target(section, section)
|
|
self.current_node = section
|
|
|
|
def visit_text(self, mdnode):
|
|
self.current_node.append(nodes.Text(mdnode.literal, mdnode.literal))
|
|
|
|
def visit_softbreak(self, _):
|
|
self.current_node.append(nodes.Text('\n'))
|
|
|
|
def visit_linebreak(self, _):
|
|
self.current_node.append(nodes.raw('', '<br />', format='html'))
|
|
|
|
def visit_paragraph(self, mdnode):
|
|
p = nodes.paragraph(mdnode.literal)
|
|
p.line = mdnode.sourcepos[0][0]
|
|
self.current_node.append(p)
|
|
self.current_node = p
|
|
|
|
def visit_emph(self, _):
|
|
n = nodes.emphasis()
|
|
self.current_node.append(n)
|
|
self.current_node = n
|
|
|
|
def visit_strong(self, _):
|
|
n = nodes.strong()
|
|
self.current_node.append(n)
|
|
self.current_node = n
|
|
|
|
def visit_code(self, mdnode):
|
|
n = nodes.literal(mdnode.literal, mdnode.literal)
|
|
self.current_node.append(n)
|
|
|
|
def visit_link(self, mdnode):
|
|
ref_node = nodes.reference()
|
|
# Check destination is supported for cross-linking and remove extension
|
|
destination = mdnode.destination
|
|
_, ext = splitext(destination)
|
|
|
|
# Check if the destination starts with a url scheme, since internal and
|
|
# external links need to be handled differently.
|
|
url_check = urlparse(destination)
|
|
known_url_schemes = self.config.get('known_url_schemes')
|
|
if known_url_schemes:
|
|
scheme_known = url_check.scheme in known_url_schemes
|
|
else:
|
|
scheme_known = bool(url_check.scheme)
|
|
|
|
# TODO check for other supported extensions, such as those specified in
|
|
# the Sphinx conf.py file but how to access this information?
|
|
if not scheme_known and ext.replace('.', '') in self.supported:
|
|
destination = destination.replace(ext, '')
|
|
ref_node['refuri'] = destination
|
|
# TODO okay, so this is acutally not always the right line number, but
|
|
# these mdnodes won't have sourcepos on them for whatever reason. This
|
|
# is better than 0 though.
|
|
ref_node.line = self._get_line(mdnode)
|
|
if mdnode.title:
|
|
ref_node['title'] = mdnode.title
|
|
next_node = ref_node
|
|
|
|
# If there's not a url scheme (e.g. 'https' for 'https:...' links),
|
|
# or there is a scheme but it's not in the list of known_url_schemes,
|
|
# then assume it's a cross-reference and pass it to Sphinx as an `:any:` ref.
|
|
if not url_check.fragment and not scheme_known:
|
|
wrap_node = addnodes.pending_xref(
|
|
reftarget=unquote(destination),
|
|
reftype='any',
|
|
refdomain=None, # Added to enable cross-linking
|
|
refexplicit=True,
|
|
refwarn=True
|
|
)
|
|
# TODO also not correct sourcepos
|
|
wrap_node.line = self._get_line(mdnode)
|
|
if mdnode.title:
|
|
wrap_node['title'] = mdnode.title
|
|
wrap_node.append(ref_node)
|
|
next_node = wrap_node
|
|
|
|
self.current_node.append(next_node)
|
|
self.current_node = ref_node
|
|
|
|
def depart_link(self, mdnode):
|
|
if isinstance(self.current_node.parent, addnodes.pending_xref):
|
|
self.current_node = self.current_node.parent.parent
|
|
else:
|
|
self.current_node = self.current_node.parent
|
|
|
|
def visit_image(self, mdnode):
|
|
img_node = nodes.image()
|
|
img_node['uri'] = mdnode.destination
|
|
|
|
if mdnode.first_child and mdnode.first_child.literal:
|
|
content = [mdnode.first_child.literal]
|
|
n = mdnode.first_child
|
|
mdnode.first_child.literal = ''
|
|
mdnode.first_child = mdnode.last_child = None
|
|
while getattr(n, 'nxt'):
|
|
n.nxt, n = None, n.nxt
|
|
content.append(n.literal)
|
|
img_node['alt'] = ''.join(content)
|
|
|
|
self.current_node.append(img_node)
|
|
self.current_node = img_node
|
|
|
|
def visit_list(self, mdnode):
|
|
list_node = None
|
|
if (mdnode.list_data['type'] == "bullet"):
|
|
list_node_cls = nodes.bullet_list
|
|
else:
|
|
list_node_cls = nodes.enumerated_list
|
|
list_node = list_node_cls()
|
|
list_node.line = mdnode.sourcepos[0][0]
|
|
|
|
self.current_node.append(list_node)
|
|
self.current_node = list_node
|
|
|
|
def visit_item(self, mdnode):
|
|
node = nodes.list_item()
|
|
node.line = mdnode.sourcepos[0][0]
|
|
self.current_node.append(node)
|
|
self.current_node = node
|
|
|
|
def visit_code_block(self, mdnode):
|
|
kwargs = {}
|
|
if mdnode.is_fenced and mdnode.info:
|
|
kwargs['language'] = mdnode.info
|
|
text = ''.join(mdnode.literal)
|
|
if text.endswith('\n'):
|
|
text = text[:-1]
|
|
node = nodes.literal_block(text, text, **kwargs)
|
|
self.current_node.append(node)
|
|
|
|
def visit_block_quote(self, mdnode):
|
|
q = nodes.block_quote()
|
|
q.line = mdnode.sourcepos[0][0]
|
|
self.current_node.append(q)
|
|
self.current_node = q
|
|
|
|
def visit_html(self, mdnode):
|
|
raw_node = nodes.raw(mdnode.literal,
|
|
mdnode.literal, format='html')
|
|
if mdnode.sourcepos is not None:
|
|
raw_node.line = mdnode.sourcepos[0][0]
|
|
self.current_node.append(raw_node)
|
|
|
|
def visit_html_inline(self, mdnode):
|
|
self.visit_html(mdnode)
|
|
|
|
def visit_html_block(self, mdnode):
|
|
self.visit_html(mdnode)
|
|
|
|
def visit_thematic_break(self, _):
|
|
self.current_node.append(nodes.transition())
|
|
|
|
# Section handling
|
|
def setup_sections(self):
|
|
self._level_to_elem = {0: self.document}
|
|
|
|
def add_section(self, section, level):
|
|
parent_level = max(
|
|
section_level for section_level in self._level_to_elem
|
|
if level > section_level
|
|
)
|
|
parent = self._level_to_elem[parent_level]
|
|
parent.append(section)
|
|
self._level_to_elem[level] = section
|
|
|
|
# Prune level to limit
|
|
self._level_to_elem = dict(
|
|
(section_level, section)
|
|
for section_level, section in self._level_to_elem.items()
|
|
if section_level <= level
|
|
)
|
|
|
|
def is_section_level(self, level, section):
|
|
return self._level_to_elem.get(level, None) == section
|
|
|
|
def _get_line(self, mdnode):
|
|
while mdnode:
|
|
if mdnode.sourcepos:
|
|
return mdnode.sourcepos[0][0]
|
|
mdnode = mdnode.parent
|
|
return 0
|