"""Docutils CommonMark parser""" import sys from os.path import splitext from docutils import parsers, nodes from sphinx import addnodes from commonmark import Parser from warnings import warn if sys.version_info < (3, 0): from urlparse import urlparse, unquote else: from urllib.parse import urlparse, unquote __all__ = ['CommonMarkParser'] class CommonMarkParser(parsers.Parser): """Docutils parser for CommonMark""" supported = ('md', 'markdown') translate_section_name = None default_config = { 'known_url_schemes': None, } def __init__(self): self._level_to_elem = {} def parse(self, inputstring, document): self.document = document self.current_node = document self.config = self.default_config.copy() try: new_cfg = self.document.settings.env.config.recommonmark_config self.config.update(new_cfg) except AttributeError: pass self.setup_parse(inputstring, document) self.setup_sections() parser = Parser() ast = parser.parse(inputstring + '\n') self.convert_ast(ast) self.finish_parse() def convert_ast(self, ast): for (node, entering) in ast.walker(): fn_prefix = "visit" if entering else "depart" fn_name = "{0}_{1}".format(fn_prefix, node.t.lower()) fn_default = "default_{0}".format(fn_prefix) fn = getattr(self, fn_name, None) if fn is None: fn = getattr(self, fn_default) fn(node) # Node type enter/exit handlers def default_visit(self, mdnode): pass def default_depart(self, mdnode): """Default node depart handler If there is a matching ``visit_`` method for a container node, then we should make sure to back up to it's parent element when the node is exited. """ if mdnode.is_container(): fn_name = 'visit_{0}'.format(mdnode.t) if not hasattr(self, fn_name): warn("Container node skipped: type={0}".format(mdnode.t)) else: self.current_node = self.current_node.parent def visit_heading(self, mdnode): # Test if we're replacing a section level first if isinstance(self.current_node, nodes.section): if self.is_section_level(mdnode.level, self.current_node): self.current_node = self.current_node.parent title_node = nodes.title() title_node.line = mdnode.sourcepos[0][0] new_section = nodes.section() new_section.line = mdnode.sourcepos[0][0] new_section.append(title_node) self.add_section(new_section, mdnode.level) # Set the current node to the title node to accumulate text children/etc # for heading. self.current_node = title_node def depart_heading(self, _): """Finish establishing section Wrap up title node, but stick in the section node. Add the section names based on all the text nodes added to the title. """ assert isinstance(self.current_node, nodes.title) # The title node has a tree of text nodes, use the whole thing to # determine the section id and names text = self.current_node.astext() if self.translate_section_name: text = self.translate_section_name(text) name = nodes.fully_normalize_name(text) section = self.current_node.parent section['names'].append(name) self.document.note_implicit_target(section, section) self.current_node = section def visit_text(self, mdnode): self.current_node.append(nodes.Text(mdnode.literal, mdnode.literal)) def visit_softbreak(self, _): self.current_node.append(nodes.Text('\n')) def visit_linebreak(self, _): self.current_node.append(nodes.raw('', '
', format='html')) def visit_paragraph(self, mdnode): p = nodes.paragraph(mdnode.literal) p.line = mdnode.sourcepos[0][0] self.current_node.append(p) self.current_node = p def visit_emph(self, _): n = nodes.emphasis() self.current_node.append(n) self.current_node = n def visit_strong(self, _): n = nodes.strong() self.current_node.append(n) self.current_node = n def visit_code(self, mdnode): n = nodes.literal(mdnode.literal, mdnode.literal) self.current_node.append(n) def visit_link(self, mdnode): ref_node = nodes.reference() # Check destination is supported for cross-linking and remove extension destination = mdnode.destination _, ext = splitext(destination) # Check if the destination starts with a url scheme, since internal and # external links need to be handled differently. url_check = urlparse(destination) known_url_schemes = self.config.get('known_url_schemes') if known_url_schemes: scheme_known = url_check.scheme in known_url_schemes else: scheme_known = bool(url_check.scheme) # TODO check for other supported extensions, such as those specified in # the Sphinx conf.py file but how to access this information? if not scheme_known and ext.replace('.', '') in self.supported: destination = destination.replace(ext, '') ref_node['refuri'] = destination # TODO okay, so this is acutally not always the right line number, but # these mdnodes won't have sourcepos on them for whatever reason. This # is better than 0 though. ref_node.line = self._get_line(mdnode) if mdnode.title: ref_node['title'] = mdnode.title next_node = ref_node # If there's not a url scheme (e.g. 'https' for 'https:...' links), # or there is a scheme but it's not in the list of known_url_schemes, # then assume it's a cross-reference and pass it to Sphinx as an `:any:` ref. if not url_check.fragment and not scheme_known: wrap_node = addnodes.pending_xref( reftarget=unquote(destination), reftype='any', refdomain=None, # Added to enable cross-linking refexplicit=True, refwarn=True ) # TODO also not correct sourcepos wrap_node.line = self._get_line(mdnode) if mdnode.title: wrap_node['title'] = mdnode.title wrap_node.append(ref_node) next_node = wrap_node self.current_node.append(next_node) self.current_node = ref_node def depart_link(self, mdnode): if isinstance(self.current_node.parent, addnodes.pending_xref): self.current_node = self.current_node.parent.parent else: self.current_node = self.current_node.parent def visit_image(self, mdnode): img_node = nodes.image() img_node['uri'] = mdnode.destination if mdnode.first_child and mdnode.first_child.literal: content = [mdnode.first_child.literal] n = mdnode.first_child mdnode.first_child.literal = '' mdnode.first_child = mdnode.last_child = None while getattr(n, 'nxt'): n.nxt, n = None, n.nxt content.append(n.literal) img_node['alt'] = ''.join(content) self.current_node.append(img_node) self.current_node = img_node def visit_list(self, mdnode): list_node = None if (mdnode.list_data['type'] == "bullet"): list_node_cls = nodes.bullet_list else: list_node_cls = nodes.enumerated_list list_node = list_node_cls() list_node.line = mdnode.sourcepos[0][0] self.current_node.append(list_node) self.current_node = list_node def visit_item(self, mdnode): node = nodes.list_item() node.line = mdnode.sourcepos[0][0] self.current_node.append(node) self.current_node = node def visit_code_block(self, mdnode): kwargs = {} if mdnode.is_fenced and mdnode.info: kwargs['language'] = mdnode.info text = ''.join(mdnode.literal) if text.endswith('\n'): text = text[:-1] node = nodes.literal_block(text, text, **kwargs) self.current_node.append(node) def visit_block_quote(self, mdnode): q = nodes.block_quote() q.line = mdnode.sourcepos[0][0] self.current_node.append(q) self.current_node = q def visit_html(self, mdnode): raw_node = nodes.raw(mdnode.literal, mdnode.literal, format='html') if mdnode.sourcepos is not None: raw_node.line = mdnode.sourcepos[0][0] self.current_node.append(raw_node) def visit_html_inline(self, mdnode): self.visit_html(mdnode) def visit_html_block(self, mdnode): self.visit_html(mdnode) def visit_thematic_break(self, _): self.current_node.append(nodes.transition()) # Section handling def setup_sections(self): self._level_to_elem = {0: self.document} def add_section(self, section, level): parent_level = max( section_level for section_level in self._level_to_elem if level > section_level ) parent = self._level_to_elem[parent_level] parent.append(section) self._level_to_elem[level] = section # Prune level to limit self._level_to_elem = dict( (section_level, section) for section_level, section in self._level_to_elem.items() if section_level <= level ) def is_section_level(self, level, section): return self._level_to_elem.get(level, None) == section def _get_line(self, mdnode): while mdnode: if mdnode.sourcepos: return mdnode.sourcepos[0][0] mdnode = mdnode.parent return 0