Source code for httk.httkweb.render_httk

#
#    The high-throughput toolkit (httk)
#    Copyright (C) 2012-2018 Rickard Armiento
#
#    This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU Affero General Public License as
#    published by the Free Software Foundation, either version 3 of the
#    License, or (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU Affero General Public License for more details.
#
#    You should have received a copy of the GNU Affero General Public License
#    along with this program.  If not, see <http://www.gnu.org/licenses/>.

from __future__ import print_function
import os, sys, re, pprint, unicodedata, codecs

# Retain python2 compatibility without a dependency on httk.core
if sys.version[0] == "2":
    # Note:
    # The "html" module is not a builtin in Python 2.
    # If it happens to be installed, we still do not
    # want to use it since it is old (last updated in 2011,
    # version 1.16). Use the builtin cgi module to get the
    # escape funtion instead.

    from cgi import escape
    unicode_type=unicode

    from StringIO import StringIO
    import ConfigParser as configparser
else:
    from html import escape
    unicode_type=str

    from io import StringIO
    import configparser


[docs]class RenderHttk(object):

    left_punctuation_chars = "'[({<:\"; -"
    right_punctuation_chars = "]')}>:,!.?\"; -"

    #left_punctuation_chars_quotes = "\\'\\(\\[\\{\\<:-\\\"; "
    #right_punctuation_chars_quotes = "'\\)\\]\\}\\>:,-\\!\\.\\?\\\"; "

    adornment_chars = ['!','"','#','$','%','&',"'",'(',')','*','+',',','-','.','/',':',';','<','=','>','?','@','[',"\\",']','^','_','`','{','|','}','~']
    bullet_item_markers = ['- ', '* ', '+ ']
    option_list_characters = ['-','/']

    def __init__(self, render_dir, render_filename, global_data):

        self.render_dir = render_dir
        self.render_filename = render_filename
        self.filename = os.path.join(render_dir, render_filename)

        with codecs.open(self.filename, 'r', encoding='utf-8') as f:
            source = f.read()

        self.global_data = global_data

        if render_dir != '':
            owd = os.getcwd()
            os.chdir(render_dir)
        try:
            self.split_content(source)
        finally:
            if render_dir != '':
                os.chdir(owd)

[docs]    def make_id(self, s):
        s = unicodedata.normalize('NFKD', s).encode('ascii','ignore').decode('utf-8')
        s = s.lower()
        s = s.replace(' ', '_')
        #s = re.sub('[^0-9a-zA-Z_]', '', s)
        #s = re.sub('^[^a-zA-Z_]+', '', s)
        return s

[docs]    def rst_light_html_renderer(self, content):
        outstr = ''
        for entry in content:
            if entry['type'] == 'section':
                modifiers = [x['name'] for x in entry['modifiers']]
                modifiers += ['section']
                outstr += '<div id="'+self.make_id(entry['title'])+'" class="'+(' '.join(modifiers))+'">'
                end_div_tag = '</div>\n'
                outstr += '<h'+str(entry['level']+1)+'>'+entry['title']+'</h'+str(entry['level']+1)+">\n"
                outstr += self.rst_light_html_renderer(entry['content'])
                outstr += end_div_tag
            elif entry['type'] == 'transition':
                outstr += '<hr class="docutils"/>'
            elif entry['type'] == 'textblock':
                modifiers = [x['name'] for x in entry['modifiers']]
                if len(entry['modifiers'])>0:
                    outstr += '<p class="'+(' '.join(modifiers))+'">'
                    end_p_tag = '</p>\n'
                else:
                    outstr += '<p>'
                    end_p_tag = '</p>\n'
                for segment in entry['content']:
                    endtag = '\n'
                    modifiers = [x['name'] for x in segment['modifiers']]
                    if 'literal' in modifiers:
                        modifiers.remove('literal')
                        outstr += '<tt class="docutils literal">'
                        outstr +=  segment['content']
                        outstr += '</tt>'
                        continue
                    if 'strong' in modifiers:
                        modifiers.remove('strong')
                        outstr += '<strong>'
                        endtag = '</strong>' + endtag
                    if 'emphasis' in modifiers:
                        modifiers.remove('emphasis')
                        outstr += '<em>'
                        endtag = '</em>' + endtag
                    if 'link' in modifiers:
                        modifiers.remove('link')
                        outstr += '<a class="reference external" href="'+segment['url']+'">'
                        endtag = '</a>'+endtag
                    if 'anchor' in modifiers:
                        modifiers.remove('anchor')
                        outstr += '<a class="reference internal" href="#'+segment['anchor']+'">'
                        endtag = '</a>'+endtag
                    if len(modifiers)>0:
                        if 'role' in modifiers:
                            modifiers.remove('role')
                            outstr += '<span class="'+(' '.join(modifiers + [segment['role']]))+'">'
                        else:
                            outstr += '<span class="'+(' '.join(modifiers))+'">'
                        endtag = '</span>' + endtag
                    outstr +=  escape(segment['content']).encode('ascii',
                            'xmlcharrefreplace').decode('utf-8')
                    outstr += endtag
                outstr += end_p_tag
        return outstr

[docs]    def rst_light_parse_textstyle(self, content, start_marker, end_marker, style, allow_nested = False, unescape=True, handle_roles = False, handle_hyperlinks = False):

        # Quote
        start_marker = re.escape(start_marker)
        end_marker = re.escape(end_marker)

        outcontent = []
        for segment in content:
            segment_text = segment['content']
            if len(segment['modifiers'])==0 or allow_nested:
                role = False
                link = False

                #try:
                #print("REGEX",segment_text)
                found = re.finditer('(?P<start>['+self.left_punctuation_chars+']|^)(?P<role>:[^:]+:)?'+start_marker+'(?P<content>.+?)(?P<url> +<[^>]+>)?'+end_marker+'(?P<link>_?)(?P<end>['+self.right_punctuation_chars+']|$)',segment_text)
                #except Exception:
                #    print("RE ERROR",repr('(['+self.left_punctuation_chars+']|^)(:[^:]+:)?'+start_marker+'(.+?)(
                #    +<[^>]+>)?'+end_marker+'(_?)(['+self.right_punctuation_chars+']|$)'))
                #    print("STRING",segment_text)
                #    raise
                end_idx = 0
                for m in found:
                    start = m.group('start') if m.group('start') is not None else ''
                    role = m.group('role')[1:-1] if m.group('role') is not None else ''
                    content = m.group('content') if m.group('content') is not None else ''
                    url = m.group('url').lstrip(" <").rstrip(">") if m.group('url') is not None else ''
                    link = m.group('link') if m.group('link') is not None else ''
                    end = m.group('end') if m.group('end') is not None else ''

                    #print("MATCH:",start,'|',role,'|',content,'|',url,'|',link,'|',end)
                    start_idx = m.start()
                    # Text role
                    if handle_roles and role != '' and link == '':
                        #print("TEXT ROLE")
                        before_text = segment_text[end_idx:start_idx]+start
                        outcontent += [{'content':before_text,'modifiers':segment['modifiers'],'unescape':segment['unescape']}]

                        outcontent += [{'content':content+url,'role':role,'modifiers':segment['modifiers'] + [{'name':'role'}], 'unescape':unescape and segment['unescape']}]
                    # Hyperlink
                    elif handle_hyperlinks and role == '' and link != '':
                        #print("HYPERLINK")
                        before_text = segment_text[end_idx:start_idx]+start+role
                        outcontent += [{'content':before_text,'modifiers':segment['modifiers'],'unescape':segment['unescape']}]
                        if url == '':
                            outcontent += [{'content':content,'anchor':self.make_id(content),'modifiers':segment['modifiers'] + [{'name':'anchor'}], 'unescape':unescape and segment['unescape']}]
                        else:
                            outcontent += [{'content':content,'url':url,'modifiers':segment['modifiers'] + [{'name':'link'}], 'unescape':unescape and segment['unescape']}]
                    # Error condition, if it isn't a hyperlink but ends with _, reject the m
                    elif handle_hyperlinks and role != '' and link != '':
                        #print("ERROR")
                        continue
                    # Other markup
                    else:
                        #print("OTHER MARKUP", m.start(), start, start,role)
                        before_text = segment_text[end_idx:start_idx]+start+role
                        #print("BEFORE TEXT", before_text)
                        outcontent += [{'content':before_text,'modifiers':segment['modifiers'],'unescape':segment['unescape']}]
                        outcontent += [{'content':content + url,'modifiers':segment['modifiers'] + [style], 'unescape':unescape and segment['unescape']}]

                    end_idx = m.end()-len(end)

                after_text = segment_text[end_idx:]
                if len(after_text) > 0:
                    outcontent += [{'content':after_text,'modifiers':segment['modifiers'],'unescape':segment['unescape']}]

            else:
                outcontent += [{'content':segment['content'], 'modifiers':segment['modifiers'], 'unescape':segment['unescape']}]

        return outcontent

[docs]    def rst_light_parser(self, source):
        adornment_levels = []
        fifo = []
        content = []
        section_hierarcy = []
        context = content
        block_modifiers = []

        align_hierarcy = []
        last_align = 0
        align = 0
        last_list_index = None

        # Divide text into sections, add an empty line to make sure last textblock is terminated
        for line in source.splitlines() + [""]:
            fifo += [line]
            section_title = None
            adornment = None

            # Detect section header with over and underline
            if len(fifo) == 3:
                line1, line2, line3 = fifo[0].rstrip(), fifo[1].rstrip(), fifo[2].rstrip()
                if len(line1) == len(line3) and len(line1) > 0 and line1 == len(line1) * line1[0] and line3 == len(line1) * line1[0] and line1[0] in self.adornment_chars:
                    section_title = line2.strip()
                    adornment = line1[0] + line1[0]

            # Detect section header with over and underline
            elif len(fifo) == 2:
                line1, line2 = fifo[-2].rstrip(), fifo[-1].rstrip()
                if len(line1) == len(line2) and len(line2) > 0 and line2 == len(line2) * line2[0]:
                    section_title = line1.rstrip()
                    adornment = line2[0]

            # Handle titles
            if section_title is not None:
                if adornment not in adornment_levels:
                    adornment_levels += [adornment]
                level = adornment_levels.index(adornment)
                #section_hierarcy = section_hierarcy[:-(len(section_hierarcy)-level)]
                context = []
                section = {'type':'section','level':level, 'title':section_title, 'content':context, 'modifiers':block_modifiers}
                block_modifiers = []
                while len(section_hierarcy) > 0 and section_hierarcy[-1]['level'] >= level:
                    section_hierarcy = section_hierarcy[:-1]
                if len(section_hierarcy) > 0:
                    section_hierarcy[-1]['content'] += [section]
                else:
                    content += [section]
                section_hierarcy += [section]
                fifo = []
                continue

            # Handle other blocks
            last_align = align
            if len(fifo[-1].strip()) > 0:
                align = len(fifo[-1]) - len(fifo[-1].lstrip(' '))

            if align > last_align:
                skip = False
                # Class
                if fifo[0][:3] == ".. ":
                    skip = True
                # Bullet item
                if fifo[0][:2] in self.bullet_item_markers:
                    skip = True
                # Numbered list item
                if fifo[0][:2] == '#. ' or (fifo[0][0].isdigit() and fifo[0].lstrip("0123456789") == '. '):
                    list_index = int(fifo[0].partition('. ')[0])
                    if last_list_index != None:
                        if list_index == last_list_index+1:
                            skip = True
                    else:
                        skip = True
                if len(fifo) == 2:
                    # Field list
                    if fifo[0][0] == ':' and fifo[0][-1] == ':':
                        skip = True
                    # Definition list
                    else:
                        skip = True
                # Option lists not yet supported
                # Literal block not yet supported
                # Line blocks not yet supported
                # Doctest blocks not supported
                # Tables not supported
                # Footnotes not supported
                # Citations not supported
                # External separated hyperlinks not supported
                if skip:
                    continue

            if len(fifo) >= 2 and len(fifo[-1].strip())==0 or align < last_align:
                # Transition
                first_line_strp = fifo[0].strip()
                if len(fifo) == 2 and len(first_line_strp) >= 4 and first_line_strp == first_line_strp[0]*len(first_line_strp) and not first_line_strp.isalnum():
                    element = {'type':'transition','modifiers':block_modifiers,'align':align}
                    block_modifiers = []
                    context += [element]
                    fifo = []
                    continue
                # Class
                if fifo[0][:3] == ".. ":
                    modparts = fifo[0][3:].split("::")
                    modname = modparts[0]
                    if len(modparts) > 1:
                        modargs = modparts[1].split()
                    else:
                        modargs = []
                    modcontent = fifo[1:-1]
                    if modname != 'class':
                        block_modifiers += [{'name':modname,'args':modargs,'content':modcontent, 'align':align}]
                    else:
                        block_modifiers += [{'name':modargs[0],'args':modargs[1:],'content':modcontent, 'align':align}]
                    fifo = []
                    continue
                #elif fifo[0][0:2] in self.bullet_item_markers:
                #
                #    list_fifo += {}

                allcontent = (" ".join([x.strip() for x in fifo[:-1]])).strip()
                allcontent = [{'content':allcontent,'modifiers':[],'unescape':True}]
                allcontent = self.rst_light_parse_textstyle(allcontent,'``','``',{'name':'literal'},unescape=False)
                allcontent = self.rst_light_parse_textstyle(allcontent,'**','**',{'name':'strong'})
                allcontent = self.rst_light_parse_textstyle(allcontent,'*','*',{'name':'emphasis'})
                # Roles and hyperlinks
                allcontent = self.rst_light_parse_textstyle(allcontent,'`','`',{'name':'literal'}, handle_roles=True, handle_hyperlinks=True)

                # Unescape
                for segment in allcontent:
                    if segment['unescape']:
                        segment['content'] = "\\".join(["".join(y) for y in [x.replace("\\ ","").split("\\") for x in segment['content'].split("\\\\")]])
                element = {'type':'textblock','content':allcontent,'modifiers':block_modifiers,'align':align}
                block_modifiers = []
                context += [element]
                fifo = []
                continue

            # Empty block, just ignore
            if len(fifo) == 1 and len(fifo[-1].strip())==0:
                fifo = []
                continue

        #print("== RESULT")
        #pprint.pprint(content)
        #print("==")
        return content

[docs]    def split_content(self,source):
        if source.startswith("---"):
            alt1 = source[3:].find("---")
            alt2 = source[3:].find("...")
            if alt1 >= 0:
                if alt2 >= 0:
                    pos = min(alt1,alt2)
                else:
                    pos = alt1
            elif alt2 > 0:
                pos = alt2
            else:
                self.metadata_block = ''
                self.content_block = source
                return

            self.metadata_block = source[3:pos+3]
            self.content_block = self.rst_light_html_renderer(self.rst_light_parser(source[pos+6:]))

        else:
            self.metadata_block = ''
            self.content_block = self.rst_light_html_renderer(self.rst_light(source))

        return

[docs]    def content(self):
        return self.content_block

[docs]    def metadata(self):
        md = self.metadata_block
        buf = StringIO("[main]\n"+md)
        config = configparser.ConfigParser()
        config.readfp(buf)
        d = dict(config.items('main'))
        # In Python 3 d.keys() is not a list but an iterator,
        # which means that when we do the deleting of keys and values
        # below, something gets messed up with the iterator.
        # Making sure that d.keys() is a list seems to fix the problem.
        for i in list(d.keys()):
            if i.endswith("-list"):
                l = d[i]
                del d[i]
                newkey = i[:-len("-list")]
                d[newkey] = [x.strip() for x in l.split(",")]
        return d
Source code for httk.httkweb.render_httk

Navigation

Python API