sitescripts/web/converters.py - Issue 17817001: Simple CMS as Anwiki replacement

Side by Side Diff: sitescripts/web/converters.py

Issue 17817001: Simple CMS as Anwiki replacement (Closed)

Patch Set: Completed functionality Created Oct. 24, 2013, 9:32 p.m.

Left:
Right:

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 # coding: utf-8

	2

	3 # This file is part of the Adblock Plus web scripts,

	4 # Copyright (C) 2006-2013 Eyeo GmbH

	5 #

	6 # Adblock Plus is free software: you can redistribute it and/or modify

	7 # it under the terms of the GNU General Public License version 3 as

	8 # published by the Free Software Foundation.

	9 #

	10 # Adblock Plus is distributed in the hope that it will be useful,

	11 # but WITHOUT ANY WARRANTY; without even the implied warranty of

	12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

	13 # GNU General Public License for more details.

	14 #

	15 # You should have received a copy of the GNU General Public License

	16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.

	17

	18 import re, jinja2, markdown

	19 from ..utils import get_custom_template_environment

	20

	21 # Monkey-patch Markdown's isBlockLevel function to ensure that no paragraphs are

	22 # inserted into the <head> tag

	23 orig_isBlockLevel = markdown.util.isBlockLevel

	24 def isBlockLevel(tag):

	25 if tag == "head":

	26 return True

	27 else:

	28 return orig_isBlockLevel(tag)

	29 markdown.util.isBlockLevel = isBlockLevel

	30

	31 html_escapes = {

	32 "<": "<",

	33 ">": ">",

	34 "&": "&",

	35 "\"": """,

	36 "'": "'",

	37 }

	38

	39 class Converter:

	40 def __init__(self, params, key="pagedata"):

	41 self._params = params

	42 self._key = key

	43

	44 # Read in any parameters specified at the beginning of the file

	45 lines = params[key].splitlines(True)

	46 while len(lines) and re.search(r"^\s[\w\-]+\s=", lines[0]):

	47 name, value = lines.pop(0).split("=", 1)

	48 params[name.strip()] = value.strip()

	49 params[key] = "".join(lines)

	50

	51 def insert_localized_strings(self, text, escapes):

	52 def escape(s):

	53 return re.sub(r".",

	54 lambda match: escapes.get(match.group(0), match.group(0)),

	55 s, flags=re.S)

	56

	57 def lookup_string(match):

	58 name, links = match.groups()

	59 try:

	60 result = self._params["localedata"][name].strip()

	61 except KeyError:

	62 raise Exception("Lookup failed for string %s used on page %s" % (name, s elf._params["page"]))

	63

	64 result = escape(result)

	65 if links:

	66 links = map(unicode.strip, links.strip("()").split(","))

	67 while len(links):
	Sebastian Noack 2013/10/29 11:04:17 len() isn't needed here. Lists evaluate to True wh len() isn't needed here. Lists evaluate to True while non empty. So you can just use "while links:"
	68 result = re.sub(

	69 r"%s(.*?)%s" % (escape("<a>"), escape("</a>")),

	70 r'<a href="%s">\1</a>' % links.pop(0),

	71 result, 1, flags=re.S

	72 )

	73 return result

	74

	75 return re.sub(

	76 r"\$([\w\-]+)($[^()$]+$)?\$",

	77 lookup_string,

	78 text

	79 )

	80

	81 def process_links(self, text):

	82 def process_link(match):

	83 pre, attr, url, post = match.groups()

	84 url = jinja2.Markup(url).unescape()

	85

	86 locale, new_url = self._params["source"].resolve_link(url, self._params["l ocale"])

	87 if new_url != None:

	88 url = new_url

	89 if attr == "href":

	90 post += ' hreflang="%s"' % jinja2.Markup.escape(locale)

	91

	92 return "".join((pre, jinja2.Markup.escape(url), post))

	93

	94 text = re.sub(r"(<a [^<>]*\b(href)=\")([^<>\"]+)(\")", process_link, text)

	95 text = re.sub(r"(<img [^<>]*\b(src)=\")([^<>\"]+)(\")", process_link, text)

	96 return text

	97

	98 def resolve_includes(self, text):

	99 def resolve_include(match):

	100 global converters

	101 name = match.group(1)

	102 for format, converter_class in converters.iteritems():

	103 if self._params["source"].has_include(name, format):

	104 self._params["includedata"] = self._params["source"].read_include(name , format)

	105 converter = converter_class(self._params, key="includedata")

	106 return converter()

	107 raise Exception("Failed to resolve include %s in page %s" % (name, self._p arams["page"]))

	108

	109 return re.sub(r'<\?\sinclude\s+([^\s<>"]+)\s\?>', resolve_include, text)

	110

	111 def __call__(self):

	112 result = self.get_html(self._params[self._key])

	113 result = self.resolve_includes(result)

	114 if self._key == "pagedata":

	115 head = []

	116 def add_to_head(match):

	117 head.append(match.group(1))

	118 return ""

	119 body = re.sub(r"<head>(.*?)</head>", add_to_head, result, flags=re.S)

	120 return "".join(head), body

	121 else:

	122 return result

	123

	124 class RawConverter(Converter):

	125 def get_html(self, source):

	126 result = self.insert_localized_strings(source, html_escapes)

	127 result = self.process_links(result)

	128 return result

	129

	130 class MarkdownConverter(Converter):

	131 def get_html(self, source):

	132 def remove_unnecessary_entities(match):

	133 char = chr(int(match.group(1)))

	134 if char in html_escapes:

	135 return match.group(0)

	136 else:

	137 return char

	138

	139 escapes = {}

	140 for char in markdown.Markdown.ESCAPED_CHARS:

	141 escapes[char] = "&#" + str(ord(char)) + ";"

	142 for key, value in html_escapes.iteritems():

	143 escapes[key] = value

	144

	145 result = self.insert_localized_strings(source, escapes)

	146 result = markdown.Markdown(output="html5", extensions=["attr_list"]).convert (result)

	147 result = re.sub(r"&#(\d+);", remove_unnecessary_entities, result)

	148 result = self.process_links(result)

	149 return result

	150

	151 class TemplateConverter(Converter):

	152 def __init__(self, args, *kwargs):

	153 Converter.__init__(self, args, *kwargs)

	154

	155 filters = {

	156 "translate": self.translate,

	157 "linkify": self.linkify,

	158 "toclist": self.toclist,

	159 }

	160 self._env = get_custom_template_environment(filters)

	161

	162 def get_html(self, source):

	163 template = self._env.from_string(source)

	164 return template.render(self._params)

	165

	166 def translate(self, name, page=None):

	167 if page == None:

	168 localedata = self._params["localedata"]

	169 else:

	170 localedata = self._params["source"].read_locale(self._params["locale"], pa ge)

	171

	172 try:

	173 return localedata[name]

	174 except KeyError:

	175 raise Exception("Lookup failed for string %s used on page %s" % (name, sel f._params["page"]))

	176

	177 def linkify(self, page, locale=None):

	178 if locale == None:

	179 locale = self._params["locale"]

	180

	181 locale, url = self._params["source"].resolve_link(page, locale)

	182 return jinja2.Markup('<a href="%s" hreflang="%s">' % (

	183 jinja2.Markup.escape(url),

	184 jinja2.Markup.escape(locale)

	185 ))

	186

	187 def toclist(self, content):

	188 flat = []

	189 for match in re.finditer(r'<h(\d) [^<>]\bid="([^<>"]+)"[^<>]>(.*?)</h\1>', content, re.S):
	Sebastian Noack 2013/10/29 11:04:17 I would use \s instead of a whitespace, after the I would use \s instead of a whitespace, after the tag name.
	190 flat.append({

	191 "level": int(match.group(1)),

	192 "anchor": jinja2.Markup(match.group(2)).unescape(),

	193 "title": jinja2.Markup(match.group(3)).unescape(),

	194 "subitems": [],

	195 })

	196

	197 structured = []

	198 stack = [{"level": 0, "subitems": structured}]

	199 for item in flat:

	200 while stack[-1]["level"] >= item["level"]:

	201 stack.pop()

	202 stack[-1]["subitems"].append(item)

	203 stack.append(item)

	204 return structured

	205

	206 converters = {

	207 "raw": RawConverter,

	208 "md": MarkdownConverter,

	209 "tmpl": TemplateConverter,

	210 }

OLD	NEW

« sitescripts/web/bin/test_server.py ('K') | « sitescripts/web/bin/test_server.py ('k') | sitescripts/web/sources.py » ('j') | sitescripts/web/sources.py » ('J')