sitescripts/web/converters.py - Issue 17817001: Simple CMS as Anwiki replacement

Side by Side Diff: sitescripts/web/converters.py

Issue 17817001: Simple CMS as Anwiki replacement (Closed)

Patch Set: Addressed comments and improved localization Created Nov. 4, 2013, 9:43 a.m.

Left:
Right:

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 # coding: utf-8

	2

	3 # This file is part of the Adblock Plus web scripts,

	4 # Copyright (C) 2006-2013 Eyeo GmbH

	5 #

	6 # Adblock Plus is free software: you can redistribute it and/or modify

	7 # it under the terms of the GNU General Public License version 3 as

	8 # published by the Free Software Foundation.

	9 #

	10 # Adblock Plus is distributed in the hope that it will be useful,

	11 # but WITHOUT ANY WARRANTY; without even the implied warranty of

	12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

	13 # GNU General Public License for more details.

	14 #

	15 # You should have received a copy of the GNU General Public License

	16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.

	17

	18 import re, jinja2, markdown

	19 from ..utils import get_custom_template_environment

	20

	21 # Monkey-patch Markdown's isBlockLevel function to ensure that no paragraphs are

	22 # inserted into the <head> tag

	23 orig_isBlockLevel = markdown.util.isBlockLevel

	24 def isBlockLevel(tag):

	25 if tag == "head":

	26 return True

	27 else:

	28 return orig_isBlockLevel(tag)

	29 markdown.util.isBlockLevel = isBlockLevel

	30

	31 html_escapes = {

	32 "<": "<",

	33 ">": ">",

	34 "&": "&",

	35 "\"": """,

	36 "'": "'",

	37 }

	38

	39 class Converter:

	40 def __init__(self, params, key="pagedata"):

	41 self._params = params

	42 self._key = key

	43

	44 # Read in any parameters specified at the beginning of the file

	45 lines = params[key].splitlines(True)

	46 while lines and re.search(r"^\s[\w\-]+\s=", lines[0]):

	47 name, value = lines.pop(0).split("=", 1)

	48 params[name.strip()] = value.strip()

	49 params[key] = "".join(lines)

	50

	51 def localize_string(self, name, localedata, escapes, links=[]):

	52 def escape(s):

	53 return re.sub(r".",

	54 lambda match: escapes.get(match.group(0), match.group(0)),

	55 s, flags=re.S)

	56 def re_escape(s):

	57 return re.escape(escape(s))

	58

	59 try:

	60 result = localedata[name].strip()

	61 except KeyError:

	62 raise Exception("Lookup failed for string %s used on page %s" % (name, sel f._params["page"]))

	63

	64 # Insert links

	65 result = escape(result)

	66 while links:

	67 result = re.sub(

	68 r"%s([^<>]*?)%s" % (re_escape("<a>"), re_escape("</a>")),

	69 r'<a href="%s">\1</a>' % links.pop(0),

	70 result, 1, flags=re.S

	71 )

	72

	73 # <strong> and <em> tags are allowed

	74 result = re.sub(

	75 r"%s([^<>]*?)%s" % (re_escape("<strong>"), re_escape("</strong>")),

	76 r"<strong>\1</strong>",

	77 result, flags=re.S

	78 )

	79 result = re.sub(

	80 r"%s([^<>]*?)%s" % (re_escape("<em>"), re_escape("</em>")),

	81 r"<em>\1</em>",

	82 result, flags=re.S

	83 )

	84 return result

	85

	86 def insert_localized_strings(self, text, escapes):

	87 def lookup_string(match):

	88 name, links = match.groups()

	89 if links:

	90 links = map(unicode.strip, links.strip("()").split(","))

	91 else:

	92 links = []

	93 return self.localize_string(name, self._params["localedata"], escapes, lin ks)

	94

	95 return re.sub(

	96 r"\$([\w\-]+)($[^()$]+$)?\$",

	97 lookup_string,

	98 text

	99 )

	100

	101 def process_links(self, text):

	102 def process_link(match):

	103 pre, attr, url, post = match.groups()

	104 url = jinja2.Markup(url).unescape()

	105

	106 locale, new_url = self._params["source"].resolve_link(url, self._params["l ocale"])

	107 if new_url != None:

	108 url = new_url

	109 if attr == "href":

	110 post += ' hreflang="%s"' % jinja2.Markup.escape(locale)

	111

	112 return "".join((pre, jinja2.Markup.escape(url), post))

	113

	114 text = re.sub(r"(<a\s[^<>]*\b(href)=\")([^<>\"]+)(\")", process_link, text)

	115 text = re.sub(r"(<img\s[^<>]*\b(src)=\")([^<>\"]+)(\")", process_link, text)

	116 return text

	117

	118 def resolve_includes(self, text):

	119 def resolve_include(match):

	120 global converters

	121 name = match.group(1)

	122 for format, converter_class in converters.iteritems():

	123 if self._params["source"].has_include(name, format):

	124 self._params["includedata"] = self._params["source"].read_include(name , format)

	125 converter = converter_class(self._params, key="includedata")

	126 return converter()

	127 raise Exception("Failed to resolve include %s in page %s" % (name, self._p arams["page"]))

	128

	129 return re.sub(r'<\?\sinclude\s+([^\s<>"]+)\s\?>', resolve_include, text)

	130

	131 def __call__(self):

	132 result = self.get_html(self._params[self._key])

	133 result = self.resolve_includes(result)

	134 if self._key == "pagedata":

	135 head = []

	136 def add_to_head(match):

	137 head.append(match.group(1))

	138 return ""

	139 body = re.sub(r"<head>(.*?)</head>", add_to_head, result, flags=re.S)

	140 return "".join(head), body

	141 else:

	142 return result

	143

	144 class RawConverter(Converter):

	145 def get_html(self, source):

	146 result = self.insert_localized_strings(source, html_escapes)

	147 result = self.process_links(result)

	148 return result

	149

	150 class MarkdownConverter(Converter):

	151 def get_html(self, source):

	152 def remove_unnecessary_entities(match):

	153 char = chr(int(match.group(1)))

	154 if char in html_escapes:

	155 return match.group(0)

	156 else:

	157 return char

	158

	159 escapes = {}

	160 for char in markdown.Markdown.ESCAPED_CHARS:

	161 escapes[char] = "&#" + str(ord(char)) + ";"

	162 for key, value in html_escapes.iteritems():

	163 escapes[key] = value

	164

	165 result = self.insert_localized_strings(source, escapes)

	166 result = markdown.Markdown(output="html5", extensions=["attr_list"]).convert (result)

	167 result = re.sub(r"&#(\d+);", remove_unnecessary_entities, result)

	168 result = self.process_links(result)

	169 return result

	170

	171 class TemplateConverter(Converter):

	172 def __init__(self, args, *kwargs):

	173 Converter.__init__(self, args, *kwargs)

	174

	175 filters = {

	176 "translate": self.translate,

	177 "linkify": self.linkify,

	178 "toclist": self.toclist,

	179 }

	180 self._env = get_custom_template_environment(filters)

	181

	182 def get_html(self, source):

	183 template = self._env.from_string(source)

	184 return template.render(self._params)

	185

	186 def translate(self, name, page=None, links=[]):

	187 if page == None:

	188 localedata = self._params["localedata"]

	189 else:

	190 localedata = self._params["source"].read_locale(self._params["locale"], pa ge)

	191 return jinja2.Markup(self.localize_string(name, localedata, html_escapes, li nks=links))

	192

	193 def linkify(self, page, locale=None):

	194 if locale == None:

	195 locale = self._params["locale"]

	196

	197 locale, url = self._params["source"].resolve_link(page, locale)

	198 return jinja2.Markup('<a href="%s" hreflang="%s">' % (

	199 jinja2.Markup.escape(url),

	200 jinja2.Markup.escape(locale)

	201 ))

	202

	203 def toclist(self, content):

	204 flat = []

	205 for match in re.finditer(r'<h(\d)\s[^<>]\bid="([^<>"]+)"[^<>]>(.*?)</h\1>' , content, re.S):

	206 flat.append({

	207 "level": int(match.group(1)),

	208 "anchor": jinja2.Markup(match.group(2)).unescape(),

	209 "title": jinja2.Markup(match.group(3)).unescape(),

	210 "subitems": [],

	211 })

	212

	213 structured = []

	214 stack = [{"level": 0, "subitems": structured}]

	215 for item in flat:

	216 while stack[-1]["level"] >= item["level"]:

	217 stack.pop()

	218 stack[-1]["subitems"].append(item)

	219 stack.append(item)

	220 return structured

	221

	222 converters = {

	223 "raw": RawConverter,

	224 "md": MarkdownConverter,

	225 "tmpl": TemplateConverter,

	226 }

OLD	NEW

« no previous file with comments | « sitescripts/web/bin/test_server.py ('k') | sitescripts/web/sources.py » ('j') | no next file with comments »