Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Unified Diff: sitescripts/web/converters.py

Issue 17817001: Simple CMS as Anwiki replacement (Closed)
Patch Set: Addressed comments and improved localization Created Nov. 4, 2013, 9:43 a.m.
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « sitescripts/web/bin/test_server.py ('k') | sitescripts/web/sources.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: sitescripts/web/converters.py
new file mode 100644
--- /dev/null
+++ b/sitescripts/web/converters.py
@@ -0,0 +1,226 @@
+# coding: utf-8
+# This file is part of the Adblock Plus web scripts,
+# Copyright (C) 2006-2013 Eyeo GmbH
+# Adblock Plus is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 3 as
+# published by the Free Software Foundation.
+# Adblock Plus is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# GNU General Public License for more details.
+# You should have received a copy of the GNU General Public License
+# along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
+import re, jinja2, markdown
+from ..utils import get_custom_template_environment
+# Monkey-patch Markdown's isBlockLevel function to ensure that no paragraphs are
+# inserted into the <head> tag
+orig_isBlockLevel = markdown.util.isBlockLevel
+def isBlockLevel(tag):
+ if tag == "head":
+ return True
+ else:
+ return orig_isBlockLevel(tag)
+markdown.util.isBlockLevel = isBlockLevel
+html_escapes = {
+ "<": "&lt;",
+ ">": "&gt;",
+ "&": "&amp;",
+ "\"": "&quot;",
+ "'": "&#39;",
+class Converter:
+ def __init__(self, params, key="pagedata"):
+ self._params = params
+ self._key = key
+ # Read in any parameters specified at the beginning of the file
+ lines = params[key].splitlines(True)
+ while lines and re.search(r"^\s*[\w\-]+\s*=", lines[0]):
+ name, value = lines.pop(0).split("=", 1)
+ params[name.strip()] = value.strip()
+ params[key] = "".join(lines)
+ def localize_string(self, name, localedata, escapes, links=[]):
+ def escape(s):
+ return re.sub(r".",
+ lambda match: escapes.get(match.group(0), match.group(0)),
+ s, flags=re.S)
+ def re_escape(s):
+ return re.escape(escape(s))
+ try:
+ result = localedata[name].strip()
+ except KeyError:
+ raise Exception("Lookup failed for string %s used on page %s" % (name, self._params["page"]))
+ # Insert links
+ result = escape(result)
+ while links:
+ result = re.sub(
+ r"%s([^<>]*?)%s" % (re_escape("<a>"), re_escape("</a>")),
+ r'<a href="%s">\1</a>' % links.pop(0),
+ result, 1, flags=re.S
+ )
+ # <strong> and <em> tags are allowed
+ result = re.sub(
+ r"%s([^<>]*?)%s" % (re_escape("<strong>"), re_escape("</strong>")),
+ r"<strong>\1</strong>",
+ result, flags=re.S
+ )
+ result = re.sub(
+ r"%s([^<>]*?)%s" % (re_escape("<em>"), re_escape("</em>")),
+ r"<em>\1</em>",
+ result, flags=re.S
+ )
+ return result
+ def insert_localized_strings(self, text, escapes):
+ def lookup_string(match):
+ name, links = match.groups()
+ if links:
+ links = map(unicode.strip, links.strip("()").split(","))
+ else:
+ links = []
+ return self.localize_string(name, self._params["localedata"], escapes, links)
+ return re.sub(
+ r"\$([\w\-]+)(\([^()$]+\))?\$",
+ lookup_string,
+ text
+ )
+ def process_links(self, text):
+ def process_link(match):
+ pre, attr, url, post = match.groups()
+ url = jinja2.Markup(url).unescape()
+ locale, new_url = self._params["source"].resolve_link(url, self._params["locale"])
+ if new_url != None:
+ url = new_url
+ if attr == "href":
+ post += ' hreflang="%s"' % jinja2.Markup.escape(locale)
+ return "".join((pre, jinja2.Markup.escape(url), post))
+ text = re.sub(r"(<a\s[^<>]*\b(href)=\")([^<>\"]+)(\")", process_link, text)
+ text = re.sub(r"(<img\s[^<>]*\b(src)=\")([^<>\"]+)(\")", process_link, text)
+ return text
+ def resolve_includes(self, text):
+ def resolve_include(match):
+ global converters
+ name = match.group(1)
+ for format, converter_class in converters.iteritems():
+ if self._params["source"].has_include(name, format):
+ self._params["includedata"] = self._params["source"].read_include(name, format)
+ converter = converter_class(self._params, key="includedata")
+ return converter()
+ raise Exception("Failed to resolve include %s in page %s" % (name, self._params["page"]))
+ return re.sub(r'<\?\s*include\s+([^\s<>"]+)\s*\?>', resolve_include, text)
+ def __call__(self):
+ result = self.get_html(self._params[self._key])
+ result = self.resolve_includes(result)
+ if self._key == "pagedata":
+ head = []
+ def add_to_head(match):
+ head.append(match.group(1))
+ return ""
+ body = re.sub(r"<head>(.*?)</head>", add_to_head, result, flags=re.S)
+ return "".join(head), body
+ else:
+ return result
+class RawConverter(Converter):
+ def get_html(self, source):
+ result = self.insert_localized_strings(source, html_escapes)
+ result = self.process_links(result)
+ return result
+class MarkdownConverter(Converter):
+ def get_html(self, source):
+ def remove_unnecessary_entities(match):
+ char = chr(int(match.group(1)))
+ if char in html_escapes:
+ return match.group(0)
+ else:
+ return char
+ escapes = {}
+ for char in markdown.Markdown.ESCAPED_CHARS:
+ escapes[char] = "&#" + str(ord(char)) + ";"
+ for key, value in html_escapes.iteritems():
+ escapes[key] = value
+ result = self.insert_localized_strings(source, escapes)
+ result = markdown.Markdown(output="html5", extensions=["attr_list"]).convert(result)
+ result = re.sub(r"&#(\d+);", remove_unnecessary_entities, result)
+ result = self.process_links(result)
+ return result
+class TemplateConverter(Converter):
+ def __init__(self, *args, **kwargs):
+ Converter.__init__(self, *args, **kwargs)
+ filters = {
+ "translate": self.translate,
+ "linkify": self.linkify,
+ "toclist": self.toclist,
+ }
+ self._env = get_custom_template_environment(filters)
+ def get_html(self, source):
+ template = self._env.from_string(source)
+ return template.render(self._params)
+ def translate(self, name, page=None, links=[]):
+ if page == None:
+ localedata = self._params["localedata"]
+ else:
+ localedata = self._params["source"].read_locale(self._params["locale"], page)
+ return jinja2.Markup(self.localize_string(name, localedata, html_escapes, links=links))
+ def linkify(self, page, locale=None):
+ if locale == None:
+ locale = self._params["locale"]
+ locale, url = self._params["source"].resolve_link(page, locale)
+ return jinja2.Markup('<a href="%s" hreflang="%s">' % (
+ jinja2.Markup.escape(url),
+ jinja2.Markup.escape(locale)
+ ))
+ def toclist(self, content):
+ flat = []
+ for match in re.finditer(r'<h(\d)\s[^<>]*\bid="([^<>"]+)"[^<>]*>(.*?)</h\1>', content, re.S):
+ flat.append({
+ "level": int(match.group(1)),
+ "anchor": jinja2.Markup(match.group(2)).unescape(),
+ "title": jinja2.Markup(match.group(3)).unescape(),
+ "subitems": [],
+ })
+ structured = []
+ stack = [{"level": 0, "subitems": structured}]
+ for item in flat:
+ while stack[-1]["level"] >= item["level"]:
+ stack.pop()
+ stack[-1]["subitems"].append(item)
+ stack.append(item)
+ return structured
+converters = {
+ "raw": RawConverter,
+ "md": MarkdownConverter,
+ "tmpl": TemplateConverter,
« no previous file with comments | « sitescripts/web/bin/test_server.py ('k') | sitescripts/web/sources.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld