sitescripts/web/bin/generate_static_pages.py - Issue 17817001: Simple CMS as Anwiki replacement

Delta Between Two Patch Sets: sitescripts/web/bin/generate_static_pages.py

Issue 17817001: Simple CMS as Anwiki replacement (Closed)

Left Patch Set: Completed functionality Created Oct. 24, 2013, 9:32 p.m.

Right Patch Set: Fixed MIME type Created Nov. 4, 2013, 4:11 p.m.

Left:
Right:

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

Left: Side by side diff | Download
Right: Side by side diff | Download

LEFT	RIGHT
1 # coding: utf-8	1 # coding: utf-8

2	2

3 # This file is part of the Adblock Plus web scripts,	3 # This file is part of the Adblock Plus web scripts,

4 # Copyright (C) 2006-2013 Eyeo GmbH	4 # Copyright (C) 2006-2013 Eyeo GmbH

5 #	5 #

6 # Adblock Plus is free software: you can redistribute it and/or modify	6 # Adblock Plus is free software: you can redistribute it and/or modify

7 # it under the terms of the GNU General Public License version 3 as	7 # it under the terms of the GNU General Public License version 3 as

8 # published by the Free Software Foundation.	8 # published by the Free Software Foundation.

9 #	9 #

10 # Adblock Plus is distributed in the hope that it will be useful,	10 # Adblock Plus is distributed in the hope that it will be useful,

11 # but WITHOUT ANY WARRANTY; without even the implied warranty of	11 # but WITHOUT ANY WARRANTY; without even the implied warranty of

12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the	12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

13 # GNU General Public License for more details.	13 # GNU General Public License for more details.

14 #	14 #

15 # You should have received a copy of the GNU General Public License	15 # You should have received a copy of the GNU General Public License

16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.	16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.

17	17

18 import sys, os, re, codecs	18 import sys, os, re, errno, codecs

19 from ...utils import setupStderr	19 from ...utils import setupStderr, cached

20 from ..utils import process_page	20 from ..utils import process_page

21 from ..sources import MercurialSource	21 from ..sources import MercurialSource

22	22

23 def generate_pages(repo, output_dir):	23 def generate_pages(repo, output_dir):

24 known_files = set()	24 known_files = set()

25	25

26 def write_file(path_parts, contents, binary=False):	26 def write_file(path_parts, contents, binary=False):

27 encoding = None if binary else "utf-8"	27 encoding = None if binary else "utf-8"

28 outfile = os.path.join(output_dir, *path_parts)	28 outfile = os.path.join(output_dir, *path_parts)

29 if outfile in known_files:	29 if outfile in known_files:

30 print >>sys.stderr, "Warning: File %s has multiple sources" % outfile	30 print >>sys.stderr, "Warning: File %s has multiple sources" % outfile

31 return	31 return

32 known_files.add(outfile)	32 known_files.add(outfile)

33	33

34 if os.path.exists(outfile):	34 if os.path.exists(outfile):

35 with codecs.open(outfile, "rb", encoding=encoding) as handle:	35 with codecs.open(outfile, "rb", encoding=encoding) as handle:

36 if handle.read() == contents:	36 if handle.read() == contents:

37 return	37 return

38	38

39 try:	39 try:

40 os.makedirs(os.path.dirname(outfile))	40 os.makedirs(os.path.dirname(outfile))

41 except OSError:	41 except OSError, e:

42 pass	42 if e.errno != errno.EEXIST:
Sebastian Noack 2013/10/29 11:04:17 I would check if we got EEXIST here and only then I would check if we got EEXIST here and only then fail silently and otherwise re-raise the error. Otherwise for example when you don't have permissions to create the directory, the code below will fail because of the directory doesn't exist, which is misleading. try: os.makedirs(os.path.dirname(outfile)) except OSError, e: if e.errno != errno.EEXIST: raise
	43 raise

43	44

44 with codecs.open(outfile, "wb", encoding=encoding) as handle:	45 with codecs.open(outfile, "wb", encoding=encoding) as handle:

45 handle.write(contents)	46 handle.write(contents)

46	47

47 with MercurialSource(repo) as source:	48 with MercurialSource(repo) as source:

	49 # Cache the result for some functions - we can assume here that the data

	50 # never changes

	51 source.resolve_link = cached(float("Infinity"))(source.resolve_link)

	52 source.read_config = cached(float("Infinity"))(source.read_config)

	53 source.read_template = cached(float("Infinity"))(source.read_template)

	54 source.read_locale = cached(float("Infinity"))(source.read_locale)

	55 source.read_include = cached(float("Infinity"))(source.read_include)

	56

48 locales = list(source.list_locales())	57 locales = list(source.list_locales())

49 for page, format in source.list_pages():	58 for page, format in source.list_pages():

50 for locale in locales:	59 for locale in locales:

51 if source.has_locale(locale, page):	60 if source.has_locale(locale, page):

52 pagedata = process_page(source, locale, page, format)	61 pagedata = process_page(source, locale, page, format)

53	62

54 # Make sure links to static files are versioned	63 # Make sure links to static files are versioned

55 pagedata = re.sub(r'(<script\s[^<>]*\bsrc="/[^"<>]+)', r"\1?%s" % sour ce.version, pagedata)	64 pagedata = re.sub(r'(<script\s[^<>]*\bsrc="/[^"<>]+)', r"\1?%s" % sour ce.version, pagedata)
Sebastian Noack 2013/10/29 11:04:17 When you inject text into regular expressions alwa When you inject text into regular expressions always use re.escape(). Wladimir Palant 2013/11/04 09:49:21 I'm not injecting into a regular expression here, Show quoted text On 2013/10/29 11:04:17, sebastian wrote: > When you inject text into regular expressions always use re.escape(). I'm not injecting into a regular expression here, it's the replacement term. Using re.escape() can be fatal here, consider the following: re.sub("(a)", r"\1%s" % re.escape("$"), "abcd") Given that source.version is known to be alphanumerical we can skip escaping here. I've fixed this in a some other places however. Sebastian Noack 2013/11/04 13:28:18 Yes, you are right. Show quoted text On 2013/11/04 09:49:21, Wladimir Palant wrote: > On 2013/10/29 11:04:17, sebastian wrote: > > When you inject text into regular expressions always use re.escape(). > > I'm not injecting into a regular expression here, it's the replacement term. > Using re.escape() can be fatal here, consider the following: > > re.sub("(a)", r"\1%s" % re.escape("$"), "abcd") > > Given that source.version is known to be alphanumerical we can skip escaping > here. I've fixed this in a some other places however. Yes, you are right.
56 pagedata = re.sub(r'(<link\s[^<>]*\bhref="/[^"<>]+)', r"\1?%s" % sourc e.version, pagedata)	65 pagedata = re.sub(r'(<link\s[^<>]*\bhref="/[^"<>]+)', r"\1?%s" % sourc e.version, pagedata)

57 pagedata = re.sub(r'(<img\s[^<>]*\bsrc="/[^"<>]+)', r"\1?%s" % source. version, pagedata)	66 pagedata = re.sub(r'(<img\s[^<>]*\bsrc="/[^"<>]+)', r"\1?%s" % source. version, pagedata)

58	67

59 write_file([locale] + page.split("/"), pagedata)	68 write_file([locale] + page.split("/"), pagedata)

60	69

61 for filename in source.list_localizable_files():	70 for filename in source.list_localizable_files():

62 for locale in locales:	71 for locale in locales:

63 if source.has_localizable_file(locale, filename):	72 if source.has_localizable_file(locale, filename):

64 filedata = source.read_localizable_file(locale, filename)	73 filedata = source.read_localizable_file(locale, filename)

65 write_file([locale] + filename.split("/"), filedata, binary=True)	74 write_file([locale] + filename.split("/"), filedata, binary=True)

66	75

67 for filename in source.list_static():	76 for filename in source.list_static():

68 write_file(filename.split("/"), source.read_static(filename), binary=True)	77 write_file(filename.split("/"), source.read_static(filename), binary=True)

69	78

70 def remove_unknown(dir):	79 def remove_unknown(dir):

71 files = os.listdir(dir)	80 files = os.listdir(dir)

72 for filename in files:	81 for filename in files:

73 path = os.path.join(dir, filename)	82 path = os.path.join(dir, filename)

74 if os.path.isfile(path) and path not in known_files:	83 if os.path.isfile(path) and path not in known_files:

75 os.remove(path)	84 os.remove(path)

76 elif os.path.isdir(path):	85 elif os.path.isdir(path):

77 remove_unknown(path)	86 remove_unknown(path)

78 if not os.listdir(path):	87 if not os.listdir(path):

79 os.rmdir(path)	88 os.rmdir(path)
Sebastian Noack 2013/10/29 11:04:17 You might want to remove directories recursively. You might want to remove directories recursively. So that when a/b/c.html is removed and there aren't any other files within a will be removed too, instead of removing only a/b. Wladimir Palant 2013/11/04 09:49:21 We are already removing directories recursively. r We are already removing directories recursively. remove_unknown("foo/a/b") will remove foo/a/b/c.html and then return to remove_unknown("foo/a") that will remove foo/a/b as an empty directory. It will then return to remove_unknown("foo") which will remove foo/a. The top-level directory is never removed which is intended. Sebastian Noack 2013/11/04 13:28:18 You are right. Show quoted text On 2013/11/04 09:49:21, Wladimir Palant wrote: > We are already removing directories recursively. remove_unknown("foo/a/b") will > remove foo/a/b/c.html and then return to remove_unknown("foo/a") that will > remove foo/a/b as an empty directory. It will then return to > remove_unknown("foo") which will remove foo/a. The top-level directory is never > removed which is intended. You are right.
80 remove_unknown(output_dir)	89 remove_unknown(output_dir)

81	90

82 if __name__ == "__main__":	91 if __name__ == "__main__":

83 setupStderr()	92 setupStderr()

84 if len(sys.argv) < 3:	93 if len(sys.argv) < 3:

85 print >>sys.stderr, "Usage: %s source_repository output_dir" % sys.argv[0]	94 print >>sys.stderr, "Usage: %s source_repository output_dir" % sys.argv[0]

86 sys.exit(1)	95 sys.exit(1)

87	96

88 repo, output_dir = sys.argv[1:3]	97 repo, output_dir = sys.argv[1:3]

89 generate_pages(repo, output_dir)	98 generate_pages(repo, output_dir)

LEFT	RIGHT