Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Delta Between Two Patch Sets: sitescripts/filterhits/geometrical_mean.py

Issue 4615801646612480: Issue 395 - Filter hits statistics backend (Closed)
Left Patch Set: Display friendly message if processing script can't connect to DB. Created March 2, 2015, 11:14 a.m.
Right Patch Set: Addressed further comments from Sebastian. Created April 2, 2015, 10:13 a.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « sitescripts/filterhits/db.py ('k') | sitescripts/filterhits/schema.sql » ('j') | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 # coding: utf-8 1 # coding: utf-8
2 2
3 # This file is part of the Adblock Plus web scripts, 3 # This file is part of the Adblock Plus web scripts,
4 # Copyright (C) 2006-2015 Eyeo GmbH 4 # Copyright (C) 2006-2015 Eyeo GmbH
5 # 5 #
6 # Adblock Plus is free software: you can redistribute it and/or modify 6 # Adblock Plus is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License version 3 as 7 # it under the terms of the GNU General Public License version 3 as
8 # published by the Free Software Foundation. 8 # published by the Free Software Foundation.
9 # 9 #
10 # Adblock Plus is distributed in the hope that it will be useful, 10 # Adblock Plus is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of 11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details. 13 # GNU General Public License for more details.
14 # 14 #
15 # You should have received a copy of the GNU General Public License 15 # You should have received a copy of the GNU General Public License
16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. 16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
17 17
18 import itertools 18 import itertools
19
19 from sitescripts.filterhits import db 20 from sitescripts.filterhits import db
20 21
21 def filter_hits(data): 22 def update(interval, data):
22 """ 23 """
23 Generator that provides all filter hits for the given data, 24 Generator that provides all the SQL and parameters needed to update the
24 in tuples like (filter, domain, hits, latest). 25 aggregations for the given data + interval in the database.
25 """ 26 """
26 for filter, filter_data in data['filters'].iteritems(): 27 for filter, filter_data in data["filters"].iteritems():
28 yield ("""INSERT IGNORE INTO `filters`
29 (filter, sha1) VALUES (%s, UNHEX(SHA1(filter)))""", filter)
30
27 domains = itertools.chain(filter_data.get("thirdParty", {}).iteritems(), 31 domains = itertools.chain(filter_data.get("thirdParty", {}).iteritems(),
28 filter_data.get("firstParty", {}).iteritems()) 32 filter_data.get("firstParty", {}).iteritems())
29 for domain, domain_data in domains: 33 for domain, domain_data in domains:
30 yield (filter, domain, domain_data["hits"], domain_data["latest"] / 1000) 34 yield ("""INSERT INTO `frequencies`
31 35 (filter_sha1, domain, frequency, timestamp)
32 def update_query(interval, filter, domain, hits, latest): 36 VALUES (UNHEX(SHA1(%s)), %s, %s, FROM_UNIXTIME(%s))
33 """ 37 ON DUPLICATE KEY UPDATE
34 Function that takes the fields for a filter hit and returns them arranged 38 frequency = (
35 as the update SQL requires along with the SQL itself. 39 POW(frequency, 1 - (UNIX_TIMESTAMP(VALUES(timestamp)) -
36 """ 40 UNIX_TIMESTAMP(timestamp)) / %s) *
37 return (("""INSERT IGNORE INTO `filters` 41 POW(VALUES(frequency), (UNIX_TIMESTAMP(VALUES(timestamp)) -
38 (filter, sha1) VALUES (%s, UNHEX(SHA1(filter)))""", 42 UNIX_TIMESTAMP(timestamp)) / %s)),
39 filter), 43 timestamp = VALUES(timestamp)""",
40 ("""INSERT INTO `geometrical_mean` 44 filter, domain, domain_data["hits"],
41 (filter_sha1, domain, hits, timestamp) 45 int(domain_data["latest"] / 1000), interval, interval)
42 VALUES (UNHEX(SHA1(%s)), %s, %s, FROM_UNIXTIME(%s))
43 ON DUPLICATE KEY UPDATE
44 hits = (
45 POW(hits, 1 - (UNIX_TIMESTAMP(VALUES(timestamp)) -
46 UNIX_TIMESTAMP(timestamp)) / %s) *
47 POW(VALUES(hits), (UNIX_TIMESTAMP(VALUES(timestamp)) -
48 UNIX_TIMESTAMP(timestamp)) / %s)),
49 timestamp = VALUES(timestamp)""",
50 filter, domain, hits, int(latest), interval, interval))
51
52 def update(interval, data):
53 """
54 Returns an iterator of all the SQL and parameters needed to
55 update the aggregations for the given data + interval in the database.
56 """
57 for fields in filter_hits(data):
58 for query in update_query(interval, *fields):
59 yield query
LEFTRIGHT

Powered by Google App Engine
This is Rietveld