Left: | ||
Right: |
OLD | NEW |
---|---|
(Empty) | |
1 # coding: utf-8 | |
2 | |
3 # This Source Code is subject to the terms of the Mozilla Public License | |
4 # version 2.0 (the "License"). You can obtain a copy of the License at | |
5 # http://mozilla.org/MPL/2.0/. | |
6 | |
7 # import MySQLdb, os, simplejson, sys, re | |
8 # from sitescripts.utils import cached, get_config | |
9 # from sitescripts.web import url_handler, basic_auth | |
Wladimir Palant
2012/10/09 11:51:14
Any reason why the lines above are required? ;)
| |
10 | |
11 import os, MySQLdb, simplejson as json | |
12 from urlparse import parse_qs | |
13 from math import sqrt | |
Wladimir Palant
2012/10/09 11:51:14
I would prefer to just import math and use the fun
| |
14 from sitescripts.web import url_handler | |
15 from sitescripts.utils import get_config, setupStderr | |
16 | |
17 STATUS_TYPED = 1 | |
18 STATUS_TYPO = 2 | |
19 STATUS_CORRECTION = 3 | |
20 STATUS_FALSE_POSITIVE = 4 | |
21 | |
22 @url_handler("/submitData") | |
23 def submit_data(environ, start_response): | |
24 setupStderr(environ['wsgi.errors']) | |
25 | |
26 if environ['REQUEST_METHOD'].upper() != 'POST': | |
27 return showError('Unsupported request method', start_response) | |
28 | |
29 output = "" | |
30 | |
31 params = parse_qs(environ.get("QUERY_STRING", "")) | |
32 requestVersion = params.get("version", ["0"])[0] | |
33 data = "{}" | |
34 try: | |
35 data_length = int(environ.get("CONTENT_LENGTH", "0")) | |
36 except ValueError: | |
37 data_length = 0 | |
38 if data_length != 0: | |
39 data = environ["wsgi.input"].read(data_length) | |
40 data = json.loads(data, ["{}"]) | |
Wladimir Palant
2012/10/09 11:51:14
Not sure I understand the significance of the seco
| |
41 | |
42 try: | |
43 database = get_config().get("urlfixer", "database") | |
44 dbuser = get_config().get("urlfixer", "dbuser") | |
45 dbpasswd = get_config().get("urlfixer", "dbpassword") | |
46 except Exception: | |
47 return showError("Error while reading config.", start_response) | |
Wladimir Palant
2012/10/09 11:51:14
That's a server configuration issue, not something
| |
48 | |
49 if os.name == "nt": | |
50 db = MySQLdb.connect(user=dbuser, passwd=dbpasswd, db=database, | |
51 use_unicode=True, charset="utf8", named_pipe=True) | |
52 else: | |
53 db = MySQLdb.connect(user=dbuser, passwd=dbpasswd, db=database, | |
54 use_unicode=True, charset="utf8") | |
Wladimir Palant
2012/10/09 11:51:14
I suggest caching the database connection similarl
| |
55 | |
56 for domain in data: | |
Wladimir Palant
2012/10/09 11:51:14
I suggest:
for domain, status in data.iteritems
| |
57 process_domain(db, domain, data[domain]) | |
58 | |
59 db.commit() | |
60 db.close() | |
Wladimir Palant
2012/10/09 11:51:14
Database shouldn't be closed if you share the data
| |
61 | |
62 response_headers = [('Content-type', 'text/plain'), | |
63 ('Content-Length', str(len(output)))] | |
Wladimir Palant
2012/10/09 11:51:14
Output is empty and this won't change (nobody can
| |
64 start_response("200 OK", response_headers) | |
65 return [output] | |
66 | |
67 def process_domain(db, domain, status): | |
68 domain_id = _get_domain_id(db, domain) | |
69 | |
70 _increment_entry(db, domain_id, status) | |
71 | |
72 typed = _get_entry(db, domain_id, STATUS_TYPED) | |
73 typo = _get_entry(db, domain_id, STATUS_TYPO) | |
74 correction = _get_entry(db, domain_id, STATUS_CORRECTION) | |
75 false_positive = _get_entry(db, domain_id, STATUS_FALSE_POSITIVE) | |
76 | |
77 # Determine the occurences of the domain being correct as is | |
78 typed_count = get_weighted_count(typed) | |
79 # Determine the occurences of the domain as a typo | |
80 typo_count = get_weighted_count(typo) | |
81 # Determine the occurences of the domain as a correction | |
82 correction_count = get_weighted_count(correction) | |
83 # Determine the occurences of the domain as a false positive | |
84 false_positive_count = get_weighted_count(false_positive) | |
85 | |
86 # Determine the correctness of the domain and of the corrections (with a confi dence level of 0.95) | |
87 # http://www.evanmiller.org/how-not-to-sort-by-average-rating.html | |
88 domain_correctness_score = calculateCorrectness(correction_count + typed_count , false_positive_count + typo_count) | |
89 correction_correctness_score = calculateCorrectness(correction_count, false_po sitive_count) | |
90 | |
91 _update_scores(db, domain_id, domain_correctness_score, correction_correctness _score) | |
92 | |
93 def calculateCorrectness(positive, negative): | |
94 score = 0 | |
95 | |
96 if positive + negative > 0: | |
97 score = ((positive + 1.9208) / (positive + negative) - | |
98 1.96 * sqrt((positive * negative) / (positive + negative) + 0.9604) / | |
99 (positive + negative)) / (1 + 3.8416 / (positive + negative)) | |
100 | |
101 return score | |
102 | |
103 def get_weighted_count(result): | |
104 if result == None: | |
105 return 0 | |
106 | |
107 return result["curr_month"] * 0.4 + result["prev_month"] * 0.3 + result["curr_ year"] * 0.2 + result["prev_year"] * 0.1 | |
108 | |
109 def showError(message, start_response): | |
110 start_response('400 Processing Error', [('Content-Type', 'text/plain; charset= utf-8')]) | |
111 return [message.encode('utf-8')] | |
112 | |
113 def _get_domain_id(db, domain): | |
114 cursor = db.cursor(MySQLdb.cursors.DictCursor) | |
115 cursor.execute("SELECT id FROM domains WHERE domain='%s'" % (domain)) | |
Wladimir Palant
2012/10/09 11:51:14
Please don't ever generate queries dynamically, us
Thomas Greiner
2012/10/09 13:47:02
Actually, I thought that this was the syntax for p
| |
116 result = cursor.fetchone() | |
117 if result == None: | |
118 cursor.execute("INSERT INTO domains(domain) VALUES ('%s')" % (domain)) | |
119 return _get_domain_id(db, domain) | |
Wladimir Palant
2012/10/09 11:51:14
Should be:
return db.insert_id()
| |
120 else: | |
121 return result["id"] | |
122 | |
123 def _get_entry(db, domain_id, status): | |
124 cursor = db.cursor(MySQLdb.cursors.DictCursor) | |
125 cursor.execute("SELECT * FROM corrections WHERE domain=%d AND status=%d" % (do main_id, status)) | |
126 return cursor.fetchone() | |
127 | |
128 def _increment_entry(db, domain_id, status): | |
129 cursor = db.cursor(MySQLdb.cursors.DictCursor) | |
130 cursor.execute("SELECT COUNT(*) AS count FROM corrections WHERE domain=%d AND status=%d" % (domain_id, status)) | |
131 | |
132 if cursor.fetchone()["count"] < 1: | |
133 # Create and increment current period | |
134 cursor.execute("INSERT INTO corrections(domain, status, curr_month, prev_mon th, curr_year, prev_year) VALUES (%d, %d, %d, %d, %d, %d)" % (domain_id, status, 1, 0, 1, 0)) | |
135 else: | |
136 # Increment current period | |
137 cursor.execute("UPDATE corrections SET curr_month=curr_month+1, curr_year=cu rr_year+1 WHERE domain=%d AND status=%d" % (domain_id, status)) | |
Wladimir Palant
2012/10/09 11:51:14
http://dev.mysql.com/doc/refman/5.0/en/insert-on-d
Thomas Greiner
2012/10/09 13:47:02
Thanks for the tip!
| |
138 | |
139 def _update_scores(db, domain_id, domain_correctness, correction_correctness): | |
140 cursor = db.cursor() | |
141 cursor.execute("UPDATE domains SET domain_correct=%f, correction_correct=%f WH ERE id=%d" % (domain_correctness, correction_correctness, domain_id)) | |
OLD | NEW |