Source
countCaps.py
#!/usr/bin/python -OO
# -*- coding: utf-8 -*-
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import re # Regular expressions
import wikitools # Wiki API
import datetime # Date manipulation and comparison
import MySQLdb # MySQL databases
import MySQLdb.cursors # Additional cursor classes for convenience
from capsConfig import config
config['runtime'] = {
'wiki': None,
'regexes': {}
}
config['rules'] = None
def wiki():
global config
if config['runtime']['wiki'] is None:
config['runtime']['wiki'] = wikitools.wiki.Wiki(config['api'])
print 'Logging in as', config['username'], '...'
config['runtime']['wiki'].login(config['username'], config['password'])
print 'Logged in.'
return config['runtime']['wiki']
def page(p):
if type(p) in (type(''), type(u'')):
p = wikitools.page.Page(wiki(), u(p), followRedir=False)
return p
def editPage(p, content, summary=u'', minor=True, bot=True, nocreate=True):
global config
summary = u(summary)
while len(summary) > 250:
if summary.find(u' ') == -1:
summary = summary[:summary.rfind(u' ')] + u'...'
else:
summary = summary[:247] + u'...'
result = page(p).edit(u(content), summary=summary, minor=minor, bot=bot, nocreate=nocreate)
return result
def compileRegex(regex, flags=re.IGNORECASE):
global config
regex = u(regex)
if regex in config['runtime']['regexes']:
return config['runtime']['regexes'][regex]
config['runtime']['regexes'][regex] = re.compile(regex, flags)
return config['runtime']['regexes'][regex]
def userSort(x, y):
global config
for k in config['rules']['sortKeys']:
reverse = False
if k[0] == '!':
k = k[1:]
reverse = True
if reverse:
r = cmp(y[k], x[k])
else:
r = cmp(x[k], y[k])
if r == 0:
continue
return r
return 0
def createUser(username):
user = wikitools.user.User(wiki(), username)
return {
'name': username,
'user': user,
'score': 0.0,
'firstEdit': user.registration,
'lastEdit': None,
'daysOff': 0,
'hasTalked': False,
'isBot': False,
'edits': 0,
'undone': 0,
'redirects': 0,
'newPages': 0,
'creationTime': user.registration
}
def run():
global config
config['rules'] = eval(page(config['pages']['rules']).getWikiText())
rules = config['rules'] # Convenience
undoRegex = compileRegex(r'^(?:Undo edit by|Reverted edits by).*?\[\[Special:Contributions/([^]|]+)(?:.*?\((\d+)\))?')
mysql = MySQLdb.connect(host=config['mysql']['host'], user=config['mysql']['username'], passwd=config['mysql']['password'], db=config['mysql']['database'], cursorclass=MySQLdb.cursors.DictCursor)
cursor = mysql.cursor()
cursor.execute('SELECT * FROM tfwikirc')
stats = {}
editScores = {}
now = datetime.datetime.utcnow().replace(tzinfo=wikitools.utc.utc)
while True:
row = cursor.fetchone()
if row == None:
break
row['timestamp'] = row['timestamp'].replace(tzinfo=wikitools.utc.utc)
print row
if not row['user'] in stats:
stats[row['user']] = createUser(row['user'])
rowScore = 0.0
for n in rules['namespaces']:
if rules['namespaces'][n]['id'] == row['namespace']:
if rules['namespaces'][n]['points'] >= 0.0:
# First, check if edit is not too old
if (now - row['timestamp']).days >= rules['inspectDuration']:
stats[row['user']]['firstEdit'] = row['timestamp']
break
# Take care of daysOff
if stats[row['user']]['lastEdit'] is None:
stats[row['user']]['lastEdit'] = row['timestamp']
else:
stats[row['user']]['daysOff'] += max(0, (row['timestamp'] - stats[row['user']]['lastEdit']).days - 1)
stats[row['user']]['lastEdit'] = row['timestamp']
# Now compute points
rowScore += rules['namespaces'][n]['points']
scoreBonus = rules['namespaces'][n]['points']
if row['flags'].find('N') != -1:
scoreBonus *= rules['newPageBonus']
stats[row['user']]['newPages'] += 1
if row['flags'].find('R') != -1:
scoreBonus *= rules['redirect']
stats[row['user']]['redirects'] += 1
editScores[str(row['rcid'])] = scoreBonus
stats[row['user']]['score'] += scoreBonus
stats[row['user']]['edits'] += 1
if row['flags'].find('b') != -1:
stats[row['user']]['isBot'] = True
# Handle undo's
undo = undoRegex.search(row['comment'])
if undo:
if undo.group(2) and undo.group(2) in editScores:
penalty = editScores[undo.group(2)] * rules['undoMultiplier']
else:
penalty = rules['undone']
if undo.group(1) not in stats:
stats[undo.group(1)] = createUser(undo.group(1))
stats[undo.group(1)]['score'] += penalty
stats[undo.group(1)]['undone'] += 1
if rules['namespaces'][n]['isTalk']:
stats[row['user']]['hasTalked'] = True
break
users = []
for u in stats:
stats[u]['daysOff'] += max(0, (now - stats[u]['lastEdit']).days - 1) # Days from last edit to moment of counting
print 'Now', now
print 'creation', stats[u]['creationTime']
print 'first', stats[u]['firstEdit']
if stats[u]['isBot']: # Skip bots
print 'Dropping', u, '- Bot'
continue
if u in rules['owners']:
print 'Dropping', u, '- Already has cap'
continue
if stats[u]['score'] < rules['minEdits']: # Skip too low scores
print 'Dropping', u, '- Score:', stats[u]['score'], '<', rules['minEdits']
continue
if stats[u]['daysOff'] > rules['maxDaysOff']: # Skip too many days off
print 'Dropping', u, '- Days off:', stats[u]['daysOff'], '>', rules['maxDaysOff']
continue
if (now - stats[u]['creationTime']).days < rules['minAge']: # Skip too young accounts
print 'Dropping', u, '- Age:', stats[u]['creationTime'], '<', rules['minAge']
continue
if (now - stats[u]['firstEdit']).days < rules['inspectDuration']: # Skip no edits in inspection duration
print 'Dropping', u, '- First edit:', stats[u]['firstEdit'], '<', rules['inspectDuration']
continue
print 'Accepting candidate:', u
users.append(stats[u])
config['users'] = users
users = sorted(users, cmp=userSort, reverse=True)
s = """{| class="wikitable grid sortable" align="center" style="text-align:center"
! class="header" style="font-size:90%;" | User
! class="header" style="font-size:90%;" | Score
! class="header" style="font-size:90%;" | Inactive days
! class="header" style="font-size:90%;" | Edits
! class="header" style="font-size:90%;" | Undone edits
! class="header" style="font-size:90%;" | Valid edits ratio
! class="header" style="font-size:90%;" | New pages
! class="header" style="font-size:90%;" | Redirects
! class="header" style="font-size:90%;" | Age
! class="header" style="font-size:90%;" | Has talked?
! class="header unsortable" style="font-size:90%;" | Contribs"""
i = 0
for u in users:
i += 1
if i > rules['maxCandidates']:
break
print u['name'], u['score']
hasTalked = 'Yes'
if not u['hasTalked']:
hasTalked = 'No'
s += '\n\
|-\n\
! {{subst:ul|' + u['name'] + '}}\n\
| ' + str(round(u['score'], 2)) + '\n\
| ' + str(u['daysOff']) + '\n\
| ' + str(u['edits']) + '\n\
| ' + str(u['undone']) + '\n\
| ' + str(round((1.0-float(u['undone'])/float(u['edits']))*100.0, 2)) + '%\n\
| ' + str(u['newPages']) + '\n\
| ' + str(u['redirects']) + '\n\
| ' + str((now - u['creationTime']).days) + ' days\n\
| ' + hasTalked + '\n\
| [[Special:Contributions/' + u['name'] + '|' + u['name'] + '\'s Contribs]]'
s+='\n\
|}'
editPage(config['pages']['results'], rules['template'].replace('%table%', s), summary=u'Updated Wiki Cap candidates list.', minor=False)
if __name__ == '__main__':
run()