Roundup Tracker

Attachment 'detector_spambayes.py'

Download

   1 import xmlrpclib
   2 import socket
   3 import time
   4 import math
   5 import re
   6 
   7 from roundup.exceptions import Reject
   8 
   9 REVPAT = re.compile(r'(r[0-9]+\b|rev(ision)? [0-9]+\b)')
  10 
  11 def extract_classinfo(db, klass, nodeid, newvalues):
  12     if None == nodeid:
  13         node = newvalues
  14         content = newvalues['content']
  15     else:
  16         node = db.getnode(klass.classname, nodeid)
  17         content = klass.get(nodeid, 'content')
  18 
  19     if node.has_key('creation') or node.has_key('date'):
  20         nodets = node.get('creation', node.get('date')).timestamp()
  21     else:
  22         nodets = time.time()
  23 
  24     if node.has_key('author') or node.has_key('creator'):
  25         authorid = node.get('author', node.get('creator'))
  26     else:
  27         authorid = db.getuid()
  28 
  29     authorage = nodets - db.getnode('user', authorid)['creation'].timestamp()
  30 
  31     tokens = ["klass:%s" % klass.classname,
  32               "author:%s" % authorid,
  33               "authorage:%d" % int(math.log(authorage)),
  34               "hasrev:%s" % (REVPAT.search(content) is not None)]
  35 
  36 
  37     return (content, tokens)
  38 
  39 def check_spambayes(db, content, tokens):
  40     try:
  41         spambayes_uri = db.config.detectors['SPAMBAYES_URI']
  42     except KeyError, e:
  43         return (False, str(e))
  44 
  45     try:
  46         server = xmlrpclib.ServerProxy(spambayes_uri, verbose=False)
  47     except IOError, e:
  48         return (False, str(e))
  49 
  50 
  51     try:
  52         prob = server.score({'content':content}, tokens, {})
  53         return (True, prob)
  54     except (socket.error, xmlrpclib.Error), e:
  55         return (False, str(e))
  56 
  57 
  58 def check_spam(db, klass, nodeid, newvalues):
  59     """Auditor to score a website submission."""
  60 
  61 
  62     if newvalues.has_key('spambayes_score'):
  63         if not db.security.hasPermission('SB: May Classify', db.getuid()):
  64             raise ValueError, "You don't have permission to spamclassify messages"
  65         # Don't do anything if we're explicitly setting the score
  66         return
  67 
  68     if not newvalues.has_key('content'):
  69         # No need to invoke spambayes if the content of the message
  70         # is unchanged.
  71         return
  72 
  73     (content, tokens) = extract_classinfo(db, klass, nodeid, newvalues)
  74     (success, other) = check_spambayes(db, content, tokens)
  75     if success:
  76         newvalues['spambayes_score'] = other
  77         newvalues['spambayes_misclassified'] = False
  78     else:
  79         newvalues['spambayes_score'] = -1
  80         newvalues['spambayes_misclassified'] = True
  81 
  82 def init(database):
  83     """Initialize auditor."""
  84     database.msg.audit('create', check_spam)
  85     database.msg.audit('set', check_spam)
  86     database.file.audit('create', check_spam)
  87     database.file.audit('set', check_spam)

Attached Files

To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.
  • [get | view] (2021-09-03 20:00:31, 0.8 KB) [[attachment:config.ini.template]]
  • [get | view] (2021-09-03 20:00:09, 2.6 KB) [[attachment:detector_spambayes.py]]
  • [get | view] (2021-09-03 20:00:18, 2.7 KB) [[attachment:extensions_spambayes.py]]
 All files | Selected Files: delete move to page copy to page

You are not allowed to attach a file to this page.