Roundup Tracker

Attachment 'extensions_spambayes.py'

Download

   1 import re, math
   2 from roundup.cgi.actions import Action
   3 from roundup.cgi.exceptions import *
   4 
   5 import xmlrpclib, socket
   6 
   7 REVPAT = re.compile(r'(r[0-9]+\b|rev(ision)? [0-9]+\b)')
   8 
   9 def extract_classinfo(db, classname, nodeid):
  10     node = db.getnode(classname, nodeid)
  11 
  12     authorage = node['creation'].timestamp() - \
  13                 db.getnode('user', node.get('author', node.get('creator')))['creation'].timestamp()
  14 
  15     authorid = node.get('author', node.get('creator'))
  16 
  17     content = db.getclass(classname).get(nodeid, 'content')
  18 
  19     tokens = ["klass:%s" % classname,
  20               "author:%s" % authorid,
  21               "authorage:%d" % int(math.log(authorage)),
  22               "hasrev:%s" % (REVPAT.search(content) is not None)]
  23 
  24     return (content, tokens)
  25 
  26 def train_spambayes(db, content, tokens, is_spam):
  27     # spambayes training is now disabled; only leave
  28     # spam classification UI
  29     return True, None
  30     spambayes_uri = db.config.detectors['SPAMBAYES_URI']
  31 
  32     server = xmlrpclib.ServerProxy(spambayes_uri, verbose=False)
  33     try:
  34         server.train({'content':content}, tokens, {}, is_spam)
  35         return (True, None)
  36     except (socket.error, xmlrpclib.Error), e:
  37         return (False, str(e))
  38 
  39 
  40 class SpambayesClassify(Action):
  41     permissionType = 'SB: May Classify'
  42 
  43     def handle(self):
  44         (content, tokens) = extract_classinfo(self.db,
  45                                               self.classname, self.nodeid)
  46 
  47         if self.form.has_key("trainspam"):
  48             is_spam = True
  49         elif self.form.has_key("trainham"):
  50             is_spam = False
  51 
  52         (status, errmsg) = train_spambayes(self.db, content, tokens,
  53                                            is_spam)
  54 
  55         node = self.db.getnode(self.classname, self.nodeid)
  56         props = {}
  57 
  58         if status:
  59             if node.get('spambayes_misclassified', False):
  60                 props['spambayes_misclassified'] = True
  61 
  62             props['spambayes_score'] = 1.0
  63 
  64             s = " SPAM"
  65             if not is_spam:
  66                 props['spambayes_score'] = 0.0
  67                 s = " HAM"
  68             self.client.add_ok_message(self._('Message classified as') + s)
  69         else:
  70             self.client.add_error_message(self._('Unable to classify message, got error:') + errmsg)
  71 
  72         klass = self.db.getclass(self.classname)
  73         klass.set(self.nodeid, **props)
  74         self.db.commit()
  75 
  76 def sb_is_spam(obj):
  77     cutoff_score = float(obj._db.config.detectors['SPAMBAYES_SPAM_CUTOFF'])
  78     try:
  79         score = obj['spambayes_score']
  80     except KeyError:
  81         return False
  82     return score >= cutoff_score
  83 
  84 def init(instance):
  85     instance.registerAction("spambayes_classify", SpambayesClassify)
  86     instance.registerUtil('sb_is_spam', sb_is_spam)

Attached Files

To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.
  • [get | view] (2021-09-03 20:00:31, 0.8 KB) [[attachment:config.ini.template]]
  • [get | view] (2021-09-03 20:00:09, 2.6 KB) [[attachment:detector_spambayes.py]]
  • [get | view] (2021-09-03 20:00:18, 2.7 KB) [[attachment:extensions_spambayes.py]]
 All files | Selected Files: delete move to page copy to page

You are not allowed to attach a file to this page.