- attachment:detector_spambayes.py of SpamBayesIntegration
Attachment 'detector_spambayes.py'
Download 1 import xmlrpclib
2 import socket
3 import time
4 import math
5 import re
6
7 from roundup.exceptions import Reject
8
9 REVPAT = re.compile(r'(r[0-9]+\b|rev(ision)? [0-9]+\b)')
10
11 def extract_classinfo(db, klass, nodeid, newvalues):
12 if None == nodeid:
13 node = newvalues
14 content = newvalues['content']
15 else:
16 node = db.getnode(klass.classname, nodeid)
17 content = klass.get(nodeid, 'content')
18
19 if node.has_key('creation') or node.has_key('date'):
20 nodets = node.get('creation', node.get('date')).timestamp()
21 else:
22 nodets = time.time()
23
24 if node.has_key('author') or node.has_key('creator'):
25 authorid = node.get('author', node.get('creator'))
26 else:
27 authorid = db.getuid()
28
29 authorage = nodets - db.getnode('user', authorid)['creation'].timestamp()
30
31 tokens = ["klass:%s" % klass.classname,
32 "author:%s" % authorid,
33 "authorage:%d" % int(math.log(authorage)),
34 "hasrev:%s" % (REVPAT.search(content) is not None)]
35
36
37 return (content, tokens)
38
39 def check_spambayes(db, content, tokens):
40 try:
41 spambayes_uri = db.config.detectors['SPAMBAYES_URI']
42 except KeyError, e:
43 return (False, str(e))
44
45 try:
46 server = xmlrpclib.ServerProxy(spambayes_uri, verbose=False)
47 except IOError, e:
48 return (False, str(e))
49
50
51 try:
52 prob = server.score({'content':content}, tokens, {})
53 return (True, prob)
54 except (socket.error, xmlrpclib.Error), e:
55 return (False, str(e))
56
57
58 def check_spam(db, klass, nodeid, newvalues):
59 """Auditor to score a website submission."""
60
61
62 if newvalues.has_key('spambayes_score'):
63 if not db.security.hasPermission('SB: May Classify', db.getuid()):
64 raise ValueError, "You don't have permission to spamclassify messages"
65 # Don't do anything if we're explicitly setting the score
66 return
67
68 if not newvalues.has_key('content'):
69 # No need to invoke spambayes if the content of the message
70 # is unchanged.
71 return
72
73 (content, tokens) = extract_classinfo(db, klass, nodeid, newvalues)
74 (success, other) = check_spambayes(db, content, tokens)
75 if success:
76 newvalues['spambayes_score'] = other
77 newvalues['spambayes_misclassified'] = False
78 else:
79 newvalues['spambayes_score'] = -1
80 newvalues['spambayes_misclassified'] = True
81
82 def init(database):
83 """Initialize auditor."""
84 database.msg.audit('create', check_spam)
85 database.msg.audit('set', check_spam)
86 database.file.audit('create', check_spam)
87 database.file.audit('set', check_spam)
Attached Files
To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.You are not allowed to attach a file to this page.