Mercurial > feed-push
annotate feed-push @ 15:68a9b24a182a draft
encode log messages in UTF8 before passing them to syslog
Signed-off-by: Changaco <changaco ατ changaco δοτ net>
author | Changaco <changaco ατ changaco δοτ net> |
---|---|
date | Tue, 24 Apr 2012 00:30:31 +0200 |
parents | 5c26fc1adbac |
children | 183f4f544987 |
rev | line source |
---|---|
1 | 1 #!/usr/bin/env python2 |
2 # -*- coding: utf-8 -*- | |
3 | |
4 # This program is free software: you can redistribute it and/or modify | |
5 # it under the terms of the GNU General Public License as published by | |
6 # the Free Software Foundation, either version 3 of the License, or | |
7 # (at your option) any later version. | |
8 # | |
9 # This program is distributed in the hope that it will be useful, | |
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 # GNU General Public License for more details. | |
13 # | |
14 # You should have received a copy of the GNU General Public License | |
15 # along with this program. If not, see <http://www.gnu.org/licenses/>. | |
16 | |
17 import argparse | |
18 import calendar | |
19 from functools import partial, reduce | |
20 from glob import glob | |
21 import json | |
22 import os | |
7 | 23 from os.path import abspath, dirname, isdir |
1 | 24 import shlex |
25 from subprocess import Popen, PIPE, STDOUT | |
26 import sys | |
27 from syslog import * | |
28 import time | |
29 | |
30 import feedparser | |
31 import gamin | |
32 | |
33 | |
34 # Constants | |
35 | |
36 log_levels = ['DEBUG', 'INFO', 'NOTICE', 'WARNING', 'ERR', 'CRIT', 'ALERT', 'EMERG'] | |
37 | |
38 gamin_events = { | |
39 1:'GAMChanged', 2:'GAMDeleted', 3:'GAMStartExecuting', 4:'GAMStopExecuting', | |
40 5:'GAMCreated', 6:'GAMMoved', 7:'GAMAcknowledge', 8:'GAMExists', 9:'GAMEndExist' | |
41 } | |
42 | |
43 | |
44 # Generic utils | |
45 | |
46 concat = lambda l: reduce(list.__add__, l, []) | |
47 | |
48 def dict_append(d, k, v): | |
49 d.setdefault(k, []) | |
50 d[k].append(v) | |
51 | |
52 | |
53 # Logging | |
54 | |
55 def log(*args): | |
56 if len(args) == 1: | |
57 priority, msg = LOG_INFO, args[0] | |
58 elif len(args) == 2: | |
59 priority, msg = args | |
60 else: | |
61 return | |
62 priority = 7 - priority | |
63 if priority < global_args.log_level: | |
64 return | |
15
68a9b24a182a
encode log messages in UTF8 before passing them to syslog
Changaco <changaco ατ changaco δοτ net>
parents:
14
diff
changeset
|
65 if isinstance(msg, unicode): |
68a9b24a182a
encode log messages in UTF8 before passing them to syslog
Changaco <changaco ατ changaco δοτ net>
parents:
14
diff
changeset
|
66 msg = msg.encode('utf8') |
1 | 67 if global_args.syslog: |
15
68a9b24a182a
encode log messages in UTF8 before passing them to syslog
Changaco <changaco ατ changaco δοτ net>
parents:
14
diff
changeset
|
68 syslog(priority, msg) |
1 | 69 else: |
70 sys.stderr.write(log_levels[priority]+': '+msg+'\n') | |
71 | |
72 def ignore_event(path, event): | |
73 log(LOG_DEBUG, 'ignoring event '+gamin_events.get(event, str(event))+' on '+path) | |
74 | |
75 | |
76 # Config parsing | |
77 | |
78 def parse_config_file(config_fd): | |
79 feeds_paths = config_to_feed_paths_to_commands[config_fd.name] = {} | |
11 | 80 cmd = [] |
12
ddbbd4ee5d47
log watched/unwatched feed paths
Changaco <changaco ατ changaco δοτ net>
parents:
11
diff
changeset
|
81 log('parsing config file '+config_fd.name) |
1 | 82 config_fd.seek(0) |
83 for i, line in enumerate(config_fd): | |
84 line = line.strip() | |
85 if len(line) == 0 or line[0] == '#': | |
86 continue | |
87 if line[0] == '%': | |
11 | 88 if isinstance(cmd, str): |
89 cmd = [] | |
90 cmd.append(line[1:].rstrip(';')) | |
91 elif not cmd: | |
1 | 92 log(LOG_ERR, 'missing command in file '+config_fd.name+' before line '+str(i)) |
93 return | |
94 else: | |
11 | 95 cmd = '; '.join(cmd) |
1 | 96 for feed_path in glob(line): |
97 feed_path = abspath(feed_path) | |
98 dict_append(feeds_paths, feed_path, cmd) | |
99 if not feed_path in path_to_feed_fd: | |
100 monitor.watch_file(feed_path, handle_feed_change) | |
12
ddbbd4ee5d47
log watched/unwatched feed paths
Changaco <changaco ατ changaco δοτ net>
parents:
11
diff
changeset
|
101 log('now watching '+feed_path) |
1 | 102 |
103 | |
104 # Gamin callbacks | |
105 | |
106 def handle_config_change(path, event): | |
107 path = abspath(path) | |
108 if os.path.isdir(path): | |
109 ignore_event(path, event) | |
110 elif not path in path_to_config_fd: | |
111 open_config(path, event) | |
112 elif event in [gamin.GAMChanged, gamin.GAMDeleted]: | |
113 update_config(path, event) | |
114 else: | |
115 ignore_event(path, event) | |
116 | |
117 def open_config(path, event): | |
118 if event in [gamin.GAMCreated, gamin.GAMExists]: | |
119 if (not path.endswith('.conf') or path[0] == '.') and not hasattr(global_args.config, 'read'): | |
120 return log('ignoring '+path+' (not a valid config file name)') | |
121 try: | |
122 config_fd = open(path) | |
123 except IOError as e: | |
124 return log('failed to open "'+line+'" '+str(e)) | |
125 path_to_config_fd[path] = config_fd | |
126 parse_config_file(config_fd) | |
127 else: | |
128 ignore_event(path, event) | |
129 | |
130 def update_config(path, event): | |
10
89f581ebf4b2
fix update_config function
Changaco <changaco ατ changaco δοτ net>
parents:
7
diff
changeset
|
131 feeds_paths = set(concat(d.keys() for d in config_to_feed_paths_to_commands.values())) |
1 | 132 if event == gamin.GAMChanged: |
133 parse_config_file(path_to_config_fd[path]) | |
134 elif event == gamin.GAMDeleted: | |
135 log('removing actions from deleted config file '+config_fd.name) | |
136 config_to_feed_paths_to_commands.pop(path) | |
137 path_to_config_fd.pop(path).close() | |
10
89f581ebf4b2
fix update_config function
Changaco <changaco ατ changaco δοτ net>
parents:
7
diff
changeset
|
138 new_feeds_paths = set(concat(d.keys() for d in config_to_feed_paths_to_commands.values())) |
1 | 139 for feed_path in feeds_paths.difference(new_feeds_paths): |
140 monitor.stop_watch(feed_path) | |
12
ddbbd4ee5d47
log watched/unwatched feed paths
Changaco <changaco ατ changaco δοτ net>
parents:
11
diff
changeset
|
141 log('stopped watching '+feed_path) |
1 | 142 if feed_path in path_to_feed_fd: |
143 path_to_feed_fd.pop(feed_path).close() | |
144 | |
145 def handle_feed_change(path, event): | |
146 if path not in path_to_feed_fd: | |
147 if event in [gamin.GAMCreated, gamin.GAMExists, gamin.GAMChanged]: | |
148 try: | |
149 feed_fd = path_to_feed_fd[path] = open(path) | |
150 except IOError as e: | |
151 return log('failed to open "'+path+'": '+str(e)) | |
152 feed = feedparser.parse(feed_fd.read()) | |
153 handle_feed_change(path, gamin.GAMChanged) | |
154 else: | |
155 ignore_event(path, event) | |
156 elif event == gamin.GAMChanged: | |
157 feed_fd = path_to_feed_fd[path] | |
158 feed_fd.seek(0) | |
159 feed = feedparser.parse(feed_fd.read()) | |
160 for entry in reversed(feed.entries): | |
161 if entry.id in state['id_cache'].get(feed_fd.name, []) or \ | |
162 not global_args.flood and calendar.timegm(entry.published_parsed) < time.time() - 86400: | |
163 continue | |
164 for feed_path_to_commands in config_to_feed_paths_to_commands.values(): | |
165 for cmd in feed_path_to_commands.get(path, []): | |
166 run_command(format_cmd(cmd, feed=feed.feed, entry=entry), entry.content[0].value) | |
167 state['id_cache'][feed_fd.name] = [entry.id for entry in feed.entries] | |
168 save_state() | |
169 elif event == gamin.GAMDeleted: | |
170 path_to_feed_fd.pop(path).close() | |
171 else: | |
172 ignore_event(path, event) | |
173 | |
174 def save_state(): | |
175 global_args.state_file.truncate(0) | |
176 json.dump(state, global_args.state_file) | |
177 global_args.state_file.flush() | |
178 | |
179 | |
180 # Commands utils | |
181 | |
182 def format_cmd(cmd, **kwargs): | |
183 """The safe equivalent of str.format() for shell commands, meaning interpolated variables can't do shell injections (I hope).""" | |
184 r = u'' | |
185 for arg in shlex.split(cmd.encode('utf8')): | |
186 a = arg.decode('utf8') | |
187 b = a.format(**kwargs) | |
188 if a != b: | |
189 r += u" '" + b.replace(u"'", u'\'"\'"\'') + u"'" | |
190 else: | |
191 r += u' ' + arg | |
192 return r.lstrip() | |
193 | |
194 def run_command(cmd, input): | |
195 p = Popen(cmd, stdin=PIPE, stdout=PIPE, stderr=STDOUT, shell=True) | |
196 output = p.communicate(input.encode('utf8'))[0].decode('utf8') | |
197 if p.returncode != 0: | |
198 log(LOG_ERR, 'command failed: '+cmd+'\n'+output) | |
199 else: | |
200 log(LOG_INFO, 'successfully executed '+cmd) | |
201 log(LOG_DEBUG, '===== output:\n'+output) | |
202 | |
203 | |
204 # Argparse utils | |
205 | |
206 def AbsPath(next_type=None): | |
207 def f(s): | |
208 p = abspath(s) | |
209 if next_type is not None: | |
210 return next_type(p) | |
211 else: | |
212 return p | |
213 return f | |
214 | |
215 class Apply(argparse.Action): | |
216 def __init__(self, f, *args, **kwargs): | |
217 super(self.__class__, self).__init__(**kwargs) | |
218 self.f = f | |
219 def __call__(self, parser, namespace, values, option_string=None): | |
220 setattr(namespace, self.dest, self.f(values[0])) | |
221 | |
6
455cd8c78862
create state dir if it doesn't exist
Changaco <changaco ατ changaco δοτ net>
parents:
1
diff
changeset
|
222 def MakeDirs(next_type=None): |
455cd8c78862
create state dir if it doesn't exist
Changaco <changaco ατ changaco δοτ net>
parents:
1
diff
changeset
|
223 def f(s): |
7 | 224 d = dirname(s) |
14
5c26fc1adbac
minor fix in automatic directory creation
Changaco <changaco ατ changaco δοτ net>
parents:
12
diff
changeset
|
225 if d and not isdir(d): |
6
455cd8c78862
create state dir if it doesn't exist
Changaco <changaco ατ changaco δοτ net>
parents:
1
diff
changeset
|
226 try: |
455cd8c78862
create state dir if it doesn't exist
Changaco <changaco ατ changaco δοτ net>
parents:
1
diff
changeset
|
227 os.makedirs(d) |
455cd8c78862
create state dir if it doesn't exist
Changaco <changaco ατ changaco δοτ net>
parents:
1
diff
changeset
|
228 except OSError as e: |
455cd8c78862
create state dir if it doesn't exist
Changaco <changaco ατ changaco δοτ net>
parents:
1
diff
changeset
|
229 raise argparse.ArgumentTypeError(str(e)) |
455cd8c78862
create state dir if it doesn't exist
Changaco <changaco ατ changaco δοτ net>
parents:
1
diff
changeset
|
230 if next_type is not None: |
455cd8c78862
create state dir if it doesn't exist
Changaco <changaco ατ changaco δοτ net>
parents:
1
diff
changeset
|
231 return next_type(s) |
455cd8c78862
create state dir if it doesn't exist
Changaco <changaco ατ changaco δοτ net>
parents:
1
diff
changeset
|
232 else: |
455cd8c78862
create state dir if it doesn't exist
Changaco <changaco ατ changaco δοτ net>
parents:
1
diff
changeset
|
233 return s |
455cd8c78862
create state dir if it doesn't exist
Changaco <changaco ατ changaco δοτ net>
parents:
1
diff
changeset
|
234 return f |
455cd8c78862
create state dir if it doesn't exist
Changaco <changaco ατ changaco δοτ net>
parents:
1
diff
changeset
|
235 |
1 | 236 def Directory(s): |
237 try: | |
238 os.listdir(s) | |
239 return s | |
240 except OSError as e: | |
241 raise argparse.ArgumentTypeError(str(e)) | |
242 | |
243 def File(flags): | |
244 def f(s): | |
245 try: | |
246 return os.fdopen(os.open(s, flags), 'w') | |
247 except OSError as e: | |
248 raise argparse.ArgumentTypeError(str(e)) | |
249 return f | |
250 | |
251 class First(argparse.Action): | |
252 def __call__(self, parser, namespace, values, option_string=None): | |
253 setattr(namespace, self.dest, values[0]) | |
254 | |
255 def FirstOf(*types, **kwargs): | |
256 kwargs.setdefault('error', 'argument "{}" is not valid') | |
257 def f(s): | |
258 for t in types: | |
259 try: | |
260 return t(s) | |
261 except: | |
262 pass | |
263 raise argparse.ArgumentTypeError(error.format(s)) | |
264 return f | |
265 | |
266 | |
267 # Main | |
268 | |
269 if __name__ == '__main__': | |
270 | |
271 p = argparse.ArgumentParser() | |
272 p.add_argument('config', type=FirstOf(AbsPath(argparse.FileType('r')), AbsPath(Directory), error='"{}" is neither a file nor a directory'), help='either a file or a directory') | |
6
455cd8c78862
create state dir if it doesn't exist
Changaco <changaco ατ changaco δοτ net>
parents:
1
diff
changeset
|
273 p.add_argument('state_file', type=MakeDirs(argparse.FileType('a+')), help='e.g. /var/lib/feed-push/state') |
1 | 274 p.add_argument('--flood', default=False, action='store_true', help='push all articles on startup instead of ignoring the ones older than 24h (useful for debugging)') |
275 p.add_argument('--fork', metavar='pid-file', nargs=1, type=File(os.O_WRONLY|os.O_CREAT|os.O_EXCL), action=First, help='useful in init scripts') | |
276 p.add_argument('--log-level', nargs=1, default=1, choices=log_levels, action=partial(Apply, log_levels.index), help='default is INFO') | |
277 p.add_argument('--syslog', default=False, action='store_true', help='log to syslog instead of stderr') | |
278 global_args = p.parse_args() | |
279 | |
280 if global_args.fork: | |
281 pid = os.fork() | |
282 if pid != 0: | |
283 global_args.fork.write(str(pid)) | |
284 exit(0) | |
285 | |
286 if global_args.syslog: | |
287 openlog(facility=LOG_DAEMON) | |
288 | |
289 state = {'id_cache': {}} | |
290 saved_state = global_args.state_file.read().strip() | |
291 if len(saved_state) > 0: | |
292 state.update(json.loads(saved_state)) | |
293 del saved_state | |
294 | |
295 monitor = gamin.WatchMonitor() | |
296 path_to_feed_fd = {} | |
297 path_to_config_fd = {} | |
298 config_to_feed_paths_to_commands = {} | |
299 if hasattr(global_args.config, 'read'): | |
300 os.chdir(os.path.dirname(global_args.config.name)) | |
301 monitor.watch_file(global_args.config.name, handle_config_change) | |
302 else: | |
303 os.chdir(global_args.config) | |
304 monitor.watch_directory(global_args.config, handle_config_change) | |
305 | |
306 while True: | |
307 monitor.handle_one_event() |