0
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
1 #!/usr/bin/env python
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
2 # -*- encoding: UTF-8 -*-
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
3 ##
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
4 ## Copyright © 2012 Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
5 ##
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
6 ## This program is free software; you can redistribute it and/or modify
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
7 ## it under the terms of the GNU General Public License as published
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
8 ## by the Free Software Foundation; version 3 only.
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
9 ##
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
10 ## This program is distributed in the hope that it will be useful,
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
11 ## but WITHOUT ANY WARRANTY; without even the implied warranty of
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
12 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
13 ## GNU General Public License for more details.
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
14 ##
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
15
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
16
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
17 from __future__ import unicode_literals
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
18
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
19 from lxml import etree
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
20 from lxml.builder import E
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
21 from time import strptime, strftime
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
22 import epub
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
23 from argparse import ArgumentParser
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
24
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
25 try:
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
26 from urllib.request import urlopen
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
27 from urllib.error import HTTPError
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
28 except ImportError:
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
29 from urllib2 import urlopen
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
30 from urllib2 import HTTPError
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
31
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
32
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
33 images_list = []
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
34 mime_type = {
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
35 '.jpg': 'image/jpeg',
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
36 'jpeg': 'image/jpeg',
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
37 '.png': 'image/png',
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
38 '.gif': 'image/gif',
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
39 '.svg': 'image/svg+xml'
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
40 }
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
41
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
42
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
43 class Thread(object):
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
44 def __init__(self, op, replies):
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
45 self.op = op
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
46 self.replies = replies
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
47 self.title = op.title
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
48 self.author = op.author
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
49
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
50 def render(self, only_op):
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
51 html = E.html(
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
52 E.head(
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
53 E.title(self.title),
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
54 E.link(href='story.css', type='text/css', rel='stylesheet') #TODO: convert it to a PI.
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
55 ),
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
56 E.body(
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
57 E.h1(self.title)
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
58 ),
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
59 xmlns='http://www.w3.org/1999/xhtml'
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
60 )
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
61
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
62 body = html.find('body')
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
63 body.append(self.op.render(display_title=False))
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
64
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
65 for reply in self.replies:
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
66 # Remove user answers if not wanted.
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
67 if only_op and not reply.is_op(self.op):
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
68 continue
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
69
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
70 body.append(reply.render())
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
71
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
72 return html
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
73
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
74
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
75 class Post(object):
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
76 def __init__(self, title, author, date, image, content):
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
77 self.title = title
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
78 self.author = author
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
79 self.date = date
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
80 self.image = image
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
81 self.content = content
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
82
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
83 def is_op(self, op):
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
84 return self.author.trip == op.author.trip
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
85
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
86 def render(self, display_title=True):
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
87 if display_title:
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
88 title = E.h2(self.title) if self.title and display_title else E.h2('⁂')
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
89 else:
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
90 title = ''
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
91
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
92 img = self.image.render() if self.image else ''
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
93
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
94 p = E.p()
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
95 for item in self.content:
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
96 #TODO: remove useless attributes like onclick.
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
97 p.append(item)
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
98
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
99 article = E.article(
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
100 title,
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
101 E.footer(
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
102 E.cite(self.author.render()),
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
103 ' ',
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
104 E.time(strftime('%y/%m/%d(%a)%H:%M', self.date), time=strftime('%y-%m-%dT%H:%M:%SZ', self.date))
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
105 ),
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
106 img,
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
107 p
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
108 )
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
109 return article
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
110
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
111
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
112 class Image(object):
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
113 def __init__(self, name, filesize, size, url):
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
114 self.name = name
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
115 self.filesize = filesize
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
116 self.size = size
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
117 self.url = url
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
118
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
119 def render(self):
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
120 try:
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
121 url_file = urlopen(self.url)
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
122 except HTTPError:
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
123 return ''
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
124 with open(self.name, 'wb') as out:
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
125 out.write(url_file.read())
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
126 images_list.append(self.name)
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
127 return E.img(src=self.name, alt=self.name)
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
128
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
129
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
130 class Author(object):
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
131 def __init__(self, name, trip, mail):
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
132 self.name = name
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
133 self.trip = trip
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
134 self.mail = mail
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
135
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
136 def render(self):
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
137 return '{}{}'.format(self.name, self.trip)
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
138
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
139
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
140 def parse_post(root):
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
141 # We use the filesize element because it contains the image name.
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
142 label = root.find('label')
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
143 filesize = root.find('span[@class="filesize"]')
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
144 if filesize is not None:
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
145 a = filesize.getnext().getnext()
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
146 filesize = etree.tostring(filesize, method='text', encoding='UTF-8')
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
147 filesize = filesize.split()
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
148 name = filesize[7:-2]
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
149 name = b' '.join(name)
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
150 try:
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
151 name = name.decode('UTF-8')
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
152 except UnicodeDecodeError:
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
153 for i in range(-5, -42, -1):
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
154 char = name[i] if type(name[i]) is int else ord(name[i])
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
155 if char & 0xc0 == 0xc0:
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
156 name = name[:i-1] + b'\xef\xbf\xbd' + name[-4:]
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
157 break
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
158 name = name.decode('UTF-8')
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
159 filesize, size = filesize[3][1:], filesize[5]
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
160 if a.tag == 'a':
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
161 url = a.get('href')
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
162 else:
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
163 url = a.find('img').get('src')
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
164
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
165 image = Image(name, filesize, size, url)
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
166 else:
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
167 image = None
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
168
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
169 label = root.find('label')
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
170 title = label.find('span[@class="filetitle"]')
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
171 title = title.text.strip() if title is not None else None
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
172 name = label.find('span[@class="postername"]')
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
173 if name is not None:
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
174 last = name
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
175 a = name.find('a')
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
176 if a is not None:
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
177 mail = a.get('href')
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
178 name = a.text
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
179 else:
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
180 mail = ''
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
181 name = name.text
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
182 else:
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
183 mail = ''
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
184 name = ''
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
185
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
186 postertrip = label.find('span[@class="postertrip"]')
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
187 if postertrip is not None:
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
188 trip = postertrip.text
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
189 last = postertrip
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
190 else:
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
191 trip = ''
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
192
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
193 author = Author(name, trip, mail)
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
194
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
195 date = strptime(last.tail.strip(), '%y/%m/%d(%a)%H:%M')
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
196
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
197 blockquote = root.find('blockquote')
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
198 content = []
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
199 for item in blockquote:
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
200 if item is str and item.strip() == '':
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
201 continue
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
202 content.append(item)
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
203
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
204 return Post(title, author, date, image, content)
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
205
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
206
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
207 def parse_thread(url):
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
208 tree = etree.parse(url, etree.HTMLParser())
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
209
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
210 root = tree.find('//form[@id="delform"]')
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
211 if root is None:
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
212 root = tree.find('//body')
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
213 op = parse_post(root)
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
214
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
215 replies = []
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
216 td = root.findall('.//td[@class="reply"]')
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
217 for reply in td:
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
218 replies.append(parse_post(reply))
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
219
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
220 return Thread(op, replies)
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
221
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
222
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
223 def main(url, forum, only_op, threads):
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
224 threads_list = []
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
225 for thread in threads:
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
226 print('Rendering of thread №{}…'.format(thread))
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
227 t = parse_thread(url.format(forum, thread))
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
228 threads_list.append(t)
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
229
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
230 html = t.render(only_op)
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
231
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
232 # Use b mode as it allows us to directly dump UTF-8 data.
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
233 with open('{}.xhtml'.format(thread), 'wb') as f:
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
234 f.write(etree.tostring(html, pretty_print=True, xml_declaration=True, doctype='<!DOCTYPE html SYSTEM "/tmp/test.dtd">', encoding='UTF-8'))
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
235
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
236 with epub.open('story.epub', 'w') as book:
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
237 t = threads_list[0]
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
238
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
239 book.opf.metadata.add_title(t.title)
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
240 book.opf.metadata.add_creator(t.author.render())
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
241 book.opf.metadata.add_date(strftime('%y-%m-%dT%H:%M:%SZ'))
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
242 book.opf.metadata.add_language('en')
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
243
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
244 for thread in threads:
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
245 filename = '{}.xhtml'.format(thread)
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
246 manifest_item = epub.opf.ManifestItem(identifier='thread_{}'.format(thread),
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
247 href=filename,
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
248 media_type='application/xhtml+xml')
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
249 book.add_item(filename, manifest_item, True)
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
250
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
251 for image in images_list:
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
252 extension = image[-4:]
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
253 manifest_item = epub.opf.ManifestItem(identifier='image_{}'.format(image),
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
254 href=image,
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
255 media_type=mime_type[extension])
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
256 book.add_item(image, manifest_item, True)
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
257
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
258 manifest_item = epub.opf.ManifestItem(identifier='style',
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
259 href='story.css',
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
260 media_type='text/css')
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
261 book.add_item('story.css', manifest_item)
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
262
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
263 book.toc.title = t.title
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
264 nav_map = book.toc.nav_map
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
265 for thread in threads:
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
266 nav_point = epub.ncx.NavPoint()
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
267 nav_point.identifier = 'thread_%d' % thread
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
268 nav_point.add_label('Thread №%d' % thread)
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
269 nav_point.src = '%d.xhtml' % thread
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
270 nav_map.nav_point.append(nav_point)
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
271
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
272
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
273 if __name__ == '__main__':
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
274 parser = ArgumentParser(description='Download and convert THP stories.')
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
275
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
276 parser.add_argument('threads', metavar='THREADS', nargs='+', type=int, help='List of the threads of the story.')
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
277 parser.add_argument('-u', '--url', metavar='URL', default='http://www.touhou-project.com/{}/res/{}.html', help='URL pattern from which the story will be downloaded, with the first {} as the forum, and the second as the thread.')
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
278 parser.add_argument('-f', '--forum', metavar='FORUM', default='sdm', help='The name of the forum (example: sdm, th, etc.).')
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
279 parser.add_argument('-o', '--only-op', action='store_true', help='Include only posts made by the original poster.')
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
280
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
281 args = parser.parse_args()
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
282
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
283 main(args.url, args.forum, args.only_op, args.threads)
|