1 | """ |
---|
2 | PyRSS2Gen - A Python library for generating RSS 2.0 feeds. |
---|
3 | |
---|
4 | (This is the BSD license, based on the template at |
---|
5 | http://www.opensource.org/licenses/bsd-license.php ) |
---|
6 | |
---|
7 | Copyright (c) 2003, Dalke Scientific Software, LLC |
---|
8 | |
---|
9 | All rights reserved. |
---|
10 | |
---|
11 | Redistribution and use in source and binary forms, with or without |
---|
12 | modification, are permitted provided that the following conditions are |
---|
13 | met: |
---|
14 | |
---|
15 | * Redistributions of source code must retain the above copyright |
---|
16 | notice, this list of conditions and the following disclaimer. |
---|
17 | |
---|
18 | * Redistributions in binary form must reproduce the above copyright |
---|
19 | notice, this list of conditions and the following disclaimer in |
---|
20 | the documentation and/or other materials provided with the |
---|
21 | distribution. |
---|
22 | |
---|
23 | * Neither the name of the Dalke Scientific Softare, LLC, Andrew |
---|
24 | Dalke, nor the names of its contributors may be used to endorse or |
---|
25 | promote products derived from this software without specific prior |
---|
26 | written permission. |
---|
27 | |
---|
28 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
---|
29 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
---|
30 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
---|
31 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
---|
32 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
---|
33 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
---|
34 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
---|
35 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
---|
36 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
---|
37 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
---|
38 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
---|
39 | """ |
---|
40 | |
---|
41 | __name__ = "PyRSS2Gen" |
---|
42 | __version__ = (1, 1, 0) |
---|
43 | __author__ = "Andrew Dalke <dalke@dalkescientific.com>" |
---|
44 | |
---|
45 | _generator_name = __name__ + "-" + ".".join(map(str, __version__)) |
---|
46 | |
---|
47 | import datetime |
---|
48 | |
---|
49 | import sys |
---|
50 | |
---|
51 | if sys.version_info[0] == 3: |
---|
52 | # Python 3 |
---|
53 | basestring = str |
---|
54 | from io import StringIO |
---|
55 | else: |
---|
56 | # Python 2 |
---|
57 | try: |
---|
58 | from cStringIO import StringIO |
---|
59 | except ImportError: |
---|
60 | # Very old (or memory constrained) systems might |
---|
61 | # have left out the compiled C version. Fall back |
---|
62 | # to the pure Python one. Haven't seen this sort |
---|
63 | # of system since the early 2000s. |
---|
64 | from StringIO import StringIO |
---|
65 | |
---|
66 | # Could make this the base class; will need to add 'publish' |
---|
67 | |
---|
68 | |
---|
69 | class WriteXmlMixin: |
---|
70 | def write_xml(self, outfile, encoding="iso-8859-1"): |
---|
71 | from xml.sax import saxutils |
---|
72 | handler = saxutils.XMLGenerator(outfile, encoding) |
---|
73 | handler.startDocument() |
---|
74 | self.publish(handler) |
---|
75 | handler.endDocument() |
---|
76 | |
---|
77 | def to_xml(self, encoding="iso-8859-1"): |
---|
78 | f = StringIO() |
---|
79 | self.write_xml(f, encoding) |
---|
80 | return f.getvalue() |
---|
81 | |
---|
82 | |
---|
83 | def _element(handler, name, obj, d={}): |
---|
84 | if isinstance(obj, basestring) or obj is None: |
---|
85 | # special-case handling to make the API easier |
---|
86 | # to use for the common case. |
---|
87 | handler.startElement(name, d) |
---|
88 | if obj is not None: |
---|
89 | handler.characters(obj) |
---|
90 | handler.endElement(name) |
---|
91 | else: |
---|
92 | # It better know how to emit the correct XML. |
---|
93 | obj.publish(handler) |
---|
94 | |
---|
95 | |
---|
96 | def _opt_element(handler, name, obj): |
---|
97 | if obj is None: |
---|
98 | return |
---|
99 | _element(handler, name, obj) |
---|
100 | |
---|
101 | |
---|
102 | def _format_date(dt): |
---|
103 | """convert a datetime into an RFC 822 formatted date |
---|
104 | |
---|
105 | Input date must be in GMT. |
---|
106 | """ |
---|
107 | # Looks like: |
---|
108 | # Sat, 07 Sep 2002 00:00:01 GMT |
---|
109 | # Can't use strftime because that's locale dependent |
---|
110 | # |
---|
111 | # Isn't there a standard way to do this for Python? The |
---|
112 | # rfc822 and email.Utils modules assume a timestamp. The |
---|
113 | # following is based on the rfc822 module. |
---|
114 | return "%s, %02d %s %04d %02d:%02d:%02d GMT" % ( |
---|
115 | ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"][dt.weekday()], |
---|
116 | dt.day, |
---|
117 | ["Jan", "Feb", "Mar", "Apr", "May", "Jun", |
---|
118 | "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"][dt.month - 1], |
---|
119 | dt.year, dt.hour, dt.minute, dt.second) |
---|
120 | |
---|
121 | |
---|
122 | ## |
---|
123 | # A couple simple wrapper objects for the fields which |
---|
124 | # take a simple value other than a string. |
---|
125 | class IntElement: |
---|
126 | """implements the 'publish' API for integers |
---|
127 | |
---|
128 | Takes the tag name and the integer value to publish. |
---|
129 | |
---|
130 | (Could be used for anything which uses str() to be published |
---|
131 | to text for XML.) |
---|
132 | """ |
---|
133 | element_attrs = {} |
---|
134 | |
---|
135 | def __init__(self, name, val): |
---|
136 | self.name = name |
---|
137 | self.val = val |
---|
138 | |
---|
139 | def publish(self, handler): |
---|
140 | handler.startElement(self.name, self.element_attrs) |
---|
141 | handler.characters(str(self.val)) |
---|
142 | handler.endElement(self.name) |
---|
143 | |
---|
144 | |
---|
145 | class DateElement: |
---|
146 | """implements the 'publish' API for a datetime.datetime |
---|
147 | |
---|
148 | Takes the tag name and the datetime to publish. |
---|
149 | |
---|
150 | Converts the datetime to RFC 2822 timestamp (4-digit year). |
---|
151 | """ |
---|
152 | def __init__(self, name, dt): |
---|
153 | self.name = name |
---|
154 | self.dt = dt |
---|
155 | |
---|
156 | def publish(self, handler): |
---|
157 | _element(handler, self.name, _format_date(self.dt)) |
---|
158 | #### |
---|
159 | |
---|
160 | |
---|
161 | class Category: |
---|
162 | """Publish a category element""" |
---|
163 | def __init__(self, category, domain=None): |
---|
164 | self.category = category |
---|
165 | self.domain = domain |
---|
166 | |
---|
167 | def publish(self, handler): |
---|
168 | d = {} |
---|
169 | if self.domain is not None: |
---|
170 | d["domain"] = self.domain |
---|
171 | _element(handler, "category", self.category, d) |
---|
172 | |
---|
173 | |
---|
174 | class Cloud: |
---|
175 | """Publish a cloud""" |
---|
176 | def __init__(self, domain, port, path, |
---|
177 | registerProcedure, protocol): |
---|
178 | self.domain = domain |
---|
179 | self.port = port |
---|
180 | self.path = path |
---|
181 | self.registerProcedure = registerProcedure |
---|
182 | self.protocol = protocol |
---|
183 | |
---|
184 | def publish(self, handler): |
---|
185 | _element(handler, "cloud", None, { |
---|
186 | "domain": self.domain, |
---|
187 | "port": str(self.port), |
---|
188 | "path": self.path, |
---|
189 | "registerProcedure": self.registerProcedure, |
---|
190 | "protocol": self.protocol}) |
---|
191 | |
---|
192 | |
---|
193 | class Image: |
---|
194 | """Publish a channel Image""" |
---|
195 | element_attrs = {} |
---|
196 | |
---|
197 | def __init__(self, url, title, link, |
---|
198 | width=None, height=None, description=None): |
---|
199 | self.url = url |
---|
200 | self.title = title |
---|
201 | self.link = link |
---|
202 | self.width = width |
---|
203 | self.height = height |
---|
204 | self.description = description |
---|
205 | |
---|
206 | def publish(self, handler): |
---|
207 | handler.startElement("image", self.element_attrs) |
---|
208 | |
---|
209 | _element(handler, "url", self.url) |
---|
210 | _element(handler, "title", self.title) |
---|
211 | _element(handler, "link", self.link) |
---|
212 | |
---|
213 | width = self.width |
---|
214 | if isinstance(width, int): |
---|
215 | width = IntElement("width", width) |
---|
216 | _opt_element(handler, "width", width) |
---|
217 | |
---|
218 | height = self.height |
---|
219 | if isinstance(height, int): |
---|
220 | height = IntElement("height", height) |
---|
221 | _opt_element(handler, "height", height) |
---|
222 | |
---|
223 | _opt_element(handler, "description", self.description) |
---|
224 | |
---|
225 | handler.endElement("image") |
---|
226 | |
---|
227 | |
---|
228 | class Guid: |
---|
229 | """Publish a guid |
---|
230 | |
---|
231 | Defaults to being a permalink, which is the assumption if it's |
---|
232 | omitted. Hence strings are always permalinks. |
---|
233 | """ |
---|
234 | def __init__(self, guid, isPermaLink=1): |
---|
235 | self.guid = guid |
---|
236 | self.isPermaLink = isPermaLink |
---|
237 | |
---|
238 | def publish(self, handler): |
---|
239 | d = {} |
---|
240 | if self.isPermaLink: |
---|
241 | d["isPermaLink"] = "true" |
---|
242 | else: |
---|
243 | d["isPermaLink"] = "false" |
---|
244 | _element(handler, "guid", self.guid, d) |
---|
245 | |
---|
246 | |
---|
247 | class TextInput: |
---|
248 | """Publish a textInput |
---|
249 | |
---|
250 | Apparently this is rarely used. |
---|
251 | """ |
---|
252 | element_attrs = {} |
---|
253 | |
---|
254 | def __init__(self, title, description, name, link): |
---|
255 | self.title = title |
---|
256 | self.description = description |
---|
257 | self.name = name |
---|
258 | self.link = link |
---|
259 | |
---|
260 | def publish(self, handler): |
---|
261 | handler.startElement("textInput", self.element_attrs) |
---|
262 | _element(handler, "title", self.title) |
---|
263 | _element(handler, "description", self.description) |
---|
264 | _element(handler, "name", self.name) |
---|
265 | _element(handler, "link", self.link) |
---|
266 | handler.endElement("textInput") |
---|
267 | |
---|
268 | |
---|
269 | class Enclosure: |
---|
270 | """Publish an enclosure""" |
---|
271 | def __init__(self, url, length, type): |
---|
272 | self.url = url |
---|
273 | self.length = length |
---|
274 | self.type = type |
---|
275 | |
---|
276 | def publish(self, handler): |
---|
277 | _element(handler, "enclosure", None, |
---|
278 | {"url": self.url, |
---|
279 | "length": str(self.length), |
---|
280 | "type": self.type, |
---|
281 | }) |
---|
282 | |
---|
283 | |
---|
284 | class Source: |
---|
285 | """Publish the item's original source, used by aggregators""" |
---|
286 | def __init__(self, name, url): |
---|
287 | self.name = name |
---|
288 | self.url = url |
---|
289 | |
---|
290 | def publish(self, handler): |
---|
291 | _element(handler, "source", self.name, {"url": self.url}) |
---|
292 | |
---|
293 | |
---|
294 | class SkipHours: |
---|
295 | """Publish the skipHours |
---|
296 | |
---|
297 | This takes a list of hours, as integers. |
---|
298 | """ |
---|
299 | element_attrs = {} |
---|
300 | |
---|
301 | def __init__(self, hours): |
---|
302 | self.hours = hours |
---|
303 | |
---|
304 | def publish(self, handler): |
---|
305 | if self.hours: |
---|
306 | handler.startElement("skipHours", self.element_attrs) |
---|
307 | for hour in self.hours: |
---|
308 | _element(handler, "hour", str(hour)) |
---|
309 | handler.endElement("skipHours") |
---|
310 | |
---|
311 | |
---|
312 | class SkipDays: |
---|
313 | """Publish the skipDays |
---|
314 | |
---|
315 | This takes a list of days as strings. |
---|
316 | """ |
---|
317 | element_attrs = {} |
---|
318 | |
---|
319 | def __init__(self, days): |
---|
320 | self.days = days |
---|
321 | |
---|
322 | def publish(self, handler): |
---|
323 | if self.days: |
---|
324 | handler.startElement("skipDays", self.element_attrs) |
---|
325 | for day in self.days: |
---|
326 | _element(handler, "day", day) |
---|
327 | handler.endElement("skipDays") |
---|
328 | |
---|
329 | |
---|
330 | class RSS2(WriteXmlMixin): |
---|
331 | """The main RSS class. |
---|
332 | |
---|
333 | Stores the channel attributes, with the "category" elements under |
---|
334 | ".categories" and the RSS items under ".items". |
---|
335 | """ |
---|
336 | |
---|
337 | rss_attrs = {"version": "2.0"} |
---|
338 | element_attrs = {} |
---|
339 | |
---|
340 | def __init__(self, |
---|
341 | title, |
---|
342 | link, |
---|
343 | description, |
---|
344 | |
---|
345 | language=None, |
---|
346 | copyright=None, |
---|
347 | managingEditor=None, |
---|
348 | webMaster=None, |
---|
349 | pubDate=None, # a datetime, *in* *GMT* |
---|
350 | lastBuildDate=None, # a datetime |
---|
351 | |
---|
352 | categories=None, # list of strings or Category |
---|
353 | generator=_generator_name, |
---|
354 | docs="http://blogs.law.harvard.edu/tech/rss", |
---|
355 | cloud=None, # a Cloud |
---|
356 | ttl=None, # integer number of minutes |
---|
357 | |
---|
358 | image=None, # an Image |
---|
359 | rating=None, # a string; I don't know how it's used |
---|
360 | textInput=None, # a TextInput |
---|
361 | skipHours=None, # a SkipHours with a list of integers |
---|
362 | skipDays=None, # a SkipDays with a list of strings |
---|
363 | |
---|
364 | items=None, # list of RSSItems |
---|
365 | ): |
---|
366 | self.title = title |
---|
367 | self.link = link |
---|
368 | self.description = description |
---|
369 | self.language = language |
---|
370 | self.copyright = copyright |
---|
371 | self.managingEditor = managingEditor |
---|
372 | |
---|
373 | self.webMaster = webMaster |
---|
374 | self.pubDate = pubDate |
---|
375 | self.lastBuildDate = lastBuildDate |
---|
376 | |
---|
377 | if categories is None: |
---|
378 | categories = [] |
---|
379 | self.categories = categories |
---|
380 | self.generator = generator |
---|
381 | self.docs = docs |
---|
382 | self.cloud = cloud |
---|
383 | self.ttl = ttl |
---|
384 | self.image = image |
---|
385 | self.rating = rating |
---|
386 | self.textInput = textInput |
---|
387 | self.skipHours = skipHours |
---|
388 | self.skipDays = skipDays |
---|
389 | |
---|
390 | if items is None: |
---|
391 | items = [] |
---|
392 | self.items = items |
---|
393 | |
---|
394 | def publish(self, handler): |
---|
395 | handler.startElement("rss", self.rss_attrs) |
---|
396 | handler.startElement("channel", self.element_attrs) |
---|
397 | _element(handler, "title", self.title) |
---|
398 | _element(handler, "link", self.link) |
---|
399 | _element(handler, "description", self.description) |
---|
400 | |
---|
401 | self.publish_extensions(handler) |
---|
402 | |
---|
403 | _opt_element(handler, "language", self.language) |
---|
404 | _opt_element(handler, "copyright", self.copyright) |
---|
405 | _opt_element(handler, "managingEditor", self.managingEditor) |
---|
406 | _opt_element(handler, "webMaster", self.webMaster) |
---|
407 | |
---|
408 | pubDate = self.pubDate |
---|
409 | if isinstance(pubDate, datetime.datetime): |
---|
410 | pubDate = DateElement("pubDate", pubDate) |
---|
411 | _opt_element(handler, "pubDate", pubDate) |
---|
412 | |
---|
413 | lastBuildDate = self.lastBuildDate |
---|
414 | if isinstance(lastBuildDate, datetime.datetime): |
---|
415 | lastBuildDate = DateElement("lastBuildDate", lastBuildDate) |
---|
416 | _opt_element(handler, "lastBuildDate", lastBuildDate) |
---|
417 | |
---|
418 | for category in self.categories: |
---|
419 | if isinstance(category, basestring): |
---|
420 | category = Category(category) |
---|
421 | category.publish(handler) |
---|
422 | |
---|
423 | _opt_element(handler, "generator", self.generator) |
---|
424 | _opt_element(handler, "docs", self.docs) |
---|
425 | |
---|
426 | if self.cloud is not None: |
---|
427 | self.cloud.publish(handler) |
---|
428 | |
---|
429 | ttl = self.ttl |
---|
430 | if isinstance(self.ttl, int): |
---|
431 | ttl = IntElement("ttl", ttl) |
---|
432 | _opt_element(handler, "ttl", ttl) |
---|
433 | |
---|
434 | if self.image is not None: |
---|
435 | self.image.publish(handler) |
---|
436 | |
---|
437 | _opt_element(handler, "rating", self.rating) |
---|
438 | if self.textInput is not None: |
---|
439 | self.textInput.publish(handler) |
---|
440 | if self.skipHours is not None: |
---|
441 | self.skipHours.publish(handler) |
---|
442 | if self.skipDays is not None: |
---|
443 | self.skipDays.publish(handler) |
---|
444 | |
---|
445 | for item in self.items: |
---|
446 | item.publish(handler) |
---|
447 | |
---|
448 | handler.endElement("channel") |
---|
449 | handler.endElement("rss") |
---|
450 | |
---|
451 | def publish_extensions(self, handler): |
---|
452 | # Derived classes can hook into this to insert |
---|
453 | # output after the three required fields. |
---|
454 | pass |
---|
455 | |
---|
456 | |
---|
457 | class RSSItem(WriteXmlMixin): |
---|
458 | """Publish an RSS Item""" |
---|
459 | element_attrs = {} |
---|
460 | |
---|
461 | def __init__(self, |
---|
462 | title=None, # string |
---|
463 | link=None, # url as string |
---|
464 | description=None, # string |
---|
465 | author=None, # email address as string |
---|
466 | categories=None, # list of string or Category |
---|
467 | comments=None, # url as string |
---|
468 | enclosure=None, # an Enclosure |
---|
469 | guid=None, # a unique string |
---|
470 | pubDate=None, # a datetime |
---|
471 | source=None, # a Source |
---|
472 | ): |
---|
473 | |
---|
474 | if title is None and description is None: |
---|
475 | raise TypeError( |
---|
476 | "must define at least one of 'title' or 'description'") |
---|
477 | self.title = title |
---|
478 | self.link = link |
---|
479 | self.description = description |
---|
480 | self.author = author |
---|
481 | if categories is None: |
---|
482 | categories = [] |
---|
483 | self.categories = categories |
---|
484 | self.comments = comments |
---|
485 | self.enclosure = enclosure |
---|
486 | self.guid = guid |
---|
487 | self.pubDate = pubDate |
---|
488 | self.source = source |
---|
489 | # It sure does get tedious typing these names three times... |
---|
490 | |
---|
491 | def publish(self, handler): |
---|
492 | handler.startElement("item", self.element_attrs) |
---|
493 | _opt_element(handler, "title", self.title) |
---|
494 | _opt_element(handler, "link", self.link) |
---|
495 | self.publish_extensions(handler) |
---|
496 | _opt_element(handler, "description", self.description) |
---|
497 | _opt_element(handler, "author", self.author) |
---|
498 | |
---|
499 | for category in self.categories: |
---|
500 | if isinstance(category, basestring): |
---|
501 | category = Category(category) |
---|
502 | category.publish(handler) |
---|
503 | |
---|
504 | _opt_element(handler, "comments", self.comments) |
---|
505 | if self.enclosure is not None: |
---|
506 | self.enclosure.publish(handler) |
---|
507 | _opt_element(handler, "guid", self.guid) |
---|
508 | |
---|
509 | pubDate = self.pubDate |
---|
510 | if isinstance(pubDate, datetime.datetime): |
---|
511 | pubDate = DateElement("pubDate", pubDate) |
---|
512 | _opt_element(handler, "pubDate", pubDate) |
---|
513 | |
---|
514 | if self.source is not None: |
---|
515 | self.source.publish(handler) |
---|
516 | |
---|
517 | handler.endElement("item") |
---|
518 | |
---|
519 | def publish_extensions(self, handler): |
---|
520 | # Derived classes can hook into this to insert |
---|
521 | # output after the title and link elements |
---|
522 | pass |
---|