source: OpenRLabs-Git/deploy/rlabs-docker/web2py-rlabs/gluon/contrib/autolinks.py

main
Last change on this file was 42bd667, checked in by David Fuertes <dfuertes@…>, 4 years ago

Historial Limpio

  • Property mode set to 100755
File size: 6.2 KB
Line 
1"""
2Developed by Massimo Di Pierro
3Released under the web2py license (LGPL)
4
5What does it do?
6
7if html is a variable containing HTML text and urls in the text, when you call
8
9    html = expend_html(html)
10
11it automatically converts the url to links but when possible it embeds the object being linked.
12In particular it can embed images, videos, audio files, documents (it uses the google code player),
13as well as pages to a oembed service.
14
15
16Google Doc Support
17==================
18Microsoft Word (.DOC, .DOCX)
19Microsoft Excel (.XLS and .XLSX)
20Microsoft PowerPoint 2007 / 2010 (.PPTX)
21Apple Pages (.PAGES)
22Adobe PDF (.PDF)
23Adobe Illustrator (.AI)
24Adobe Photoshop (.PSD)
25Autodesk AutoCad (.DXF)
26Scalable Vector Graphics (.SVG)
27PostScript (.EPS, .PS)
28TrueType (.TTF)
29XML Paper Specification (.XPS)
30
31Oembed Support
32==============
33flickr.com
34youtube.com
35hulu.com
36vimeo.com
37slideshare.net
38qik.com
39polleverywhere.com
40wordpress.com
41revision3.com
42viddler.com
43"""
44from __future__ import print_function
45from gluon._compat import FancyURLopener, urllib_quote
46
47import re
48import cgi
49import sys
50from json import loads
51import urllib
52import uuid
53try:
54    from BeautifulSoup import BeautifulSoup, Comment
55    have_soup = True
56except ImportError:
57    have_soup = False
58
59regex_link = re.compile('https?://\S+')
60
61EMBED_MAPS = [
62    (re.compile('http://\S*?flickr.com/\S*'),
63     'http://www.flickr.com/services/oembed/'),
64    (re.compile('http://\S*.youtu(\.be|be\.com)/watch\S*'),
65     'http://www.youtube.com/oembed'),
66    (re.compile('http://www.hulu.com/watch/\S*'),
67     'http://www.hulu.com/api/oembed.json'),
68    (re.compile('http://vimeo.com/\S*'),
69     'http://vimeo.com/api/oembed.json'),
70    (re.compile('http://www.slideshare.net/[^\/]+/\S*'),
71     'http://www.slideshare.net/api/oembed/2'),
72    (re.compile('http://qik.com/\S*'),
73     'http://qik.com/api/oembed.json'),
74    (re.compile('http://www.polleverywhere.com/\w+/\S+'),
75     'http://www.polleverywhere.com/services/oembed/'),
76    (re.compile('http://\S+.wordpress.com/\S+'),
77     'http://public-api.wordpress.com/oembed/'),
78    (re.compile('http://*.revision3.com/\S+'),
79     'http://revision3.com/api/oembed/'),
80    (re.compile('http://\S+.viddler.com/\S+'),
81     'http://lab.viddler.com/services/oembed/'),
82]
83
84
85def image(url):
86    return '<img src="%s" style="max-width:100%%"/>' % url
87
88
89def audio(url):
90    return '<audio controls="controls" style="max-width:100%%"><source src="%s" /></audio>' % url
91
92
93def video(url):
94    return '<video controls="controls" style="max-width:100%%"><source src="%s" /></video>' % url
95
96
97def googledoc_viewer(url):
98    return '<iframe src="https://docs.google.com/viewer?url=%s&embedded=true" style="max-width:100%%"></iframe>' % urllib_quote(url)
99
100
101def web2py_component(url):
102    code = str(uuid.uuid4())
103    return '<div id="%s"></div><script>\nweb2py_component("%s","%s");\n</script>' % (code, url, code)
104
105EXTENSION_MAPS = {
106    'png': image,
107    'gif': image,
108    'jpg': image,
109    'jpeg': image,
110    'wav': audio,
111    'ogg': audio,
112    'mp3': audio,
113    'mov': video,
114    'mpe': video,
115    'mp4': video,
116    'mpg': video,
117    'mpg2': video,
118    'mpeg': video,
119    'mpeg4': video,
120    'movie': video,
121    'wmv': video,
122    'load': web2py_component,
123    'pdf': googledoc_viewer,
124    'doc': googledoc_viewer,
125    'docx': googledoc_viewer,
126    'ppt': googledoc_viewer,
127    'pptx': googledoc_viewer,
128    'xls': googledoc_viewer,
129    'xlsx': googledoc_viewer,
130    'pages': googledoc_viewer,
131    'ai': googledoc_viewer,
132    'psd': googledoc_viewer,
133    'xdf': googledoc_viewer,
134    'svg': googledoc_viewer,
135    'ttf': googledoc_viewer,
136    'xps': googledoc_viewer,
137}
138
139
140class VimeoURLOpener(FancyURLopener):
141    "Vimeo blocks the urllib user agent for some reason"
142    version = "Mozilla/4.0"
143urllib._urlopener = VimeoURLOpener()
144
145
146def oembed(url):
147    for k, v in EMBED_MAPS:
148        if k.match(url):
149            oembed = v + '?format=json&url=' + cgi.escape(url)
150            try:
151                data = urllib.urlopen(oembed).read()
152                return loads(data)  # json!
153            except:
154                pass
155    return {}
156
157
158def extension(url):
159    return url.split('?')[0].split('.')[-1].lower()
160
161
162def expand_one(url, cdict):
163    # try ombed but first check in cache
164    if '@' in url and not '://'in url:
165        return '<a href="mailto:%s">%s</a>' % (url, url)
166    if cdict and url in cdict:
167        r = cdict[url]
168    else:
169        r = oembed(url)
170        if isinstance(cdict, dict):
171            cdict[url] = r
172    # if oembed service
173    if 'html' in r:
174        html = r['html'].encode('utf8')
175        if html.startswith('<object'):
176            return '<embed style="max-width:100%%">%s</embed>' % html
177        else:
178            return html
179    elif 'url' in r:
180        url = r['url'].encode('utf8')
181    # embed images, video, audio files
182    ext = extension(url)
183    if ext in EXTENSION_MAPS:
184        return EXTENSION_MAPS[ext](url)
185    # else regular link
186    return '<a href="%(u)s">%(u)s</a>' % dict(u=url)
187
188
189def expand_html(html, cdict=None):
190    if not have_soup:
191        raise RuntimeError("Missing BeautifulSoup")
192    soup = BeautifulSoup(html)
193    comments = soup.findAll(text=lambda text: isinstance(text, Comment))
194    [comment.extract() for comment in comments]
195    for txt in soup.findAll(text=True):
196        if not txt.parent.name in ('a', 'script', 'pre', 'code', 'embed', 'object', 'audio', 'video'):
197            ntxt = regex_link.sub(
198                lambda match: expand_one(match.group(0), cdict), txt)
199            txt.replaceWith(BeautifulSoup(ntxt))
200    return str(soup)
201
202
203def test():
204    example = """
205<h3>Fringilla nisi parturient nullam</h3>
206<p>http://www.youtube.com/watch?v=IWBFiI5RrA0</p>
207<p>http://www.web2py.com/examples/static/images/logo_bw.png</p>
208<p>http://www.web2py.com/examples/default/index.load</p>
209<p>http://www.web2py.com/examples/static/web2py_manual_cutl.pdf</p>
210<p>Elementum sodales est varius magna leo sociis erat. Nascetur pretium non
211ultricies gravida. Condimentum at nascetur tempus. Porttitor viverra ipsum
212accumsan neque aliquet. Ultrices vestibulum tempor quisque eget sem eget.
213Ornare malesuada tempus dolor dolor magna consectetur. Nisl dui non curabitur
214laoreet tortor.</p>
215"""
216    return expand_html(example)
217
218if __name__ == "__main__":
219    if len(sys.argv) > 1:
220        print(expand_html(open(sys.argv[1]).read()))
221    else:
222        print(test())
Note: See TracBrowser for help on using the repository browser.