1 | #!/usr/bin/env python |
---|
2 | # -*- coding: utf-8 -*- |
---|
3 | |
---|
4 | """ |
---|
5 | | This file is part of the web2py Web Framework |
---|
6 | | Copyrighted by Massimo Di Pierro <mdipierro@cs.depaul.edu> |
---|
7 | | License: LGPLv3 (http://www.gnu.org/licenses/lgpl.html) |
---|
8 | """ |
---|
9 | |
---|
10 | from pydal._compat import xrange |
---|
11 | from yatl.sanitizer import xmlescape |
---|
12 | import re |
---|
13 | |
---|
14 | __all__ = ['highlight'] |
---|
15 | |
---|
16 | |
---|
17 | class all_styles(object): |
---|
18 | """ |
---|
19 | Custom non-data descriptor for lazy initialization of |
---|
20 | Highlighter.all_styles class attribute. |
---|
21 | |
---|
22 | see: |
---|
23 | https://docs.python.org/2/reference/datamodel.html#implementing-descriptors |
---|
24 | or |
---|
25 | https://docs.python.org/3/reference/datamodel.html#implementing-descriptors |
---|
26 | """ |
---|
27 | |
---|
28 | def __get__(self, instance, owner): |
---|
29 | val = _get_all_styles(owner) |
---|
30 | setattr(owner, 'all_styles', val) |
---|
31 | return val |
---|
32 | |
---|
33 | |
---|
34 | class Highlighter(object): |
---|
35 | |
---|
36 | """Does syntax highlighting. |
---|
37 | """ |
---|
38 | |
---|
39 | def __init__( |
---|
40 | self, |
---|
41 | mode, |
---|
42 | link=None, |
---|
43 | styles=None, |
---|
44 | ): |
---|
45 | """ |
---|
46 | Initialize highlighter: |
---|
47 | mode = language (PYTHON, WEB2PY, C, CPP, HTML, HTML_PLAIN) |
---|
48 | """ |
---|
49 | styles = styles or {} |
---|
50 | mode = mode.upper() |
---|
51 | if link and link[-1] != '/': |
---|
52 | link = link + '/' |
---|
53 | self.link = link |
---|
54 | self.styles = styles |
---|
55 | self.output = [] |
---|
56 | self.span_style = None |
---|
57 | if mode == 'WEB2PY': |
---|
58 | (mode, self.suppress_tokens) = ('PYTHON', []) |
---|
59 | elif mode == 'PYTHON': |
---|
60 | self.suppress_tokens = ['GOTOHTML'] |
---|
61 | elif mode == 'CPP': |
---|
62 | (mode, self.suppress_tokens) = ('C', []) |
---|
63 | elif mode == 'C': |
---|
64 | self.suppress_tokens = ['CPPKEYWORD'] |
---|
65 | elif mode == 'HTML_PLAIN': |
---|
66 | (mode, self.suppress_tokens) = ('HTML', ['GOTOPYTHON']) |
---|
67 | elif mode == 'HTML': |
---|
68 | self.suppress_tokens = [] |
---|
69 | else: |
---|
70 | raise SyntaxError('Unknown mode: %s' % mode) |
---|
71 | self.mode = mode |
---|
72 | |
---|
73 | def c_tokenizer( |
---|
74 | self, |
---|
75 | token, |
---|
76 | match, |
---|
77 | style, |
---|
78 | ): |
---|
79 | """ |
---|
80 | Callback for C specific highlighting. |
---|
81 | """ |
---|
82 | |
---|
83 | value = xmlescape(match.group(), quote=False) |
---|
84 | self.change_style(token, style) |
---|
85 | self.output.append(value) |
---|
86 | |
---|
87 | def python_tokenizer( |
---|
88 | self, |
---|
89 | token, |
---|
90 | match, |
---|
91 | style, |
---|
92 | ): |
---|
93 | """ |
---|
94 | Callback for python specific highlighting. |
---|
95 | """ |
---|
96 | |
---|
97 | value = xmlescape(match.group(), quote=False) |
---|
98 | if token == 'MULTILINESTRING': |
---|
99 | self.change_style(token, style) |
---|
100 | self.output.append(value) |
---|
101 | self.strMultilineString = match.group(1) |
---|
102 | return 'PYTHONMultilineString' |
---|
103 | elif token == 'ENDMULTILINESTRING': |
---|
104 | if match.group(1) == self.strMultilineString: |
---|
105 | self.output.append(value) |
---|
106 | self.strMultilineString = '' |
---|
107 | return 'PYTHON' |
---|
108 | if style and style[:5] == 'link:': |
---|
109 | self.change_style(None, None) |
---|
110 | (url, style) = style[5:].split(';', 1) |
---|
111 | if url == 'None' or url == '': |
---|
112 | self.output.append('<span style="%s">%s</span>' |
---|
113 | % (style, value)) |
---|
114 | else: |
---|
115 | self.output.append('<a href="%s%s" style="%s">%s</a>' |
---|
116 | % (url, value, style, value)) |
---|
117 | else: |
---|
118 | self.change_style(token, style) |
---|
119 | self.output.append(value) |
---|
120 | if token == 'GOTOHTML': |
---|
121 | return 'HTML' |
---|
122 | return None |
---|
123 | |
---|
124 | def html_tokenizer( |
---|
125 | self, |
---|
126 | token, |
---|
127 | match, |
---|
128 | style, |
---|
129 | ): |
---|
130 | """ |
---|
131 | Callback for HTML specific highlighting. |
---|
132 | """ |
---|
133 | |
---|
134 | value = xmlescape(match.group(), quote=False) |
---|
135 | self.change_style(token, style) |
---|
136 | self.output.append(value) |
---|
137 | if token == 'GOTOPYTHON': |
---|
138 | return 'PYTHON' |
---|
139 | return None |
---|
140 | |
---|
141 | all_styles = all_styles() |
---|
142 | |
---|
143 | def highlight(self, data): |
---|
144 | """ |
---|
145 | Syntax highlight some python code. |
---|
146 | Returns html version of code. |
---|
147 | """ |
---|
148 | |
---|
149 | i = 0 |
---|
150 | mode = self.mode |
---|
151 | while i < len(data): |
---|
152 | for (token, o_re, style) in Highlighter.all_styles[mode][1]: |
---|
153 | if token not in self.suppress_tokens: |
---|
154 | match = o_re.match(data, i) |
---|
155 | if match: |
---|
156 | if style: |
---|
157 | new_mode = \ |
---|
158 | Highlighter.all_styles[mode][0](self, |
---|
159 | token, match, style |
---|
160 | % dict(link=self.link)) |
---|
161 | else: |
---|
162 | new_mode = \ |
---|
163 | Highlighter.all_styles[mode][0](self, |
---|
164 | token, match, style) |
---|
165 | if new_mode is not None: |
---|
166 | mode = new_mode |
---|
167 | i += max(1, len(match.group())) |
---|
168 | break |
---|
169 | else: |
---|
170 | self.change_style(None, None) |
---|
171 | self.output.append(data[i]) |
---|
172 | i += 1 |
---|
173 | self.change_style(None, None) |
---|
174 | return ''.join(self.output).expandtabs(4) |
---|
175 | |
---|
176 | def change_style(self, token, style): |
---|
177 | """ |
---|
178 | Generate output to change from existing style to another style only. |
---|
179 | """ |
---|
180 | |
---|
181 | if token in self.styles: |
---|
182 | style = self.styles[token] |
---|
183 | if self.span_style != style: |
---|
184 | if style != 'Keep': |
---|
185 | if self.span_style is not None: |
---|
186 | self.output.append('</span>') |
---|
187 | if style is not None: |
---|
188 | self.output.append('<span style="%s">' % style) |
---|
189 | self.span_style = style |
---|
190 | |
---|
191 | |
---|
192 | def _get_all_styles(cls): |
---|
193 | return { |
---|
194 | 'C': (cls.c_tokenizer, ( |
---|
195 | ('COMMENT', re.compile(r'//.*\r?\n'), |
---|
196 | 'color: green; font-style: italic'), |
---|
197 | ('MULTILINECOMMENT', re.compile(r'/\*.*?\*/', re.DOTALL), |
---|
198 | 'color: green; font-style: italic'), |
---|
199 | ('PREPROCESSOR', re.compile(r'\s*#.*?[^\\]\s*\n', |
---|
200 | re.DOTALL), 'color: magenta; font-style: italic'), |
---|
201 | ('PUNC', re.compile(r'[-+*!&|^~/%\=<>\[\]{}(),.:]'), |
---|
202 | 'font-weight: bold'), |
---|
203 | ('NUMBER', |
---|
204 | re.compile(r'0x[0-9a-fA-F]+|[+-]?\d+(\.\d+)?([eE][+-]\d+)?|\d+'), |
---|
205 | 'color: red'), |
---|
206 | ('KEYWORD', re.compile(r'(sizeof|int|long|short|char|void|' |
---|
207 | + r'signed|unsigned|float|double|' |
---|
208 | + r'goto|break|return|continue|asm|' |
---|
209 | + r'case|default|if|else|switch|while|for|do|' |
---|
210 | + r'struct|union|enum|typedef|' |
---|
211 | + r'static|register|auto|volatile|extern|const)(?![a-zA-Z0-9_])'), |
---|
212 | 'color:#185369; font-weight: bold'), |
---|
213 | ('CPPKEYWORD', |
---|
214 | re.compile(r'(class|private|protected|public|template|new|delete|' |
---|
215 | + r'this|friend|using|inline|export|bool|throw|try|catch|' |
---|
216 | + r'operator|typeid|virtual)(?![a-zA-Z0-9_])'), |
---|
217 | 'color: blue; font-weight: bold'), |
---|
218 | ('STRING', re.compile(r'r?u?\'(.*?)(?<!\\)\'|"(.*?)(?<!\\)"'), |
---|
219 | 'color: #FF9966'), |
---|
220 | ('IDENTIFIER', re.compile(r'[a-zA-Z_][a-zA-Z0-9_]*'), |
---|
221 | None), |
---|
222 | ('WHITESPACE', re.compile(r'[ \r\n]+'), 'Keep'), |
---|
223 | )), |
---|
224 | 'PYTHON': (cls.python_tokenizer, ( |
---|
225 | ('GOTOHTML', re.compile(r'\}\}'), 'color: red'), |
---|
226 | ('PUNC', re.compile(r'[-+*!|&^~/%\=<>\[\]{}(),.:]'), |
---|
227 | 'font-weight: bold'), |
---|
228 | ('NUMBER', |
---|
229 | re.compile(r'0x[0-9a-fA-F]+|[+-]?\d+(\.\d+)?([eE][+-]\d+)?|\d+' |
---|
230 | ), 'color: red'), |
---|
231 | ('KEYWORD', |
---|
232 | re.compile(r'(def|class|break|continue|del|exec|finally|pass|' |
---|
233 | + r'print|raise|return|try|except|global|assert|lambda|' |
---|
234 | + r'yield|for|while|if|elif|else|and|in|is|not|or|import|' |
---|
235 | + r'from|True|False)(?![a-zA-Z0-9_])'), |
---|
236 | 'color:#185369; font-weight: bold'), |
---|
237 | ('WEB2PY', |
---|
238 | re.compile(r'(request|response|session|cache|redirect|local_import|HTTP|TR|XML|URL|BEAUTIFY|A|BODY|BR|B|CAT|CENTER|CODE|COL|COLGROUP|DIV|EM|EMBED|FIELDSET|LEGEND|FORM|H1|H2|H3|H4|H5|H6|IFRAME|HEAD|HR|HTML|I|IMG|INPUT|LABEL|LI|LINK|MARKMIN|MENU|META|OBJECT|OL|ON|OPTION|P|PRE|SCRIPT|SELECT|SPAN|STYLE|TABLE|THEAD|TBODY|TFOOT|TAG|TD|TEXTAREA|TH|TITLE|TT|T|UL|XHTML|IS_SLUG|IS_STRONG|IS_LOWER|IS_UPPER|IS_ALPHANUMERIC|IS_DATETIME|IS_DATETIME_IN_RANGE|IS_DATE|IS_DATE_IN_RANGE|IS_DECIMAL_IN_RANGE|IS_EMAIL|IS_EXPR|IS_FILE|IS_FLOAT_IN_RANGE|IS_IMAGE|IS_INT_IN_RANGE|IS_IN_SET|IS_IPV4|IS_LIST_OF|IS_LENGTH|IS_MATCH|IS_EQUAL_TO|IS_EMPTY_OR|IS_NULL_OR|IS_NOT_EMPTY|IS_TIME|IS_UPLOAD_FILENAME|IS_URL|CLEANUP|CRYPT|IS_IN_DB|IS_NOT_IN_DB|DAL|Field|SQLFORM|SQLTABLE|xmlescape|embed64)(?![a-zA-Z0-9_])' |
---|
239 | ), 'link:%(link)s;text-decoration:None;color:#FF5C1F;'), |
---|
240 | ('MAGIC', re.compile(r'self|None'), |
---|
241 | 'color:#185369; font-weight: bold'), |
---|
242 | ('MULTILINESTRING', re.compile(r'r?u?(\'\'\'|""")'), |
---|
243 | 'color: #FF9966'), |
---|
244 | ('STRING', re.compile(r'r?u?\'(.*?)(?<!\\)\'|"(.*?)(?<!\\)"' |
---|
245 | ), 'color: #FF9966'), |
---|
246 | ('IDENTIFIER', re.compile(r'[a-zA-Z_][a-zA-Z0-9_]*'), |
---|
247 | None), |
---|
248 | ('COMMENT', re.compile(r'\#.*\r?\n'), |
---|
249 | 'color: green; font-style: italic'), |
---|
250 | ('WHITESPACE', re.compile(r'[ \r\n]+'), 'Keep'), |
---|
251 | )), |
---|
252 | 'PYTHONMultilineString': (cls.python_tokenizer, |
---|
253 | (('ENDMULTILINESTRING', |
---|
254 | re.compile(r'.*?("""|\'\'\')', |
---|
255 | re.DOTALL), 'color: darkred'), )), |
---|
256 | 'HTML': (cls.html_tokenizer, ( |
---|
257 | ('GOTOPYTHON', re.compile(r'\{\{'), 'color: red'), |
---|
258 | ('COMMENT', re.compile(r'<!--[^>]*-->|<!>'), |
---|
259 | 'color: green; font-style: italic'), |
---|
260 | ('XMLCRAP', re.compile(r'<![^>]*>'), |
---|
261 | 'color: blue; font-style: italic'), |
---|
262 | ('SCRIPT', re.compile(r'<script .*?</script>', re.IGNORECASE |
---|
263 | + re.DOTALL), 'color: black'), |
---|
264 | ('TAG', re.compile(r'</?\s*[a-zA-Z0-9]+'), |
---|
265 | 'color: darkred; font-weight: bold'), |
---|
266 | ('ENDTAG', re.compile(r'/?>'), |
---|
267 | 'color: darkred; font-weight: bold'), |
---|
268 | )), |
---|
269 | } |
---|
270 | |
---|
271 | |
---|
272 | def highlight( |
---|
273 | code, |
---|
274 | language, |
---|
275 | link='/examples/globals/vars/', |
---|
276 | counter=1, |
---|
277 | styles=None, |
---|
278 | highlight_line=None, |
---|
279 | context_lines=None, |
---|
280 | attributes=None, |
---|
281 | ): |
---|
282 | styles = styles or {} |
---|
283 | attributes = attributes or {} |
---|
284 | code_style = styles.get('CODE', None) or ''' |
---|
285 | font-size: 11px; |
---|
286 | font-family: Bitstream Vera Sans Mono,monospace; |
---|
287 | background-color: transparent; |
---|
288 | margin: 0; |
---|
289 | padding: 5px; |
---|
290 | border: none; |
---|
291 | overflow: auto; |
---|
292 | white-space: pre !important; |
---|
293 | ''' |
---|
294 | linenumbers_style = styles.get('LINENUMBERS', None) or ''' |
---|
295 | font-size: 11px; |
---|
296 | font-family: Bitstream Vera Sans Mono,monospace; |
---|
297 | background-color: transparent; |
---|
298 | margin: 0; |
---|
299 | padding: 5px; |
---|
300 | border: none; |
---|
301 | color: #A0A0A0; |
---|
302 | ''' |
---|
303 | linehighlight_style = styles.get('LINEHIGHLIGHT', None) or \ |
---|
304 | 'background-color: #EBDDE2;' |
---|
305 | |
---|
306 | if language and language.upper() in ['PYTHON', 'C', 'CPP', 'HTML', |
---|
307 | 'WEB2PY']: |
---|
308 | code = Highlighter(language, link, styles).highlight(code) |
---|
309 | else: |
---|
310 | code = xmlescape(code, quote=False) |
---|
311 | lines = code.split('\n') |
---|
312 | |
---|
313 | if counter is None: |
---|
314 | linenumbers = [''] * len(lines) |
---|
315 | elif isinstance(counter, str): |
---|
316 | linenumbers = [xmlescape(counter, quote=False)] * len(lines) |
---|
317 | else: |
---|
318 | linenumbers = [str(i + counter) + '.' for i in |
---|
319 | xrange(len(lines))] |
---|
320 | |
---|
321 | if highlight_line: |
---|
322 | if counter and not isinstance(counter, str): |
---|
323 | lineno = highlight_line - counter |
---|
324 | else: |
---|
325 | lineno = highlight_line |
---|
326 | if lineno < len(lines): |
---|
327 | lines[lineno] = '<span style="%s">%s</span>' % ( |
---|
328 | linehighlight_style, lines[lineno]) |
---|
329 | linenumbers[lineno] = '<span style="%s">%s</span>' % ( |
---|
330 | linehighlight_style, linenumbers[lineno]) |
---|
331 | |
---|
332 | if context_lines: |
---|
333 | if lineno + context_lines < len(lines): |
---|
334 | delslice = slice(lineno + context_lines + 1, len(lines)) |
---|
335 | del lines[delslice] |
---|
336 | del linenumbers[delslice] |
---|
337 | if lineno - context_lines > 0: |
---|
338 | delslice = slice(0, lineno - context_lines) |
---|
339 | del lines[delslice] |
---|
340 | del linenumbers[delslice] |
---|
341 | |
---|
342 | code = '<br/>'.join(lines) |
---|
343 | numbers = '<br/>'.join(linenumbers) |
---|
344 | |
---|
345 | items = attributes.items() |
---|
346 | fa = ' '.join([key[1:].lower() for (key, value) in items if key[:1] |
---|
347 | == '_' and value is None] + ['%s="%s"' |
---|
348 | % (key[1:].lower(), str(value).replace('"', "'")) |
---|
349 | for (key, value) in items if key[:1] |
---|
350 | == '_' and value]) |
---|
351 | if fa: |
---|
352 | fa = ' ' + fa |
---|
353 | return '<table%s><tr style="vertical-align:top;">' \ |
---|
354 | '<td style="min-width:40px; text-align: right;"><pre style="%s">%s</pre></td>' \ |
---|
355 | '<td><pre style="%s">%s</pre></td></tr></table>' % (fa, linenumbers_style, numbers, code_style, code) |
---|
356 | |
---|
357 | |
---|
358 | if __name__ == '__main__': |
---|
359 | import sys |
---|
360 | argfp = open(sys.argv[1]) |
---|
361 | data = argfp.read() |
---|
362 | argfp.close() |
---|
363 | print('<html><body>' + highlight(data, sys.argv[2]) + '</body></html>') |
---|