source: OpenRLabs-Git/deploy/rlabs-docker/web2py-rlabs/gluon/contrib/markmin/markmin2html.py

main
Last change on this file was 42bd667, checked in by David Fuertes <dfuertes@…>, 4 years ago

Historial Limpio

  • Property mode set to 100755
File size: 55.7 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# created by Massimo Di Pierro
4# recreated by Vladyslav Kozlovskyy
5# license MIT/BSD/GPL
6from __future__ import print_function
7import re
8import sys
9import urllib
10import ast
11
12PY2 = sys.version_info[0] == 2
13
14if PY2:
15    from urllib import quote as urllib_quote
16    from string import maketrans
17else:
18    from urllib.parse import quote as urllib_quote
19    maketrans = str.maketrans
20
21
22"""
23TODO: next version should use MathJax
24
25<script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js">
26MathJax.Hub.Config({
27 extensions: ["tex2jax.js","TeX/AMSmath.js","TeX/AMSsymbols.js"],
28 jax: ["input/TeX", "output/HTML-CSS"],
29 tex2jax: {
30     inlineMath: [ ['$','$'], ["\\(","\\)"] ],
31     displayMath: [ ['$$','$$'], ["\\[","\\]"] ],
32 },
33 "HTML-CSS": { availableFonts: ["TeX"] }
34});
35</script>
36"""
37
38__all__ = ['render', 'markmin2html', 'markmin_escape']
39
40__doc__ = """
41# Markmin markup language
42
43## About
44
45This is a new markup language that we call markmin designed to produce high quality scientific papers and books and also put them online. We provide serializers for html, latex and pdf. It is implemented in the ``markmin2html`` function in the ``markmin2html.py``.
46
47Example of usage:
48
49``
50m = "Hello **world** [[link http://web2py.com]]"
51from markmin2html import markmin2html
52print(markmin2html(m))
53from markmin2latex import markmin2latex
54print(markmin2latex(m))
55from markmin2pdf import markmin2pdf # requires pdflatex
56print(markmin2pdf(m))
57``
58====================
59# This is a test block
60  with new features:
61This is a blockquote with
62a list with tables in it:
63-----------
64  This is a paragraph before list.
65  You can continue paragraph on the
66  next lines.
67
68  This is an ordered list with tables:
69  + Item 1
70  + Item 2
71  + --------
72    aa|bb|cc
73    11|22|33
74    --------:tableclass1[tableid1]
75  + Item 4
76    -----------
77     T1| T2| t3
78    ===========
79    aaa|bbb|ccc
80    ddd|fff|ggg
81    123|0  |5.0
82    -----------:tableclass1
83-----------:blockquoteclass[blockquoteid]
84
85This this a new paragraph
86with a followed table.
87Table has header, footer, sections,
88odd and even rows:
89-------------------------------
90**Title 1**|**Title 2**|**Title 3**
91==============================
92data 1     | data 2    |  2.00
93data 3     |data4(long)| 23.00
94           |data 5     | 33.50
95==============================
96New section|New data   |  5.00
97data 1     |data2(long)|100.45
98           |data 3     | 12.50
99data 4     | data 5    |   .33
100data 6     |data7(long)|  8.01
101           |data 8     |   514
102==============================
103Total:     | 9 items   |698,79
104------------------------------:tableclass1[tableid2]
105
106## Multilevel
107   lists
108
109Now lists can be multilevel:
110
111+ Ordered item 1 on level 1.
112  You can continue item text on
113  next strings
114
115. paragraph in an item
116
117++. Ordered item 1 of sublevel 2 with
118    a paragraph (paragraph can start
119    with point after plus or minus
120    characters, e.g. **++.** or **--.**)
121
122++. This is another item. But with 3 paragraphs,
123    blockquote and sublists:
124
125.. This is the second paragraph in the item. You
126   can add paragraphs to an item, using point
127   notation, where first characters in the string
128   are sequence of points with space between
129   them and another string. For example, this
130   paragraph (in sublevel 2) starts with two points:
131   ``.. This is the second paragraph...``
132
133.. ----------
134     ### this is a blockquote in a list
135
136     You can use blockquote with headers, paragraphs,
137     tables and lists in it:
138
139     Tables can have or have not header and footer.
140     This table is defined without any header
141     and footer in it:
142     ---------------------
143     red  |fox     | 0
144     blue |dolphin | 1000
145     green|leaf    | 10000
146     ---------------------
147   ----------
148
149.. This is yet another paragraph in the item.
150
151--- This is an item of unordered list **(sublevel 3)**
152--- This is the second item of the unordered list ''(sublevel 3)''
153
154++++++ This is a single item of ordered list in sublevel 6
155.... and this is a paragraph in sublevel 4
156---. This is a new item with paragraph in sublevel 3.
157++++ Start ordered list in sublevel 4 with code block: ``
158line 1
159  line 2
160     line 3
161``
162++++. Yet another item with code block (we need to indent \`\` to add code block as part of item):
163 ``
164  line 1
165line 2
166  line 3
167``
168 This item finishes with this paragraph.
169
170... Item in sublevel 3 can be continued with paragraphs.
171
172... ``
173  this is another
174code block
175    in the
176  sublevel 3 item
177``
178
179+++ The last item in sublevel 3
180.. This is a continuous paragraph for item 2 in sublevel 2.
181   You can use such structure to create difficult structured
182   documents.
183
184++ item 3 in sublevel 2
185-- item 1 in sublevel 2 (new unordered list)
186-- item 2 in sublevel 2
187-- item 3 in sublevel 2
188
189++ item 1 in sublevel 2 (new ordered list)
190++ item 2 in sublevel 2
191++ item 3 in sublevle 2
192
193+ item 2 in level 1
194+ item 3 in level 1
195- new unordered list (item 1 in level 1)
196- level 2 in level 1
197
198- level 3 in level 1
199- level 4 in level 1
200## This is the last section of the test
201
202Single paragraph with '----' in it will be turned into separator:
203
204-----------
205
206And this is the last paragraph in
207the test. Be happy!
208
209====================
210
211## Why?
212
213We wanted a markup language with the following requirements:
214- less than 300 lines of functional code
215- easy to read
216- secure
217- support table, ul, ol, code
218- support html5 video and audio elements (html serialization only)
219- can align images and resize them
220- can specify class for tables, blockquotes and code elements
221- can add anchors
222- does not use _ for markup (since it creates odd behavior)
223- automatically links urls
224- fast
225- easy to extend
226- supports latex and pdf including references
227- allows to describe the markup in the markup (this document is generated from markmin syntax)
228
229(results depend on text but in average for text ~100K markmin is 30% faster than markdown, for text ~10K it is 10x faster)
230
231The [[web2py book http://www.lulu.com/product/paperback/web2py-%283rd-edition%29/12822827]] published by lulu, for example, was entirely generated with markmin2pdf from the online [[web2py wiki http://www.web2py.com/book]]
232
233## Download
234
235- http://web2py.googlecode.com/hg/gluon/contrib/markmin/markmin2html.py
236- http://web2py.googlecode.com/hg/gluon/contrib/markmin/markmin2latex.py
237- http://web2py.googlecode.com/hg/gluon/contrib/markmin/markmin2pdf.py
238
239markmin2html.py and markmin2latex.py are single files and have no web2py dependence. Their license is BSD.
240
241## Examples
242
243### Bold, italic, code and links
244
245------------------------------------------------------------------------------
246**SOURCE**                                    | **OUTPUT**
247==============================================================================
248``# title``                                   | **title**
249``## section``                                | **section**
250``### subsection``                            | **subsection**
251``**bold**``                                  | **bold**
252``''italic''``                                | ''italic''
253``~~strikeout~~``                             | ~~strikeout~~
254``!`!`verbatim`!`!``                          | ``verbatim``
255``\`\`color with **bold**\`\`:red``           | ``color with **bold**``:red
256``\`\`many colors\`\`:color[blue:#ffff00]``   | ``many colors``:color[blue:#ffff00]
257``http://google.com``                         | http://google.com
258``[[**click** me #myanchor]]``                | [[**click** me #myanchor]]
259``[[click me [extra info] #myanchor popup]]`` | [[click me [extra info] #myanchor popup]]
260-------------------------------------------------------------------------------
261
262### More on links
263
264The format is always ``[[title link]]`` or ``[[title [extra] link]]``. Notice you can nest bold, italic, strikeout and code inside the link ``title``.
265
266### Anchors [[myanchor]]
267
268You can place an anchor anywhere in the text using the syntax ``[[name]]`` where ''name'' is the name of the anchor.
269You can then link the anchor with [[link #myanchor]], i.e. ``[[link #myanchor]]`` or [[link with an extra info [extra info] #myanchor]], i.e.
270``[[link with an extra info [extra info] #myanchor]]``.
271
272### Images
273
274[[alt-string for the image [the image title] http://www.web2py.com/examples/static/web2py_logo.png right 200px]]
275This paragraph has an image aligned to the right with a width of 200px. Its is placed using the code
276
277``[[alt-string for the image [the image title] http://www.web2py.com/examples/static/web2py_logo.png right 200px]]``.
278
279### Unordered Lists
280
281``
282- Dog
283- Cat
284- Mouse
285``
286
287is rendered as
288- Dog
289- Cat
290- Mouse
291
292Two new lines between items break the list in two lists.
293
294### Ordered Lists
295
296``
297+ Dog
298+ Cat
299+ Mouse
300``
301
302is rendered as
303+ Dog
304+ Cat
305+ Mouse
306
307
308### Multilevel Lists
309
310``
311+ Dogs
312 -- red
313 -- brown
314 -- black
315+ Cats
316 -- fluffy
317 -- smooth
318 -- bald
319+ Mice
320 -- small
321 -- big
322 -- huge
323``
324
325is rendered as
326+ Dogs
327 -- red
328 -- brown
329 -- black
330+ Cats
331 -- fluffy
332 -- smooth
333 -- bald
334+ Mice
335 -- small
336 -- big
337 -- huge
338
339
340### Tables (with optional header and/or footer)
341
342Something like this
343``
344-----------------
345**A**|**B**|**C**
346=================
347  0  |  0  |  X
348  0  |  X  |  0
349  X  |  0  |  0
350=================
351**D**|**F**|**G**
352-----------------:abc[id]
353``
354is a table and is rendered as
355-----------------
356**A**|**B**|**C**
357=================
3580 | 0 | X
3590 | X | 0
360X | 0 | 0
361=================
362**D**|**F**|**G**
363-----------------:abc[id]
364Four or more dashes delimit the table and | separates the columns.
365The ``:abc``, ``:id[abc_1]`` or ``:abc[abc_1]`` at the end sets the class and/or id for the table and it is optional.
366
367### Blockquote
368
369A table with a single cell is rendered as a blockquote:
370
371-----
372Hello world
373-----
374
375Blockquote can contain headers, paragraphs, lists and tables:
376
377``
378-----
379  This is a paragraph in a blockquote
380
381  + item 1
382  + item 2
383  -- item 2.1
384  -- item 2.2
385  + item 3
386
387  ---------
388  0 | 0 | X
389  0 | X | 0
390  X | 0 | 0
391  ---------:tableclass1
392-----
393``
394
395is rendered as:
396-----
397  This is a paragraph in a blockquote
398
399  + item 1
400  + item 2
401  -- item 2.1
402  -- item 2.2
403  + item 3
404
405  ---------
406  0 | 0 | X
407  0 | X | 0
408  X | 0 | 0
409  ---------:tableclass1
410-----
411
412
413### Code, ``<code>``, escaping and extra stuff
414
415``
416def test():
417    return "this is Python code"
418``:python
419
420Optionally a ` inside a ``!`!`...`!`!`` block can be inserted escaped with !`!.
421
422**NOTE:** You can escape markmin constructions (\\'\\',\`\`,\*\*,\~\~,\[,\{,\]\},\$,\@) with '\\\\' character:
423 so \\\\`\\\\` can replace !`!`! escape string
424
425The ``:python`` after the markup is also optional. If present, by default, it is used to set the class of the <code> block.
426The behavior can be overridden by passing an argument ``extra`` to the ``render`` function. For example:
427
428``
429markmin2html("!`!!`!aaa!`!!`!:custom",
430             extra=dict(custom=lambda text: 'x'+text+'x'))
431``:python
432
433generates
434
435``'xaaax'``:python
436
437(the ``!`!`...`!`!:custom`` block is rendered by the ``custom=lambda`` function passed to ``render``).
438
439### Line breaks
440
441``[[NEWLINE]]`` tag is used to break lines:
442``
443#### Multiline [[NEWLINE]]
444   title
445paragraph [[NEWLINE]]
446with breaks[[NEWLINE]]in it
447``
448generates:
449
450#### Multiline [[NEWLINE]]
451   title
452paragraph [[NEWLINE]]
453with breaks[[NEWLINE]]in it
454
455
456### Html5 support
457
458Markmin also supports the <video> and <audio> html5 tags using the notation:
459``
460[[message link video]]
461[[message link audio]]
462
463[[message [title] link video]]
464[[message [title] link audio]]
465``
466where ``message`` will be shown in browsers without HTML5 video/audio tags support.
467
468### Latex and other extensions
469
470Formulas can be embedded into HTML with ''\$\$``formula``\$\$''.
471You can use Google charts to render the formula:
472
473``
474LATEX = '<img src="http://chart.apis.google.com/chart?cht=tx&chl=%s" />'
475markmin2html(text,{'latex':lambda code: LATEX % urllib.quote(code)})
476``
477
478### Code with syntax highlighting
479
480This requires a syntax highlighting tool, such as the web2py CODE helper.
481
482``
483extra={'code_cpp':lambda text: CODE(text,language='cpp').xml(),
484       'code_java':lambda text: CODE(text,language='java').xml(),
485       'code_python':lambda text: CODE(text,language='python').xml(),
486       'code_html':lambda text: CODE(text,language='html').xml()}
487``
488or simple:
489``
490extra={'code':lambda text,lang='python': CODE(text,language=lang).xml()}
491``
492``
493markmin2html(text,extra=extra)
494``
495
496Code can now be marked up as in this example:
497``
498!`!`
499<html><body>example</body></html>
500!`!`:code_html
501``
502OR
503``
504!`!`
505<html><body>example</body></html>
506!`!`:code[html]
507``
508
509### Citations and References
510
511Citations are treated as internal links in html and proper citations in latex if there is a final section called "References". Items like
512
513``
514- [[key]] value
515``
516
517in the References will be translated into Latex
518
519``
520\\bibitem{key} value
521``
522
523Here is an example of usage:
524
525``
526As shown in Ref.!`!`mdipierro`!`!:cite
527
528## References
529
530- [[mdipierro]] web2py Manual, 3rd Edition, lulu.com
531``
532
533### Caveats
534
535``<ul/>``, ``<ol/>``, ``<code/>``, ``<table/>``, ``<blockquote/>``, ``<h1/>``, ..., ``<h6/>`` do not have ``<p>...</p>`` around them.
536
537"""
538html_colors = ['aqua', 'black', 'blue', 'fuchsia', 'gray', 'green',
539               'lime', 'maroon', 'navy', 'olive', 'purple', 'red',
540               'silver', 'teal', 'white', 'yellow']
541
542META = '\x06'
543LINK = '\x07'
544DISABLED_META = '\x08'
545LATEX = '<img src="http://chart.apis.google.com/chart?cht=tx&chl=%s" />'
546regex_URL = re.compile(r'@/(?P<a>\w*)/(?P<c>\w*)/(?P<f>\w*(\.\w+)?)(/(?P<args>[\w\.\-/]+))?')
547regex_env2 = re.compile(r'@\{(?P<a>[\w\-\.]+?)(\:(?P<b>.*?))?\}')
548regex_expand_meta = re.compile('(' + META + '|' + DISABLED_META + '|````)')
549regex_dd = re.compile(r'\$\$(?P<latex>.*?)\$\$')
550regex_code = re.compile(
551    '(' + META + '|' + DISABLED_META + r'|````)|(``(?P<t>.+?)``(?::(?P<c>[a-zA-Z][_a-zA-Z\-\d]*)(?:\[(?P<p>[^\]]*)\])?)?)',
552    re.S)
553regex_strong = re.compile(r'\*\*(?P<t>[^\s*]+( +[^\s*]+)*)\*\*')
554regex_del = re.compile(r'~~(?P<t>[^\s~]+( +[^\s~]+)*)~~')
555regex_em = re.compile(r"''(?P<t>([^\s']| |'(?!'))+)''")
556regex_num = re.compile(r"^\s*[+-]?((\d+(\.\d*)?)|\.\d+)([eE][+-]?[0-9]+)?\s*$")
557regex_list = re.compile('^(?:(?:(#{1,6})|(?:(\.+|\++|\-+)(\.)?))\s*)?(.*)$')
558regex_bq_headline = re.compile('^(?:(\.+|\++|\-+)(\.)?\s+)?(-{3}-*)$')
559regex_tq = re.compile('^(-{3}-*)(?::(?P<c>[a-zA-Z][_a-zA-Z\-\d]*)(?:\[(?P<p>[a-zA-Z][_a-zA-Z\-\d]*)\])?)?$')
560regex_proto = re.compile(r'(?<!["\w>/=])(?P<p>\w+):(?P<k>\w+://[\w\d\-+=?%&/:.]+)', re.M)
561regex_auto = re.compile(r'(?<!["\w>/=])(?P<k>\w+://[\w\d\-+_=?%&/:.,;#]+\w|[\w\-.]+@[\w\-.]+)', re.M)
562regex_link = re.compile(r'(' + LINK + r')|\[\[(?P<s>.+?)\]\]', re.S)
563regex_link_level2 = re.compile(r'^(?P<t>\S.*?)?(?:\s+\[(?P<a>.+?)\])?(?:\s+(?P<k>\S+))?(?:\s+(?P<p>popup))?\s*$', re.S)
564regex_media_level2 = re.compile(
565    r'^(?P<t>\S.*?)?(?:\s+\[(?P<a>.+?)\])?(?:\s+(?P<k>\S+))?\s+(?P<p>img|IMG|left|right|center|video|audio|blockleft|blockright)(?:\s+(?P<w>\d+px))?\s*$',
566    re.S)
567
568regex_markmin_escape = re.compile(r"(\\*)(['`:*~\\[\]{}@\$+\-.#\n])")
569regex_backslash = re.compile(r"\\(['`:*~\\[\]{}@\$+\-.#\n])")
570ttab_in = maketrans("'`:*~\\[]{}@$+-.#\n", '\x0b\x0c\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x05')
571ttab_out = maketrans('\x0b\x0c\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x05', "'`:*~\\[]{}@$+-.#\n")
572regex_quote = re.compile('(?P<name>\w+?)\s*\=\s*')
573
574def local_html_escape(data, quote=False):
575    """
576    Works with bytes.
577    Replace special characters "&", "<" and ">" to HTML-safe sequences.
578    If the optional flag quote is true (the default), the quotation mark
579    characters, both double quote (") and single quote (') characters are also
580    translated.
581    """
582    if PY2:
583        import cgi
584        data = cgi.escape(data, quote)
585        return data.replace("'", "&#x27;") if quote else data
586    else:
587        import html
588        if isinstance(data, str):
589            return html.escape(data, quote=quote)
590        data = data.replace(b"&", b"&amp;")  # Must be done first!                                                                                           
591        data = data.replace(b"<", b"&lt;")
592        data = data.replace(b">", b"&gt;")
593        if quote:
594            data = data.replace(b'"', b"&quot;")
595            data = data.replace(b'\'', b"&#x27;")
596        return data
597
598def make_dict(b):
599    return '{%s}' % regex_quote.sub("'\g<name>':", b)
600
601
602def safe_eval(node_or_string, env):
603    """
604    Safely evaluate an expression node or a string containing a Python
605    expression.  The string or node provided may only consist of the following
606    Python literal structures: strings, numbers, tuples, lists, dicts, booleans,
607    and None.
608    """
609    _safe_names = {'None': None, 'True': True, 'False': False}
610    _safe_names.update(env)
611    if isinstance(node_or_string, basestring):
612        node_or_string = ast.parse(node_or_string, mode='eval')
613    if isinstance(node_or_string, ast.Expression):
614        node_or_string = node_or_string.body
615
616    def _convert(node):
617        if isinstance(node, ast.Str):
618            return node.s
619        elif isinstance(node, ast.Num):
620            return node.n
621        elif isinstance(node, ast.Tuple):
622            return tuple(map(_convert, node.elts))
623        elif isinstance(node, ast.List):
624            return list(map(_convert, node.elts))
625        elif isinstance(node, ast.Dict):
626            return dict((_convert(k), _convert(v)) for k, v
627                        in zip(node.keys, node.values))
628        elif isinstance(node, ast.Name):
629            if node.id in _safe_names:
630                return _safe_names[node.id]
631        elif isinstance(node, ast.BinOp) and \
632                isinstance(node.op, (ast.Add, ast.Sub)) and \
633                isinstance(node.right, ast.Num) and \
634                isinstance(node.right.n, complex) and \
635                isinstance(node.left, ast.Num) and \
636                isinstance(node.left.n, (int, long, float)):
637            left = node.left.n
638            right = node.right.n
639            if isinstance(node.op, ast.Add):
640                return left + right
641            else:
642                return left - right
643        raise ValueError('malformed string')
644
645    return _convert(node_or_string)
646
647
648def markmin_escape(text):
649    """ insert \\ before markmin control characters: '`:*~[]{}@$ """
650    return regex_markmin_escape.sub(
651        lambda m: '\\' + m.group(0).replace('\\', '\\\\'), text)
652
653
654def replace_autolinks(text, autolinks):
655    return regex_auto.sub(lambda m: autolinks(m.group('k')), text)
656
657
658def replace_at_urls(text, url):
659    # this is experimental @{function/args}
660    def u1(match, url=url):
661        a, c, f, args = match.group('a', 'c', 'f', 'args')
662        return url(a=a or None, c=c or None, f=f or None,
663                   args=(args or '').split('/'), scheme=True, host=True)
664
665    return regex_URL.sub(u1, text)
666
667
668def replace_components(text, env):
669    # not perfect but acceptable
670    def u2(match, env=env):
671        f = env.get(match.group('a'), match.group(0))
672        if callable(f):
673            b = match.group('b')
674            try:
675                b = safe_eval(make_dict(b), env)
676            except:
677                pass
678            try:
679                f = f(**b) if isinstance(b, dict) else f(b)
680            except Exception as e:
681                f = 'ERROR: %s' % e
682            return str(f)
683
684    text = regex_env2.sub(u2, text)
685    return text
686
687
688def autolinks_simple(url):
689    """
690    it automatically converts the url to link,
691    image, video or audio tag
692    """
693    u_url = url.lower()
694    if '@' in url and '://' not in url:
695        return '<a href="mailto:%s">%s</a>' % (url, url)
696    elif u_url.endswith(('.jpg', '.jpeg', '.gif', '.png')):
697        return '<img src="%s" controls />' % url
698    elif u_url.endswith(('.mp4', '.mpeg', '.mov', '.ogv')):
699        return '<video src="%s" controls></video>' % url
700    elif u_url.endswith(('.mp3', '.wav', '.ogg')):
701        return '<audio src="%s" controls></audio>' % url
702    return '<a href="%s">%s</a>' % (url, url)
703
704
705def protolinks_simple(proto, url):
706    """
707    it converts url to html-string using appropriate proto-prefix:
708    Uses for construction "proto:url", e.g.:
709        "iframe:http://www.example.com/path" will call protolinks()
710        with parameters:
711            proto="iframe"
712            url="http://www.example.com/path"
713    """
714    if proto in ('iframe', 'embed'):  # == 'iframe':
715        return '<iframe src="%s" frameborder="0" allowfullscreen></iframe>' % url
716    # elif proto == 'embed':  # NOTE: embed is a synonym to iframe now
717    #    return '<a href="%s" class="%sembed">%s></a>'%(url,class_prefix,url)
718    elif proto == 'qr':
719        return '<img style="width:100px" src="http://chart.apis.google.com/chart?cht=qr&chs=100x100&chl=%s&choe=UTF-8&chld=H" alt="QR Code" title="QR Code" />' % url
720    return proto + ':' + url
721
722
723def email_simple(email):
724    return '<a href="mailto:%s">%s</a>' % (email, email)
725
726
727def render(text,
728           extra={},
729           allowed={},
730           sep='p',
731           URL=None,
732           environment=None,
733           latex='google',
734           autolinks='default',
735           protolinks='default',
736           class_prefix='',
737           id_prefix='markmin_',
738           pretty_print=False):
739    """
740    Arguments:
741    - text is the text to be processed
742    - extra is a dict like extra=dict(custom=lambda value: value) that process custom code
743      as in " ``this is custom code``:custom "
744    - allowed is a dictionary of list of allowed classes like
745      allowed = dict(code=('python','cpp','java'))
746    - sep can be 'p' to separate text in <p>...</p>
747      or can be 'br' to separate text using <br />
748    - URL -
749    - environment is a dictionary of environment variables (can be accessed with @{variable}
750    - latex -
751    - autolinks is a function to convert auto urls to html-code (default is autolinks(url) )
752    - protolinks is a function to convert proto-urls (e.g."proto:url") to html-code
753      (default is protolinks(proto,url))
754    - class_prefix is a prefix for ALL classes in markmin text. E.g. if class_prefix='my_'
755      then for ``test``:cls class will be changed to "my_cls" (default value is '')
756    - id_prefix is prefix for ALL ids in markmin text (default value is 'markmin_'). E.g.:
757        -- [[id]] will be converted to <span class="anchor" id="markmin_id"></span>
758        -- [[link #id]] will be converted to <a href="#markmin_id">link</a>
759        -- ``test``:cls[id] will be converted to <code class="cls" id="markmin_id">test</code>
760
761    >>> render('this is\\n# a section\\n\\nparagraph')
762    '<p>this is</p><h1>a section</h1><p>paragraph</p>'
763    >>> render('this is\\n## a subsection\\n\\nparagraph')
764    '<p>this is</p><h2>a subsection</h2><p>paragraph</p>'
765    >>> render('this is\\n### a subsubsection\\n\\nparagraph')
766    '<p>this is</p><h3>a subsubsection</h3><p>paragraph</p>'
767    >>> render('**hello world**')
768    '<p><strong>hello world</strong></p>'
769    >>> render('``hello world``')
770    '<code>hello world</code>'
771    >>> render('``hello world``:python')
772    '<code class="python">hello world</code>'
773    >>> render('``\\nhello\\nworld\\n``:python')
774    '<pre><code class="python">hello\\nworld</code></pre>'
775    >>> render('``hello world``:python[test_id]')
776    '<code class="python" id="markmin_test_id">hello world</code>'
777    >>> render('``hello world``:id[test_id]')
778    '<code id="markmin_test_id">hello world</code>'
779    >>> render('``\\nhello\\nworld\\n``:python[test_id]')
780    '<pre><code class="python" id="markmin_test_id">hello\\nworld</code></pre>'
781    >>> render('``\\nhello\\nworld\\n``:id[test_id]')
782    '<pre><code id="markmin_test_id">hello\\nworld</code></pre>'
783    >>> render("''hello world''")
784    '<p><em>hello world</em></p>'
785    >>> render('** hello** **world**')
786    '<p>** hello** <strong>world</strong></p>'
787
788    >>> render('- this\\n- is\\n- a list\\n\\nand this\\n- is\\n- another')
789    '<ul><li>this</li><li>is</li><li>a list</li></ul><p>and this</p><ul><li>is</li><li>another</li></ul>'
790
791    >>> render('+ this\\n+ is\\n+ a list\\n\\nand this\\n+ is\\n+ another')
792    '<ol><li>this</li><li>is</li><li>a list</li></ol><p>and this</p><ol><li>is</li><li>another</li></ol>'
793
794    >>> render("----\\na | b\\nc | d\\n----\\n")
795    '<table><tbody><tr class="first"><td>a</td><td>b</td></tr><tr class="even"><td>c</td><td>d</td></tr></tbody></table>'
796
797    >>> render("----\\nhello world\\n----\\n")
798    '<blockquote><p>hello world</p></blockquote>'
799
800    >>> render('[[myanchor]]')
801    '<p><span class="anchor" id="markmin_myanchor"></span></p>'
802
803    >>> render('[[ http://example.com]]')
804    '<p><a href="http://example.com">http://example.com</a></p>'
805
806    >>> render('[[bookmark [http://example.com] ]]')
807    '<p><span class="anchor" id="markmin_bookmark"><a href="http://example.com">http://example.com</a></span></p>'
808
809    >>> render('[[this is a link http://example.com]]')
810    '<p><a href="http://example.com">this is a link</a></p>'
811
812    >>> render('[[this is an image http://example.com left]]')
813    '<p><img src="http://example.com" alt="this is an image" style="float:left" /></p>'
814
815    >>> render('[[this is an image http://example.com left 200px]]')
816    '<p><img src="http://example.com" alt="this is an image" style="float:left;width:200px" /></p>'
817
818    >>> render("[[Your browser doesn't support <video> HTML5 tag http://example.com video]]")
819    '<p><video controls="controls"><source src="http://example.com" />Your browser doesn\\'t support &lt;video&gt; HTML5 tag</video></p>'
820
821    >>> render("[[Your browser doesn't support <audio> HTML5 tag http://example.com audio]]")
822    '<p><audio controls="controls"><source src="http://example.com" />Your browser doesn\\'t support &lt;audio&gt; HTML5 tag</audio></p>'
823
824    >>> render("[[Your\\nbrowser\\ndoesn't\\nsupport\\n<audio> HTML5 tag http://exam\\\\\\nple.com\\naudio]]")
825    '<p><audio controls="controls"><source src="http://example.com" />Your browser doesn\\'t support &lt;audio&gt; HTML5 tag</audio></p>'
826
827    >>> render('[[this is a **link** http://example.com]]')
828    '<p><a href="http://example.com">this is a <strong>link</strong></a></p>'
829
830    >>> render("``aaa``:custom", extra=dict(custom=lambda text: 'x'+text+'x'))
831    'xaaax'
832
833    >>> print(render(r"$$\int_a^b sin(x)dx$$"))
834    <img src="http://chart.apis.google.com/chart?cht=tx&chl=%5Cint_a%5Eb%20sin%28x%29dx" />
835
836    >>> markmin2html(r"use backslash: \[\[[[mess\[[ag\]]e link]]\]]")
837    '<p>use backslash: [[<a href="link">mess[[ag]]e</a>]]</p>'
838
839    >>> markmin2html("backslash instead of exclamation sign: \``probe``")
840    '<p>backslash instead of exclamation sign: ``probe``</p>'
841
842    >>> render(r"simple image: [[\[[this is an image\]] http://example.com IMG]]!!!")
843    '<p>simple image: <img src="http://example.com" alt="[[this is an image]]" />!!!</p>'
844
845    >>> render(r"simple link no anchor with popup: [[ http://example.com popup]]")
846    '<p>simple link no anchor with popup: <a href="http://example.com" target="_blank">http://example.com</a></p>'
847
848    >>> render("auto-url: http://example.com")
849    '<p>auto-url: <a href="http://example.com">http://example.com</a></p>'
850
851    >>> render("auto-image: (http://example.com/image.jpeg)")
852    '<p>auto-image: (<img src="http://example.com/image.jpeg" controls />)</p>'
853
854    >>> render("qr: (qr:http://example.com/image.jpeg)")
855    '<p>qr: (<img style="width:100px" src="http://chart.apis.google.com/chart?cht=qr&chs=100x100&chl=http://example.com/image.jpeg&choe=UTF-8&chld=H" alt="QR Code" title="QR Code" />)</p>'
856
857    >>> render("embed: (embed:http://example.com/page)")
858    '<p>embed: (<iframe src="http://example.com/page" frameborder="0" allowfullscreen></iframe>)</p>'
859
860    >>> render("iframe: (iframe:http://example.com/page)")
861    '<p>iframe: (<iframe src="http://example.com/page" frameborder="0" allowfullscreen></iframe>)</p>'
862
863    >>> render("title1: [[test message [simple \[test\] title] http://example.com ]] test")
864    '<p>title1: <a href="http://example.com" title="simple [test] title">test message</a> test</p>'
865
866    >>> render("title2: \[\[[[test message [simple title] http://example.com popup]]\]]")
867    '<p>title2: [[<a href="http://example.com" title="simple title" target="_blank">test message</a>]]</p>'
868
869    >>> render("title3: [[ [link w/o anchor but with title] http://www.example.com ]]")
870    '<p>title3: <a href="http://www.example.com" title="link w/o anchor but with title">http://www.example.com</a></p>'
871
872    >>> render("title4: [[ [simple title] http://www.example.com popup]]")
873    '<p>title4: <a href="http://www.example.com" title="simple title" target="_blank">http://www.example.com</a></p>'
874
875    >>> render("title5: [[test message [simple title] http://example.com IMG]]")
876    '<p>title5: <img src="http://example.com" alt="test message" title="simple title" /></p>'
877
878    >>> render("title6: [[[test message w/o title] http://example.com IMG]]")
879    '<p>title6: <img src="http://example.com" alt="[test message w/o title]" /></p>'
880
881    >>> render("title7: [[[this is not a title] [this is a title] http://example.com IMG]]")
882    '<p>title7: <img src="http://example.com" alt="[this is not a title]" title="this is a title" /></p>'
883
884    >>> render("title8: [[test message [title] http://example.com center]]")
885    '<p>title8: <p style="text-align:center"><img src="http://example.com" alt="test message" title="title" /></p></p>'
886
887    >>> render("title9: [[test message [title] http://example.com left]]")
888    '<p>title9: <img src="http://example.com" alt="test message" title="title" style="float:left" /></p>'
889
890    >>> render("title10: [[test message [title] http://example.com right 100px]]")
891    '<p>title10: <img src="http://example.com" alt="test message" title="title" style="float:right;width:100px" /></p>'
892
893    >>> render("title11: [[test message [title] http://example.com center 200px]]")
894    '<p>title11: <p style="text-align:center"><img src="http://example.com" alt="test message" title="title" style="width:200px" /></p></p>'
895
896    >>> render(r"\\[[probe]]")
897    '<p>[[probe]]</p>'
898
899    >>> render(r"\\\\[[probe]]")
900    '<p>\\\\<span class="anchor" id="markmin_probe"></span></p>'
901
902    >>> render(r"\\\\\\[[probe]]")
903    '<p>\\\\[[probe]]</p>'
904
905    >>> render(r"\\\\\\\\[[probe]]")
906    '<p>\\\\\\\\<span class="anchor" id="markmin_probe"></span></p>'
907
908    >>> render(r"\\\\\\\\\[[probe]]")
909    '<p>\\\\\\\\[[probe]]</p>'
910
911    >>> render(r"\\\\\\\\\\\[[probe]]")
912    '<p>\\\\\\\\\\\\<span class="anchor" id="markmin_probe"></span></p>'
913
914    >>> render("``[[ [\\[[probe\]\\]] URL\\[x\\]]]``:red[dummy_params]")
915    '<span style="color: red"><a href="URL[x]" title="[[probe]]">URL[x]</a></span>'
916
917    >>> render("the \\**text**")
918    '<p>the **text**</p>'
919
920    >>> render("the \\``text``")
921    '<p>the ``text``</p>'
922
923    >>> render("the \\\\''text''")
924    "<p>the ''text''</p>"
925
926    >>> render("the [[link [**with** ``<b>title</b>``:red] http://www.example.com]]")
927    '<p>the <a href="http://www.example.com" title="**with** ``&lt;b&gt;title&lt;/b&gt;``:red">link</a></p>'
928
929    >>> render("the [[link \\[**without** ``<b>title</b>``:red\\] http://www.example.com]]")
930    '<p>the <a href="http://www.example.com">link [<strong>without</strong> <span style="color: red">&lt;b&gt;title&lt;/b&gt;</span>]</a></p>'
931
932    >>> render("aaa-META-``code``:text[]-LINK-[[link http://www.example.com]]-LINK-[[image http://www.picture.com img]]-end")
933    '<p>aaa-META-<code class="text">code</code>-LINK-<a href="http://www.example.com">link</a>-LINK-<img src="http://www.picture.com" alt="image" />-end</p>'
934
935    >>> render("[[<a>test</a> [<a>test2</a>] <a>text3</a>]]")
936    '<p><a href="&lt;a&gt;text3&lt;/a&gt;" title="&lt;a&gt;test2&lt;/a&gt;">&lt;a&gt;test&lt;/a&gt;</a></p>'
937
938    >>> render("[[<a>test</a> [<a>test2</a>] <a>text3</a> IMG]]")
939    '<p><img src="&lt;a&gt;text3&lt;/a&gt;" alt="&lt;a&gt;test&lt;/a&gt;" title="&lt;a&gt;test2&lt;/a&gt;" /></p>'
940
941    >>> render("**bold** ''italic'' ~~strikeout~~")
942    '<p><strong>bold</strong> <em>italic</em> <del>strikeout</del></p>'
943
944    >>> render("this is ``a red on yellow text``:c[#FF0000:#FFFF00]")
945    '<p>this is <span style="color: #FF0000;background-color: #FFFF00;">a red on yellow text</span></p>'
946
947    >>> render("this is ``a text with yellow background``:c[:yellow]")
948    '<p>this is <span style="background-color: yellow;">a text with yellow background</span></p>'
949
950    >>> render("this is ``a colored text (RoyalBlue)``:color[rgb(65,105,225)]")
951    '<p>this is <span style="color: rgb(65,105,225);">a colored text (RoyalBlue)</span></p>'
952
953    >>> render("this is ``a green text``:color[green:]")
954    '<p>this is <span style="color: green;">a green text</span></p>'
955
956    >>> render("**@{probe:1}**", environment=dict(probe=lambda t:"test %s" % t))
957    '<p><strong>test 1</strong></p>'
958
959    >>> render("**@{probe:t=a}**", environment=dict(probe=lambda t:"test %s" % t, a=1))
960    '<p><strong>test 1</strong></p>'
961
962    >>> render('[[id1 [span **messag** in ''markmin''] ]] ... [[**link** to id [link\\\'s title] #mark1]]')
963    '<p><span class="anchor" id="markmin_id1">span <strong>messag</strong> in markmin</span> ... <a href="#markmin_mark1" title="link\\\'s title"><strong>link</strong> to id</a></p>'
964
965    >>> render('# Multiline[[NEWLINE]]\\n title\\nParagraph[[NEWLINE]]\\nwith breaks[[NEWLINE]]\\nin it')
966    '<h1>Multiline<br /> title</h1><p>Paragraph<br /> with breaks<br /> in it</p>'
967
968    >>> render("anchor with name 'NEWLINE': [[NEWLINE [ ] ]]")
969    '<p>anchor with name \\'NEWLINE\\': <span class="anchor" id="markmin_NEWLINE"></span></p>'
970
971    >>> render("anchor with name 'NEWLINE': [[NEWLINE [newline] ]]")
972    '<p>anchor with name \\'NEWLINE\\': <span class="anchor" id="markmin_NEWLINE">newline</span></p>'
973    """
974    if autolinks == "default":
975        autolinks = autolinks_simple
976    if protolinks == "default":
977        protolinks = protolinks_simple
978    pp = '\n' if pretty_print else ''
979    text = text if text is None or isinstance(text, str) else text.decode('utf8', 'strict')
980
981    if not (isinstance(text, str)):
982        text = str(text or '')
983    text = regex_backslash.sub(lambda m: m.group(1).translate(ttab_in), text)
984    text = text.replace('\x05', '').replace('\r\n', '\n')  # concatenate strings separeted by \\n
985    if URL is not None:
986        text = replace_at_urls(text, URL)
987
988    if latex == 'google':
989        text = regex_dd.sub('``\g<latex>``:latex ', text)
990
991    #############################################################
992    # replace all blocks marked with ``...``:class[id] with META
993    # store them into segments they will be treated as code
994    #############################################################
995    segments = []
996
997    def mark_code(m):
998        g = m.group(0)
999        if g in (META, DISABLED_META):
1000            segments.append((None, None, None, g))
1001            return m.group()
1002        elif g == '````':
1003            segments.append((None, None, None, ''))
1004            return m.group()
1005        else:
1006            c = m.group('c') or ''
1007            p = m.group('p') or ''
1008            if 'code' in allowed and c not in allowed['code']:
1009                c = ''
1010            code = m.group('t').replace('!`!', '`')
1011            segments.append((code, c, p, m.group(0)))
1012        return META
1013
1014    text = regex_code.sub(mark_code, text)
1015
1016    #############################################################
1017    # replace all blocks marked with [[...]] with LINK
1018    # store them into links they will be treated as link
1019    #############################################################
1020    links = []
1021
1022    def mark_link(m):
1023        links.append(None if m.group() == LINK
1024                     else m.group('s'))
1025        return LINK
1026
1027    text = regex_link.sub(mark_link, text)
1028    text = local_html_escape(text)
1029
1030    if protolinks:
1031        text = regex_proto.sub(lambda m: protolinks(*m.group('p', 'k')), text)
1032
1033    if autolinks:
1034        text = replace_autolinks(text, autolinks)
1035
1036    #############################################################
1037    # normalize spaces
1038    #############################################################
1039    strings = text.split('\n')
1040
1041    def parse_title(t, s):  # out, lev, etags, tag, s):
1042        hlevel = str(len(t))
1043        out.extend(etags[::-1])
1044        out.append("<h%s>%s" % (hlevel, s))
1045        etags[:] = ["</h%s>%s" % (hlevel, pp)]
1046        lev = 0
1047        ltags[:] = []
1048        tlev[:] = []
1049        return (lev, 'h')
1050
1051    def parse_list(t, p, s, tag, lev, mtag, lineno):
1052        lent = len(t)
1053        if lent < lev:  # current item level < previous item level
1054            while ltags[-1] > lent:
1055                ltags.pop()
1056                out.append(etags.pop())
1057            lev = lent
1058            tlev[lev:] = []
1059
1060        if lent > lev:  # current item level > previous item level
1061            if lev == 0:  # previous line is not a list (paragraph or title)
1062                out.extend(etags[::-1])
1063                ltags[:] = []
1064                tlev[:] = []
1065                etags[:] = []
1066            if pend and mtag == '.':  # paragraph in a list:
1067                out.append(etags.pop())
1068                ltags.pop()
1069            for i in range(lent - lev):
1070                out.append('<' + tag + '>' + pp)
1071                etags.append('</' + tag + '>' + pp)
1072                lev += 1
1073                ltags.append(lev)
1074                tlev.append(tag)
1075        elif lent == lev:
1076            if tlev[-1] != tag:
1077                # type of list is changed (ul<=>ol):
1078                for i in range(ltags.count(lent)):
1079                    ltags.pop()
1080                    out.append(etags.pop())
1081                tlev[-1] = tag
1082                out.append('<' + tag + '>' + pp)
1083                etags.append('</' + tag + '>' + pp)
1084                ltags.append(lev)
1085            else:
1086                if ltags.count(lev) > 1:
1087                    out.append(etags.pop())
1088                    ltags.pop()
1089        mtag = 'l'
1090        out.append('<li>')
1091        etags.append('</li>' + pp)
1092        ltags.append(lev)
1093        if s[:1] == '-':
1094            (s, mtag, lineno) = parse_table_or_blockquote(s, mtag, lineno)
1095        if p and mtag == 'l':
1096            (lev, mtag, lineno) = parse_point(t, s, lev, '', lineno)
1097        else:
1098            out.append(s)
1099
1100        return (lev, mtag, lineno)
1101
1102    def parse_point(t, s, lev, mtag, lineno):
1103        """ paragraphs in lists """
1104        lent = len(t)
1105        if lent > lev:
1106            return parse_list(t, '.', s, 'ul', lev, mtag, lineno)
1107        elif lent < lev:
1108            while ltags[-1] > lent:
1109                ltags.pop()
1110                out.append(etags.pop())
1111            lev = lent
1112            tlev[lev:] = []
1113            mtag = ''
1114        elif lent == lev:
1115            if pend and mtag == '.':
1116                out.append(etags.pop())
1117                ltags.pop()
1118        if br and mtag in ('l', '.'):
1119            out.append(br)
1120        if s == META:
1121            mtag = ''
1122        else:
1123            mtag = '.'
1124            if s[:1] == '-':
1125                (s, mtag, lineno) = parse_table_or_blockquote(s, mtag, lineno)
1126            if mtag == '.':
1127                out.append(pbeg)
1128                if pend:
1129                    etags.append(pend)
1130                    ltags.append(lev)
1131        out.append(s)
1132        return (lev, mtag, lineno)
1133
1134    def parse_table_or_blockquote(s, mtag, lineno):
1135        # check next line. If next line :
1136        # - is empty -> this is an <hr /> tag
1137        # - consists '|' -> table
1138        # - consists other characters -> blockquote
1139        if (lineno + 1 >= strings_len or
1140                not (s.count('-') == len(s) and len(s) > 3)):
1141            return (s, mtag, lineno)
1142
1143        lineno += 1
1144        s = strings[lineno].strip()
1145        if s:
1146            if '|' in s:
1147                # table
1148                tout = []
1149                thead = []
1150                tbody = []
1151                rownum = 0
1152                t_id = ''
1153                t_cls = ''
1154
1155                # parse table:
1156                while lineno < strings_len:
1157                    s = strings[lineno].strip()
1158                    if s[:1] == '=':
1159                        # header or footer
1160                        if s.count('=') == len(s) and len(s) > 3:
1161                            if not thead:  # if thead list is empty:
1162                                thead = tout
1163                            else:
1164                                tbody.extend(tout)
1165                            tout = []
1166                            rownum = 0
1167                            lineno += 1
1168                            continue
1169
1170                    m = regex_tq.match(s)
1171                    if m:
1172                        t_cls = m.group('c') or ''
1173                        t_id = m.group('p') or ''
1174                        break
1175
1176                    if rownum % 2:
1177                        tr = '<tr class="even">'
1178                    else:
1179                        tr = '<tr class="first">' if rownum == 0 else '<tr>'
1180                    tout.append(tr + ''.join(['<td%s>%s</td>' % (
1181                        ' class="num"'
1182                        if regex_num.match(f) else '',
1183                        f.strip()
1184                    ) for f in s.split('|')]) + '</tr>' + pp)
1185                    rownum += 1
1186                    lineno += 1
1187
1188                t_cls = ' class="%s%s"' % (class_prefix, t_cls) \
1189                    if t_cls and t_cls != 'id' else ''
1190                t_id = ' id="%s%s"' % (id_prefix, t_id) if t_id else ''
1191                s = ''
1192                if thead:
1193                    s += '<thead>' + pp + ''.join([l for l in thead]) + '</thead>' + pp
1194                if not tbody:  # tbody strings are in tout list
1195                    tbody = tout
1196                    tout = []
1197                if tbody:  # if tbody list is not empty:
1198                    s += '<tbody>' + pp + ''.join([l for l in tbody]) + '</tbody>' + pp
1199                if tout:  # tfoot is not empty:
1200                    s += '<tfoot>' + pp + ''.join([l for l in tout]) + '</tfoot>' + pp
1201                s = '<table%s%s>%s%s</table>%s' % (t_cls, t_id, pp, s, pp)
1202                mtag = 't'
1203            else:
1204                # parse blockquote:
1205                bq_begin = lineno
1206                t_mode = False  # embedded table
1207                t_cls = ''
1208                t_id = ''
1209
1210                # search blockquote closing line:
1211                while lineno < strings_len:
1212                    s = strings[lineno].strip()
1213                    if not t_mode:
1214                        m = regex_tq.match(s)
1215                        if m:
1216                            if (lineno + 1 == strings_len or
1217                                        '|' not in strings[lineno + 1]):
1218                                t_cls = m.group('c') or ''
1219                                t_id = m.group('p') or ''
1220                                break
1221
1222                        if regex_bq_headline.match(s):
1223                            if (lineno + 1 < strings_len and
1224                                    strings[lineno + 1].strip()):
1225                                t_mode = True
1226                            lineno += 1
1227                            continue
1228                    elif regex_tq.match(s):
1229                        t_mode = False
1230                        lineno += 1
1231                        continue
1232
1233                    lineno += 1
1234
1235                t_cls = ' class="%s%s"' % (class_prefix, t_cls) \
1236                    if t_cls and t_cls != 'id' else ''
1237                t_id = ' id="%s%s"' % (id_prefix, t_id) \
1238                    if t_id else ''
1239
1240                s = '<blockquote%s%s>%s</blockquote>%s' \
1241                    % (t_cls,
1242                       t_id,
1243                       render('\n'.join(strings[bq_begin:lineno])), pp)
1244                mtag = 'q'
1245        else:
1246            s = '<hr />'
1247            lineno -= 1
1248            mtag = 'q'
1249        return (s, 'q', lineno)
1250
1251    if sep == 'p':
1252        pbeg = "<p>"
1253        pend = "</p>" + pp
1254        br = ''
1255    else:
1256        pbeg = pend = ''
1257        br = "<br />" + pp if sep == 'br' else ''
1258
1259    lev = 0  # nesting level of lists
1260    c0 = ''  # first character of current line
1261    out = []  # list of processed lines
1262    etags = []  # trailing tags
1263    ltags = []  # level# correspondent to trailing tag
1264    tlev = []  # list of tags for each level ('ul' or 'ol')
1265    mtag = ''  # marked tag (~last tag) ('l','.','h','p','t'). Used to set <br/>
1266    # and to avoid <p></p> around tables and blockquotes
1267    lineno = 0
1268    strings_len = len(strings)
1269    while lineno < strings_len:
1270        s0 = strings[lineno][:1]
1271        s = strings[lineno].strip()
1272        """ #     +     -     .             ---------------------
1273            ##    ++    --    ..   -------  field | field | field  <-title
1274            ###   +++   ---   ...  quote    =====================
1275            ####  ++++  ----  .... -------  field | field | field  <-body
1276            ##### +++++ ----- .....         ---------------------:class[id]
1277        """
1278        pc0 = c0  # first character of previous line
1279        c0 = s[:1]
1280        if c0:  # for non empty strings
1281            if c0 in "#+-.":  # first character is one of: # + - .
1282                (t1, t2, p, ss) = regex_list.findall(s)[0]
1283                # t1 - tag ("###")
1284                # t2 - tag ("+++", "---", "...")
1285                # p - paragraph point ('.')->for "++." or "--."
1286                # ss - other part of string
1287                if t1 or t2:
1288                    # headers and lists:
1289                    if c0 == '#':  # headers
1290                        (lev, mtag) = parse_title(t1, ss)
1291                        lineno += 1
1292                        continue
1293                    elif c0 == '+':  # ordered list
1294                        (lev, mtag, lineno) = parse_list(t2, p, ss, 'ol', lev, mtag, lineno)
1295                        lineno += 1
1296                        continue
1297                    elif c0 == '-':  # unordered list, table or blockquote
1298                        if p or ss:
1299                            (lev, mtag, lineno) = parse_list(t2, p, ss, 'ul', lev, mtag, lineno)
1300                            lineno += 1
1301                            continue
1302                        else:
1303                            (s, mtag, lineno) = parse_table_or_blockquote(s, mtag, lineno)
1304                    elif lev > 0:  # and c0 == '.' # paragraph in lists
1305                        (lev, mtag, lineno) = parse_point(t2, ss, lev, mtag, lineno)
1306                        lineno += 1
1307                        continue
1308
1309            if lev == 0 and (mtag == 'q' or s == META):
1310                # new paragraph
1311                pc0 = ''
1312
1313            if pc0 == '' or (mtag != 'p' and s0 not in (' ', '\t')):
1314                # paragraph
1315                out.extend(etags[::-1])
1316                etags = []
1317                ltags = []
1318                tlev = []
1319                lev = 0
1320                if br and mtag == 'p':
1321                    out.append(br)
1322                if mtag != 'q' and s != META:
1323                    if pend:
1324                        etags = [pend]
1325                    out.append(pbeg)
1326                    mtag = 'p'
1327                else:
1328                    mtag = ''
1329                out.append(s)
1330            else:
1331                if lev > 0 and mtag == '.' and s == META:
1332                    out.append(etags.pop())
1333                    ltags.pop()
1334                    out.append(s)
1335                    mtag = ''
1336                else:
1337                    out.append(' ' + s)
1338        lineno += 1
1339    out.extend(etags[::-1])
1340    text = ''.join(out)
1341
1342    #############################################################
1343    # do strong,em,del
1344    #############################################################
1345    text = regex_strong.sub('<strong>\g<t></strong>', text)
1346    text = regex_del.sub('<del>\g<t></del>', text)
1347    text = regex_em.sub('<em>\g<t></em>', text)
1348
1349    #############################################################
1350    # deal with images, videos, audios and links
1351    #############################################################
1352    def sub_media(m):
1353        t, a, k, p, w = m.group('t', 'a', 'k', 'p', 'w')
1354        if not k:
1355            return m.group(0)
1356        k = local_html_escape(k)
1357        t = t or ''
1358        style = 'width:%s' % w if w else ''
1359        title = ' title="%s"' % local_html_escape(a).replace(META, DISABLED_META) if a else ''
1360        p_begin = p_end = ''
1361        if p == 'center':
1362            p_begin = '<p style="text-align:center">'
1363            p_end = '</p>' + pp
1364        elif p == 'blockleft':
1365            p_begin = '<p style="text-align:left">'
1366            p_end = '</p>' + pp
1367        elif p == 'blockright':
1368            p_begin = '<p style="text-align:right">'
1369            p_end = '</p>' + pp
1370        elif p in ('left', 'right'):
1371            style = ('float:%s' % p) + (';%s' % style if style else '')
1372        if t and regex_auto.match(t):
1373            p_begin = p_begin + '<a href="%s">' % t
1374            p_end = '</a>' + p_end
1375            t = ''
1376        if style:
1377            style = ' style="%s"' % style
1378        if p in ('video', 'audio'):
1379            t = render(t, {}, {}, 'br', URL, environment, latex,
1380                       autolinks, protolinks, class_prefix, id_prefix, pretty_print)
1381            return '<%(p)s controls="controls"%(title)s%(style)s><source src="%(k)s" />%(t)s</%(p)s>' \
1382                   % dict(p=p, title=title, style=style, k=k, t=t)
1383        alt = ' alt="%s"' % local_html_escape(t).replace(META, DISABLED_META) if t else ''
1384        return '%(begin)s<img src="%(k)s"%(alt)s%(title)s%(style)s />%(end)s' \
1385               % dict(begin=p_begin, k=k, alt=alt, title=title, style=style, end=p_end)
1386
1387    def sub_link(m):
1388        t, a, k, p = m.group('t', 'a', 'k', 'p')
1389        if not k and not t:
1390            return m.group(0)
1391        t = t or ''
1392        a = local_html_escape(a) if a else ''
1393        if k:
1394            if '#' in k and ':' not in k.split('#')[0]:
1395                # wikipage, not external url
1396                k = k.replace('#', '#' + id_prefix)
1397            k = local_html_escape(k)
1398            title = ' title="%s"' % a.replace(META, DISABLED_META) if a else ''
1399            target = ' target="_blank"' if p == 'popup' else ''
1400            t = render(t, {}, {}, 'br', URL, environment, latex, None,
1401                       None, class_prefix, id_prefix, pretty_print) if t else k
1402            return '<a href="%(k)s"%(title)s%(target)s>%(t)s</a>' \
1403                   % dict(k=k, title=title, target=target, t=t)
1404        if t == 'NEWLINE' and not a:
1405            return '<br />' + pp
1406        return '<span class="anchor" id="%s">%s</span>' % (
1407            local_html_escape(id_prefix + t),
1408            render(a, {}, {}, 'br', URL,
1409                   environment, latex, autolinks,
1410                   protolinks, class_prefix,
1411                   id_prefix, pretty_print))
1412
1413    parts = text.split(LINK)
1414    text = parts[0]
1415    for i, s in enumerate(links):
1416        if s is None:
1417            html = LINK
1418        else:
1419            html = regex_media_level2.sub(sub_media, s)
1420            if html == s:
1421                html = regex_link_level2.sub(sub_link, html)
1422            if html == s:
1423                # return unprocessed string as a signal of an error
1424                html = '[[%s]]' % s
1425        text += html + parts[i + 1]
1426
1427    #############################################################
1428    # process all code text
1429    #############################################################
1430    def expand_meta(m):
1431        code, b, p, s = segments.pop(0)
1432        if code is None or m.group() == DISABLED_META:
1433            return local_html_escape(s)
1434        if b in extra:
1435            if code[:1] == '\n':
1436                code = code[1:]
1437            if code[-1:] == '\n':
1438                code = code[:-1]
1439            if p:
1440                return str(extra[b](code, p))
1441            else:
1442                return str(extra[b](code))
1443        elif b == 'cite':
1444            return '[' + ','.join('<a href="#%s" class="%s">%s</a>' %
1445                                  (id_prefix + d, b, d) for d in local_html_escape(code).split(',')) + ']'
1446        elif b == 'latex':
1447            return LATEX % urllib_quote(code)
1448        elif b in html_colors:
1449            return '<span style="color: %s">%s</span>' \
1450                   % (b, render(code, {}, {}, 'br', URL, environment, latex,
1451                                autolinks, protolinks, class_prefix, id_prefix, pretty_print))
1452        elif b in ('c', 'color') and p:
1453            c = p.split(':')
1454            fg = 'color: %s;' % c[0] if c[0] else ''
1455            bg = 'background-color: %s;' % c[1] if len(c) > 1 and c[1] else ''
1456            return '<span style="%s%s">%s</span>' \
1457                   % (fg, bg, render(code, {}, {}, 'br', URL, environment, latex,
1458                                     autolinks, protolinks, class_prefix, id_prefix, pretty_print))
1459        cls = ' class="%s%s"' % (class_prefix, b) if b and b != 'id' else ''
1460        id = ' id="%s%s"' % (id_prefix, local_html_escape(p)) if p else ''
1461        beg = (code[:1] == '\n')
1462        end = [None, -1][code[-1:] == '\n']
1463        if beg and end:
1464            return '<pre><code%s%s>%s</code></pre>%s' % (cls, id, local_html_escape(code[1:-1]), pp)
1465        return '<code%s%s>%s</code>' % (cls, id, local_html_escape(code[beg:end]))
1466
1467    text = regex_expand_meta.sub(expand_meta, text)
1468
1469    if environment:
1470        text = replace_components(text, environment)
1471
1472    return text.translate(ttab_out)
1473
1474
1475def markmin2html(text, extra={}, allowed={}, sep='p',
1476                 autolinks='default', protolinks='default',
1477                 class_prefix='', id_prefix='markmin_', pretty_print=False):
1478    return render(text, extra, allowed, sep,
1479                  autolinks=autolinks, protolinks=protolinks,
1480                  class_prefix=class_prefix, id_prefix=id_prefix,
1481                  pretty_print=pretty_print)
1482
1483
1484def run_doctests():
1485    import doctest
1486    doctest.testmod()
1487
1488
1489if __name__ == '__main__':
1490    import sys
1491    import doctest
1492    from textwrap import dedent
1493
1494    html = dedent("""
1495         <!doctype html>
1496         <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
1497         <head>
1498         <meta http-equiv="content-type" content="text/html; charset=utf-8" />
1499         %(style)s
1500         <title>%(title)s</title>
1501         </head>
1502         <body>
1503         %(body)s
1504         </body>
1505         </html>""")[1:]
1506
1507    if sys.argv[1:2] == ['-h']:
1508        style = dedent("""
1509              <style>
1510                blockquote { background-color: #FFFAAE; padding: 7px; }
1511                table { border-collapse: collapse; }
1512                thead td { border-bottom: 1px solid; }
1513                tfoot td { border-top: 1px solid; }
1514                .tableclass1 { background-color: lime; }
1515                .tableclass1 thead { color: yellow; background-color: green; }
1516                .tableclass1 tfoot { color: yellow; background-color: green; }
1517                .tableclass1 .even td { background-color: #80FF7F; }
1518                .tableclass1 .first td {border-top: 1px solid; }
1519
1520                td.num { text-align: right; }
1521                pre { background-color: #E0E0E0; padding: 5px; }
1522              </style>""")[1:]
1523
1524        print(html % dict(title="Markmin markup language",
1525                          style=style,
1526                          body=markmin2html(__doc__, pretty_print=True)))
1527    elif sys.argv[1:2] == ['-t']:
1528        from timeit import Timer
1529
1530        loops = 1000
1531        ts = Timer("markmin2html(__doc__)", "from markmin2html import markmin2html")
1532        print('timeit "markmin2html(__doc__)":')
1533        t = min([ts.timeit(loops) for i in range(3)])
1534        print("%s loops, best of 3: %.3f ms per loop" % (loops, t / 1000 * loops))
1535    elif len(sys.argv) > 1:
1536        fargv = open(sys.argv[1], 'r')
1537        try:
1538            markmin_text = fargv.read()
1539
1540            # embed css file from second parameter into html file
1541            if len(sys.argv) > 2:
1542                if sys.argv[2].startswith('@'):
1543                    markmin_style = '<link rel="stylesheet" href="' + sys.argv[2][1:] + '"/>'
1544                else:
1545                    fargv2 = open(sys.argv[2], 'r')
1546                    try:
1547                        markmin_style = "<style>\n" + fargv2.read() + "</style>"
1548                    finally:
1549                        fargv2.close()
1550            else:
1551                markmin_style = ""
1552
1553            print(html % dict(title=sys.argv[1], style=markmin_style,
1554                              body=markmin2html(markmin_text, pretty_print=True)))
1555        finally:
1556            fargv.close()
1557
1558    else:
1559        print("Usage: " + sys.argv[0] + " -h | -t | file.markmin [file.css|@path_to/css]")
1560        print("where: -h  - print __doc__")
1561        print("       -t  - timeit __doc__ (for testing purpuse only)")
1562        print("       file.markmin  [file.css] - process file.markmin + built in file.css (optional)")
1563        print("       file.markmin  [@path_to/css] - process file.markmin + link path_to/css (optional)")
1564        run_doctests()
Note: See TracBrowser for help on using the repository browser.