Context Navigation

← Previous Revision
Next Revision →
Blame
Revision Log

source: OpenRLabs-Git/deploy/rlabs-docker/web2py-rlabs/gluon/contrib/markmin/markmin2html.py

main

Last change on this file was 42bd667, checked in by David Fuertes <dfuertes@…>, 4 years ago
Historial Limpio
Property mode set to `100755`
File size: 55.7 KB

Line
1	#!/usr/bin/env python
2	# -- coding: utf-8 --
3	# created by Massimo Di Pierro
4	# recreated by Vladyslav Kozlovskyy
5	# license MIT/BSD/GPL
6	from __future__ import print_function
7	import re
8	import sys
9	import urllib
10	import ast
11
12	PY2 = sys.version_info[0] == 2
13
14	if PY2:
15	from urllib import quote as urllib_quote
16	from string import maketrans
17	else:
18	from urllib.parse import quote as urllib_quote
19	maketrans = str.maketrans
20
21
22	"""
23	TODO: next version should use MathJax
24
25	<script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js">
26	MathJax.Hub.Config({
27	extensions: ["tex2jax.js","TeX/AMSmath.js","TeX/AMSsymbols.js"],
28	jax: ["input/TeX", "output/HTML-CSS"],
29	tex2jax: {
30	inlineMath: [ ['$','$'], ["\\(","\\)"] ],
31	displayMath: [ ['$$','$$'], ["\\[","\\]"] ],
32	},
33	"HTML-CSS": { availableFonts: ["TeX"] }
34	});
35	</script>
36	"""
37
38	__all__ = ['render', 'markmin2html', 'markmin_escape']
39
40	__doc__ = """
41	# Markmin markup language
42
43	## About
44
45	This is a new markup language that we call markmin designed to produce high quality scientific papers and books and also put them online. We provide serializers for html, latex and pdf. It is implemented in the ``markmin2html`` function in the ``markmin2html.py``.
46
47	Example of usage:
48
49	``
50	m = "Hello world [[link http://web2py.com]]"
51	from markmin2html import markmin2html
52	print(markmin2html(m))
53	from markmin2latex import markmin2latex
54	print(markmin2latex(m))
55	from markmin2pdf import markmin2pdf # requires pdflatex
56	print(markmin2pdf(m))
57	``
58	====================
59	# This is a test block
60	with new features:
61	This is a blockquote with
62	a list with tables in it:
63	-----------
64	This is a paragraph before list.
65	You can continue paragraph on the
66	next lines.
67
68	This is an ordered list with tables:
69	+ Item 1
70	+ Item 2
71	+ --------
72	aa\|bb\|cc
73	11\|22\|33
74	--------:tableclass1[tableid1]
75	+ Item 4
76	-----------
77	T1\| T2\| t3
78	===========
79	aaa\|bbb\|ccc
80	ddd\|fff\|ggg
81	123\|0 \|5.0
82	-----------:tableclass1
83	-----------:blockquoteclass[blockquoteid]
84
85	This this a new paragraph
86	with a followed table.
87	Table has header, footer, sections,
88	odd and even rows:
89	-------------------------------
90	Title 1\|Title 2\|Title 3
91	==============================
92	data 1 \| data 2 \| 2.00
93	data 3 \|data4(long)\| 23.00
94	\|data 5 \| 33.50
95	==============================
96	New section\|New data \| 5.00
97	data 1 \|data2(long)\|100.45
98	\|data 3 \| 12.50
99	data 4 \| data 5 \| .33
100	data 6 \|data7(long)\| 8.01
101	\|data 8 \| 514
102	==============================
103	Total: \| 9 items \|698,79
104	------------------------------:tableclass1[tableid2]
105
106	## Multilevel
107	lists
108
109	Now lists can be multilevel:
110
111	+ Ordered item 1 on level 1.
112	You can continue item text on
113	next strings
114
115	. paragraph in an item
116
117	++. Ordered item 1 of sublevel 2 with
118	a paragraph (paragraph can start
119	with point after plus or minus
120	characters, e.g. ++. or --.)
121
122	++. This is another item. But with 3 paragraphs,
123	blockquote and sublists:
124
125	.. This is the second paragraph in the item. You
126	can add paragraphs to an item, using point
127	notation, where first characters in the string
128	are sequence of points with space between
129	them and another string. For example, this
130	paragraph (in sublevel 2) starts with two points:
131	``.. This is the second paragraph...``
132
133	.. ----------
134	### this is a blockquote in a list
135
136	You can use blockquote with headers, paragraphs,
137	tables and lists in it:
138
139	Tables can have or have not header and footer.
140	This table is defined without any header
141	and footer in it:
142	---------------------
143	red \|fox \| 0
144	blue \|dolphin \| 1000
145	green\|leaf \| 10000
146	---------------------
147	----------
148
149	.. This is yet another paragraph in the item.
150
151	--- This is an item of unordered list (sublevel 3)
152	--- This is the second item of the unordered list ''(sublevel 3)''
153
154	++++++ This is a single item of ordered list in sublevel 6
155	.... and this is a paragraph in sublevel 4
156	---. This is a new item with paragraph in sublevel 3.
157	++++ Start ordered list in sublevel 4 with code block: ``
158	line 1
159	line 2
160	line 3
161	``
162	++++. Yet another item with code block (we need to indent \`\` to add code block as part of item):
163	``
164	line 1
165	line 2
166	line 3
167	``
168	This item finishes with this paragraph.
169
170	... Item in sublevel 3 can be continued with paragraphs.
171
172	... ``
173	this is another
174	code block
175	in the
176	sublevel 3 item
177	``
178
179	+++ The last item in sublevel 3
180	.. This is a continuous paragraph for item 2 in sublevel 2.
181	You can use such structure to create difficult structured
182	documents.
183
184	++ item 3 in sublevel 2
185	-- item 1 in sublevel 2 (new unordered list)
186	-- item 2 in sublevel 2
187	-- item 3 in sublevel 2
188
189	++ item 1 in sublevel 2 (new ordered list)
190	++ item 2 in sublevel 2
191	++ item 3 in sublevle 2
192
193	+ item 2 in level 1
194	+ item 3 in level 1
195	- new unordered list (item 1 in level 1)
196	- level 2 in level 1
197
198	- level 3 in level 1
199	- level 4 in level 1
200	## This is the last section of the test
201
202	Single paragraph with '----' in it will be turned into separator:
203
204	-----------
205
206	And this is the last paragraph in
207	the test. Be happy!
208
209	====================
210
211	## Why?
212
213	We wanted a markup language with the following requirements:
214	- less than 300 lines of functional code
215	- easy to read
216	- secure
217	- support table, ul, ol, code
218	- support html5 video and audio elements (html serialization only)
219	- can align images and resize them
220	- can specify class for tables, blockquotes and code elements
221	- can add anchors
222	- does not use _ for markup (since it creates odd behavior)
223	- automatically links urls
224	- fast
225	- easy to extend
226	- supports latex and pdf including references
227	- allows to describe the markup in the markup (this document is generated from markmin syntax)
228
229	(results depend on text but in average for text ~100K markmin is 30% faster than markdown, for text ~10K it is 10x faster)
230
231	The [[web2py book http://www.lulu.com/product/paperback/web2py-%283rd-edition%29/12822827]] published by lulu, for example, was entirely generated with markmin2pdf from the online [[web2py wiki http://www.web2py.com/book]]
232
233	## Download
234
235	- http://web2py.googlecode.com/hg/gluon/contrib/markmin/markmin2html.py
236	- http://web2py.googlecode.com/hg/gluon/contrib/markmin/markmin2latex.py
237	- http://web2py.googlecode.com/hg/gluon/contrib/markmin/markmin2pdf.py
238
239	markmin2html.py and markmin2latex.py are single files and have no web2py dependence. Their license is BSD.
240
241	## Examples
242
243	### Bold, italic, code and links
244
245	------------------------------------------------------------------------------
246	SOURCE \| OUTPUT
247	==============================================================================
248	``# title`` \| title
249	``## section`` \| section
250	``### subsection`` \| subsection
251	``bold`` \| bold
252	``''italic''`` \| ''italic''
253	``~~strikeout~~`` \| ~~strikeout~~
254	``!`!`verbatim`!`!`` \| ``verbatim``
255	``\`\`color with bold\`\`:red`` \| ``color with bold``:red
256	``\`\`many colors\`\`:color[blue:#ffff00]`` \| ``many colors``:color[blue:#ffff00]
257	``http://google.com`` \| http://google.com
258	``[[click me #myanchor]]`` \| [[click me #myanchor]]
259	``[[click me [extra info] #myanchor popup]]`` \| [[click me [extra info] #myanchor popup]]
260	-------------------------------------------------------------------------------
261
262	### More on links
263
264	The format is always ``[[title link]]`` or ``[[title [extra] link]]``. Notice you can nest bold, italic, strikeout and code inside the link ``title``.
265
266	### Anchors [[myanchor]]
267
268	You can place an anchor anywhere in the text using the syntax ``[[name]]`` where ''name'' is the name of the anchor.
269	You can then link the anchor with [[link #myanchor]], i.e. ``[[link #myanchor]]`` or [[link with an extra info [extra info] #myanchor]], i.e.
270	``[[link with an extra info [extra info] #myanchor]]``.
271
272	### Images
273
274	[[alt-string for the image [the image title] http://www.web2py.com/examples/static/web2py_logo.png right 200px]]
275	This paragraph has an image aligned to the right with a width of 200px. Its is placed using the code
276
277	``[[alt-string for the image [the image title] http://www.web2py.com/examples/static/web2py_logo.png right 200px]]``.
278
279	### Unordered Lists
280
281	``
282	- Dog
283	- Cat
284	- Mouse
285	``
286
287	is rendered as
288	- Dog
289	- Cat
290	- Mouse
291
292	Two new lines between items break the list in two lists.
293
294	### Ordered Lists
295
296	``
297	+ Dog
298	+ Cat
299	+ Mouse
300	``
301
302	is rendered as
303	+ Dog
304	+ Cat
305	+ Mouse
306
307
308	### Multilevel Lists
309
310	``
311	+ Dogs
312	-- red
313	-- brown
314	-- black
315	+ Cats
316	-- fluffy
317	-- smooth
318	-- bald
319	+ Mice
320	-- small
321	-- big
322	-- huge
323	``
324
325	is rendered as
326	+ Dogs
327	-- red
328	-- brown
329	-- black
330	+ Cats
331	-- fluffy
332	-- smooth
333	-- bald
334	+ Mice
335	-- small
336	-- big
337	-- huge
338
339
340	### Tables (with optional header and/or footer)
341
342	Something like this
343	``
344	-----------------
345	A\|B\|C
346	=================
347	0 \| 0 \| X
348	0 \| X \| 0
349	X \| 0 \| 0
350	=================
351	D\|F\|G
352	-----------------:abc[id]
353	``
354	is a table and is rendered as
355	-----------------
356	A\|B\|C
357	=================
358	0 \| 0 \| X
359	0 \| X \| 0
360	X \| 0 \| 0
361	=================
362	D\|F\|G
363	-----------------:abc[id]
364	Four or more dashes delimit the table and \| separates the columns.
365	The ``:abc``, ``:id[abc_1]`` or ``:abc[abc_1]`` at the end sets the class and/or id for the table and it is optional.
366
367	### Blockquote
368
369	A table with a single cell is rendered as a blockquote:
370
371	-----
372	Hello world
373	-----
374
375	Blockquote can contain headers, paragraphs, lists and tables:
376
377	``
378	-----
379	This is a paragraph in a blockquote
380
381	+ item 1
382	+ item 2
383	-- item 2.1
384	-- item 2.2
385	+ item 3
386
387	---------
388	0 \| 0 \| X
389	0 \| X \| 0
390	X \| 0 \| 0
391	---------:tableclass1
392	-----
393	``
394
395	is rendered as:
396	-----
397	This is a paragraph in a blockquote
398
399	+ item 1
400	+ item 2
401	-- item 2.1
402	-- item 2.2
403	+ item 3
404
405	---------
406	0 \| 0 \| X
407	0 \| X \| 0
408	X \| 0 \| 0
409	---------:tableclass1
410	-----
411
412
413	### Code, ``<code>``, escaping and extra stuff
414
415	``
416	def test():
417	return "this is Python code"
418	``:python
419
420	Optionally a ` inside a ``!`!`...`!`!`` block can be inserted escaped with !`!.
421
422	NOTE: You can escape markmin constructions (\\'\\',\`\`,\\,\~\~,\[,\{,\]\},\$,\@) with '\\\\' character:
423	so \\\\`\\\\` can replace !`!`! escape string
424
425	The ``:python`` after the markup is also optional. If present, by default, it is used to set the class of the <code> block.
426	The behavior can be overridden by passing an argument ``extra`` to the ``render`` function. For example:
427
428	``
429	markmin2html("!`!!`!aaa!`!!`!:custom",
430	extra=dict(custom=lambda text: 'x'+text+'x'))
431	``:python
432
433	generates
434
435	``'xaaax'``:python
436
437	(the ``!`!`...`!`!:custom`` block is rendered by the ``custom=lambda`` function passed to ``render``).
438
439	### Line breaks
440
441	``[[NEWLINE]]`` tag is used to break lines:
442	``
443	#### Multiline [[NEWLINE]]
444	title
445	paragraph [[NEWLINE]]
446	with breaks[[NEWLINE]]in it
447	``
448	generates:
449
450	#### Multiline [[NEWLINE]]
451	title
452	paragraph [[NEWLINE]]
453	with breaks[[NEWLINE]]in it
454
455
456	### Html5 support
457
458	Markmin also supports the <video> and <audio> html5 tags using the notation:
459	``
460	[[message link video]]
461	[[message link audio]]
462
463	[[message [title] link video]]
464	[[message [title] link audio]]
465	``
466	where ``message`` will be shown in browsers without HTML5 video/audio tags support.
467
468	### Latex and other extensions
469
470	Formulas can be embedded into HTML with ''\$\$``formula``\$\$''.
471	You can use Google charts to render the formula:
472
473	``
474	LATEX = '<img src="http://chart.apis.google.com/chart?cht=tx&chl=%s" />'
475	markmin2html(text,{'latex':lambda code: LATEX % urllib.quote(code)})
476	``
477
478	### Code with syntax highlighting
479
480	This requires a syntax highlighting tool, such as the web2py CODE helper.
481
482	``
483	extra={'code_cpp':lambda text: CODE(text,language='cpp').xml(),
484	'code_java':lambda text: CODE(text,language='java').xml(),
485	'code_python':lambda text: CODE(text,language='python').xml(),
486	'code_html':lambda text: CODE(text,language='html').xml()}
487	``
488	or simple:
489	``
490	extra={'code':lambda text,lang='python': CODE(text,language=lang).xml()}
491	``
492	``
493	markmin2html(text,extra=extra)
494	``
495
496	Code can now be marked up as in this example:
497	``
498	!`!`
499	<html><body>example</body></html>
500	!`!`:code_html
501	``
502	OR
503	``
504	!`!`
505	<html><body>example</body></html>
506	!`!`:code[html]
507	``
508
509	### Citations and References
510
511	Citations are treated as internal links in html and proper citations in latex if there is a final section called "References". Items like
512
513	``
514	- [[key]] value
515	``
516
517	in the References will be translated into Latex
518
519	``
520	\\bibitem{key} value
521	``
522
523	Here is an example of usage:
524
525	``
526	As shown in Ref.!`!`mdipierro`!`!:cite
527
528	## References
529
530	- [[mdipierro]] web2py Manual, 3rd Edition, lulu.com
531	``
532
533	### Caveats
534
535	``<ul/>``, ``<ol/>``, ``<code/>``, ``<table/>``, ``<blockquote/>``, ``<h1/>``, ..., ``<h6/>`` do not have ``<p>...</p>`` around them.
536
537	"""
538	html_colors = ['aqua', 'black', 'blue', 'fuchsia', 'gray', 'green',
539	'lime', 'maroon', 'navy', 'olive', 'purple', 'red',
540	'silver', 'teal', 'white', 'yellow']
541
542	META = '\x06'
543	LINK = '\x07'
544	DISABLED_META = '\x08'
545	LATEX = '<img src="http://chart.apis.google.com/chart?cht=tx&chl=%s" />'
546	regex_URL = re.compile(r'@/(?P<a>\w)/(?P<c>\w)/(?P<f>\w*(\.\w+)?)(/(?P<args>[\w\.\-/]+))?')
547	regex_env2 = re.compile(r'@\{(?P<a>[\w\-\.]+?)(\:(?P<b>.*?))?\}')
548	regex_expand_meta = re.compile('(' + META + '\|' + DISABLED_META + '\|````)')
549	regex_dd = re.compile(r'\$\$(?P<latex>.*?)\$\$')
550	regex_code = re.compile(
551	'(' + META + '\|' + DISABLED_META + r'\|````)\|(``(?P<t>.+?)``(?::(?P<c>[a-zA-Z][_a-zA-Z\-\d])(?:\[(?P<p>[^\]])\])?)?)',
552	re.S)
553	regex_strong = re.compile(r'\\(?P<t>[^\s]+( +[^\s]+))\\*')
554	regex_del = re.compile(r'~~(?P<t>[^\s~]+( +[^\s~]+)*)~~')
555	regex_em = re.compile(r"''(?P<t>([^\s']\| \|'(?!'))+)''")
556	regex_num = re.compile(r"^\s[+-]?((\d+(\.\d)?)\|\.\d+)([eE][+-]?[0-9]+)?\s*$")
557	regex_list = re.compile('^(?:(?:(#{1,6})\|(?:(\.+\|\++\|\-+)(\.)?))\s)?(.)$')
558	regex_bq_headline = re.compile('^(?:(\.+\|\++\|\-+)(\.)?\s+)?(-{3}-*)$')
559	regex_tq = re.compile('^(-{3}-)(?::(?P<c>[a-zA-Z][_a-zA-Z\-\d])(?:\[(?P<p>[a-zA-Z][_a-zA-Z\-\d]*)\])?)?$')
560	regex_proto = re.compile(r'(?<!["\w>/=])(?P<p>\w+):(?P<k>\w+://[\w\d\-+=?%&/:.]+)', re.M)
561	regex_auto = re.compile(r'(?<!["\w>/=])(?P<k>\w+://[\w\d\-+_=?%&/:.,;#]+\w\|[\w\-.]+@[\w\-.]+)', re.M)
562	regex_link = re.compile(r'(' + LINK + r')\|\[\[(?P<s>.+?)\]\]', re.S)
563	regex_link_level2 = re.compile(r'^(?P<t>\S.?)?(?:\s+\[(?P<a>.+?)\])?(?:\s+(?P<k>\S+))?(?:\s+(?P<p>popup))?\s$', re.S)
564	regex_media_level2 = re.compile(
565	r'^(?P<t>\S.?)?(?:\s+\[(?P<a>.+?)\])?(?:\s+(?P<k>\S+))?\s+(?P<p>img\|IMG\|left\|right\|center\|video\|audio\|blockleft\|blockright)(?:\s+(?P<w>\d+px))?\s$',
566	re.S)
567
568	regex_markmin_escape = re.compile(r"(\\)(['`:~\\[\]{}@\$+\-.#\n])")
569	regex_backslash = re.compile(r"\\(['`:*~\\[\]{}@\$+\-.#\n])")
570	ttab_in = maketrans("'`:*~\\[]{}@$+-.#\n", '\x0b\x0c\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x05')
571	ttab_out = maketrans('\x0b\x0c\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x05', "'`:*~\\[]{}@$+-.#\n")
572	regex_quote = re.compile('(?P<name>\w+?)\s\=\s')
573
574	def local_html_escape(data, quote=False):
575	"""
576	Works with bytes.
577	Replace special characters "&", "<" and ">" to HTML-safe sequences.
578	If the optional flag quote is true (the default), the quotation mark
579	characters, both double quote (") and single quote (') characters are also
580	translated.
581	"""
582	if PY2:
583	import cgi
584	data = cgi.escape(data, quote)
585	return data.replace("'", "'") if quote else data
586	else:
587	import html
588	if isinstance(data, str):
589	return html.escape(data, quote=quote)
590	data = data.replace(b"&", b"&") # Must be done first!
591	data = data.replace(b"<", b"<")
592	data = data.replace(b">", b">")
593	if quote:
594	data = data.replace(b'"', b""")
595	data = data.replace(b'\'', b"'")
596	return data
597
598	def make_dict(b):
599	return '{%s}' % regex_quote.sub("'\g<name>':", b)
600
601
602	def safe_eval(node_or_string, env):
603	"""
604	Safely evaluate an expression node or a string containing a Python
605	expression. The string or node provided may only consist of the following
606	Python literal structures: strings, numbers, tuples, lists, dicts, booleans,
607	and None.
608	"""
609	_safe_names = {'None': None, 'True': True, 'False': False}
610	_safe_names.update(env)
611	if isinstance(node_or_string, basestring):
612	node_or_string = ast.parse(node_or_string, mode='eval')
613	if isinstance(node_or_string, ast.Expression):
614	node_or_string = node_or_string.body
615
616	def _convert(node):
617	if isinstance(node, ast.Str):
618	return node.s
619	elif isinstance(node, ast.Num):
620	return node.n
621	elif isinstance(node, ast.Tuple):
622	return tuple(map(_convert, node.elts))
623	elif isinstance(node, ast.List):
624	return list(map(_convert, node.elts))
625	elif isinstance(node, ast.Dict):
626	return dict((_convert(k), _convert(v)) for k, v
627	in zip(node.keys, node.values))
628	elif isinstance(node, ast.Name):
629	if node.id in _safe_names:
630	return _safe_names[node.id]
631	elif isinstance(node, ast.BinOp) and \
632	isinstance(node.op, (ast.Add, ast.Sub)) and \
633	isinstance(node.right, ast.Num) and \
634	isinstance(node.right.n, complex) and \
635	isinstance(node.left, ast.Num) and \
636	isinstance(node.left.n, (int, long, float)):
637	left = node.left.n
638	right = node.right.n
639	if isinstance(node.op, ast.Add):
640	return left + right
641	else:
642	return left - right
643	raise ValueError('malformed string')
644
645	return _convert(node_or_string)
646
647
648	def markmin_escape(text):
649	""" insert \\ before markmin control characters: '`:*~[]{}@$ """
650	return regex_markmin_escape.sub(
651	lambda m: '\\' + m.group(0).replace('\\', '\\\\'), text)
652
653
654	def replace_autolinks(text, autolinks):
655	return regex_auto.sub(lambda m: autolinks(m.group('k')), text)
656
657
658	def replace_at_urls(text, url):
659	# this is experimental @{function/args}
660	def u1(match, url=url):
661	a, c, f, args = match.group('a', 'c', 'f', 'args')
662	return url(a=a or None, c=c or None, f=f or None,
663	args=(args or '').split('/'), scheme=True, host=True)
664
665	return regex_URL.sub(u1, text)
666
667
668	def replace_components(text, env):
669	# not perfect but acceptable
670	def u2(match, env=env):
671	f = env.get(match.group('a'), match.group(0))
672	if callable(f):
673	b = match.group('b')
674	try:
675	b = safe_eval(make_dict(b), env)
676	except:
677	pass
678	try:
679	f = f(**b) if isinstance(b, dict) else f(b)
680	except Exception as e:
681	f = 'ERROR: %s' % e
682	return str(f)
683
684	text = regex_env2.sub(u2, text)
685	return text
686
687
688	def autolinks_simple(url):
689	"""
690	it automatically converts the url to link,
691	image, video or audio tag
692	"""
693	u_url = url.lower()
694	if '@' in url and '://' not in url:
695	return '<a href="mailto:%s">%s</a>' % (url, url)
696	elif u_url.endswith(('.jpg', '.jpeg', '.gif', '.png')):
697	return '<img src="%s" controls />' % url
698	elif u_url.endswith(('.mp4', '.mpeg', '.mov', '.ogv')):
699	return '<video src="%s" controls></video>' % url
700	elif u_url.endswith(('.mp3', '.wav', '.ogg')):
701	return '<audio src="%s" controls></audio>' % url
702	return '<a href="%s">%s</a>' % (url, url)
703
704
705	def protolinks_simple(proto, url):
706	"""
707	it converts url to html-string using appropriate proto-prefix:
708	Uses for construction "proto:url", e.g.:
709	"iframe:http://www.example.com/path" will call protolinks()
710	with parameters:
711	proto="iframe"
712	url="http://www.example.com/path"
713	"""
714	if proto in ('iframe', 'embed'): # == 'iframe':
715	return '<iframe src="%s" frameborder="0" allowfullscreen></iframe>' % url
716	# elif proto == 'embed': # NOTE: embed is a synonym to iframe now
717	# return '<a href="%s" class="%sembed">%s></a>'%(url,class_prefix,url)
718	elif proto == 'qr':
719	return '<img style="width:100px" src="http://chart.apis.google.com/chart?cht=qr&chs=100x100&chl=%s&choe=UTF-8&chld=H" alt="QR Code" title="QR Code" />' % url
720	return proto + ':' + url
721
722
723	def email_simple(email):
724	return '<a href="mailto:%s">%s</a>' % (email, email)
725
726
727	def render(text,
728	extra={},
729	allowed={},
730	sep='p',
731	URL=None,
732	environment=None,
733	latex='google',
734	autolinks='default',
735	protolinks='default',
736	class_prefix='',
737	id_prefix='markmin_',
738	pretty_print=False):
739	"""
740	Arguments:
741	- text is the text to be processed
742	- extra is a dict like extra=dict(custom=lambda value: value) that process custom code
743	as in " ``this is custom code``:custom "
744	- allowed is a dictionary of list of allowed classes like
745	allowed = dict(code=('python','cpp','java'))
746	- sep can be 'p' to separate text in <p>...</p>
747	or can be 'br' to separate text using <br />
748	- URL -
749	- environment is a dictionary of environment variables (can be accessed with @{variable}
750	- latex -
751	- autolinks is a function to convert auto urls to html-code (default is autolinks(url) )
752	- protolinks is a function to convert proto-urls (e.g."proto:url") to html-code
753	(default is protolinks(proto,url))
754	- class_prefix is a prefix for ALL classes in markmin text. E.g. if class_prefix='my_'
755	then for ``test``:cls class will be changed to "my_cls" (default value is '')
756	- id_prefix is prefix for ALL ids in markmin text (default value is 'markmin_'). E.g.:
757	-- [[id]] will be converted to <span class="anchor" id="markmin_id"></span>
758	-- [[link #id]] will be converted to <a href="#markmin_id">link</a>
759	-- ``test``:cls[id] will be converted to <code class="cls" id="markmin_id">test</code>
760
761	>>> render('this is\\n# a section\\n\\nparagraph')
762	'<p>this is</p><h1>a section</h1><p>paragraph</p>'
763	>>> render('this is\\n## a subsection\\n\\nparagraph')
764	'<p>this is</p><h2>a subsection</h2><p>paragraph</p>'
765	>>> render('this is\\n### a subsubsection\\n\\nparagraph')
766	'<p>this is</p><h3>a subsubsection</h3><p>paragraph</p>'
767	>>> render('hello world')
768	'<p><strong>hello world</strong></p>'
769	>>> render('``hello world``')
770	'<code>hello world</code>'
771	>>> render('``hello world``:python')
772	'<code class="python">hello world</code>'
773	>>> render('``\\nhello\\nworld\\n``:python')
774	'<pre><code class="python">hello\\nworld</code></pre>'
775	>>> render('``hello world``:python[test_id]')
776	'<code class="python" id="markmin_test_id">hello world</code>'
777	>>> render('``hello world``:id[test_id]')
778	'<code id="markmin_test_id">hello world</code>'
779	>>> render('``\\nhello\\nworld\\n``:python[test_id]')
780	'<pre><code class="python" id="markmin_test_id">hello\\nworld</code></pre>'
781	>>> render('``\\nhello\\nworld\\n``:id[test_id]')
782	'<pre><code id="markmin_test_id">hello\\nworld</code></pre>'
783	>>> render("''hello world''")
784	'<p><em>hello world</em></p>'
785	>>> render(' hello world')
786	'<p> hello <strong>world</strong></p>'
787
788	>>> render('- this\\n- is\\n- a list\\n\\nand this\\n- is\\n- another')
789	'<ul><li>this</li><li>is</li><li>a list</li></ul><p>and this</p><ul><li>is</li><li>another</li></ul>'
790
791	>>> render('+ this\\n+ is\\n+ a list\\n\\nand this\\n+ is\\n+ another')
792	'<ol><li>this</li><li>is</li><li>a list</li></ol><p>and this</p><ol><li>is</li><li>another</li></ol>'
793
794	>>> render("----\\na \| b\\nc \| d\\n----\\n")
795	'<table><tbody><tr class="first"><td>a</td><td>b</td></tr><tr class="even"><td>c</td><td>d</td></tr></tbody></table>'
796
797	>>> render("----\\nhello world\\n----\\n")
798	'<blockquote><p>hello world</p></blockquote>'
799
800	>>> render('[[myanchor]]')
801	'<p><span class="anchor" id="markmin_myanchor"></span></p>'
802
803	>>> render('[[ http://example.com]]')
804	'<p><a href="http://example.com">http://example.com</a></p>'
805
806	>>> render('[[bookmark [http://example.com] ]]')
807	'<p><span class="anchor" id="markmin_bookmark"><a href="http://example.com">http://example.com</a></span></p>'
808
809	>>> render('[[this is a link http://example.com]]')
810	'<p><a href="http://example.com">this is a link</a></p>'
811
812	>>> render('[[this is an image http://example.com left]]')
813	'<p><img src="http://example.com" alt="this is an image" style="float:left" /></p>'
814
815	>>> render('[[this is an image http://example.com left 200px]]')
816	'<p><img src="http://example.com" alt="this is an image" style="float:left;width:200px" /></p>'
817
818	>>> render("[[Your browser doesn't support <video> HTML5 tag http://example.com video]]")
819	'<p><video controls="controls"><source src="http://example.com" />Your browser doesn\\'t support <video> HTML5 tag</video></p>'
820
821	>>> render("[[Your browser doesn't support <audio> HTML5 tag http://example.com audio]]")
822	'<p><audio controls="controls"><source src="http://example.com" />Your browser doesn\\'t support <audio> HTML5 tag</audio></p>'
823
824	>>> render("[[Your\\nbrowser\\ndoesn't\\nsupport\\n<audio> HTML5 tag http://exam\\\\\\nple.com\\naudio]]")
825	'<p><audio controls="controls"><source src="http://example.com" />Your browser doesn\\'t support <audio> HTML5 tag</audio></p>'
826
827	>>> render('[[this is a link http://example.com]]')
828	'<p><a href="http://example.com">this is a <strong>link</strong></a></p>'
829
830	>>> render("``aaa``:custom", extra=dict(custom=lambda text: 'x'+text+'x'))
831	'xaaax'
832
833	>>> print(render(r"$$\int_a^b sin(x)dx$$"))
834	<img src="http://chart.apis.google.com/chart?cht=tx&chl=%5Cint_a%5Eb%20sin%28x%29dx" />
835
836	>>> markmin2html(r"use backslash: \[\[[[mess\[[ag\]]e link]]\]]")
837	'<p>use backslash: [[<a href="link">mess[[ag]]e</a>]]</p>'
838
839	>>> markmin2html("backslash instead of exclamation sign: \``probe``")
840	'<p>backslash instead of exclamation sign: ``probe``</p>'
841
842	>>> render(r"simple image: [[\[[this is an image\]] http://example.com IMG]]!!!")
843	'<p>simple image: <img src="http://example.com" alt="[[this is an image]]" />!!!</p>'
844
845	>>> render(r"simple link no anchor with popup: [[ http://example.com popup]]")
846	'<p>simple link no anchor with popup: <a href="http://example.com" target="_blank">http://example.com</a></p>'
847
848	>>> render("auto-url: http://example.com")
849	'<p>auto-url: <a href="http://example.com">http://example.com</a></p>'
850
851	>>> render("auto-image: (http://example.com/image.jpeg)")
852	'<p>auto-image: (<img src="http://example.com/image.jpeg" controls />)</p>'
853
854	>>> render("qr: (qr:http://example.com/image.jpeg)")
855	'<p>qr: (<img style="width:100px" src="http://chart.apis.google.com/chart?cht=qr&chs=100x100&chl=http://example.com/image.jpeg&choe=UTF-8&chld=H" alt="QR Code" title="QR Code" />)</p>'
856
857	>>> render("embed: (embed:http://example.com/page)")
858	'<p>embed: (<iframe src="http://example.com/page" frameborder="0" allowfullscreen></iframe>)</p>'
859
860	>>> render("iframe: (iframe:http://example.com/page)")
861	'<p>iframe: (<iframe src="http://example.com/page" frameborder="0" allowfullscreen></iframe>)</p>'
862
863	>>> render("title1: [[test message [simple \[test\] title] http://example.com ]] test")
864	'<p>title1: <a href="http://example.com" title="simple [test] title">test message</a> test</p>'
865
866	>>> render("title2: \[\[[[test message [simple title] http://example.com popup]]\]]")
867	'<p>title2: [[<a href="http://example.com" title="simple title" target="_blank">test message</a>]]</p>'
868
869	>>> render("title3: [[ [link w/o anchor but with title] http://www.example.com ]]")
870	'<p>title3: <a href="http://www.example.com" title="link w/o anchor but with title">http://www.example.com</a></p>'
871
872	>>> render("title4: [[ [simple title] http://www.example.com popup]]")
873	'<p>title4: <a href="http://www.example.com" title="simple title" target="_blank">http://www.example.com</a></p>'
874
875	>>> render("title5: [[test message [simple title] http://example.com IMG]]")
876	'<p>title5: <img src="http://example.com" alt="test message" title="simple title" /></p>'
877
878	>>> render("title6: [[[test message w/o title] http://example.com IMG]]")
879	'<p>title6: <img src="http://example.com" alt="[test message w/o title]" /></p>'
880
881	>>> render("title7: [[[this is not a title] [this is a title] http://example.com IMG]]")
882	'<p>title7: <img src="http://example.com" alt="[this is not a title]" title="this is a title" /></p>'
883
884	>>> render("title8: [[test message [title] http://example.com center]]")
885	'<p>title8: <p style="text-align:center"><img src="http://example.com" alt="test message" title="title" /></p></p>'
886
887	>>> render("title9: [[test message [title] http://example.com left]]")
888	'<p>title9: <img src="http://example.com" alt="test message" title="title" style="float:left" /></p>'
889
890	>>> render("title10: [[test message [title] http://example.com right 100px]]")
891	'<p>title10: <img src="http://example.com" alt="test message" title="title" style="float:right;width:100px" /></p>'
892
893	>>> render("title11: [[test message [title] http://example.com center 200px]]")
894	'<p>title11: <p style="text-align:center"><img src="http://example.com" alt="test message" title="title" style="width:200px" /></p></p>'
895
896	>>> render(r"\\[[probe]]")
897	'<p>[[probe]]</p>'
898
899	>>> render(r"\\\\[[probe]]")
900	'<p>\\\\<span class="anchor" id="markmin_probe"></span></p>'
901
902	>>> render(r"\\\\\\[[probe]]")
903	'<p>\\\\[[probe]]</p>'
904
905	>>> render(r"\\\\\\\\[[probe]]")
906	'<p>\\\\\\\\<span class="anchor" id="markmin_probe"></span></p>'
907
908	>>> render(r"\\\\\\\\\[[probe]]")
909	'<p>\\\\\\\\[[probe]]</p>'
910
911	>>> render(r"\\\\\\\\\\\[[probe]]")
912	'<p>\\\\\\\\\\\\<span class="anchor" id="markmin_probe"></span></p>'
913
914	>>> render("``[[ [\\[[probe\]\\]] URL\\[x\\]]]``:red[dummy_params]")
915	'<span style="color: red"><a href="URL[x]" title="[[probe]]">URL[x]</a></span>'
916
917	>>> render("the \\text")
918	'<p>the text</p>'
919
920	>>> render("the \\``text``")
921	'<p>the ``text``</p>'
922
923	>>> render("the \\\\''text''")
924	"<p>the ''text''</p>"
925
926	>>> render("the [[link [with ``<b>title</b>``:red] http://www.example.com]]")
927	'<p>the <a href="http://www.example.com" title="with ``<b>title</b>``:red">link</a></p>'
928
929	>>> render("the [[link \\[without ``<b>title</b>``:red\\] http://www.example.com]]")
930	'<p>the <a href="http://www.example.com">link [<strong>without</strong> <span style="color: red"><b>title</b></span>]</a></p>'
931
932	>>> render("aaa-META-``code``:text[]-LINK-[[link http://www.example.com]]-LINK-[[image http://www.picture.com img]]-end")
933	'<p>aaa-META-<code class="text">code</code>-LINK-<a href="http://www.example.com">link</a>-LINK-<img src="http://www.picture.com" alt="image" />-end</p>'
934
935	>>> render("[[<a>test</a> [<a>test2</a>] <a>text3</a>]]")
936	'<p><a href="<a>text3</a>" title="<a>test2</a>"><a>test</a></a></p>'
937
938	>>> render("[[<a>test</a> [<a>test2</a>] <a>text3</a> IMG]]")
939	'<p><img src="<a>text3</a>" alt="<a>test</a>" title="<a>test2</a>" /></p>'
940
941	>>> render("bold ''italic'' ~~strikeout~~")
942	'<p><strong>bold</strong> <em>italic</em> <del>strikeout</del></p>'
943
944	>>> render("this is ``a red on yellow text``:c[#FF0000:#FFFF00]")
945	'<p>this is <span style="color: #FF0000;background-color: #FFFF00;">a red on yellow text</span></p>'
946
947	>>> render("this is ``a text with yellow background``:c[:yellow]")
948	'<p>this is <span style="background-color: yellow;">a text with yellow background</span></p>'
949
950	>>> render("this is ``a colored text (RoyalBlue)``:color[rgb(65,105,225)]")
951	'<p>this is <span style="color: rgb(65,105,225);">a colored text (RoyalBlue)</span></p>'
952
953	>>> render("this is ``a green text``:color[green:]")
954	'<p>this is <span style="color: green;">a green text</span></p>'
955
956	>>> render("@{probe:1}", environment=dict(probe=lambda t:"test %s" % t))
957	'<p><strong>test 1</strong></p>'
958
959	>>> render("@{probe:t=a}", environment=dict(probe=lambda t:"test %s" % t, a=1))
960	'<p><strong>test 1</strong></p>'
961
962	>>> render('[[id1 [span messag in ''markmin''] ]] ... [[link to id [link\\\'s title] #mark1]]')
963	'<p><span class="anchor" id="markmin_id1">span <strong>messag</strong> in markmin</span> ... <a href="#markmin_mark1" title="link\\\'s title"><strong>link</strong> to id</a></p>'
964
965	>>> render('# Multiline[[NEWLINE]]\\n title\\nParagraph[[NEWLINE]]\\nwith breaks[[NEWLINE]]\\nin it')
966	'<h1>Multiline<br /> title</h1><p>Paragraph<br /> with breaks<br /> in it</p>'
967
968	>>> render("anchor with name 'NEWLINE': [[NEWLINE [ ] ]]")
969	'<p>anchor with name \\'NEWLINE\\': <span class="anchor" id="markmin_NEWLINE"></span></p>'
970
971	>>> render("anchor with name 'NEWLINE': [[NEWLINE [newline] ]]")
972	'<p>anchor with name \\'NEWLINE\\': <span class="anchor" id="markmin_NEWLINE">newline</span></p>'
973	"""
974	if autolinks == "default":
975	autolinks = autolinks_simple
976	if protolinks == "default":
977	protolinks = protolinks_simple
978	pp = '\n' if pretty_print else ''
979	text = text if text is None or isinstance(text, str) else text.decode('utf8', 'strict')
980
981	if not (isinstance(text, str)):
982	text = str(text or '')
983	text = regex_backslash.sub(lambda m: m.group(1).translate(ttab_in), text)
984	text = text.replace('\x05', '').replace('\r\n', '\n') # concatenate strings separeted by \\n
985	if URL is not None:
986	text = replace_at_urls(text, URL)
987
988	if latex == 'google':
989	text = regex_dd.sub('``\g<latex>``:latex ', text)
990
991	#############################################################
992	# replace all blocks marked with ``...``:class[id] with META
993	# store them into segments they will be treated as code
994	#############################################################
995	segments = []
996
997	def mark_code(m):
998	g = m.group(0)
999	if g in (META, DISABLED_META):
1000	segments.append((None, None, None, g))
1001	return m.group()
1002	elif g == '````':
1003	segments.append((None, None, None, ''))
1004	return m.group()
1005	else:
1006	c = m.group('c') or ''
1007	p = m.group('p') or ''
1008	if 'code' in allowed and c not in allowed['code']:
1009	c = ''
1010	code = m.group('t').replace('!`!', '`')
1011	segments.append((code, c, p, m.group(0)))
1012	return META
1013
1014	text = regex_code.sub(mark_code, text)
1015
1016	#############################################################
1017	# replace all blocks marked with [[...]] with LINK
1018	# store them into links they will be treated as link
1019	#############################################################
1020	links = []
1021
1022	def mark_link(m):
1023	links.append(None if m.group() == LINK
1024	else m.group('s'))
1025	return LINK
1026
1027	text = regex_link.sub(mark_link, text)
1028	text = local_html_escape(text)
1029
1030	if protolinks:
1031	text = regex_proto.sub(lambda m: protolinks(*m.group('p', 'k')), text)
1032
1033	if autolinks:
1034	text = replace_autolinks(text, autolinks)
1035
1036	#############################################################
1037	# normalize spaces
1038	#############################################################
1039	strings = text.split('\n')
1040
1041	def parse_title(t, s): # out, lev, etags, tag, s):
1042	hlevel = str(len(t))
1043	out.extend(etags[::-1])
1044	out.append("<h%s>%s" % (hlevel, s))
1045	etags[:] = ["</h%s>%s" % (hlevel, pp)]
1046	lev = 0
1047	ltags[:] = []
1048	tlev[:] = []
1049	return (lev, 'h')
1050
1051	def parse_list(t, p, s, tag, lev, mtag, lineno):
1052	lent = len(t)
1053	if lent < lev: # current item level < previous item level
1054	while ltags[-1] > lent:
1055	ltags.pop()
1056	out.append(etags.pop())
1057	lev = lent
1058	tlev[lev:] = []
1059
1060	if lent > lev: # current item level > previous item level
1061	if lev == 0: # previous line is not a list (paragraph or title)
1062	out.extend(etags[::-1])
1063	ltags[:] = []
1064	tlev[:] = []
1065	etags[:] = []
1066	if pend and mtag == '.': # paragraph in a list:
1067	out.append(etags.pop())
1068	ltags.pop()
1069	for i in range(lent - lev):
1070	out.append('<' + tag + '>' + pp)
1071	etags.append('</' + tag + '>' + pp)
1072	lev += 1
1073	ltags.append(lev)
1074	tlev.append(tag)
1075	elif lent == lev:
1076	if tlev[-1] != tag:
1077	# type of list is changed (ul<=>ol):
1078	for i in range(ltags.count(lent)):
1079	ltags.pop()
1080	out.append(etags.pop())
1081	tlev[-1] = tag
1082	out.append('<' + tag + '>' + pp)
1083	etags.append('</' + tag + '>' + pp)
1084	ltags.append(lev)
1085	else:
1086	if ltags.count(lev) > 1:
1087	out.append(etags.pop())
1088	ltags.pop()
1089	mtag = 'l'
1090	out.append('<li>')
1091	etags.append('</li>' + pp)
1092	ltags.append(lev)
1093	if s[:1] == '-':
1094	(s, mtag, lineno) = parse_table_or_blockquote(s, mtag, lineno)
1095	if p and mtag == 'l':
1096	(lev, mtag, lineno) = parse_point(t, s, lev, '', lineno)
1097	else:
1098	out.append(s)
1099
1100	return (lev, mtag, lineno)
1101
1102	def parse_point(t, s, lev, mtag, lineno):
1103	""" paragraphs in lists """
1104	lent = len(t)
1105	if lent > lev:
1106	return parse_list(t, '.', s, 'ul', lev, mtag, lineno)
1107	elif lent < lev:
1108	while ltags[-1] > lent:
1109	ltags.pop()
1110	out.append(etags.pop())
1111	lev = lent
1112	tlev[lev:] = []
1113	mtag = ''
1114	elif lent == lev:
1115	if pend and mtag == '.':
1116	out.append(etags.pop())
1117	ltags.pop()
1118	if br and mtag in ('l', '.'):
1119	out.append(br)
1120	if s == META:
1121	mtag = ''
1122	else:
1123	mtag = '.'
1124	if s[:1] == '-':
1125	(s, mtag, lineno) = parse_table_or_blockquote(s, mtag, lineno)
1126	if mtag == '.':
1127	out.append(pbeg)
1128	if pend:
1129	etags.append(pend)
1130	ltags.append(lev)
1131	out.append(s)
1132	return (lev, mtag, lineno)
1133
1134	def parse_table_or_blockquote(s, mtag, lineno):
1135	# check next line. If next line :
1136	# - is empty -> this is an <hr /> tag
1137	# - consists '\|' -> table
1138	# - consists other characters -> blockquote
1139	if (lineno + 1 >= strings_len or
1140	not (s.count('-') == len(s) and len(s) > 3)):
1141	return (s, mtag, lineno)
1142
1143	lineno += 1
1144	s = strings[lineno].strip()
1145	if s:
1146	if '\|' in s:
1147	# table
1148	tout = []
1149	thead = []
1150	tbody = []
1151	rownum = 0
1152	t_id = ''
1153	t_cls = ''
1154
1155	# parse table:
1156	while lineno < strings_len:
1157	s = strings[lineno].strip()
1158	if s[:1] == '=':
1159	# header or footer
1160	if s.count('=') == len(s) and len(s) > 3:
1161	if not thead: # if thead list is empty:
1162	thead = tout
1163	else:
1164	tbody.extend(tout)
1165	tout = []
1166	rownum = 0
1167	lineno += 1
1168	continue
1169
1170	m = regex_tq.match(s)
1171	if m:
1172	t_cls = m.group('c') or ''
1173	t_id = m.group('p') or ''
1174	break
1175
1176	if rownum % 2:
1177	tr = '<tr class="even">'
1178	else:
1179	tr = '<tr class="first">' if rownum == 0 else '<tr>'
1180	tout.append(tr + ''.join(['<td%s>%s</td>' % (
1181	' class="num"'
1182	if regex_num.match(f) else '',
1183	f.strip()
1184	) for f in s.split('\|')]) + '</tr>' + pp)
1185	rownum += 1
1186	lineno += 1
1187
1188	t_cls = ' class="%s%s"' % (class_prefix, t_cls) \
1189	if t_cls and t_cls != 'id' else ''
1190	t_id = ' id="%s%s"' % (id_prefix, t_id) if t_id else ''
1191	s = ''
1192	if thead:
1193	s += '<thead>' + pp + ''.join([l for l in thead]) + '</thead>' + pp
1194	if not tbody: # tbody strings are in tout list
1195	tbody = tout
1196	tout = []
1197	if tbody: # if tbody list is not empty:
1198	s += '<tbody>' + pp + ''.join([l for l in tbody]) + '</tbody>' + pp
1199	if tout: # tfoot is not empty:
1200	s += '<tfoot>' + pp + ''.join([l for l in tout]) + '</tfoot>' + pp
1201	s = '<table%s%s>%s%s</table>%s' % (t_cls, t_id, pp, s, pp)
1202	mtag = 't'
1203	else:
1204	# parse blockquote:
1205	bq_begin = lineno
1206	t_mode = False # embedded table
1207	t_cls = ''
1208	t_id = ''
1209
1210	# search blockquote closing line:
1211	while lineno < strings_len:
1212	s = strings[lineno].strip()
1213	if not t_mode:
1214	m = regex_tq.match(s)
1215	if m:
1216	if (lineno + 1 == strings_len or
1217	'\|' not in strings[lineno + 1]):
1218	t_cls = m.group('c') or ''
1219	t_id = m.group('p') or ''
1220	break
1221
1222	if regex_bq_headline.match(s):
1223	if (lineno + 1 < strings_len and
1224	strings[lineno + 1].strip()):
1225	t_mode = True
1226	lineno += 1
1227	continue
1228	elif regex_tq.match(s):
1229	t_mode = False
1230	lineno += 1
1231	continue
1232
1233	lineno += 1
1234
1235	t_cls = ' class="%s%s"' % (class_prefix, t_cls) \
1236	if t_cls and t_cls != 'id' else ''
1237	t_id = ' id="%s%s"' % (id_prefix, t_id) \
1238	if t_id else ''
1239
1240	s = '<blockquote%s%s>%s</blockquote>%s' \
1241	% (t_cls,
1242	t_id,
1243	render('\n'.join(strings[bq_begin:lineno])), pp)
1244	mtag = 'q'
1245	else:
1246	s = '<hr />'
1247	lineno -= 1
1248	mtag = 'q'
1249	return (s, 'q', lineno)
1250
1251	if sep == 'p':
1252	pbeg = "<p>"
1253	pend = "</p>" + pp
1254	br = ''
1255	else:
1256	pbeg = pend = ''
1257	br = "<br />" + pp if sep == 'br' else ''
1258
1259	lev = 0 # nesting level of lists
1260	c0 = '' # first character of current line
1261	out = [] # list of processed lines
1262	etags = [] # trailing tags
1263	ltags = [] # level# correspondent to trailing tag
1264	tlev = [] # list of tags for each level ('ul' or 'ol')
1265	mtag = '' # marked tag (~last tag) ('l','.','h','p','t'). Used to set <br/>
1266	# and to avoid <p></p> around tables and blockquotes
1267	lineno = 0
1268	strings_len = len(strings)
1269	while lineno < strings_len:
1270	s0 = strings[lineno][:1]
1271	s = strings[lineno].strip()
1272	""" # + - . ---------------------
1273	## ++ -- .. ------- field \| field \| field <-title
1274	### +++ --- ... quote =====================
1275	#### ++++ ---- .... ------- field \| field \| field <-body
1276	##### +++++ ----- ..... ---------------------:class[id]
1277	"""
1278	pc0 = c0 # first character of previous line
1279	c0 = s[:1]
1280	if c0: # for non empty strings
1281	if c0 in "#+-.": # first character is one of: # + - .
1282	(t1, t2, p, ss) = regex_list.findall(s)[0]
1283	# t1 - tag ("###")
1284	# t2 - tag ("+++", "---", "...")
1285	# p - paragraph point ('.')->for "++." or "--."
1286	# ss - other part of string
1287	if t1 or t2:
1288	# headers and lists:
1289	if c0 == '#': # headers
1290	(lev, mtag) = parse_title(t1, ss)
1291	lineno += 1
1292	continue
1293	elif c0 == '+': # ordered list
1294	(lev, mtag, lineno) = parse_list(t2, p, ss, 'ol', lev, mtag, lineno)
1295	lineno += 1
1296	continue
1297	elif c0 == '-': # unordered list, table or blockquote
1298	if p or ss:
1299	(lev, mtag, lineno) = parse_list(t2, p, ss, 'ul', lev, mtag, lineno)
1300	lineno += 1
1301	continue
1302	else:
1303	(s, mtag, lineno) = parse_table_or_blockquote(s, mtag, lineno)
1304	elif lev > 0: # and c0 == '.' # paragraph in lists
1305	(lev, mtag, lineno) = parse_point(t2, ss, lev, mtag, lineno)
1306	lineno += 1
1307	continue
1308
1309	if lev == 0 and (mtag == 'q' or s == META):
1310	# new paragraph
1311	pc0 = ''
1312
1313	if pc0 == '' or (mtag != 'p' and s0 not in (' ', '\t')):
1314	# paragraph
1315	out.extend(etags[::-1])
1316	etags = []
1317	ltags = []
1318	tlev = []
1319	lev = 0
1320	if br and mtag == 'p':
1321	out.append(br)
1322	if mtag != 'q' and s != META:
1323	if pend:
1324	etags = [pend]
1325	out.append(pbeg)
1326	mtag = 'p'
1327	else:
1328	mtag = ''
1329	out.append(s)
1330	else:
1331	if lev > 0 and mtag == '.' and s == META:
1332	out.append(etags.pop())
1333	ltags.pop()
1334	out.append(s)
1335	mtag = ''
1336	else:
1337	out.append(' ' + s)
1338	lineno += 1
1339	out.extend(etags[::-1])
1340	text = ''.join(out)
1341
1342	#############################################################
1343	# do strong,em,del
1344	#############################################################
1345	text = regex_strong.sub('<strong>\g<t></strong>', text)
1346	text = regex_del.sub('<del>\g<t></del>', text)
1347	text = regex_em.sub('<em>\g<t></em>', text)
1348
1349	#############################################################
1350	# deal with images, videos, audios and links
1351	#############################################################
1352	def sub_media(m):
1353	t, a, k, p, w = m.group('t', 'a', 'k', 'p', 'w')
1354	if not k:
1355	return m.group(0)
1356	k = local_html_escape(k)
1357	t = t or ''
1358	style = 'width:%s' % w if w else ''
1359	title = ' title="%s"' % local_html_escape(a).replace(META, DISABLED_META) if a else ''
1360	p_begin = p_end = ''
1361	if p == 'center':
1362	p_begin = '<p style="text-align:center">'
1363	p_end = '</p>' + pp
1364	elif p == 'blockleft':
1365	p_begin = '<p style="text-align:left">'
1366	p_end = '</p>' + pp
1367	elif p == 'blockright':
1368	p_begin = '<p style="text-align:right">'
1369	p_end = '</p>' + pp
1370	elif p in ('left', 'right'):
1371	style = ('float:%s' % p) + (';%s' % style if style else '')
1372	if t and regex_auto.match(t):
1373	p_begin = p_begin + '<a href="%s">' % t
1374	p_end = '</a>' + p_end
1375	t = ''
1376	if style:
1377	style = ' style="%s"' % style
1378	if p in ('video', 'audio'):
1379	t = render(t, {}, {}, 'br', URL, environment, latex,
1380	autolinks, protolinks, class_prefix, id_prefix, pretty_print)
1381	return '<%(p)s controls="controls"%(title)s%(style)s><source src="%(k)s" />%(t)s</%(p)s>' \
1382	% dict(p=p, title=title, style=style, k=k, t=t)
1383	alt = ' alt="%s"' % local_html_escape(t).replace(META, DISABLED_META) if t else ''
1384	return '%(begin)s<img src="%(k)s"%(alt)s%(title)s%(style)s />%(end)s' \
1385	% dict(begin=p_begin, k=k, alt=alt, title=title, style=style, end=p_end)
1386
1387	def sub_link(m):
1388	t, a, k, p = m.group('t', 'a', 'k', 'p')
1389	if not k and not t:
1390	return m.group(0)
1391	t = t or ''
1392	a = local_html_escape(a) if a else ''
1393	if k:
1394	if '#' in k and ':' not in k.split('#')[0]:
1395	# wikipage, not external url
1396	k = k.replace('#', '#' + id_prefix)
1397	k = local_html_escape(k)
1398	title = ' title="%s"' % a.replace(META, DISABLED_META) if a else ''
1399	target = ' target="_blank"' if p == 'popup' else ''
1400	t = render(t, {}, {}, 'br', URL, environment, latex, None,
1401	None, class_prefix, id_prefix, pretty_print) if t else k
1402	return '<a href="%(k)s"%(title)s%(target)s>%(t)s</a>' \
1403	% dict(k=k, title=title, target=target, t=t)
1404	if t == 'NEWLINE' and not a:
1405	return '<br />' + pp
1406	return '<span class="anchor" id="%s">%s</span>' % (
1407	local_html_escape(id_prefix + t),
1408	render(a, {}, {}, 'br', URL,
1409	environment, latex, autolinks,
1410	protolinks, class_prefix,
1411	id_prefix, pretty_print))
1412
1413	parts = text.split(LINK)
1414	text = parts[0]
1415	for i, s in enumerate(links):
1416	if s is None:
1417	html = LINK
1418	else:
1419	html = regex_media_level2.sub(sub_media, s)
1420	if html == s:
1421	html = regex_link_level2.sub(sub_link, html)
1422	if html == s:
1423	# return unprocessed string as a signal of an error
1424	html = '[[%s]]' % s
1425	text += html + parts[i + 1]
1426
1427	#############################################################
1428	# process all code text
1429	#############################################################
1430	def expand_meta(m):
1431	code, b, p, s = segments.pop(0)
1432	if code is None or m.group() == DISABLED_META:
1433	return local_html_escape(s)
1434	if b in extra:
1435	if code[:1] == '\n':
1436	code = code[1:]
1437	if code[-1:] == '\n':
1438	code = code[:-1]
1439	if p:
1440	return str(extra[b](code, p))
1441	else:
1442	return str(extra[b](code))
1443	elif b == 'cite':
1444	return '[' + ','.join('<a href="#%s" class="%s">%s</a>' %
1445	(id_prefix + d, b, d) for d in local_html_escape(code).split(',')) + ']'
1446	elif b == 'latex':
1447	return LATEX % urllib_quote(code)
1448	elif b in html_colors:
1449	return '<span style="color: %s">%s</span>' \
1450	% (b, render(code, {}, {}, 'br', URL, environment, latex,
1451	autolinks, protolinks, class_prefix, id_prefix, pretty_print))
1452	elif b in ('c', 'color') and p:
1453	c = p.split(':')
1454	fg = 'color: %s;' % c[0] if c[0] else ''
1455	bg = 'background-color: %s;' % c[1] if len(c) > 1 and c[1] else ''
1456	return '<span style="%s%s">%s</span>' \
1457	% (fg, bg, render(code, {}, {}, 'br', URL, environment, latex,
1458	autolinks, protolinks, class_prefix, id_prefix, pretty_print))
1459	cls = ' class="%s%s"' % (class_prefix, b) if b and b != 'id' else ''
1460	id = ' id="%s%s"' % (id_prefix, local_html_escape(p)) if p else ''
1461	beg = (code[:1] == '\n')
1462	end = [None, -1][code[-1:] == '\n']
1463	if beg and end:
1464	return '<pre><code%s%s>%s</code></pre>%s' % (cls, id, local_html_escape(code[1:-1]), pp)
1465	return '<code%s%s>%s</code>' % (cls, id, local_html_escape(code[beg:end]))
1466
1467	text = regex_expand_meta.sub(expand_meta, text)
1468
1469	if environment:
1470	text = replace_components(text, environment)
1471
1472	return text.translate(ttab_out)
1473
1474
1475	def markmin2html(text, extra={}, allowed={}, sep='p',
1476	autolinks='default', protolinks='default',
1477	class_prefix='', id_prefix='markmin_', pretty_print=False):
1478	return render(text, extra, allowed, sep,
1479	autolinks=autolinks, protolinks=protolinks,
1480	class_prefix=class_prefix, id_prefix=id_prefix,
1481	pretty_print=pretty_print)
1482
1483
1484	def run_doctests():
1485	import doctest
1486	doctest.testmod()
1487
1488
1489	if __name__ == '__main__':
1490	import sys
1491	import doctest
1492	from textwrap import dedent
1493
1494	html = dedent("""
1495	<!doctype html>
1496	<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
1497	<head>
1498	<meta http-equiv="content-type" content="text/html; charset=utf-8" />
1499	%(style)s
1500	<title>%(title)s</title>
1501	</head>
1502	<body>
1503	%(body)s
1504	</body>
1505	</html>""")[1:]
1506
1507	if sys.argv[1:2] == ['-h']:
1508	style = dedent("""
1509	<style>
1510	blockquote { background-color: #FFFAAE; padding: 7px; }
1511	table { border-collapse: collapse; }
1512	thead td { border-bottom: 1px solid; }
1513	tfoot td { border-top: 1px solid; }
1514	.tableclass1 { background-color: lime; }
1515	.tableclass1 thead { color: yellow; background-color: green; }
1516	.tableclass1 tfoot { color: yellow; background-color: green; }
1517	.tableclass1 .even td { background-color: #80FF7F; }
1518	.tableclass1 .first td {border-top: 1px solid; }
1519
1520	td.num { text-align: right; }
1521	pre { background-color: #E0E0E0; padding: 5px; }
1522	</style>""")[1:]
1523
1524	print(html % dict(title="Markmin markup language",
1525	style=style,
1526	body=markmin2html(__doc__, pretty_print=True)))
1527	elif sys.argv[1:2] == ['-t']:
1528	from timeit import Timer
1529
1530	loops = 1000
1531	ts = Timer("markmin2html(__doc__)", "from markmin2html import markmin2html")
1532	print('timeit "markmin2html(__doc__)":')
1533	t = min([ts.timeit(loops) for i in range(3)])
1534	print("%s loops, best of 3: %.3f ms per loop" % (loops, t / 1000 * loops))
1535	elif len(sys.argv) > 1:
1536	fargv = open(sys.argv[1], 'r')
1537	try:
1538	markmin_text = fargv.read()
1539
1540	# embed css file from second parameter into html file
1541	if len(sys.argv) > 2:
1542	if sys.argv[2].startswith('@'):
1543	markmin_style = '<link rel="stylesheet" href="' + sys.argv[2][1:] + '"/>'
1544	else:
1545	fargv2 = open(sys.argv[2], 'r')
1546	try:
1547	markmin_style = "<style>\n" + fargv2.read() + "</style>"
1548	finally:
1549	fargv2.close()
1550	else:
1551	markmin_style = ""
1552
1553	print(html % dict(title=sys.argv[1], style=markmin_style,
1554	body=markmin2html(markmin_text, pretty_print=True)))
1555	finally:
1556	fargv.close()
1557
1558	else:
1559	print("Usage: " + sys.argv[0] + " -h \| -t \| file.markmin [file.css\|@path_to/css]")
1560	print("where: -h - print __doc__")
1561	print(" -t - timeit __doc__ (for testing purpuse only)")
1562	print(" file.markmin [file.css] - process file.markmin + built in file.css (optional)")
1563	print(" file.markmin [@path_to/css] - process file.markmin + link path_to/css (optional)")
1564	run_doctests()

Note: See TracBrowser for help on using the repository browser.

Download in other formats: