source: OpenRLabs-Git/deploy/rlabs-docker/web2py-rlabs/gluon/contrib/markmin/markmin2latex.py

main
Last change on this file was 42bd667, checked in by David Fuertes <dfuertes@…>, 4 years ago

Historial Limpio

  • Property mode set to 100755
File size: 11.8 KB
Line 
1#!/usr/bin/env python
2# created my Massimo Di Pierro
3# license MIT/BSD/GPL
4from __future__ import print_function
5import re
6import cgi
7import sys
8import doctest
9from optparse import OptionParser
10
11__all__ = ['render', 'markmin2latex']
12
13META = 'META'
14regex_newlines = re.compile('(\n\r)|(\r\n)')
15regex_dd = re.compile('\$\$(?P<latex>.*?)\$\$')
16regex_code = re.compile('(' + META + ')|(``(?P<t>.*?)``(:(?P<c>\w+))?)', re.S)
17regex_title = re.compile('^#{1} (?P<t>[^\n]+)', re.M)
18regex_maps = [
19    (re.compile('[ \t\r]+\n'), '\n'),
20    (re.compile('\*\*(?P<t>[^\s\*]+( +[^\s\*]+)*)\*\*'), '{\\\\bf \g<t>}'),
21    (re.compile("''(?P<t>[^\s']+( +[^\s']+)*)''"), '{\\\it \g<t>}'),
22    (re.compile('^#{5,6}\s*(?P<t>[^\n]+)', re.M), '\n\n{\\\\bf \g<t>}\n'),
23    (re.compile('^#{4}\s*(?P<t>[^\n]+)', re.M), '\n\n\\\\goodbreak\\\subsubsection{\g<t>}\n'),
24    (re.compile('^#{3}\s*(?P<t>[^\n]+)', re.M), '\n\n\\\\goodbreak\\\subsection{\g<t>}\n'),
25    (re.compile('^#{2}\s*(?P<t>[^\n]+)', re.M), '\n\n\\\\goodbreak\\\section{\g<t>}\n'),
26    (re.compile('^#{1}\s*(?P<t>[^\n]+)', re.M), ''),
27    (re.compile('^\- +(?P<t>.*)', re.M), '\\\\begin{itemize}\n\\\item \g<t>\n\\\end{itemize}'),
28    (re.compile('^\+ +(?P<t>.*)', re.M), '\\\\begin{itemize}\n\\\item \g<t>\n\\\end{itemize}'),
29    (re.compile('\\\\end\{itemize\}\s+\\\\begin\{itemize\}'), '\n'),
30    (re.compile('\n\s+\n'), '\n\n')]
31regex_table = re.compile('^\-{4,}\n(?P<t>.*?)\n\-{4,}(:(?P<c>\w+))?\n', re.M | re.S)
32
33regex_anchor = re.compile('\[\[(?P<t>\S+)\]\]')
34regex_bibitem = re.compile('\-\s*\[\[(?P<t>\S+)\]\]')
35regex_image_width = re.compile('\[\[(?P<t>[^\]]*?) +(?P<k>\S+) +(?P<p>left|right|center) +(?P<w>\d+px)\]\]')
36regex_image = re.compile('\[\[(?P<t>[^\]]*?) +(?P<k>\S+) +(?P<p>left|right|center)\]\]')
37# regex_video = re.compile('\[\[(?P<t>[^\]]*?) +(?P<k>\S+) +video\]\]')
38# regex_audio = re.compile('\[\[(?P<t>[^\]]*?) +(?P<k>\S+) +audio\]\]')
39regex_link = re.compile('\[\[(?P<t>[^\]]*?) +(?P<k>\S+)\]\]')
40regex_auto = re.compile('(?<!["\w])(?P<k>\w+://[\w\.\-\?&%\:]+)', re.M)
41regex_commas = re.compile('[ ]+(?P<t>[,;\.])')
42regex_noindent = re.compile('\n\n(?P<t>[a-z])')
43
44
45# regex_quote_left = re.compile('"(?=\w)')
46# regex_quote_right = re.compile('(?=\w\.)"')
47
48def latex_escape(text, pound=True):
49    text = text.replace('\\', '{\\textbackslash}')
50    for c in '^_&$%{}':
51        text = text.replace(c, '\\' + c)
52    text = text.replace('\\{\\textbackslash\\}', '{\\textbackslash}')
53    if pound: text = text.replace('#', '\\#')
54    return text
55
56
57def render(text,
58           extra={},
59           allowed={},
60           sep='p',
61           image_mapper=lambda x: x,
62           chapters=False):
63    #############################################################
64    # replace all blocks marked with ``...``:class with META
65    # store them into segments they will be treated as code
66    #############################################################
67    text = str(text or '')
68    segments, i = [], 0
69    text = regex_dd.sub('``\g<latex>``:latex ', text)
70    text = regex_newlines.sub('\n', text)
71    while True:
72        item = regex_code.search(text, i)
73        if not item:
74            break
75        if item.group() == META:
76            segments.append((None, None))
77            text = text[:item.start()] + META + text[item.end():]
78        else:
79            c = item.group('c') or ''
80            if 'code' in allowed and c not in allowed['code']:
81                c = ''
82            code = item.group('t').replace('!`!', '`')
83            segments.append((code, c))
84            text = text[:item.start()] + META + text[item.end():]
85        i = item.start() + 3
86
87    #############################################################
88    # do h1,h2,h3,h4,h5,h6,b,i,ol,ul and normalize spaces
89    #############################################################
90
91    title = regex_title.search(text)
92    if not title:
93        title = 'Title'
94    else:
95        title = title.group('t')
96
97    text = latex_escape(text, pound=False)
98
99    texts = text.split('## References', 1)
100    text = regex_anchor.sub('\\\label{\g<t>}', texts[0])
101    if len(texts) == 2:
102        text += '\n\\begin{thebibliography}{999}\n'
103        text += regex_bibitem.sub('\n\\\\bibitem{\g<t>}', texts[1])
104        text += '\n\\end{thebibliography}\n'
105
106    text = '\n'.join(t.strip() for t in text.split('\n'))
107    for regex, sub in regex_maps:
108        text = regex.sub(sub, text)
109    text = text.replace('#', '\\#')
110    text = text.replace('`', "'")
111
112    #############################################################
113    # process tables and blockquotes
114    #############################################################
115    while True:
116        item = regex_table.search(text)
117        if not item:
118            break
119        c = item.group('c') or ''
120        if 'table' in allowed and c not in allowed['table']:
121            c = ''
122        content = item.group('t')
123        if ' | ' in content:
124            rows = content.replace('\n', '\\\\\n').replace(' | ', ' & ')
125            row0, row2 = rows.split('\\\\\n', 1)
126            cols = row0.count(' & ') + 1
127            cal = '{' + ''.join('l' for j in range(cols)) + '}'
128            tabular = '\\begin{center}\n{\\begin{tabular}' + cal + '\\hline\n' + row0 + '\\\\ \\hline\n' + row2 + ' \\\\ \\hline\n\\end{tabular}}\n\\end{center}'
129            if row2.count('\n') > 20:
130                tabular = '\\newpage\n' + tabular
131            text = text[:item.start()] + tabular + text[item.end():]
132        else:
133            text = text[:item.start()] + '\\begin{quote}' + content + '\\end{quote}' + text[item.end():]
134
135    #############################################################
136    # deal with images, videos, audios and links
137    #############################################################
138
139    def sub(x):
140        f = image_mapper(x.group('k'))
141        if not f:
142            return None
143        return '\n\\begin{center}\\includegraphics[width=8cm]{%s}\\end{center}\n' % f
144
145    text = regex_image_width.sub(sub, text)
146    text = regex_image.sub(sub, text)
147
148    text = regex_link.sub('{\\\\footnotesize\\\href{\g<k>}{\g<t>}}', text)
149    text = regex_commas.sub('\g<t>', text)
150    text = regex_noindent.sub('\n\\\\noindent \g<t>', text)
151
152    # ## fix paths in images
153    regex = re.compile('\\\\_\w*\.(eps|png|jpg|gif)')
154    while True:
155        match = regex.search(text)
156        if not match:
157            break
158        text = text[:match.start()] + text[match.start() + 1:]
159    # text = regex_quote_left.sub('``',text)
160    # text = regex_quote_right.sub("''",text)
161
162    if chapters:
163        text = text.replace(r'\section*{', r'\chapter*{')
164        text = text.replace(r'\section{', r'\chapter{')
165        text = text.replace(r'subsection{', r'section{')
166
167    #############################################################
168    # process all code text
169    #############################################################
170    parts = text.split(META)
171    text = parts[0]
172    authors = []
173    for i, (code, b) in enumerate(segments):
174        if code is None:
175            html = META
176        else:
177            if b == 'hidden':
178                html = ''
179            elif b == 'author':
180                author = latex_escape(code.strip())
181                authors.append(author)
182                html = ''
183            elif b == 'inxx':
184                html = '\inxx{%s}' % latex_escape(code)
185            elif b == 'cite':
186                html = '~\cite{%s}' % latex_escape(code.strip())
187            elif b == 'ref':
188                html = '~\ref{%s}' % latex_escape(code.strip())
189            elif b == 'latex':
190                if '\n' in code:
191                    html = '\n\\begin{equation}\n%s\n\\end{equation}\n' % code.strip()
192                else:
193                    html = '$%s$' % code.strip()
194            elif b == 'latex_eqnarray':
195                code = code.strip()
196                code = '\\\\'.join(x.replace('=', '&=&', 1) for x in code.split('\\\\'))
197                html = '\n\\begin{eqnarray}\n%s\n\\end{eqnarray}\n' % code
198            elif b.startswith('latex_'):
199                key = b[6:]
200                html = '\\begin{%s}%s\\end{%s}' % (key, code, key)
201            elif b in extra:
202                if code[:1] == '\n':
203                    code = code[1:]
204                if code[-1:] == '\n':
205                    code = code[:-1]
206                html = extra[b](code)
207            elif code[:1] == '\n' or code[:-1] == '\n':
208                if code[:1] == '\n':
209                    code = code[1:]
210                if code[-1:] == '\n':
211                    code = code[:-1]
212                if code.startswith('<') or code.startswith('{{') or code.startswith('http'):
213                    html = '\\begin{lstlisting}[keywords={}]\n%s\n\\end{lstlisting}' % code
214                else:
215                    html = '\\begin{lstlisting}\n%s\n\\end{lstlisting}' % code
216            else:
217                if code[:1] == '\n':
218                    code = code[1:]
219                if code[-1:] == '\n':
220                    code = code[:-1]
221                html = '{\\ft %s}' % latex_escape(code)
222        try:
223            text = text + html + parts[i + 1]
224        except:
225            text = text + '... WIKI PROCESSING ERROR ...'
226            break
227    text = text.replace(' ~\\cite', '~\\cite')
228    return text, title, authors
229
230
231WRAPPER = """
232\\documentclass[12pt]{article}
233\\usepackage{hyperref}
234\\usepackage{listings}
235\\usepackage{upquote}
236\\usepackage{color}
237\\usepackage{graphicx}
238\\usepackage{grffile}
239\\usepackage[utf8x]{inputenc}
240\\usepackage{textgreek}
241\\definecolor{lg}{rgb}{0.9,0.9,0.9}
242\\definecolor{dg}{rgb}{0.3,0.3,0.3}
243\\def\\ft{\\small\\tt}
244\\lstset{
245   basicstyle=\\footnotesize,
246   breaklines=true, basicstyle=\\ttfamily\\color{black}\\footnotesize,
247   keywordstyle=\\bf\\ttfamily,
248   commentstyle=\\it\\ttfamily,
249   stringstyle=\\color{dg}\\it\\ttfamily,
250   numbers=left, numberstyle=\\color{dg}\\tiny, stepnumber=1, numbersep=5pt,
251   backgroundcolor=\\color{lg}, tabsize=4, showspaces=false,
252   showstringspaces=false
253}
254\\title{%(title)s}
255\\author{%(author)s}
256\\begin{document}
257\\maketitle
258\\tableofcontents
259\\newpage
260%(body)s
261\\end{document}
262"""
263
264
265def markmin2latex(data, image_mapper=lambda x: x, extra={},
266                  wrapper=WRAPPER):
267    body, title, authors = render(data, extra=extra, image_mapper=image_mapper)
268    author = '\n\\and\n'.join(a.replace('\n', '\\\\\n\\footnotesize ') for a in authors)
269    return wrapper % dict(title=title, author=author, body=body)
270
271
272if __name__ == '__main__':
273    parser = OptionParser()
274    parser.add_option("-i", "--info", dest="info",
275                      help="markmin help")
276    parser.add_option("-t", "--test", dest="test", action="store_true",
277                      default=False)
278    parser.add_option("-n", "--no_wrapper", dest="no_wrapper",
279                      action="store_true", default=False)
280    parser.add_option("-c", "--chapters", dest="chapters", action="store_true",
281                      default=False, help="switch section for chapter")
282    parser.add_option("-w", "--wrapper", dest="wrapper", default=False,
283                      help="latex file containing header and footer")
284
285    (options, args) = parser.parse_args()
286    if options.info:
287        import markmin2html
288
289        markmin2latex(markmin2html.__doc__)
290    elif options.test:
291        doctest.testmod()
292    else:
293        if options.wrapper:
294            fwrapper = open(options.wrapper, 'rb')
295            try:
296                wrapper = fwrapper.read()
297            finally:
298                fwrapper.close()
299        elif options.no_wrapper:
300            wrapper = '%(body)s'
301        else:
302            wrapper = WRAPPER
303        for f in args:
304            fargs = open(f, 'r')
305            content_data = []
306            try:
307                content_data.append(fargs.read())
308            finally:
309                fargs.close()
310        content = '\n'.join(content_data)
311        output = markmin2latex(content,
312                               wrapper=wrapper,
313                               chapters=options.chapters)
314        print(output)
Note: See TracBrowser for help on using the repository browser.