1 | #!/usr/bin/env python |
---|
2 | # created my Massimo Di Pierro |
---|
3 | # license MIT/BSD/GPL |
---|
4 | from __future__ import print_function |
---|
5 | import re |
---|
6 | import cgi |
---|
7 | import sys |
---|
8 | import doctest |
---|
9 | from optparse import OptionParser |
---|
10 | |
---|
11 | __all__ = ['render', 'markmin2latex'] |
---|
12 | |
---|
13 | META = 'META' |
---|
14 | regex_newlines = re.compile('(\n\r)|(\r\n)') |
---|
15 | regex_dd = re.compile('\$\$(?P<latex>.*?)\$\$') |
---|
16 | regex_code = re.compile('(' + META + ')|(``(?P<t>.*?)``(:(?P<c>\w+))?)', re.S) |
---|
17 | regex_title = re.compile('^#{1} (?P<t>[^\n]+)', re.M) |
---|
18 | regex_maps = [ |
---|
19 | (re.compile('[ \t\r]+\n'), '\n'), |
---|
20 | (re.compile('\*\*(?P<t>[^\s\*]+( +[^\s\*]+)*)\*\*'), '{\\\\bf \g<t>}'), |
---|
21 | (re.compile("''(?P<t>[^\s']+( +[^\s']+)*)''"), '{\\\it \g<t>}'), |
---|
22 | (re.compile('^#{5,6}\s*(?P<t>[^\n]+)', re.M), '\n\n{\\\\bf \g<t>}\n'), |
---|
23 | (re.compile('^#{4}\s*(?P<t>[^\n]+)', re.M), '\n\n\\\\goodbreak\\\subsubsection{\g<t>}\n'), |
---|
24 | (re.compile('^#{3}\s*(?P<t>[^\n]+)', re.M), '\n\n\\\\goodbreak\\\subsection{\g<t>}\n'), |
---|
25 | (re.compile('^#{2}\s*(?P<t>[^\n]+)', re.M), '\n\n\\\\goodbreak\\\section{\g<t>}\n'), |
---|
26 | (re.compile('^#{1}\s*(?P<t>[^\n]+)', re.M), ''), |
---|
27 | (re.compile('^\- +(?P<t>.*)', re.M), '\\\\begin{itemize}\n\\\item \g<t>\n\\\end{itemize}'), |
---|
28 | (re.compile('^\+ +(?P<t>.*)', re.M), '\\\\begin{itemize}\n\\\item \g<t>\n\\\end{itemize}'), |
---|
29 | (re.compile('\\\\end\{itemize\}\s+\\\\begin\{itemize\}'), '\n'), |
---|
30 | (re.compile('\n\s+\n'), '\n\n')] |
---|
31 | regex_table = re.compile('^\-{4,}\n(?P<t>.*?)\n\-{4,}(:(?P<c>\w+))?\n', re.M | re.S) |
---|
32 | |
---|
33 | regex_anchor = re.compile('\[\[(?P<t>\S+)\]\]') |
---|
34 | regex_bibitem = re.compile('\-\s*\[\[(?P<t>\S+)\]\]') |
---|
35 | regex_image_width = re.compile('\[\[(?P<t>[^\]]*?) +(?P<k>\S+) +(?P<p>left|right|center) +(?P<w>\d+px)\]\]') |
---|
36 | regex_image = re.compile('\[\[(?P<t>[^\]]*?) +(?P<k>\S+) +(?P<p>left|right|center)\]\]') |
---|
37 | # regex_video = re.compile('\[\[(?P<t>[^\]]*?) +(?P<k>\S+) +video\]\]') |
---|
38 | # regex_audio = re.compile('\[\[(?P<t>[^\]]*?) +(?P<k>\S+) +audio\]\]') |
---|
39 | regex_link = re.compile('\[\[(?P<t>[^\]]*?) +(?P<k>\S+)\]\]') |
---|
40 | regex_auto = re.compile('(?<!["\w])(?P<k>\w+://[\w\.\-\?&%\:]+)', re.M) |
---|
41 | regex_commas = re.compile('[ ]+(?P<t>[,;\.])') |
---|
42 | regex_noindent = re.compile('\n\n(?P<t>[a-z])') |
---|
43 | |
---|
44 | |
---|
45 | # regex_quote_left = re.compile('"(?=\w)') |
---|
46 | # regex_quote_right = re.compile('(?=\w\.)"') |
---|
47 | |
---|
48 | def latex_escape(text, pound=True): |
---|
49 | text = text.replace('\\', '{\\textbackslash}') |
---|
50 | for c in '^_&$%{}': |
---|
51 | text = text.replace(c, '\\' + c) |
---|
52 | text = text.replace('\\{\\textbackslash\\}', '{\\textbackslash}') |
---|
53 | if pound: text = text.replace('#', '\\#') |
---|
54 | return text |
---|
55 | |
---|
56 | |
---|
57 | def render(text, |
---|
58 | extra={}, |
---|
59 | allowed={}, |
---|
60 | sep='p', |
---|
61 | image_mapper=lambda x: x, |
---|
62 | chapters=False): |
---|
63 | ############################################################# |
---|
64 | # replace all blocks marked with ``...``:class with META |
---|
65 | # store them into segments they will be treated as code |
---|
66 | ############################################################# |
---|
67 | text = str(text or '') |
---|
68 | segments, i = [], 0 |
---|
69 | text = regex_dd.sub('``\g<latex>``:latex ', text) |
---|
70 | text = regex_newlines.sub('\n', text) |
---|
71 | while True: |
---|
72 | item = regex_code.search(text, i) |
---|
73 | if not item: |
---|
74 | break |
---|
75 | if item.group() == META: |
---|
76 | segments.append((None, None)) |
---|
77 | text = text[:item.start()] + META + text[item.end():] |
---|
78 | else: |
---|
79 | c = item.group('c') or '' |
---|
80 | if 'code' in allowed and c not in allowed['code']: |
---|
81 | c = '' |
---|
82 | code = item.group('t').replace('!`!', '`') |
---|
83 | segments.append((code, c)) |
---|
84 | text = text[:item.start()] + META + text[item.end():] |
---|
85 | i = item.start() + 3 |
---|
86 | |
---|
87 | ############################################################# |
---|
88 | # do h1,h2,h3,h4,h5,h6,b,i,ol,ul and normalize spaces |
---|
89 | ############################################################# |
---|
90 | |
---|
91 | title = regex_title.search(text) |
---|
92 | if not title: |
---|
93 | title = 'Title' |
---|
94 | else: |
---|
95 | title = title.group('t') |
---|
96 | |
---|
97 | text = latex_escape(text, pound=False) |
---|
98 | |
---|
99 | texts = text.split('## References', 1) |
---|
100 | text = regex_anchor.sub('\\\label{\g<t>}', texts[0]) |
---|
101 | if len(texts) == 2: |
---|
102 | text += '\n\\begin{thebibliography}{999}\n' |
---|
103 | text += regex_bibitem.sub('\n\\\\bibitem{\g<t>}', texts[1]) |
---|
104 | text += '\n\\end{thebibliography}\n' |
---|
105 | |
---|
106 | text = '\n'.join(t.strip() for t in text.split('\n')) |
---|
107 | for regex, sub in regex_maps: |
---|
108 | text = regex.sub(sub, text) |
---|
109 | text = text.replace('#', '\\#') |
---|
110 | text = text.replace('`', "'") |
---|
111 | |
---|
112 | ############################################################# |
---|
113 | # process tables and blockquotes |
---|
114 | ############################################################# |
---|
115 | while True: |
---|
116 | item = regex_table.search(text) |
---|
117 | if not item: |
---|
118 | break |
---|
119 | c = item.group('c') or '' |
---|
120 | if 'table' in allowed and c not in allowed['table']: |
---|
121 | c = '' |
---|
122 | content = item.group('t') |
---|
123 | if ' | ' in content: |
---|
124 | rows = content.replace('\n', '\\\\\n').replace(' | ', ' & ') |
---|
125 | row0, row2 = rows.split('\\\\\n', 1) |
---|
126 | cols = row0.count(' & ') + 1 |
---|
127 | cal = '{' + ''.join('l' for j in range(cols)) + '}' |
---|
128 | tabular = '\\begin{center}\n{\\begin{tabular}' + cal + '\\hline\n' + row0 + '\\\\ \\hline\n' + row2 + ' \\\\ \\hline\n\\end{tabular}}\n\\end{center}' |
---|
129 | if row2.count('\n') > 20: |
---|
130 | tabular = '\\newpage\n' + tabular |
---|
131 | text = text[:item.start()] + tabular + text[item.end():] |
---|
132 | else: |
---|
133 | text = text[:item.start()] + '\\begin{quote}' + content + '\\end{quote}' + text[item.end():] |
---|
134 | |
---|
135 | ############################################################# |
---|
136 | # deal with images, videos, audios and links |
---|
137 | ############################################################# |
---|
138 | |
---|
139 | def sub(x): |
---|
140 | f = image_mapper(x.group('k')) |
---|
141 | if not f: |
---|
142 | return None |
---|
143 | return '\n\\begin{center}\\includegraphics[width=8cm]{%s}\\end{center}\n' % f |
---|
144 | |
---|
145 | text = regex_image_width.sub(sub, text) |
---|
146 | text = regex_image.sub(sub, text) |
---|
147 | |
---|
148 | text = regex_link.sub('{\\\\footnotesize\\\href{\g<k>}{\g<t>}}', text) |
---|
149 | text = regex_commas.sub('\g<t>', text) |
---|
150 | text = regex_noindent.sub('\n\\\\noindent \g<t>', text) |
---|
151 | |
---|
152 | # ## fix paths in images |
---|
153 | regex = re.compile('\\\\_\w*\.(eps|png|jpg|gif)') |
---|
154 | while True: |
---|
155 | match = regex.search(text) |
---|
156 | if not match: |
---|
157 | break |
---|
158 | text = text[:match.start()] + text[match.start() + 1:] |
---|
159 | # text = regex_quote_left.sub('``',text) |
---|
160 | # text = regex_quote_right.sub("''",text) |
---|
161 | |
---|
162 | if chapters: |
---|
163 | text = text.replace(r'\section*{', r'\chapter*{') |
---|
164 | text = text.replace(r'\section{', r'\chapter{') |
---|
165 | text = text.replace(r'subsection{', r'section{') |
---|
166 | |
---|
167 | ############################################################# |
---|
168 | # process all code text |
---|
169 | ############################################################# |
---|
170 | parts = text.split(META) |
---|
171 | text = parts[0] |
---|
172 | authors = [] |
---|
173 | for i, (code, b) in enumerate(segments): |
---|
174 | if code is None: |
---|
175 | html = META |
---|
176 | else: |
---|
177 | if b == 'hidden': |
---|
178 | html = '' |
---|
179 | elif b == 'author': |
---|
180 | author = latex_escape(code.strip()) |
---|
181 | authors.append(author) |
---|
182 | html = '' |
---|
183 | elif b == 'inxx': |
---|
184 | html = '\inxx{%s}' % latex_escape(code) |
---|
185 | elif b == 'cite': |
---|
186 | html = '~\cite{%s}' % latex_escape(code.strip()) |
---|
187 | elif b == 'ref': |
---|
188 | html = '~\ref{%s}' % latex_escape(code.strip()) |
---|
189 | elif b == 'latex': |
---|
190 | if '\n' in code: |
---|
191 | html = '\n\\begin{equation}\n%s\n\\end{equation}\n' % code.strip() |
---|
192 | else: |
---|
193 | html = '$%s$' % code.strip() |
---|
194 | elif b == 'latex_eqnarray': |
---|
195 | code = code.strip() |
---|
196 | code = '\\\\'.join(x.replace('=', '&=&', 1) for x in code.split('\\\\')) |
---|
197 | html = '\n\\begin{eqnarray}\n%s\n\\end{eqnarray}\n' % code |
---|
198 | elif b.startswith('latex_'): |
---|
199 | key = b[6:] |
---|
200 | html = '\\begin{%s}%s\\end{%s}' % (key, code, key) |
---|
201 | elif b in extra: |
---|
202 | if code[:1] == '\n': |
---|
203 | code = code[1:] |
---|
204 | if code[-1:] == '\n': |
---|
205 | code = code[:-1] |
---|
206 | html = extra[b](code) |
---|
207 | elif code[:1] == '\n' or code[:-1] == '\n': |
---|
208 | if code[:1] == '\n': |
---|
209 | code = code[1:] |
---|
210 | if code[-1:] == '\n': |
---|
211 | code = code[:-1] |
---|
212 | if code.startswith('<') or code.startswith('{{') or code.startswith('http'): |
---|
213 | html = '\\begin{lstlisting}[keywords={}]\n%s\n\\end{lstlisting}' % code |
---|
214 | else: |
---|
215 | html = '\\begin{lstlisting}\n%s\n\\end{lstlisting}' % code |
---|
216 | else: |
---|
217 | if code[:1] == '\n': |
---|
218 | code = code[1:] |
---|
219 | if code[-1:] == '\n': |
---|
220 | code = code[:-1] |
---|
221 | html = '{\\ft %s}' % latex_escape(code) |
---|
222 | try: |
---|
223 | text = text + html + parts[i + 1] |
---|
224 | except: |
---|
225 | text = text + '... WIKI PROCESSING ERROR ...' |
---|
226 | break |
---|
227 | text = text.replace(' ~\\cite', '~\\cite') |
---|
228 | return text, title, authors |
---|
229 | |
---|
230 | |
---|
231 | WRAPPER = """ |
---|
232 | \\documentclass[12pt]{article} |
---|
233 | \\usepackage{hyperref} |
---|
234 | \\usepackage{listings} |
---|
235 | \\usepackage{upquote} |
---|
236 | \\usepackage{color} |
---|
237 | \\usepackage{graphicx} |
---|
238 | \\usepackage{grffile} |
---|
239 | \\usepackage[utf8x]{inputenc} |
---|
240 | \\usepackage{textgreek} |
---|
241 | \\definecolor{lg}{rgb}{0.9,0.9,0.9} |
---|
242 | \\definecolor{dg}{rgb}{0.3,0.3,0.3} |
---|
243 | \\def\\ft{\\small\\tt} |
---|
244 | \\lstset{ |
---|
245 | basicstyle=\\footnotesize, |
---|
246 | breaklines=true, basicstyle=\\ttfamily\\color{black}\\footnotesize, |
---|
247 | keywordstyle=\\bf\\ttfamily, |
---|
248 | commentstyle=\\it\\ttfamily, |
---|
249 | stringstyle=\\color{dg}\\it\\ttfamily, |
---|
250 | numbers=left, numberstyle=\\color{dg}\\tiny, stepnumber=1, numbersep=5pt, |
---|
251 | backgroundcolor=\\color{lg}, tabsize=4, showspaces=false, |
---|
252 | showstringspaces=false |
---|
253 | } |
---|
254 | \\title{%(title)s} |
---|
255 | \\author{%(author)s} |
---|
256 | \\begin{document} |
---|
257 | \\maketitle |
---|
258 | \\tableofcontents |
---|
259 | \\newpage |
---|
260 | %(body)s |
---|
261 | \\end{document} |
---|
262 | """ |
---|
263 | |
---|
264 | |
---|
265 | def markmin2latex(data, image_mapper=lambda x: x, extra={}, |
---|
266 | wrapper=WRAPPER): |
---|
267 | body, title, authors = render(data, extra=extra, image_mapper=image_mapper) |
---|
268 | author = '\n\\and\n'.join(a.replace('\n', '\\\\\n\\footnotesize ') for a in authors) |
---|
269 | return wrapper % dict(title=title, author=author, body=body) |
---|
270 | |
---|
271 | |
---|
272 | if __name__ == '__main__': |
---|
273 | parser = OptionParser() |
---|
274 | parser.add_option("-i", "--info", dest="info", |
---|
275 | help="markmin help") |
---|
276 | parser.add_option("-t", "--test", dest="test", action="store_true", |
---|
277 | default=False) |
---|
278 | parser.add_option("-n", "--no_wrapper", dest="no_wrapper", |
---|
279 | action="store_true", default=False) |
---|
280 | parser.add_option("-c", "--chapters", dest="chapters", action="store_true", |
---|
281 | default=False, help="switch section for chapter") |
---|
282 | parser.add_option("-w", "--wrapper", dest="wrapper", default=False, |
---|
283 | help="latex file containing header and footer") |
---|
284 | |
---|
285 | (options, args) = parser.parse_args() |
---|
286 | if options.info: |
---|
287 | import markmin2html |
---|
288 | |
---|
289 | markmin2latex(markmin2html.__doc__) |
---|
290 | elif options.test: |
---|
291 | doctest.testmod() |
---|
292 | else: |
---|
293 | if options.wrapper: |
---|
294 | fwrapper = open(options.wrapper, 'rb') |
---|
295 | try: |
---|
296 | wrapper = fwrapper.read() |
---|
297 | finally: |
---|
298 | fwrapper.close() |
---|
299 | elif options.no_wrapper: |
---|
300 | wrapper = '%(body)s' |
---|
301 | else: |
---|
302 | wrapper = WRAPPER |
---|
303 | for f in args: |
---|
304 | fargs = open(f, 'r') |
---|
305 | content_data = [] |
---|
306 | try: |
---|
307 | content_data.append(fargs.read()) |
---|
308 | finally: |
---|
309 | fargs.close() |
---|
310 | content = '\n'.join(content_data) |
---|
311 | output = markmin2latex(content, |
---|
312 | wrapper=wrapper, |
---|
313 | chapters=options.chapters) |
---|
314 | print(output) |
---|