1 | # -*- coding: latin-1 -*- |
---|
2 | |
---|
3 | "HTML Renderer for FPDF.py" |
---|
4 | |
---|
5 | __author__ = "Mariano Reingart <reingart@gmail.com>" |
---|
6 | __copyright__ = "Copyright (C) 2010 Mariano Reingart" |
---|
7 | __license__ = "LGPL 3.0" |
---|
8 | |
---|
9 | # Inspired by tuto5.py and several examples from fpdf.org, html2fpdf, etc. |
---|
10 | |
---|
11 | from .fpdf import FPDF |
---|
12 | from .py3k import PY3K, basestring, unicode, HTMLParser |
---|
13 | |
---|
14 | DEBUG = False |
---|
15 | |
---|
16 | def px2mm(px): |
---|
17 | return int(px)*25.4/72.0 |
---|
18 | |
---|
19 | def hex2dec(color = "#000000"): |
---|
20 | if color: |
---|
21 | r = int(color[1:3], 16) |
---|
22 | g = int(color[3:5], 16) |
---|
23 | b = int(color[5:7], 16) |
---|
24 | return r, g, b |
---|
25 | |
---|
26 | class HTML2FPDF(HTMLParser): |
---|
27 | "Render basic HTML to FPDF" |
---|
28 | |
---|
29 | def __init__(self, pdf, image_map=None): |
---|
30 | HTMLParser.__init__(self) |
---|
31 | self.style = {} |
---|
32 | self.pre = False |
---|
33 | self.href = '' |
---|
34 | self.align = '' |
---|
35 | self.page_links = {} |
---|
36 | self.font = None |
---|
37 | self.font_stack = [] |
---|
38 | self.pdf = pdf |
---|
39 | self.image_map = image_map or (lambda src: src) |
---|
40 | self.r = self.g = self.b = 0 |
---|
41 | self.indent = 0 |
---|
42 | self.bullet = [] |
---|
43 | self.set_font("times", 12) |
---|
44 | self.font_face = "times" # initialize font |
---|
45 | self.color = 0 #initialize font color |
---|
46 | self.table = None # table attributes |
---|
47 | self.table_col_width = None # column (header) widths |
---|
48 | self.table_col_index = None # current column index |
---|
49 | self.td = None # cell attributes |
---|
50 | self.th = False # header enabled |
---|
51 | self.tr = None |
---|
52 | self.theader = None # table header cells |
---|
53 | self.tfooter = None # table footer cells |
---|
54 | self.thead = None |
---|
55 | self.tfoot = None |
---|
56 | self.theader_out = self.tfooter_out = False |
---|
57 | self.hsize = dict(h1=2, h2=1.5, h3=1.17, h4=1, h5=0.83, h6=0.67) |
---|
58 | |
---|
59 | def width2mm(self, length): |
---|
60 | if length[-1]=='%': |
---|
61 | total = self.pdf.w - self.pdf.r_margin - self.pdf.l_margin |
---|
62 | if self.table['width'][-1]=='%': |
---|
63 | total *= int(self.table['width'][:-1])/100.0 |
---|
64 | return int(length[:-1]) * total / 101.0 |
---|
65 | else: |
---|
66 | return int(length) / 6.0 |
---|
67 | |
---|
68 | def handle_data(self, txt): |
---|
69 | if self.td is not None: # drawing a table? |
---|
70 | if 'width' not in self.td and 'colspan' not in self.td: |
---|
71 | try: |
---|
72 | l = [self.table_col_width[self.table_col_index]] |
---|
73 | except IndexError: |
---|
74 | raise RuntimeError("Table column/cell width not specified, unable to continue") |
---|
75 | elif 'colspan' in self.td: |
---|
76 | i = self.table_col_index |
---|
77 | colspan = int(self.td['colspan']) |
---|
78 | l = self.table_col_width[i:i+colspan] |
---|
79 | else: |
---|
80 | l = [self.td.get('width','240')] |
---|
81 | w = sum([self.width2mm(length) for length in l]) |
---|
82 | h = int(self.td.get('height', 0)) // 4 or self.h*1.30 |
---|
83 | self.table_h = h |
---|
84 | border = int(self.table.get('border', 0)) |
---|
85 | if not self.th: |
---|
86 | align = self.td.get('align', 'L')[0].upper() |
---|
87 | border = border and 'LR' |
---|
88 | else: |
---|
89 | self.set_style('B',True) |
---|
90 | border = border or 'B' |
---|
91 | align = self.td.get('align', 'C')[0].upper() |
---|
92 | bgcolor = hex2dec(self.td.get('bgcolor', self.tr.get('bgcolor', ''))) |
---|
93 | # parsing table header/footer (drawn later): |
---|
94 | if self.thead is not None: |
---|
95 | self.theader.append(((w,h,txt,border,0,align), bgcolor)) |
---|
96 | if self.tfoot is not None: |
---|
97 | self.tfooter.append(((w,h,txt,border,0,align), bgcolor)) |
---|
98 | # check if reached end of page, add table footer and header: |
---|
99 | height = h + (self.tfooter and self.tfooter[0][0][1] or 0) |
---|
100 | if self.pdf.y+height>self.pdf.page_break_trigger and not self.th: |
---|
101 | self.output_table_footer() |
---|
102 | self.pdf.add_page(same = True) |
---|
103 | self.theader_out = self.tfooter_out = False |
---|
104 | if self.tfoot is None and self.thead is None: |
---|
105 | if not self.theader_out: |
---|
106 | self.output_table_header() |
---|
107 | self.box_shadow(w, h, bgcolor) |
---|
108 | if DEBUG: print("td cell", self.pdf.x, w, txt, "*") |
---|
109 | self.pdf.cell(w,h,txt,border,0,align) |
---|
110 | elif self.table is not None: |
---|
111 | # ignore anything else than td inside a table |
---|
112 | pass |
---|
113 | elif self.align: |
---|
114 | if DEBUG: print("cell", txt, "*") |
---|
115 | self.pdf.cell(0,self.h,txt,0,1,self.align[0].upper(), self.href) |
---|
116 | else: |
---|
117 | txt = txt.replace("\n"," ") |
---|
118 | if self.href: |
---|
119 | self.put_link(self.href,txt) |
---|
120 | else: |
---|
121 | if DEBUG: print("write", txt, "*") |
---|
122 | self.pdf.write(self.h,txt) |
---|
123 | |
---|
124 | def box_shadow(self, w, h, bgcolor): |
---|
125 | if DEBUG: print("box_shadow", w, h, bgcolor) |
---|
126 | if bgcolor: |
---|
127 | fill_color = self.pdf.fill_color |
---|
128 | self.pdf.set_fill_color(*bgcolor) |
---|
129 | self.pdf.rect(self.pdf.x, self.pdf.y, w, h, 'F') |
---|
130 | self.pdf.fill_color = fill_color |
---|
131 | |
---|
132 | def output_table_header(self): |
---|
133 | if self.theader: |
---|
134 | b = self.b |
---|
135 | x = self.pdf.x |
---|
136 | self.pdf.set_x(self.table_offset) |
---|
137 | self.set_style('B',True) |
---|
138 | for cell, bgcolor in self.theader: |
---|
139 | self.box_shadow(cell[0], cell[1], bgcolor) |
---|
140 | self.pdf.cell(*cell) |
---|
141 | self.set_style('B',b) |
---|
142 | self.pdf.ln(self.theader[0][0][1]) |
---|
143 | self.pdf.set_x(self.table_offset) |
---|
144 | #self.pdf.set_x(x) |
---|
145 | self.theader_out = True |
---|
146 | |
---|
147 | def output_table_footer(self): |
---|
148 | if self.tfooter: |
---|
149 | x = self.pdf.x |
---|
150 | self.pdf.set_x(self.table_offset) |
---|
151 | #TODO: self.output_table_sep() |
---|
152 | for cell, bgcolor in self.tfooter: |
---|
153 | self.box_shadow(cell[0], cell[1], bgcolor) |
---|
154 | self.pdf.cell(*cell) |
---|
155 | self.pdf.ln(self.tfooter[0][0][1]) |
---|
156 | self.pdf.set_x(x) |
---|
157 | if int(self.table.get('border', 0)): |
---|
158 | self.output_table_sep() |
---|
159 | self.tfooter_out = True |
---|
160 | |
---|
161 | def output_table_sep(self): |
---|
162 | self.pdf.set_x(self.table_offset) |
---|
163 | x1 = self.pdf.x |
---|
164 | y1 = self.pdf.y |
---|
165 | w = sum([self.width2mm(lenght) for lenght in self.table_col_width]) |
---|
166 | self.pdf.line(x1,y1,x1+w,y1) |
---|
167 | |
---|
168 | |
---|
169 | def handle_starttag(self, tag, attrs): |
---|
170 | attrs = dict(attrs) |
---|
171 | if DEBUG: print("STARTTAG", tag, attrs) |
---|
172 | if tag=='b' or tag=='i' or tag=='u': |
---|
173 | self.set_style(tag,1) |
---|
174 | if tag=='a': |
---|
175 | self.href=attrs['href'] |
---|
176 | if tag=='br': |
---|
177 | self.pdf.ln(5) |
---|
178 | if tag=='p': |
---|
179 | self.pdf.ln(5) |
---|
180 | if attrs: |
---|
181 | if attrs: self.align = attrs.get('align') |
---|
182 | if tag in self.hsize: |
---|
183 | k = self.hsize[tag] |
---|
184 | self.pdf.ln(5*k) |
---|
185 | self.pdf.set_text_color(150,0,0) |
---|
186 | self.pdf.set_font_size(12 * k) |
---|
187 | if attrs: self.align = attrs.get('align') |
---|
188 | if tag=='hr': |
---|
189 | self.put_line() |
---|
190 | if tag=='pre': |
---|
191 | self.pdf.set_font('Courier','',11) |
---|
192 | self.pdf.set_font_size(11) |
---|
193 | self.set_style('B',False) |
---|
194 | self.set_style('I',False) |
---|
195 | self.pre = True |
---|
196 | if tag=='blockquote': |
---|
197 | self.set_text_color(100,0,45) |
---|
198 | self.pdf.ln(3) |
---|
199 | if tag=='ul': |
---|
200 | self.indent+=1 |
---|
201 | self.bullet.append('\x95') |
---|
202 | if tag=='ol': |
---|
203 | self.indent+=1 |
---|
204 | self.bullet.append(0) |
---|
205 | if tag=='li': |
---|
206 | self.pdf.ln(self.h+2) |
---|
207 | self.pdf.set_text_color(190,0,0) |
---|
208 | bullet = self.bullet[self.indent-1] |
---|
209 | if not isinstance(bullet, basestring): |
---|
210 | bullet += 1 |
---|
211 | self.bullet[self.indent-1] = bullet |
---|
212 | bullet = "%s. " % bullet |
---|
213 | self.pdf.write(self.h,'%s%s ' % (' '*5*self.indent, bullet)) |
---|
214 | self.set_text_color() |
---|
215 | if tag=='font': |
---|
216 | # save previous font state: |
---|
217 | self.font_stack.append((self.font_face, self.font_size, self.color)) |
---|
218 | if 'color' in attrs: |
---|
219 | color = hex2dec(attrs['color']) |
---|
220 | self.set_text_color(*color) |
---|
221 | self.color = color |
---|
222 | if 'face' in attrs: |
---|
223 | face = attrs.get('face').lower() |
---|
224 | try: |
---|
225 | self.pdf.set_font(face) |
---|
226 | self.font_face = face |
---|
227 | except RuntimeError: |
---|
228 | pass # font not found, ignore |
---|
229 | if 'size' in attrs: |
---|
230 | size = int(attrs.get('size')) |
---|
231 | self.pdf.set_font(self.font_face, size=int(size)) |
---|
232 | self.font_size = size |
---|
233 | if tag=='table': |
---|
234 | self.table = dict([(k.lower(), v) for k,v in attrs.items()]) |
---|
235 | if not 'width' in self.table: |
---|
236 | self.table['width'] = '100%' |
---|
237 | if self.table['width'][-1]=='%': |
---|
238 | w = self.pdf.w - self.pdf.r_margin - self.pdf.l_margin |
---|
239 | w *= int(self.table['width'][:-1])/100.0 |
---|
240 | self.table_offset = (self.pdf.w-w)/2.0 |
---|
241 | self.table_col_width = [] |
---|
242 | self.theader_out = self.tfooter_out = False |
---|
243 | self.theader = [] |
---|
244 | self.tfooter = [] |
---|
245 | self.thead = None |
---|
246 | self.tfoot = None |
---|
247 | self.table_h = 0 |
---|
248 | self.pdf.ln() |
---|
249 | if tag=='tr': |
---|
250 | self.tr = dict([(k.lower(), v) for k,v in attrs.items()]) |
---|
251 | self.table_col_index = 0 |
---|
252 | self.pdf.set_x(self.table_offset) |
---|
253 | if tag=='td': |
---|
254 | self.td = dict([(k.lower(), v) for k,v in attrs.items()]) |
---|
255 | if tag=='th': |
---|
256 | self.td = dict([(k.lower(), v) for k,v in attrs.items()]) |
---|
257 | self.th = True |
---|
258 | if 'width' in self.td: |
---|
259 | self.table_col_width.append(self.td['width']) |
---|
260 | if tag=='thead': |
---|
261 | self.thead = {} |
---|
262 | if tag=='tfoot': |
---|
263 | self.tfoot = {} |
---|
264 | if tag=='img': |
---|
265 | if 'src' in attrs: |
---|
266 | x = self.pdf.get_x() |
---|
267 | y = self.pdf.get_y() |
---|
268 | w = px2mm(attrs.get('width', 0)) |
---|
269 | h = px2mm(attrs.get('height',0)) |
---|
270 | if self.align and self.align[0].upper() == 'C': |
---|
271 | x = (self.pdf.w-x)/2.0 - w/2.0 |
---|
272 | self.pdf.image(self.image_map(attrs['src']), |
---|
273 | x, y, w, h, link=self.href) |
---|
274 | self.pdf.set_x(x+w) |
---|
275 | self.pdf.set_y(y+h) |
---|
276 | if tag=='b' or tag=='i' or tag=='u': |
---|
277 | self.set_style(tag, True) |
---|
278 | if tag=='center': |
---|
279 | self.align = 'Center' |
---|
280 | |
---|
281 | def handle_endtag(self, tag): |
---|
282 | #Closing tag |
---|
283 | if DEBUG: print("ENDTAG", tag) |
---|
284 | if tag=='h1' or tag=='h2' or tag=='h3' or tag=='h4': |
---|
285 | self.pdf.ln(6) |
---|
286 | self.set_font() |
---|
287 | self.set_style() |
---|
288 | self.align = None |
---|
289 | if tag=='pre': |
---|
290 | self.pdf.set_font(self.font or 'Times','',12) |
---|
291 | self.pdf.set_font_size(12) |
---|
292 | self.pre=False |
---|
293 | if tag=='blockquote': |
---|
294 | self.set_text_color(0,0,0) |
---|
295 | self.pdf.ln(3) |
---|
296 | if tag=='strong': |
---|
297 | tag='b' |
---|
298 | if tag=='em': |
---|
299 | tag='i' |
---|
300 | if tag=='b' or tag=='i' or tag=='u': |
---|
301 | self.set_style(tag, False) |
---|
302 | if tag=='a': |
---|
303 | self.href='' |
---|
304 | if tag=='p': |
---|
305 | self.align='' |
---|
306 | if tag in ('ul', 'ol'): |
---|
307 | self.indent-=1 |
---|
308 | self.bullet.pop() |
---|
309 | if tag=='table': |
---|
310 | if not self.tfooter_out: |
---|
311 | self.output_table_footer() |
---|
312 | self.table = None |
---|
313 | self.th = False |
---|
314 | self.theader = None |
---|
315 | self.tfooter = None |
---|
316 | self.pdf.ln() |
---|
317 | if tag=='thead': |
---|
318 | self.thead = None |
---|
319 | if tag=='tfoot': |
---|
320 | self.tfoot = None |
---|
321 | if tag=='tbody': |
---|
322 | # draw a line separator between table bodies |
---|
323 | self.pdf.set_x(self.table_offset) |
---|
324 | self.output_table_sep() |
---|
325 | if tag=='tr': |
---|
326 | h = self.table_h |
---|
327 | if self.tfoot is None: |
---|
328 | self.pdf.ln(h) |
---|
329 | self.tr = None |
---|
330 | if tag=='td' or tag=='th': |
---|
331 | if self.th: |
---|
332 | if DEBUG: print("revert style") |
---|
333 | self.set_style('B', False) # revert style |
---|
334 | self.table_col_index += int(self.td.get('colspan','1')) |
---|
335 | self.td = None |
---|
336 | self.th = False |
---|
337 | if tag=='font': |
---|
338 | # recover last font state |
---|
339 | face, size, color = self.font_stack.pop() |
---|
340 | if face: |
---|
341 | self.pdf.set_text_color(0,0,0) |
---|
342 | self.color = None |
---|
343 | self.set_font(face, size) |
---|
344 | self.font = None |
---|
345 | if tag=='center': |
---|
346 | self.align = None |
---|
347 | |
---|
348 | def set_font(self, face=None, size=None): |
---|
349 | if face: |
---|
350 | self.font_face = face |
---|
351 | if size: |
---|
352 | self.font_size = size |
---|
353 | self.h = size / 72.0*25.4 |
---|
354 | if DEBUG: print("H", self.h) |
---|
355 | self.pdf.set_font(self.font_face or 'times','',12) |
---|
356 | self.pdf.set_font_size(self.font_size or 12) |
---|
357 | self.set_style('u', False) |
---|
358 | self.set_style('b', False) |
---|
359 | self.set_style('i', False) |
---|
360 | self.set_text_color() |
---|
361 | |
---|
362 | def set_style(self, tag=None, enable=None): |
---|
363 | #Modify style and select corresponding font |
---|
364 | if tag: |
---|
365 | t = self.style.get(tag.lower()) |
---|
366 | self.style[tag.lower()] = enable |
---|
367 | style='' |
---|
368 | for s in ('b','i','u'): |
---|
369 | if self.style.get(s): |
---|
370 | style+=s |
---|
371 | if DEBUG: print("SET_FONT_STYLE", style) |
---|
372 | self.pdf.set_font('',style) |
---|
373 | |
---|
374 | def set_text_color(self, r=None, g=0, b=0): |
---|
375 | if r is None: |
---|
376 | self.pdf.set_text_color(self.r,self.g,self.b) |
---|
377 | else: |
---|
378 | self.pdf.set_text_color(r, g, b) |
---|
379 | self.r = r |
---|
380 | self.g = g |
---|
381 | self.b = b |
---|
382 | |
---|
383 | def put_link(self, url, txt): |
---|
384 | #Put a hyperlink |
---|
385 | self.set_text_color(0,0,255) |
---|
386 | self.set_style('u', True) |
---|
387 | self.pdf.write(5,txt,url) |
---|
388 | self.set_style('u', False) |
---|
389 | self.set_text_color(0) |
---|
390 | |
---|
391 | def put_line(self): |
---|
392 | self.pdf.ln(2) |
---|
393 | self.pdf.line(self.pdf.get_x(),self.pdf.get_y(),self.pdf.get_x()+187,self.pdf.get_y()) |
---|
394 | self.pdf.ln(3) |
---|
395 | |
---|
396 | class HTMLMixin(object): |
---|
397 | def write_html(self, text, image_map=None): |
---|
398 | "Parse HTML and convert it to PDF" |
---|
399 | h2p = HTML2FPDF(self, image_map) |
---|
400 | text = h2p.unescape(text) # To deal with HTML entities |
---|
401 | h2p.feed(text) |
---|
402 | |
---|