1 | from ._compat import PY2, text_type, long_type, JYTHON, IRONPYTHON, unichr |
---|
2 | |
---|
3 | import datetime |
---|
4 | from decimal import Decimal |
---|
5 | import re |
---|
6 | import time |
---|
7 | |
---|
8 | from .constants import FIELD_TYPE, FLAG |
---|
9 | from .charset import charset_by_id, charset_to_encoding |
---|
10 | |
---|
11 | |
---|
12 | def escape_item(val, charset, mapping=None): |
---|
13 | if mapping is None: |
---|
14 | mapping = encoders |
---|
15 | encoder = mapping.get(type(val)) |
---|
16 | |
---|
17 | # Fallback to default when no encoder found |
---|
18 | if not encoder: |
---|
19 | try: |
---|
20 | encoder = mapping[text_type] |
---|
21 | except KeyError: |
---|
22 | raise TypeError("no default type converter defined") |
---|
23 | |
---|
24 | if encoder in (escape_dict, escape_sequence): |
---|
25 | val = encoder(val, charset, mapping) |
---|
26 | else: |
---|
27 | val = encoder(val, mapping) |
---|
28 | return val |
---|
29 | |
---|
30 | def escape_dict(val, charset, mapping=None): |
---|
31 | n = {} |
---|
32 | for k, v in val.items(): |
---|
33 | quoted = escape_item(v, charset, mapping) |
---|
34 | n[k] = quoted |
---|
35 | return n |
---|
36 | |
---|
37 | def escape_sequence(val, charset, mapping=None): |
---|
38 | n = [] |
---|
39 | for item in val: |
---|
40 | quoted = escape_item(item, charset, mapping) |
---|
41 | n.append(quoted) |
---|
42 | return "(" + ",".join(n) + ")" |
---|
43 | |
---|
44 | def escape_set(val, charset, mapping=None): |
---|
45 | return ','.join([escape_item(x, charset, mapping) for x in val]) |
---|
46 | |
---|
47 | def escape_bool(value, mapping=None): |
---|
48 | return str(int(value)) |
---|
49 | |
---|
50 | def escape_object(value, mapping=None): |
---|
51 | return str(value) |
---|
52 | |
---|
53 | def escape_int(value, mapping=None): |
---|
54 | return str(value) |
---|
55 | |
---|
56 | def escape_float(value, mapping=None): |
---|
57 | return ('%.15g' % value) |
---|
58 | |
---|
59 | _escape_table = [unichr(x) for x in range(128)] |
---|
60 | _escape_table[0] = u'\\0' |
---|
61 | _escape_table[ord('\\')] = u'\\\\' |
---|
62 | _escape_table[ord('\n')] = u'\\n' |
---|
63 | _escape_table[ord('\r')] = u'\\r' |
---|
64 | _escape_table[ord('\032')] = u'\\Z' |
---|
65 | _escape_table[ord('"')] = u'\\"' |
---|
66 | _escape_table[ord("'")] = u"\\'" |
---|
67 | |
---|
68 | def _escape_unicode(value, mapping=None): |
---|
69 | """escapes *value* without adding quote. |
---|
70 | |
---|
71 | Value should be unicode |
---|
72 | """ |
---|
73 | return value.translate(_escape_table) |
---|
74 | |
---|
75 | if PY2: |
---|
76 | def escape_string(value, mapping=None): |
---|
77 | """escape_string escapes *value* but not surround it with quotes. |
---|
78 | |
---|
79 | Value should be bytes or unicode. |
---|
80 | """ |
---|
81 | if isinstance(value, unicode): |
---|
82 | return _escape_unicode(value) |
---|
83 | assert isinstance(value, (bytes, bytearray)) |
---|
84 | value = value.replace('\\', '\\\\') |
---|
85 | value = value.replace('\0', '\\0') |
---|
86 | value = value.replace('\n', '\\n') |
---|
87 | value = value.replace('\r', '\\r') |
---|
88 | value = value.replace('\032', '\\Z') |
---|
89 | value = value.replace("'", "\\'") |
---|
90 | value = value.replace('"', '\\"') |
---|
91 | return value |
---|
92 | |
---|
93 | def escape_bytes(value, mapping=None): |
---|
94 | assert isinstance(value, (bytes, bytearray)) |
---|
95 | return b"_binary'%s'" % escape_string(value) |
---|
96 | else: |
---|
97 | escape_string = _escape_unicode |
---|
98 | |
---|
99 | # On Python ~3.5, str.decode('ascii', 'surrogateescape') is slow. |
---|
100 | # (fixed in Python 3.6, http://bugs.python.org/issue24870) |
---|
101 | # Workaround is str.decode('latin1') then translate 0x80-0xff into 0udc80-0udcff. |
---|
102 | # We can escape special chars and surrogateescape at once. |
---|
103 | _escape_bytes_table = _escape_table + [chr(i) for i in range(0xdc80, 0xdd00)] |
---|
104 | |
---|
105 | def escape_bytes(value, mapping=None): |
---|
106 | return "_binary'%s'" % value.decode('latin1').translate(_escape_bytes_table) |
---|
107 | |
---|
108 | |
---|
109 | def escape_unicode(value, mapping=None): |
---|
110 | return u"'%s'" % _escape_unicode(value) |
---|
111 | |
---|
112 | def escape_str(value, mapping=None): |
---|
113 | return "'%s'" % escape_string(str(value), mapping) |
---|
114 | |
---|
115 | def escape_None(value, mapping=None): |
---|
116 | return 'NULL' |
---|
117 | |
---|
118 | def escape_timedelta(obj, mapping=None): |
---|
119 | seconds = int(obj.seconds) % 60 |
---|
120 | minutes = int(obj.seconds // 60) % 60 |
---|
121 | hours = int(obj.seconds // 3600) % 24 + int(obj.days) * 24 |
---|
122 | if obj.microseconds: |
---|
123 | fmt = "'{0:02d}:{1:02d}:{2:02d}.{3:06d}'" |
---|
124 | else: |
---|
125 | fmt = "'{0:02d}:{1:02d}:{2:02d}'" |
---|
126 | return fmt.format(hours, minutes, seconds, obj.microseconds) |
---|
127 | |
---|
128 | def escape_time(obj, mapping=None): |
---|
129 | if obj.microsecond: |
---|
130 | fmt = "'{0.hour:02}:{0.minute:02}:{0.second:02}.{0.microsecond:06}'" |
---|
131 | else: |
---|
132 | fmt = "'{0.hour:02}:{0.minute:02}:{0.second:02}'" |
---|
133 | return fmt.format(obj) |
---|
134 | |
---|
135 | def escape_datetime(obj, mapping=None): |
---|
136 | if obj.microsecond: |
---|
137 | fmt = "'{0.year:04}-{0.month:02}-{0.day:02} {0.hour:02}:{0.minute:02}:{0.second:02}.{0.microsecond:06}'" |
---|
138 | else: |
---|
139 | fmt = "'{0.year:04}-{0.month:02}-{0.day:02} {0.hour:02}:{0.minute:02}:{0.second:02}'" |
---|
140 | return fmt.format(obj) |
---|
141 | |
---|
142 | def escape_date(obj, mapping=None): |
---|
143 | fmt = "'{0.year:04}-{0.month:02}-{0.day:02}'" |
---|
144 | return fmt.format(obj) |
---|
145 | |
---|
146 | def escape_struct_time(obj, mapping=None): |
---|
147 | return escape_datetime(datetime.datetime(*obj[:6])) |
---|
148 | |
---|
149 | def _convert_second_fraction(s): |
---|
150 | if not s: |
---|
151 | return 0 |
---|
152 | # Pad zeros to ensure the fraction length in microseconds |
---|
153 | s = s.ljust(6, '0') |
---|
154 | return int(s[:6]) |
---|
155 | |
---|
156 | DATETIME_RE = re.compile(r"(\d{1,4})-(\d{1,2})-(\d{1,2})[T ](\d{1,2}):(\d{1,2}):(\d{1,2})(?:.(\d{1,6}))?") |
---|
157 | |
---|
158 | |
---|
159 | def convert_datetime(obj): |
---|
160 | """Returns a DATETIME or TIMESTAMP column value as a datetime object: |
---|
161 | |
---|
162 | >>> datetime_or_None('2007-02-25 23:06:20') |
---|
163 | datetime.datetime(2007, 2, 25, 23, 6, 20) |
---|
164 | >>> datetime_or_None('2007-02-25T23:06:20') |
---|
165 | datetime.datetime(2007, 2, 25, 23, 6, 20) |
---|
166 | |
---|
167 | Illegal values are returned as None: |
---|
168 | |
---|
169 | >>> datetime_or_None('2007-02-31T23:06:20') is None |
---|
170 | True |
---|
171 | >>> datetime_or_None('0000-00-00 00:00:00') is None |
---|
172 | True |
---|
173 | |
---|
174 | """ |
---|
175 | if not PY2 and isinstance(obj, (bytes, bytearray)): |
---|
176 | obj = obj.decode('ascii') |
---|
177 | |
---|
178 | m = DATETIME_RE.match(obj) |
---|
179 | if not m: |
---|
180 | return convert_date(obj) |
---|
181 | |
---|
182 | try: |
---|
183 | groups = list(m.groups()) |
---|
184 | groups[-1] = _convert_second_fraction(groups[-1]) |
---|
185 | return datetime.datetime(*[ int(x) for x in groups ]) |
---|
186 | except ValueError: |
---|
187 | return convert_date(obj) |
---|
188 | |
---|
189 | TIMEDELTA_RE = re.compile(r"(-)?(\d{1,3}):(\d{1,2}):(\d{1,2})(?:.(\d{1,6}))?") |
---|
190 | |
---|
191 | |
---|
192 | def convert_timedelta(obj): |
---|
193 | """Returns a TIME column as a timedelta object: |
---|
194 | |
---|
195 | >>> timedelta_or_None('25:06:17') |
---|
196 | datetime.timedelta(1, 3977) |
---|
197 | >>> timedelta_or_None('-25:06:17') |
---|
198 | datetime.timedelta(-2, 83177) |
---|
199 | |
---|
200 | Illegal values are returned as None: |
---|
201 | |
---|
202 | >>> timedelta_or_None('random crap') is None |
---|
203 | True |
---|
204 | |
---|
205 | Note that MySQL always returns TIME columns as (+|-)HH:MM:SS, but |
---|
206 | can accept values as (+|-)DD HH:MM:SS. The latter format will not |
---|
207 | be parsed correctly by this function. |
---|
208 | """ |
---|
209 | if not PY2 and isinstance(obj, (bytes, bytearray)): |
---|
210 | obj = obj.decode('ascii') |
---|
211 | |
---|
212 | m = TIMEDELTA_RE.match(obj) |
---|
213 | if not m: |
---|
214 | return None |
---|
215 | |
---|
216 | try: |
---|
217 | groups = list(m.groups()) |
---|
218 | groups[-1] = _convert_second_fraction(groups[-1]) |
---|
219 | negate = -1 if groups[0] else 1 |
---|
220 | hours, minutes, seconds, microseconds = groups[1:] |
---|
221 | |
---|
222 | tdelta = datetime.timedelta( |
---|
223 | hours = int(hours), |
---|
224 | minutes = int(minutes), |
---|
225 | seconds = int(seconds), |
---|
226 | microseconds = int(microseconds) |
---|
227 | ) * negate |
---|
228 | return tdelta |
---|
229 | except ValueError: |
---|
230 | return None |
---|
231 | |
---|
232 | TIME_RE = re.compile(r"(\d{1,2}):(\d{1,2}):(\d{1,2})(?:.(\d{1,6}))?") |
---|
233 | |
---|
234 | |
---|
235 | def convert_time(obj): |
---|
236 | """Returns a TIME column as a time object: |
---|
237 | |
---|
238 | >>> time_or_None('15:06:17') |
---|
239 | datetime.time(15, 6, 17) |
---|
240 | |
---|
241 | Illegal values are returned as None: |
---|
242 | |
---|
243 | >>> time_or_None('-25:06:17') is None |
---|
244 | True |
---|
245 | >>> time_or_None('random crap') is None |
---|
246 | True |
---|
247 | |
---|
248 | Note that MySQL always returns TIME columns as (+|-)HH:MM:SS, but |
---|
249 | can accept values as (+|-)DD HH:MM:SS. The latter format will not |
---|
250 | be parsed correctly by this function. |
---|
251 | |
---|
252 | Also note that MySQL's TIME column corresponds more closely to |
---|
253 | Python's timedelta and not time. However if you want TIME columns |
---|
254 | to be treated as time-of-day and not a time offset, then you can |
---|
255 | use set this function as the converter for FIELD_TYPE.TIME. |
---|
256 | """ |
---|
257 | if not PY2 and isinstance(obj, (bytes, bytearray)): |
---|
258 | obj = obj.decode('ascii') |
---|
259 | |
---|
260 | m = TIME_RE.match(obj) |
---|
261 | if not m: |
---|
262 | return None |
---|
263 | |
---|
264 | try: |
---|
265 | groups = list(m.groups()) |
---|
266 | groups[-1] = _convert_second_fraction(groups[-1]) |
---|
267 | hours, minutes, seconds, microseconds = groups |
---|
268 | return datetime.time(hour=int(hours), minute=int(minutes), |
---|
269 | second=int(seconds), microsecond=int(microseconds)) |
---|
270 | except ValueError: |
---|
271 | return None |
---|
272 | |
---|
273 | |
---|
274 | def convert_date(obj): |
---|
275 | """Returns a DATE column as a date object: |
---|
276 | |
---|
277 | >>> date_or_None('2007-02-26') |
---|
278 | datetime.date(2007, 2, 26) |
---|
279 | |
---|
280 | Illegal values are returned as None: |
---|
281 | |
---|
282 | >>> date_or_None('2007-02-31') is None |
---|
283 | True |
---|
284 | >>> date_or_None('0000-00-00') is None |
---|
285 | True |
---|
286 | |
---|
287 | """ |
---|
288 | if not PY2 and isinstance(obj, (bytes, bytearray)): |
---|
289 | obj = obj.decode('ascii') |
---|
290 | try: |
---|
291 | return datetime.date(*[ int(x) for x in obj.split('-', 2) ]) |
---|
292 | except ValueError: |
---|
293 | return None |
---|
294 | |
---|
295 | |
---|
296 | def convert_mysql_timestamp(timestamp): |
---|
297 | """Convert a MySQL TIMESTAMP to a Timestamp object. |
---|
298 | |
---|
299 | MySQL >= 4.1 returns TIMESTAMP in the same format as DATETIME: |
---|
300 | |
---|
301 | >>> mysql_timestamp_converter('2007-02-25 22:32:17') |
---|
302 | datetime.datetime(2007, 2, 25, 22, 32, 17) |
---|
303 | |
---|
304 | MySQL < 4.1 uses a big string of numbers: |
---|
305 | |
---|
306 | >>> mysql_timestamp_converter('20070225223217') |
---|
307 | datetime.datetime(2007, 2, 25, 22, 32, 17) |
---|
308 | |
---|
309 | Illegal values are returned as None: |
---|
310 | |
---|
311 | >>> mysql_timestamp_converter('2007-02-31 22:32:17') is None |
---|
312 | True |
---|
313 | >>> mysql_timestamp_converter('00000000000000') is None |
---|
314 | True |
---|
315 | |
---|
316 | """ |
---|
317 | if not PY2 and isinstance(timestamp, (bytes, bytearray)): |
---|
318 | timestamp = timestamp.decode('ascii') |
---|
319 | if timestamp[4] == '-': |
---|
320 | return convert_datetime(timestamp) |
---|
321 | timestamp += "0"*(14-len(timestamp)) # padding |
---|
322 | year, month, day, hour, minute, second = \ |
---|
323 | int(timestamp[:4]), int(timestamp[4:6]), int(timestamp[6:8]), \ |
---|
324 | int(timestamp[8:10]), int(timestamp[10:12]), int(timestamp[12:14]) |
---|
325 | try: |
---|
326 | return datetime.datetime(year, month, day, hour, minute, second) |
---|
327 | except ValueError: |
---|
328 | return None |
---|
329 | |
---|
330 | def convert_set(s): |
---|
331 | if isinstance(s, (bytes, bytearray)): |
---|
332 | return set(s.split(b",")) |
---|
333 | return set(s.split(",")) |
---|
334 | |
---|
335 | |
---|
336 | def through(x): |
---|
337 | return x |
---|
338 | |
---|
339 | |
---|
340 | #def convert_bit(b): |
---|
341 | # b = "\x00" * (8 - len(b)) + b # pad w/ zeroes |
---|
342 | # return struct.unpack(">Q", b)[0] |
---|
343 | # |
---|
344 | # the snippet above is right, but MySQLdb doesn't process bits, |
---|
345 | # so we shouldn't either |
---|
346 | convert_bit = through |
---|
347 | |
---|
348 | |
---|
349 | def convert_characters(connection, field, data): |
---|
350 | field_charset = charset_by_id(field.charsetnr).name |
---|
351 | encoding = charset_to_encoding(field_charset) |
---|
352 | if field.flags & FLAG.SET: |
---|
353 | return convert_set(data.decode(encoding)) |
---|
354 | if field.flags & FLAG.BINARY: |
---|
355 | return data |
---|
356 | |
---|
357 | if connection.use_unicode: |
---|
358 | data = data.decode(encoding) |
---|
359 | elif connection.charset != field_charset: |
---|
360 | data = data.decode(encoding) |
---|
361 | data = data.encode(connection.encoding) |
---|
362 | return data |
---|
363 | |
---|
364 | encoders = { |
---|
365 | bool: escape_bool, |
---|
366 | int: escape_int, |
---|
367 | long_type: escape_int, |
---|
368 | float: escape_float, |
---|
369 | str: escape_str, |
---|
370 | text_type: escape_unicode, |
---|
371 | tuple: escape_sequence, |
---|
372 | list: escape_sequence, |
---|
373 | set: escape_sequence, |
---|
374 | frozenset: escape_sequence, |
---|
375 | dict: escape_dict, |
---|
376 | bytearray: escape_bytes, |
---|
377 | type(None): escape_None, |
---|
378 | datetime.date: escape_date, |
---|
379 | datetime.datetime: escape_datetime, |
---|
380 | datetime.timedelta: escape_timedelta, |
---|
381 | datetime.time: escape_time, |
---|
382 | time.struct_time: escape_struct_time, |
---|
383 | Decimal: escape_object, |
---|
384 | } |
---|
385 | |
---|
386 | if not PY2 or JYTHON or IRONPYTHON: |
---|
387 | encoders[bytes] = escape_bytes |
---|
388 | |
---|
389 | decoders = { |
---|
390 | FIELD_TYPE.BIT: convert_bit, |
---|
391 | FIELD_TYPE.TINY: int, |
---|
392 | FIELD_TYPE.SHORT: int, |
---|
393 | FIELD_TYPE.LONG: int, |
---|
394 | FIELD_TYPE.FLOAT: float, |
---|
395 | FIELD_TYPE.DOUBLE: float, |
---|
396 | FIELD_TYPE.LONGLONG: int, |
---|
397 | FIELD_TYPE.INT24: int, |
---|
398 | FIELD_TYPE.YEAR: int, |
---|
399 | FIELD_TYPE.TIMESTAMP: convert_mysql_timestamp, |
---|
400 | FIELD_TYPE.DATETIME: convert_datetime, |
---|
401 | FIELD_TYPE.TIME: convert_timedelta, |
---|
402 | FIELD_TYPE.DATE: convert_date, |
---|
403 | FIELD_TYPE.SET: convert_set, |
---|
404 | FIELD_TYPE.BLOB: through, |
---|
405 | FIELD_TYPE.TINY_BLOB: through, |
---|
406 | FIELD_TYPE.MEDIUM_BLOB: through, |
---|
407 | FIELD_TYPE.LONG_BLOB: through, |
---|
408 | FIELD_TYPE.STRING: through, |
---|
409 | FIELD_TYPE.VAR_STRING: through, |
---|
410 | FIELD_TYPE.VARCHAR: through, |
---|
411 | FIELD_TYPE.DECIMAL: Decimal, |
---|
412 | FIELD_TYPE.NEWDECIMAL: Decimal, |
---|
413 | } |
---|
414 | |
---|
415 | |
---|
416 | # for MySQLdb compatibility |
---|
417 | conversions = encoders.copy() |
---|
418 | conversions.update(decoders) |
---|
419 | Thing2Literal = escape_str |
---|