Context Navigation

← Previous Revision
Next Revision →
Blame
Revision Log

source: OpenRLabs-Git/deploy/rlabs-docker/web2py-rlabs/gluon/cache.py

main

Last change on this file was 42bd667, checked in by David Fuertes <dfuertes@…>, 4 years ago
Historial Limpio
Property mode set to `100755`
File size: 26.0 KB

Line
1	#!/usr/bin/env python
2	# -- coding: utf-8 --
3
4	"""
5	\| This file is part of the web2py Web Framework
6	\| Copyrighted by Massimo Di Pierro <mdipierro@cs.depaul.edu>
7	\| License: LGPLv3 (http://www.gnu.org/licenses/lgpl.html)
8
9	Basic caching classes and methods
10	---------------------------------
11
12	- Cache - The generic caching object interfacing with the others
13	- CacheInRam - providing caching in ram
14	- CacheOnDisk - provides caches on disk
15
16	Memcache is also available via a different module (see gluon.contrib.memcache)
17
18	When web2py is running on Google App Engine,
19	caching will be provided by the GAE memcache
20	(see gluon.contrib.gae_memcache)
21	"""
22	import time
23	import os
24	import gc
25	import sys
26	import logging
27	import re
28	import random
29	import hashlib
30	import datetime
31	import tempfile
32	from gluon import recfile
33	from collections import defaultdict
34	from collections import OrderedDict
35
36	try:
37	from gluon import settings
38	have_settings = True
39	except ImportError:
40	have_settings = False
41
42	from pydal.contrib import portalocker
43	from gluon._compat import pickle, thread, to_bytes, to_native, hashlib_md5
44
45	try:
46	import psutil
47	HAVE_PSUTIL = True
48	except ImportError:
49	HAVE_PSUTIL = False
50
51
52	def remove_oldest_entries(storage, percentage=90):
53	# compute current memory usage (%)
54	old_mem = psutil.virtual_memory().percent
55	# if we have data in storage and utilization exceeds 90%
56	while storage and old_mem > percentage:
57	# removed oldest entry
58	storage.popitem(last=False)
59	# garbage collect
60	gc.collect(1)
61	# comute used memory again
62	new_mem = psutil.virtual_memory().percent
63	# if the used memory did not decrease stop
64	if new_mem >= old_mem:
65	break
66	# net new measurement for memory usage and loop
67	old_mem = new_mem
68
69
70	logger = logging.getLogger("web2py.cache")
71
72	__all__ = ['Cache', 'lazy_cache']
73
74
75	DEFAULT_TIME_EXPIRE = 300
76
77
78	class CacheAbstract(object):
79	"""
80	Abstract class for cache implementations.
81	Main function just provides referenced api documentation.
82
83	Use CacheInRam or CacheOnDisk instead which are derived from this class.
84
85	Note:
86	Michele says: there are signatures inside gdbm files that are used
87	directly by the python gdbm adapter that often are lagging behind in the
88	detection code in python part.
89	On every occasion that a gdbm store is probed by the python adapter,
90	the probe fails, because gdbm file version is newer.
91	Using gdbm directly from C would work, because there is backward
92	compatibility, but not from python!
93	The .shelve file is discarded and a new one created (with new
94	signature) and it works until it is probed again...
95	The possible consequences are memory leaks and broken sessions.
96	"""
97
98	cache_stats_name = 'web2py_cache_statistics'
99	max_ram_utilization = None # percent
100
101	def __init__(self, request=None):
102	"""Initializes the object
103
104	Args:
105	request: the global request object
106	"""
107	raise NotImplementedError
108
109	def __call__(self, key, f,
110	time_expire=DEFAULT_TIME_EXPIRE):
111	"""
112	Tries to retrieve the value corresponding to `key` from the cache if the
113	object exists and if it did not expire, else it calls the function `f`
114	and stores the output in the cache corresponding to `key`. It always
115	returns the function that is returned.
116
117	Args:
118	key(str): the key of the object to be stored or retrieved
119	f(function): the function whose output is to be cached.
120
121	If `f` is `None` the cache is cleared.
122	time_expire(int): expiration of the cache in seconds.
123
124	It's used to compare the current time with the time
125	when the requested object was last saved in cache. It does not
126	affect future requests. Setting `time_expire` to 0 or negative
127	value forces the cache to refresh.
128	"""
129	raise NotImplementedError
130
131	def clear(self, regex=None):
132	"""
133	Clears the cache of all keys that match the provided regular expression.
134	If no regular expression is provided, it clears all entries in cache.
135
136	Args:
137	regex: if provided, only keys matching the regex will be cleared,
138	otherwise all keys are cleared.
139	"""
140
141	raise NotImplementedError
142
143	def increment(self, key, value=1):
144	"""
145	Increments the cached value for the given key by the amount in value
146
147	Args:
148	key(str): key for the cached object to be incremeneted
149	value(int): amount of the increment (defaults to 1, can be negative)
150	"""
151	raise NotImplementedError
152
153	def _clear(self, storage, regex):
154	"""
155	Auxiliary function called by `clear` to search and clear cache entries
156	"""
157	r = re.compile(regex)
158	for key in list(storage.keys()):
159	if r.match(str(key)):
160	del storage[key]
161	return
162
163
164	class CacheInRam(CacheAbstract):
165	"""
166	Ram based caching
167
168	This is implemented as global (per process, shared by all threads)
169	dictionary.
170	A mutex-lock mechanism avoid conflicts.
171	"""
172
173	locker = thread.allocate_lock()
174	meta_storage = {}
175	stats = {}
176
177	def __init__(self, request=None):
178	self.initialized = False
179	self.request = request
180	self.storage = OrderedDict() if HAVE_PSUTIL else {}
181	self.app = request.application if request else ''
182
183	def initialize(self):
184	if self.initialized:
185	return
186	else:
187	self.initialized = True
188	self.locker.acquire()
189	if self.app not in self.meta_storage:
190	self.storage = self.meta_storage[self.app] = \
191	OrderedDict() if HAVE_PSUTIL else {}
192	self.stats[self.app] = {'hit_total': 0, 'misses': 0}
193	else:
194	self.storage = self.meta_storage[self.app]
195	self.locker.release()
196
197	def clear(self, regex=None):
198	self.initialize()
199	self.locker.acquire()
200	storage = self.storage
201	if regex is None:
202	storage.clear()
203	else:
204	self._clear(storage, regex)
205
206	if self.app not in self.stats:
207	self.stats[self.app] = {'hit_total': 0, 'misses': 0}
208
209	self.locker.release()
210
211	def __call__(self, key, f,
212	time_expire=DEFAULT_TIME_EXPIRE,
213	destroyer=None):
214	"""
215	Attention! cache.ram does not copy the cached object.
216	It just stores a reference to it. Turns out the deepcopying the object
217	has some problems:
218
219	- would break backward compatibility
220	- would be limiting because people may want to cache live objects
221	- would work unless we deepcopy no storage and retrival which would make
222	things slow.
223
224	Anyway. You can deepcopy explicitly in the function generating the value
225	to be cached.
226	"""
227	self.initialize()
228
229	dt = time_expire
230	now = time.time()
231
232	self.locker.acquire()
233	item = self.storage.get(key, None)
234	if item and f is None:
235	del self.storage[key]
236	if destroyer:
237	destroyer(item[1])
238	self.stats[self.app]['hit_total'] += 1
239	self.locker.release()
240
241	if f is None:
242	return None
243	if item and (dt is None or item[0] > now - dt):
244	return item[1]
245	elif item and (item[0] < now - dt) and destroyer:
246	destroyer(item[1])
247	value = f()
248
249	self.locker.acquire()
250	self.storage[key] = (now, value)
251	self.stats[self.app]['misses'] += 1
252	if HAVE_PSUTIL and self.max_ram_utilization is not None and random.random() < 0.10:
253	remove_oldest_entries(self.storage, percentage=self.max_ram_utilization)
254	self.locker.release()
255	return value
256
257	def increment(self, key, value=1):
258	self.initialize()
259	self.locker.acquire()
260	try:
261	if key in self.storage:
262	value = self.storage[key][1] + value
263	self.storage[key] = (time.time(), value)
264	except BaseException as e:
265	self.locker.release()
266	raise e
267	self.locker.release()
268	return value
269
270
271	class CacheOnDisk(CacheAbstract):
272	"""
273	Disk based cache
274
275	This is implemented as a key value store where each key corresponds to a
276	single file in disk which is replaced when the value changes.
277
278	Disk cache provides persistance when web2py is started/stopped but it is
279	slower than `CacheInRam`
280
281	Values stored in disk cache must be pickable.
282	"""
283
284	class PersistentStorage(object):
285	"""
286	Implements a key based thread/process-safe safe storage in disk.
287	"""
288
289	def __init__(self, folder, file_lock_time_wait=0.1):
290	self.folder = folder
291	self.key_filter_in = lambda key: key
292	self.key_filter_out = lambda key: key
293	self.file_lock_time_wait = file_lock_time_wait
294	# How long we should wait before retrying to lock a file held by another process
295	# We still need a mutex for each file as portalocker only blocks other processes
296	self.file_locks = defaultdict(thread.allocate_lock)
297
298	# Make sure we use valid filenames.
299	if sys.platform == "win32":
300	import base64
301
302	def key_filter_in_windows(key):
303	"""
304	Windows doesn't allow \ / : * ? "< > \| in filenames.
305	To go around this encode the keys with base32.
306	"""
307	return to_native(base64.b32encode(to_bytes(key)))
308
309	def key_filter_out_windows(key):
310	"""
311	We need to decode the keys so regex based removal works.
312	"""
313	return to_native(base64.b32decode(to_bytes(key)))
314
315	self.key_filter_in = key_filter_in_windows
316	self.key_filter_out = key_filter_out_windows
317
318	def wait_portalock(self, val_file):
319	"""
320	Wait for the process file lock.
321	"""
322	while True:
323	try:
324	portalocker.lock(val_file, portalocker.LOCK_EX)
325	break
326	except:
327	time.sleep(self.file_lock_time_wait)
328
329	def acquire(self, key):
330	self.file_locks[key].acquire()
331
332	def release(self, key):
333	self.file_locks[key].release()
334
335	def __setitem__(self, key, value):
336	key = self.key_filter_in(key)
337	val_file = recfile.open(key, mode='wb', path=self.folder)
338	self.wait_portalock(val_file)
339	pickle.dump(value, val_file, pickle.HIGHEST_PROTOCOL)
340	val_file.close()
341
342	def __getitem__(self, key):
343	key = self.key_filter_in(key)
344	try:
345	val_file = recfile.open(key, mode='rb', path=self.folder)
346	except IOError:
347	raise KeyError
348
349	self.wait_portalock(val_file)
350	value = pickle.load(val_file)
351	val_file.close()
352	return value
353
354	def __contains__(self, key):
355	key = self.key_filter_in(key)
356	return (key in self.file_locks) or recfile.exists(key, path=self.folder)
357
358	def __delitem__(self, key):
359	key = self.key_filter_in(key)
360	try:
361	recfile.remove(key, path=self.folder)
362	except IOError:
363	raise KeyError
364
365	def __iter__(self):
366	for dirpath, dirnames, filenames in os.walk(self.folder):
367	for filename in filenames:
368	yield self.key_filter_out(filename)
369
370	def safe_apply(self, key, function, default_value=None):
371	"""
372	Safely apply a function to the value of a key in storage and set
373	the return value of the function to it.
374
375	Return the result of applying the function.
376	"""
377	key = self.key_filter_in(key)
378	exists = True
379	try:
380	val_file = recfile.open(key, mode='r+b', path=self.folder)
381	except IOError:
382	exists = False
383	val_file = recfile.open(key, mode='wb', path=self.folder)
384	self.wait_portalock(val_file)
385	if exists:
386	timestamp, value = pickle.load(val_file)
387	else:
388	value = default_value
389	new_value = function(value)
390	val_file.seek(0)
391	pickle.dump((time.time(), new_value), val_file, pickle.HIGHEST_PROTOCOL)
392	val_file.truncate()
393	val_file.close()
394	return new_value
395
396	def keys(self):
397	return list(self.__iter__())
398
399	def get(self, key, default=None):
400	try:
401	return self[key]
402	except KeyError:
403	return default
404
405	def __init__(self, request=None, folder=None):
406	self.initialized = False
407	self.request = request
408	self.folder = folder
409	self.storage = None
410
411	def initialize(self):
412	if self.initialized:
413	return
414	else:
415	self.initialized = True
416
417	folder = self.folder
418	request = self.request
419
420	# Lets test if the cache folder exists, if not
421	# we are going to create it
422	folder = os.path.join(folder or request.folder, 'cache')
423
424	if not os.path.exists(folder):
425	os.mkdir(folder)
426
427	self.storage = CacheOnDisk.PersistentStorage(folder)
428
429	def __call__(self, key, f,
430	time_expire=DEFAULT_TIME_EXPIRE):
431	self.initialize()
432
433	def inc_hit_total(v):
434	v['hit_total'] += 1
435	return v
436
437	def inc_misses(v):
438	v['misses'] += 1
439	return v
440
441	dt = time_expire
442	self.storage.acquire(key)
443	self.storage.acquire(CacheAbstract.cache_stats_name)
444	item = self.storage.get(key)
445	self.storage.safe_apply(CacheAbstract.cache_stats_name, inc_hit_total,
446	default_value={'hit_total': 0, 'misses': 0})
447
448	if item and f is None:
449	del self.storage[key]
450
451	if f is None:
452	self.storage.release(CacheAbstract.cache_stats_name)
453	self.storage.release(key)
454	return None
455
456	now = time.time()
457
458	if item and ((dt is None) or (item[0] > now - dt)):
459	value = item[1]
460	else:
461	try:
462	value = f()
463	except:
464	self.storage.release(CacheAbstract.cache_stats_name)
465	self.storage.release(key)
466	raise
467	self.storage[key] = (now, value)
468	self.storage.safe_apply(CacheAbstract.cache_stats_name, inc_misses,
469	default_value={'hit_total': 0, 'misses': 0})
470
471	self.storage.release(CacheAbstract.cache_stats_name)
472	self.storage.release(key)
473	return value
474
475	def clear(self, regex=None):
476	self.initialize()
477	storage = self.storage
478	if regex is None:
479	keys = storage
480	else:
481	r = re.compile(regex)
482	keys = (key for key in storage if r.match(key))
483	for key in keys:
484	storage.acquire(key)
485	try:
486	del storage[key]
487	except KeyError:
488	pass
489	storage.release(key)
490
491	def increment(self, key, value=1):
492	self.initialize()
493	self.storage.acquire(key)
494	value = self.storage.safe_apply(key, lambda x: x + value, default_value=0)
495	self.storage.release(key)
496	return value
497
498
499	class CacheAction(object):
500	def __init__(self, func, key, time_expire, cache, cache_model):
501	self.__name__ = func.__name__
502	self.__doc__ = func.__doc__
503	self.func = func
504	self.key = key
505	self.time_expire = time_expire
506	self.cache = cache
507	self.cache_model = cache_model
508
509	def __call__(self, a, *b):
510	if not self.key:
511	key2 = self.__name__ + ':' + repr(a) + ':' + repr(b)
512	else:
513	key2 = self.key.replace('%(name)s', self.__name__)\
514	.replace('%(args)s', str(a)).replace('%(vars)s', str(b))
515	cache_model = self.cache_model
516	if not cache_model or isinstance(cache_model, str):
517	cache_model = getattr(self.cache, cache_model or 'ram')
518	return cache_model(key2,
519	lambda a=a, b=b: self.func(a, *b),
520	self.time_expire)
521
522
523	class Cache(object):
524	"""
525	Sets up generic caching, creating an instance of both CacheInRam and
526	CacheOnDisk.
527	In case of GAE will make use of gluon.contrib.gae_memcache.
528
529	- self.ram is an instance of CacheInRam
530	- self.disk is an instance of CacheOnDisk
531	"""
532
533	autokey = ':%(name)s:%(args)s:%(vars)s'
534
535	def __init__(self, request):
536	"""
537	Args:
538	request: the global request object
539	"""
540	# GAE will have a special caching
541	if have_settings and settings.global_settings.web2py_runtime_gae:
542	from gluon.contrib.gae_memcache import MemcacheClient
543	self.ram = self.disk = MemcacheClient(request)
544	else:
545	# Otherwise use ram (and try also disk)
546	self.ram = CacheInRam(request)
547	try:
548	self.disk = CacheOnDisk(request)
549	except IOError:
550	logger.warning('no cache.disk (IOError)')
551	except AttributeError:
552	# normally not expected anymore, as GAE has already
553	# been accounted for
554	logger.warning('no cache.disk (AttributeError)')
555
556	def action(self, time_expire=DEFAULT_TIME_EXPIRE, cache_model=None,
557	prefix=None, session=False, vars=True, lang=True,
558	user_agent=False, public=True, valid_statuses=None,
559	quick=None):
560	"""Better fit for caching an action
561
562	Warning:
563	Experimental!
564
565	Currently only HTTP 1.1 compliant
566	reference : http://code.google.com/p/doctype-mirror/wiki/ArticleHttpCaching
567
568	Args:
569	time_expire(int): same as @cache
570	cache_model(str): same as @cache
571	prefix(str): add a prefix to the calculated key
572	session(bool): adds response.session_id to the key
573	vars(bool): adds request.env.query_string
574	lang(bool): adds T.accepted_language
575	user_agent(bool or dict): if True, adds is_mobile and is_tablet to the key.
576	Pass a dict to use all the needed values (uses str(.items()))
577	(e.g. user_agent=request.user_agent()). Used only if session is
578	not True
579	public(bool): if False forces the Cache-Control to be 'private'
580	valid_statuses: by default only status codes starting with 1,2,3 will be cached.
581	pass an explicit list of statuses on which turn the cache on
582	quick: Session,Vars,Lang,User-agent,Public:
583	fast overrides with initials, e.g. 'SVLP' or 'VLP', or 'VLP'
584	"""
585	from gluon import current
586	from gluon.http import HTTP
587
588	def wrap(func):
589	def wrapped_f():
590	if current.request.env.request_method != 'GET':
591	return func()
592
593	if quick:
594	session_ = True if 'S' in quick else False
595	vars_ = True if 'V' in quick else False
596	lang_ = True if 'L' in quick else False
597	user_agent_ = True if 'U' in quick else False
598	public_ = True if 'P' in quick else False
599	else:
600	(session_, vars_, lang_, user_agent_, public_) = \
601	(session, vars, lang, user_agent, public)
602
603	expires = 'Fri, 01 Jan 1990 00:00:00 GMT'
604	if time_expire:
605	cache_control = 'max-age=%(time_expire)s, s-maxage=%(time_expire)s' % dict(time_expire=time_expire)
606	expires = (current.request.utcnow + datetime.timedelta(seconds=time_expire)).strftime(
607	'%a, %d %b %Y %H:%M:%S GMT')
608	else:
609	cache_control = 'no-store, no-cache, must-revalidate, post-check=0, pre-check=0'
610	cache_control += ', public' if not session_ and public_ else ', private'
611
612	if cache_model:
613	# figure out the correct cache key
614	cache_key = [current.request.env.path_info, current.response.view]
615	if session_:
616	cache_key.append(current.response.session_id)
617	elif user_agent_:
618	if user_agent_ is True:
619	cache_key.append("%(is_mobile)s_%(is_tablet)s" % current.request.user_agent())
620	else:
621	cache_key.append(str(list(user_agent_.items())))
622	if vars_:
623	cache_key.append(current.request.env.query_string)
624	if lang_:
625	cache_key.append(current.T.accepted_language)
626	cache_key = hashlib_md5('__'.join(cache_key)).hexdigest()
627	if prefix:
628	cache_key = prefix + cache_key
629	try:
630	# action returns something
631	rtn = cache_model(cache_key, lambda: func(), time_expire=time_expire)
632	http, status = None, current.response.status
633	except HTTP as e:
634	# action raises HTTP (can still be valid)
635	rtn = cache_model(cache_key, lambda: e.body, time_expire=time_expire)
636	http, status = HTTP(e.status, rtn, **e.headers), e.status
637	else:
638	# action raised a generic exception
639	http = None
640	else:
641	# no server-cache side involved
642	try:
643	# action returns something
644	rtn = func()
645	http, status = None, current.response.status
646	except HTTP as e:
647	# action raises HTTP (can still be valid)
648	status = e.status
649	http = HTTP(e.status, e.body, **e.headers)
650	else:
651	# action raised a generic exception
652	http = None
653	send_headers = False
654	if http and isinstance(valid_statuses, list):
655	if status in valid_statuses:
656	send_headers = True
657	elif valid_statuses is None:
658	if str(status)[0] in '123':
659	send_headers = True
660	if send_headers:
661	headers = {'Pragma': None,
662	'Expires': expires,
663	'Cache-Control': cache_control}
664	current.response.headers.update(headers)
665	if cache_model and not send_headers:
666	# we cached already the value, but the status is not valid
667	# so we need to delete the cached value
668	cache_model(cache_key, None)
669	if http:
670	if send_headers:
671	http.headers.update(current.response.headers)
672	raise http
673	return rtn
674	wrapped_f.__name__ = func.__name__
675	wrapped_f.__doc__ = func.__doc__
676	return wrapped_f
677	return wrap
678
679	def __call__(self,
680	key=None,
681	time_expire=DEFAULT_TIME_EXPIRE,
682	cache_model=None):
683	"""
684	Decorator function that can be used to cache any function/method.
685
686	Args:
687	key(str) : the key of the object to be store or retrieved
688	time_expire(int) : expiration of the cache in seconds
689	`time_expire` is used to compare the current time with the time
690	when the requested object was last saved in cache.
691	It does not affect future requests.
692	Setting `time_expire` to 0 or negative value forces the cache to
693	refresh.
694	cache_model(str): can be "ram", "disk" or other (like "memcache").
695	Defaults to "ram"
696
697	When the function `f` is called, web2py tries to retrieve
698	the value corresponding to `key` from the cache if the
699	object exists and if it did not expire, else it calles the function `f`
700	and stores the output in the cache corresponding to `key`. In the case
701	the output of the function is returned.
702
703	Example: ::
704
705	@cache('key', 5000, cache.ram)
706	def f():
707	return time.ctime()
708
709	Note:
710	If the function `f` is an action, we suggest using
711	@cache.action instead
712	"""
713
714	def tmp(func, cache=self, cache_model=cache_model):
715	return CacheAction(func, key, time_expire, self, cache_model)
716	return tmp
717
718	@staticmethod
719	def with_prefix(cache_model, prefix):
720	"""
721	allow replacing cache.ram with cache.with_prefix(cache.ram,'prefix')
722	it will add prefix to all the cache keys used.
723	"""
724	return lambda key, f, time_expire=DEFAULT_TIME_EXPIRE, prefix=prefix: cache_model(prefix + key, f, time_expire)
725
726
727	def lazy_cache(key=None, time_expire=None, cache_model='ram'):
728	"""
729	Can be used to cache any function including ones in modules,
730	as long as the cached function is only called within a web2py request
731
732	If a key is not provided, one is generated from the function name
733	`time_expire` defaults to None (no cache expiration)
734
735	If cache_model is "ram" then the model is current.cache.ram, etc.
736	"""
737	def decorator(f, key=key, time_expire=time_expire, cache_model=cache_model):
738	key = key or repr(f)
739
740	def g(c, *d):
741	from gluon import current
742	return current.cache(key, time_expire, cache_model)(f)(c, *d)
743	g.__name__ = f.__name__
744	return g
745	return decorator

Note: See TracBrowser for help on using the repository browser.

Download in other formats: