source: OpenRLabs-Git/deploy/rlabs-docker/web2py-rlabs/gluon/cache.py

main
Last change on this file was 42bd667, checked in by David Fuertes <dfuertes@…>, 4 years ago

Historial Limpio

  • Property mode set to 100755
File size: 26.0 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3
4"""
5| This file is part of the web2py Web Framework
6| Copyrighted by Massimo Di Pierro <mdipierro@cs.depaul.edu>
7| License: LGPLv3 (http://www.gnu.org/licenses/lgpl.html)
8
9Basic caching classes and methods
10---------------------------------
11
12- Cache - The generic caching object interfacing with the others
13- CacheInRam - providing caching in ram
14- CacheOnDisk - provides caches on disk
15
16Memcache is also available via a different module (see gluon.contrib.memcache)
17
18When web2py is running on Google App Engine,
19caching will be provided by the GAE memcache
20(see gluon.contrib.gae_memcache)
21"""
22import time
23import os
24import gc
25import sys
26import logging
27import re
28import random
29import hashlib
30import datetime
31import tempfile
32from gluon import recfile
33from collections import defaultdict
34from collections import OrderedDict
35
36try:
37    from gluon import settings
38    have_settings = True
39except ImportError:
40    have_settings = False
41
42from pydal.contrib import portalocker
43from gluon._compat import pickle, thread, to_bytes, to_native, hashlib_md5
44
45try:
46    import psutil
47    HAVE_PSUTIL = True
48except ImportError:
49    HAVE_PSUTIL = False
50
51
52def remove_oldest_entries(storage, percentage=90):
53    # compute current memory usage (%)
54    old_mem = psutil.virtual_memory().percent
55    # if we have data in storage and utilization exceeds 90%
56    while storage and old_mem > percentage:
57        # removed oldest entry
58        storage.popitem(last=False)
59        # garbage collect
60        gc.collect(1)
61        # comute used memory again
62        new_mem = psutil.virtual_memory().percent
63        # if the used memory did not decrease stop
64        if new_mem >= old_mem:
65            break
66        # net new measurement for memory usage and loop
67        old_mem = new_mem
68
69
70logger = logging.getLogger("web2py.cache")
71
72__all__ = ['Cache', 'lazy_cache']
73
74
75DEFAULT_TIME_EXPIRE = 300
76
77
78class CacheAbstract(object):
79    """
80    Abstract class for cache implementations.
81    Main function just provides referenced api documentation.
82
83    Use CacheInRam or CacheOnDisk instead which are derived from this class.
84
85    Note:
86        Michele says: there are signatures inside gdbm files that are used
87        directly by the python gdbm adapter that often are lagging behind in the
88        detection code in python part.
89        On every occasion that a gdbm store is probed by the python adapter,
90        the probe fails, because gdbm file version is newer.
91        Using gdbm directly from C would work, because there is backward
92        compatibility, but not from python!
93        The .shelve file is discarded and a new one created (with new
94        signature) and it works until it is probed again...
95        The possible consequences are memory leaks and broken sessions.
96    """
97
98    cache_stats_name = 'web2py_cache_statistics'
99    max_ram_utilization = None  # percent
100
101    def __init__(self, request=None):
102        """Initializes the object
103
104        Args:
105            request: the global request object
106        """
107        raise NotImplementedError
108
109    def __call__(self, key, f,
110                 time_expire=DEFAULT_TIME_EXPIRE):
111        """
112        Tries to retrieve the value corresponding to `key` from the cache if the
113        object exists and if it did not expire, else it calls the function `f`
114        and stores the output in the cache corresponding to `key`. It always
115        returns the function that is returned.
116
117        Args:
118            key(str): the key of the object to be stored or retrieved
119            f(function): the function whose output is to be cached.
120
121                If `f` is `None` the cache is cleared.
122            time_expire(int): expiration of the cache in seconds.
123
124                It's used to compare the current time with the time
125                when the requested object was last saved in cache. It does not
126                affect future requests. Setting `time_expire` to 0 or negative
127                value forces the cache to refresh.
128        """
129        raise NotImplementedError
130
131    def clear(self, regex=None):
132        """
133        Clears the cache of all keys that match the provided regular expression.
134        If no regular expression is provided, it clears all entries in cache.
135
136        Args:
137            regex: if provided, only keys matching the regex will be cleared,
138                otherwise all keys are cleared.
139        """
140
141        raise NotImplementedError
142
143    def increment(self, key, value=1):
144        """
145        Increments the cached value for the given key by the amount in value
146
147        Args:
148            key(str): key for the cached object to be incremeneted
149            value(int): amount of the increment (defaults to 1, can be negative)
150        """
151        raise NotImplementedError
152
153    def _clear(self, storage, regex):
154        """
155        Auxiliary function called by `clear` to search and clear cache entries
156        """
157        r = re.compile(regex)
158        for key in list(storage.keys()):
159            if r.match(str(key)):
160                del storage[key]
161        return
162
163
164class CacheInRam(CacheAbstract):
165    """
166    Ram based caching
167
168    This is implemented as global (per process, shared by all threads)
169    dictionary.
170    A mutex-lock mechanism avoid conflicts.
171    """
172
173    locker = thread.allocate_lock()
174    meta_storage = {}
175    stats = {}
176
177    def __init__(self, request=None):
178        self.initialized = False
179        self.request = request
180        self.storage = OrderedDict() if HAVE_PSUTIL else {}
181        self.app = request.application if request else ''
182
183    def initialize(self):
184        if self.initialized:
185            return
186        else:
187            self.initialized = True
188        self.locker.acquire()
189        if self.app not in self.meta_storage:
190            self.storage = self.meta_storage[self.app] = \
191                OrderedDict() if HAVE_PSUTIL else {}
192            self.stats[self.app] = {'hit_total': 0, 'misses': 0}
193        else:
194            self.storage = self.meta_storage[self.app]
195        self.locker.release()
196
197    def clear(self, regex=None):
198        self.initialize()
199        self.locker.acquire()
200        storage = self.storage
201        if regex is None:
202            storage.clear()
203        else:
204            self._clear(storage, regex)
205
206        if self.app not in self.stats:
207            self.stats[self.app] = {'hit_total': 0, 'misses': 0}
208
209        self.locker.release()
210
211    def __call__(self, key, f,
212                 time_expire=DEFAULT_TIME_EXPIRE,
213                 destroyer=None):
214        """
215        Attention! cache.ram does not copy the cached object.
216        It just stores a reference to it. Turns out the deepcopying the object
217        has some problems:
218
219        - would break backward compatibility
220        - would be limiting because people may want to cache live objects
221        - would work unless we deepcopy no storage and retrival which would make
222          things slow.
223
224        Anyway. You can deepcopy explicitly in the function generating the value
225        to be cached.
226        """
227        self.initialize()
228
229        dt = time_expire
230        now = time.time()
231
232        self.locker.acquire()
233        item = self.storage.get(key, None)
234        if item and f is None:
235            del self.storage[key]
236            if destroyer:
237                destroyer(item[1])
238        self.stats[self.app]['hit_total'] += 1
239        self.locker.release()
240
241        if f is None:
242            return None
243        if item and (dt is None or item[0] > now - dt):
244            return item[1]
245        elif item and (item[0] < now - dt) and destroyer:
246            destroyer(item[1])
247        value = f()
248
249        self.locker.acquire()
250        self.storage[key] = (now, value)
251        self.stats[self.app]['misses'] += 1
252        if HAVE_PSUTIL and self.max_ram_utilization is not None and random.random() < 0.10:
253            remove_oldest_entries(self.storage, percentage=self.max_ram_utilization)
254        self.locker.release()
255        return value
256
257    def increment(self, key, value=1):
258        self.initialize()
259        self.locker.acquire()
260        try:
261            if key in self.storage:
262                value = self.storage[key][1] + value
263            self.storage[key] = (time.time(), value)
264        except BaseException as e:
265            self.locker.release()
266            raise e
267        self.locker.release()
268        return value
269
270
271class CacheOnDisk(CacheAbstract):
272    """
273    Disk based cache
274
275    This is implemented as a key value store where each key corresponds to a
276    single file in disk which is replaced when the value changes.
277
278    Disk cache provides persistance when web2py is started/stopped but it is
279    slower than `CacheInRam`
280
281    Values stored in disk cache must be pickable.
282    """
283
284    class PersistentStorage(object):
285        """
286        Implements a key based thread/process-safe safe storage in disk.
287        """
288
289        def __init__(self, folder, file_lock_time_wait=0.1):
290            self.folder = folder
291            self.key_filter_in = lambda key: key
292            self.key_filter_out = lambda key: key
293            self.file_lock_time_wait = file_lock_time_wait
294            # How long we should wait before retrying to lock a file held by another process
295            # We still need a mutex for each file as portalocker only blocks other processes
296            self.file_locks = defaultdict(thread.allocate_lock)
297
298            # Make sure we use valid filenames.
299            if sys.platform == "win32":
300                import base64
301
302                def key_filter_in_windows(key):
303                    """
304                    Windows doesn't allow \ / : * ? "< > | in filenames.
305                    To go around this encode the keys with base32.
306                    """
307                    return to_native(base64.b32encode(to_bytes(key)))
308
309                def key_filter_out_windows(key):
310                    """
311                    We need to decode the keys so regex based removal works.
312                    """
313                    return to_native(base64.b32decode(to_bytes(key)))
314
315                self.key_filter_in = key_filter_in_windows
316                self.key_filter_out = key_filter_out_windows
317
318        def wait_portalock(self, val_file):
319            """
320            Wait for the process file lock.
321            """
322            while True:
323                try:
324                    portalocker.lock(val_file, portalocker.LOCK_EX)
325                    break
326                except:
327                    time.sleep(self.file_lock_time_wait)
328
329        def acquire(self, key):
330            self.file_locks[key].acquire()
331
332        def release(self, key):
333            self.file_locks[key].release()
334
335        def __setitem__(self, key, value):
336            key = self.key_filter_in(key)
337            val_file = recfile.open(key, mode='wb', path=self.folder)
338            self.wait_portalock(val_file)
339            pickle.dump(value, val_file, pickle.HIGHEST_PROTOCOL)
340            val_file.close()
341
342        def __getitem__(self, key):
343            key = self.key_filter_in(key)
344            try:
345                val_file = recfile.open(key, mode='rb', path=self.folder)
346            except IOError:
347                raise KeyError
348
349            self.wait_portalock(val_file)
350            value = pickle.load(val_file)
351            val_file.close()
352            return value
353
354        def __contains__(self, key):
355            key = self.key_filter_in(key)
356            return (key in self.file_locks) or recfile.exists(key, path=self.folder)
357
358        def __delitem__(self, key):
359            key = self.key_filter_in(key)
360            try:
361                recfile.remove(key, path=self.folder)
362            except IOError:
363                raise KeyError
364
365        def __iter__(self):
366            for dirpath, dirnames, filenames in os.walk(self.folder):
367                for filename in filenames:
368                    yield self.key_filter_out(filename)
369
370        def safe_apply(self, key, function, default_value=None):
371            """
372            Safely apply a function to the value of a key in storage and set
373            the return value of the function to it.
374
375            Return the result of applying the function.
376            """
377            key = self.key_filter_in(key)
378            exists = True
379            try:
380                val_file = recfile.open(key, mode='r+b', path=self.folder)
381            except IOError:
382                exists = False
383                val_file = recfile.open(key, mode='wb', path=self.folder)
384            self.wait_portalock(val_file)
385            if exists:
386                timestamp, value = pickle.load(val_file)
387            else:
388                value = default_value
389            new_value = function(value)
390            val_file.seek(0)
391            pickle.dump((time.time(), new_value), val_file, pickle.HIGHEST_PROTOCOL)
392            val_file.truncate()
393            val_file.close()
394            return new_value
395
396        def keys(self):
397            return list(self.__iter__())
398
399        def get(self, key, default=None):
400            try:
401                return self[key]
402            except KeyError:
403                return default
404
405    def __init__(self, request=None, folder=None):
406        self.initialized = False
407        self.request = request
408        self.folder = folder
409        self.storage = None
410
411    def initialize(self):
412        if self.initialized:
413            return
414        else:
415            self.initialized = True
416
417        folder = self.folder
418        request = self.request
419
420        # Lets test if the cache folder exists, if not
421        # we are going to create it
422        folder = os.path.join(folder or request.folder, 'cache')
423
424        if not os.path.exists(folder):
425            os.mkdir(folder)
426
427        self.storage = CacheOnDisk.PersistentStorage(folder)
428
429    def __call__(self, key, f,
430                 time_expire=DEFAULT_TIME_EXPIRE):
431        self.initialize()
432
433        def inc_hit_total(v):
434            v['hit_total'] += 1
435            return v
436
437        def inc_misses(v):
438            v['misses'] += 1
439            return v
440
441        dt = time_expire
442        self.storage.acquire(key)
443        self.storage.acquire(CacheAbstract.cache_stats_name)
444        item = self.storage.get(key)
445        self.storage.safe_apply(CacheAbstract.cache_stats_name, inc_hit_total,
446                                default_value={'hit_total': 0, 'misses': 0})
447
448        if item and f is None:
449            del self.storage[key]
450
451        if f is None:
452            self.storage.release(CacheAbstract.cache_stats_name)
453            self.storage.release(key)
454            return None
455
456        now = time.time()
457
458        if item and ((dt is None) or (item[0] > now - dt)):
459            value = item[1]
460        else:
461            try:
462                value = f()
463            except:
464                self.storage.release(CacheAbstract.cache_stats_name)
465                self.storage.release(key)
466                raise
467            self.storage[key] = (now, value)
468            self.storage.safe_apply(CacheAbstract.cache_stats_name, inc_misses,
469                                    default_value={'hit_total': 0, 'misses': 0})
470
471        self.storage.release(CacheAbstract.cache_stats_name)
472        self.storage.release(key)
473        return value
474
475    def clear(self, regex=None):
476        self.initialize()
477        storage = self.storage
478        if regex is None:
479            keys = storage
480        else:
481            r = re.compile(regex)
482            keys = (key for key in storage if r.match(key))
483        for key in keys:
484            storage.acquire(key)
485            try:
486                del storage[key]
487            except KeyError:
488                pass
489            storage.release(key)
490
491    def increment(self, key, value=1):
492        self.initialize()
493        self.storage.acquire(key)
494        value = self.storage.safe_apply(key, lambda x: x + value, default_value=0)
495        self.storage.release(key)
496        return value
497
498
499class CacheAction(object):
500    def __init__(self, func, key, time_expire, cache, cache_model):
501        self.__name__ = func.__name__
502        self.__doc__ = func.__doc__
503        self.func = func
504        self.key = key
505        self.time_expire = time_expire
506        self.cache = cache
507        self.cache_model = cache_model
508
509    def __call__(self, *a, **b):
510        if not self.key:
511            key2 = self.__name__ + ':' + repr(a) + ':' + repr(b)
512        else:
513            key2 = self.key.replace('%(name)s', self.__name__)\
514                .replace('%(args)s', str(a)).replace('%(vars)s', str(b))
515        cache_model = self.cache_model
516        if not cache_model or isinstance(cache_model, str):
517            cache_model = getattr(self.cache, cache_model or 'ram')
518        return cache_model(key2,
519                           lambda a=a, b=b: self.func(*a, **b),
520                           self.time_expire)
521
522
523class Cache(object):
524    """
525    Sets up generic caching, creating an instance of both CacheInRam and
526    CacheOnDisk.
527    In case of GAE will make use of gluon.contrib.gae_memcache.
528
529    - self.ram is an instance of CacheInRam
530    - self.disk is an instance of CacheOnDisk
531    """
532
533    autokey = ':%(name)s:%(args)s:%(vars)s'
534
535    def __init__(self, request):
536        """
537        Args:
538            request: the global request object
539        """
540        # GAE will have a special caching
541        if have_settings and settings.global_settings.web2py_runtime_gae:
542            from gluon.contrib.gae_memcache import MemcacheClient
543            self.ram = self.disk = MemcacheClient(request)
544        else:
545            # Otherwise use ram (and try also disk)
546            self.ram = CacheInRam(request)
547            try:
548                self.disk = CacheOnDisk(request)
549            except IOError:
550                logger.warning('no cache.disk (IOError)')
551            except AttributeError:
552                # normally not expected anymore, as GAE has already
553                # been accounted for
554                logger.warning('no cache.disk (AttributeError)')
555
556    def action(self, time_expire=DEFAULT_TIME_EXPIRE, cache_model=None,
557               prefix=None, session=False, vars=True, lang=True,
558               user_agent=False, public=True, valid_statuses=None,
559               quick=None):
560        """Better fit for caching an action
561
562        Warning:
563            Experimental!
564
565        Currently only HTTP 1.1 compliant
566        reference : http://code.google.com/p/doctype-mirror/wiki/ArticleHttpCaching
567
568        Args:
569            time_expire(int): same as @cache
570            cache_model(str): same as @cache
571            prefix(str): add a prefix to the calculated key
572            session(bool): adds response.session_id to the key
573            vars(bool): adds request.env.query_string
574            lang(bool): adds T.accepted_language
575            user_agent(bool or dict): if True, adds is_mobile and is_tablet to the key.
576                Pass a dict to use all the needed values (uses str(.items()))
577                (e.g. user_agent=request.user_agent()). Used only if session is
578                not True
579            public(bool): if False forces the Cache-Control to be 'private'
580            valid_statuses: by default only status codes starting with 1,2,3 will be cached.
581                pass an explicit list of statuses on which turn the cache on
582            quick: Session,Vars,Lang,User-agent,Public:
583                fast overrides with initials, e.g. 'SVLP' or 'VLP', or 'VLP'
584        """
585        from gluon import current
586        from gluon.http import HTTP
587
588        def wrap(func):
589            def wrapped_f():
590                if current.request.env.request_method != 'GET':
591                    return func()
592
593                if quick:
594                    session_ = True if 'S' in quick else False
595                    vars_ = True if 'V' in quick else False
596                    lang_ = True if 'L' in quick else False
597                    user_agent_ = True if 'U' in quick else False
598                    public_ = True if 'P' in quick else False
599                else:
600                    (session_, vars_, lang_, user_agent_, public_) = \
601                        (session, vars, lang, user_agent, public)
602
603                expires = 'Fri, 01 Jan 1990 00:00:00 GMT'
604                if time_expire:
605                    cache_control = 'max-age=%(time_expire)s, s-maxage=%(time_expire)s' % dict(time_expire=time_expire)
606                    expires = (current.request.utcnow + datetime.timedelta(seconds=time_expire)).strftime(
607                        '%a, %d %b %Y %H:%M:%S GMT')
608                else:
609                    cache_control = 'no-store, no-cache, must-revalidate, post-check=0, pre-check=0'
610                cache_control += ', public' if not session_ and public_ else ', private'
611
612                if cache_model:
613                    # figure out the correct cache key
614                    cache_key = [current.request.env.path_info, current.response.view]
615                    if session_:
616                        cache_key.append(current.response.session_id)
617                    elif user_agent_:
618                        if user_agent_ is True:
619                            cache_key.append("%(is_mobile)s_%(is_tablet)s" % current.request.user_agent())
620                        else:
621                            cache_key.append(str(list(user_agent_.items())))
622                    if vars_:
623                        cache_key.append(current.request.env.query_string)
624                    if lang_:
625                        cache_key.append(current.T.accepted_language)
626                    cache_key = hashlib_md5('__'.join(cache_key)).hexdigest()
627                    if prefix:
628                        cache_key = prefix + cache_key
629                    try:
630                        # action returns something
631                        rtn = cache_model(cache_key, lambda: func(), time_expire=time_expire)
632                        http, status = None, current.response.status
633                    except HTTP as e:
634                        # action raises HTTP (can still be valid)
635                        rtn = cache_model(cache_key, lambda: e.body, time_expire=time_expire)
636                        http, status = HTTP(e.status, rtn, **e.headers), e.status
637                    else:
638                        # action raised a generic exception
639                        http = None
640                else:
641                    # no server-cache side involved
642                    try:
643                        # action returns something
644                        rtn = func()
645                        http, status = None, current.response.status
646                    except HTTP as e:
647                        # action raises HTTP (can still be valid)
648                        status = e.status
649                        http = HTTP(e.status, e.body, **e.headers)
650                    else:
651                        # action raised a generic exception
652                        http = None
653                send_headers = False
654                if http and isinstance(valid_statuses, list):
655                    if status in valid_statuses:
656                        send_headers = True
657                elif valid_statuses is None:
658                    if str(status)[0] in '123':
659                        send_headers = True
660                if send_headers:
661                    headers = {'Pragma': None,
662                               'Expires': expires,
663                               'Cache-Control': cache_control}
664                    current.response.headers.update(headers)
665                if cache_model and not send_headers:
666                    # we cached already the value, but the status is not valid
667                    # so we need to delete the cached value
668                    cache_model(cache_key, None)
669                if http:
670                    if send_headers:
671                        http.headers.update(current.response.headers)
672                    raise http
673                return rtn
674            wrapped_f.__name__ = func.__name__
675            wrapped_f.__doc__ = func.__doc__
676            return wrapped_f
677        return wrap
678
679    def __call__(self,
680                 key=None,
681                 time_expire=DEFAULT_TIME_EXPIRE,
682                 cache_model=None):
683        """
684        Decorator function that can be used to cache any function/method.
685
686        Args:
687            key(str) : the key of the object to be store or retrieved
688            time_expire(int) : expiration of the cache in seconds
689                `time_expire` is used to compare the current time with the time
690                when the requested object was last saved in cache.
691                It does not affect future requests.
692                Setting `time_expire` to 0 or negative value forces the cache to
693                refresh.
694            cache_model(str): can be "ram", "disk" or other (like "memcache").
695                Defaults to "ram"
696
697        When the function `f` is called, web2py tries to retrieve
698        the value corresponding to `key` from the cache if the
699        object exists and if it did not expire, else it calles the function `f`
700        and stores the output in the cache corresponding to `key`. In the case
701        the output of the function is returned.
702
703        Example: ::
704
705          @cache('key', 5000, cache.ram)
706          def f():
707              return time.ctime()
708
709        Note:
710            If the function `f` is an action, we suggest using
711            @cache.action instead
712        """
713
714        def tmp(func, cache=self, cache_model=cache_model):
715            return CacheAction(func, key, time_expire, self, cache_model)
716        return tmp
717
718    @staticmethod
719    def with_prefix(cache_model, prefix):
720        """
721        allow replacing cache.ram with cache.with_prefix(cache.ram,'prefix')
722        it will add prefix to all the cache keys used.
723        """
724        return lambda key, f, time_expire=DEFAULT_TIME_EXPIRE, prefix=prefix: cache_model(prefix + key, f, time_expire)
725
726
727def lazy_cache(key=None, time_expire=None, cache_model='ram'):
728    """
729    Can be used to cache any function including ones in modules,
730    as long as the cached function is only called within a web2py request
731
732    If a key is not provided, one is generated from the function name
733    `time_expire` defaults to None (no cache expiration)
734
735    If cache_model is "ram" then the model is current.cache.ram, etc.
736    """
737    def decorator(f, key=key, time_expire=time_expire, cache_model=cache_model):
738        key = key or repr(f)
739
740        def g(*c, **d):
741            from gluon import current
742            return current.cache(key, time_expire, cache_model)(f)(*c, **d)
743        g.__name__ = f.__name__
744        return g
745    return decorator
Note: See TracBrowser for help on using the repository browser.