source: OpenRLabs-Git/deploy/rlabs-docker/web2py-rlabs/gluon/rewrite.py

main
Last change on this file was 42bd667, checked in by David Fuertes <dfuertes@…>, 4 years ago

Historial Limpio

  • Property mode set to 100755
File size: 51.6 KB
Line 
1#!/bin/env python
2# -*- coding: utf-8 -*-
3
4"""
5| This file is part of the web2py Web Framework
6| Copyrighted by Massimo Di Pierro <mdipierro@cs.depaul.edu>
7| License: LGPLv3 (http://www.gnu.org/licenses/lgpl.html)
8
9gluon.rewrite parses incoming URLs and formats outgoing URLs for gluon.html.URL.
10
11In addition, it rewrites both incoming and outgoing URLs based on the (optional) user-supplied routes.py,
12which also allows for rewriting of certain error messages.
13
14routes.py supports two styles of URL rewriting, depending on whether 'routers' is defined.
15Refer to router.example.py and routes.example.py for additional documentation.
16
17"""
18
19import os
20import re
21import logging
22import traceback
23import threading
24from gluon.storage import Storage, List
25from gluon.http import HTTP
26from gluon.fileutils import abspath, read_file
27from gluon.settings import global_settings
28from gluon._compat import urllib_unquote, urllib_quote, iteritems, xrange, urllib_quote_plus
29
30isdir = os.path.isdir
31isfile = os.path.isfile
32exists = os.path.exists
33pjoin = os.path.join
34
35logger = logging.getLogger('web2py.rewrite')
36THREAD_LOCAL = threading.local()  # thread-local storage for routing params
37
38regex_at = re.compile(r'(?<!\\)\$[a-zA-Z]\w*')
39regex_anything = re.compile(r'(?<!\\)\$anything')
40regex_redirect = re.compile(r'(\d+)->(.*)')
41regex_full_url = re.compile(
42    r'^(?P<scheme>http|https|HTTP|HTTPS)\://(?P<host>[^/]*)(?P<uri>.*)')
43regex_version = re.compile(r'^(_[\d]+\.[\d]+\.[\d]+)$')
44
45# pattern to find valid paths in url /application/controller/...
46#   this could be:
47#     for static pages:
48#        /<b:application>/static/<x:file>
49#     for dynamic pages:
50#        /<a:application>[/<c:controller>[/<f:function>[.<e:ext>][/<s:args>]]]
51#   application, controller, function and ext may only contain [a-zA-Z0-9_]
52#   file and args may also contain '-', '=', '.' and '/'
53#   apps in routes_apps_raw must parse raw_args into args
54
55regex_url = re.compile('^/((?P<a>\w+)(/(?P<c>\w+)(/(?P<z>(?P<f>\w+)(\.(?P<e>[\w.]+))?(?P<s>.*)))?)?)?$')
56regex_args = re.compile('[^\w/.@=-]')
57
58
59def _router_default():
60    """Returns new copy of default base router"""
61    router = Storage(
62        default_application='init',
63        applications='ALL',
64        default_controller='default',
65        controllers='DEFAULT',
66        default_function='index',
67        functions=dict(),
68        default_language=None,
69        languages=None,
70        root_static=['favicon.ico', 'robots.txt'],
71        map_static=None,
72        domains=None,
73        exclusive_domain=False,
74        map_hyphen=False,
75        acfe_match=r'\w+$',                   # legal app/ctlr/fcn/ext
76        #
77        #  Implementation note:
78        #  The file_match & args_match patterns use look-behind to avoid
79        #  pathological backtracking from nested patterns.
80        #
81        file_match = r'([-+=@$%\w]|(?<=[-+=@$%\w])[./])*$', # legal static subpath
82        args_match=r'([\w@ =-]|(?<=[\w@ -])[.])*$',
83    )
84    return router
85
86
87def _params_default(app=None):
88    """Returns a new copy of default parameters"""
89    p = Storage()
90    p.name = app or "BASE"
91    p.default_application = app or "init"
92    p.default_controller = "default"
93    p.default_function = "index"
94    p.routes_app = []
95    p.routes_in = []
96    p.routes_out = []
97    p.routes_onerror = []
98    p.routes_apps_raw = []
99    p.error_handler = None
100    p.error_message = '<html><body><h1>%s</h1></body></html>'
101    p.error_message_ticket = \
102        '<html><body><h1>Internal error</h1>Ticket issued: <a href="/admin/default/ticket/%(ticket)s" target="_blank">%(ticket)s</a></body><!-- this is junk text else IE does not display the page: ' + ('x' * 512) + ' //--></html>'
103    p.routers = None
104    p.logging = 'off'
105    return p
106
107params_apps = dict()
108params = _params_default(app=None)  # regex rewrite parameters
109THREAD_LOCAL.routes = params  # default to base regex rewrite parameters
110routers = None
111
112
113def log_rewrite(string):
114    """Log rewrite activity under control of routes.py"""
115    if params.logging == 'debug':   # catch common cases first
116        logger.debug(string)
117    elif params.logging == 'off' or not params.logging:
118        pass
119    elif params.logging == 'print':
120        print(string)
121    elif params.logging == 'info':
122        logger.info(string)
123    elif params.logging == 'warning':
124        logger.warning(string)
125    elif params.logging == 'error':
126        logger.error(string)
127    elif params.logging == 'critical':
128        logger.critical(string)
129    else:
130        logger.debug(string)
131
132ROUTER_KEYS = set(
133    ('default_application', 'applications',
134     'default_controller', 'controllers',
135     'default_function', 'functions',
136     'default_language', 'languages',
137     'domain', 'domains', 'root_static', 'path_prefix',
138     'exclusive_domain', 'map_hyphen', 'map_static',
139     'acfe_match', 'file_match', 'args_match'))
140
141ROUTER_BASE_KEYS = set(
142    ('applications', 'default_application',
143     'domains', 'path_prefix'))
144
145#  The external interface to rewrite consists of:
146#
147#  load: load routing configuration file(s)
148#  url_in: parse and rewrite incoming URL
149#  url_out: assemble and rewrite outgoing URL
150#
151#  THREAD_LOCAL.routes.default_application
152#  THREAD_LOCAL.routes.error_message
153#  THREAD_LOCAL.routes.error_message_ticket
154#  THREAD_LOCAL.routes.try_redirect_on_error
155#  THREAD_LOCAL.routes.error_handler
156#
157#  filter_url: helper for doctest & unittest
158#  filter_err: helper for doctest & unittest
159#  regex_filter_out: doctest
160
161
162def fixup_missing_path_info(environ):
163    eget = environ.get
164    path_info = eget('PATH_INFO')
165    request_uri = eget('REQUEST_URI')
166    if not path_info and request_uri:
167        # for fcgi, get path_info and
168        # query_string from request_uri
169        items = request_uri.split('?')
170        path_info = environ['PATH_INFO'] = items[0]
171        environ['QUERY_STRING'] = items[1] if len(items) > 1 else ''
172    elif not request_uri:
173        query_string = eget('QUERY_STRING')
174        if query_string:
175            environ['REQUEST_URI'] = '%s?%s' % (path_info, query_string)
176        else:
177            environ['REQUEST_URI'] = path_info
178    if not eget('HTTP_HOST'):
179        environ['HTTP_HOST'] = \
180            '%s:%s' % (eget('SERVER_NAME'), eget('SERVER_PORT'))
181
182
183def url_in(request, environ):
184    """Parses and rewrites incoming URL"""
185    if routers:
186        return map_url_in(request, environ)
187    return regex_url_in(request, environ)
188
189
190def url_out(request, environ, application, controller, function,
191            args, other, scheme, host, port, language=None):
192    """Assembles and rewrites outgoing URL"""
193    if routers:
194        acf = map_url_out(request, environ, application, controller,
195                          function, args, other, scheme, host, port, language)
196        url = '%s%s' % (acf, other)
197    else:
198        url = '/%s/%s/%s%s' % (application, controller, function, other)
199        url = regex_filter_out(url, environ)
200    #
201    #  fill in scheme and host if absolute URL is requested
202    #  scheme can be a string, eg 'http', 'https', 'ws', 'wss'
203    #
204    if host is True or (host is None and (scheme or port is not None)):
205        host = request.env.http_host
206    if not scheme or scheme is True:
207        scheme = request.env.get('wsgi_url_scheme', 'http').lower() if request else 'http'
208    if host:
209        host_port = host if not port else host.split(':', 1)[0] + ':%s' % port
210        url = '%s://%s%s' % (scheme, host_port, url)
211    return url
212
213
214def try_rewrite_on_error(http_response, request, environ, ticket=None):
215    """
216    Called from main.wsgibase to rewrite the http response.
217    """
218    status = int(str(http_response.status).split()[0])
219    if status >= 399 and THREAD_LOCAL.routes.routes_onerror:
220        keys = set(('%s/%s' % (request.application, status),
221                    '%s/*' % (request.application),
222                    '*/%s' % (status),
223                    '*/*'))
224        for (key, uri) in THREAD_LOCAL.routes.routes_onerror:
225            if key in keys:
226                if uri == '!':
227                    # do nothing!
228                    return http_response, environ
229                elif '?' in uri:
230                    path_info, query_string = uri.split('?', 1)
231                    query_string += '&'
232                else:
233                    path_info, query_string = uri, ''
234                query_string += \
235                    'code=%s&ticket=%s&requested_uri=%s&request_url=%s' % \
236                    (status, ticket, urllib_quote_plus(
237                        request.env.request_uri), request.url)
238                if uri.startswith('http://') or uri.startswith('https://'):
239                    # make up a response
240                    url = path_info + '?' + query_string
241                    message = 'You are being redirected <a href="%s">here</a>'
242                    return HTTP(303, message % url, Location=url), environ
243                elif not environ.get('__ROUTES_ONERROR__', False):
244                    # wsgibase will be called recursively with
245                    # the routes_onerror path.
246                    environ['__ROUTES_ONERROR__'] = True # limit recursion
247                    path_info = '/' + path_info.lstrip('/')
248                    environ['PATH_INFO'] = path_info
249                    environ['QUERY_STRING'] = query_string
250                    environ['WEB2PY_STATUS_CODE'] = status
251                    return None, environ
252    # do nothing!
253    return http_response, environ
254
255
256def try_redirect_on_error(http_object, request, ticket=None):
257    """Called from main.wsgibase to rewrite the http response"""
258    status = int(str(http_object.status).split()[0])
259    if status > 399 and THREAD_LOCAL.routes.routes_onerror:
260        keys = set(('%s/%s' % (request.application, status),
261                    '%s/*' % (request.application),
262                    '*/%s' % (status),
263                    '*/*'))
264        for (key, redir) in THREAD_LOCAL.routes.routes_onerror:
265            if key in keys:
266                if redir == '!':
267                    break
268                elif '?' in redir:
269                    url = '%s&code=%s&ticket=%s&requested_uri=%s&request_url=%s' % \
270                        (redir, status, ticket,
271                         urllib_quote_plus(request.env.request_uri),
272                         request.url)
273                else:
274                    url = '%s?code=%s&ticket=%s&requested_uri=%s&request_url=%s' % \
275                        (redir, status, ticket,
276                         urllib_quote_plus(request.env.request_uri),
277                         request.url)
278                return HTTP(303, 'You are being redirected <a href="%s">here</a>' % url, Location=url)
279    return http_object
280
281
282def load(routes='routes.py', app=None, data=None, rdict=None):
283    """
284    load: read (if file) and parse routes
285    store results in params
286    (called from main.py at web2py initialization time)
287    If data is present, it's used instead of the routes.py contents.
288    If rdict is present, it must be a dict to be used for routers (unit test)
289    """
290    global params
291    global routers
292    if app is None:
293        # reinitialize
294        global params_apps
295        params_apps = dict()
296        params = _params_default(app=None)  # regex rewrite parameters
297        THREAD_LOCAL.routes = params              # default to base regex rewrite parameters
298        routers = None
299
300    if isinstance(rdict, dict):
301        symbols = dict(routers=rdict)
302        path = 'rdict'
303    else:
304        if data is not None:
305            path = 'routes'
306        else:
307            if app is None:
308                path = abspath(routes)
309            else:
310                path = abspath('applications', app, routes)
311            if not exists(path):
312                return
313            data = read_file(path).replace('\r\n', '\n')
314
315        symbols = dict(app=app)
316        try:
317            exec(data, symbols)
318        except SyntaxError as e:
319            logger.error(
320                '%s has a syntax error and will not be loaded\n' % path
321                + traceback.format_exc())
322            raise e
323
324    p = _params_default(app)
325
326    for sym in ('routes_app', 'routes_in', 'routes_out'):
327        if sym in symbols:
328            for items in symbols[sym]:
329                p[sym].append(compile_regex(*items))
330    for sym in ('routes_onerror', 'routes_apps_raw',
331                'error_handler', 'error_message', 'error_message_ticket',
332                'default_application', 'default_controller', 'default_function',
333                'logging'):
334        if sym in symbols:
335            p[sym] = symbols[sym]
336    if 'routers' in symbols:
337        p.routers = Storage(symbols['routers'])
338        for key in p.routers:
339            if isinstance(p.routers[key], dict):
340                p.routers[key] = Storage(p.routers[key])
341
342    if app is None:
343        params = p                  # install base rewrite parameters
344        THREAD_LOCAL.routes = params      # install default as current routes
345        #
346        #  create the BASE router if routers in use
347        #
348        routers = params.routers    # establish routers if present
349        if isinstance(routers, dict):
350            routers = Storage(routers)
351        if routers is not None:
352            router = _router_default()
353            if routers.BASE:
354                router.update(routers.BASE)
355            routers.BASE = router
356
357        #  scan each app in applications/
358        #    create a router, if routers are in use
359        #    parse the app-specific routes.py if present
360        #
361        all_apps = []
362        apppath = abspath('applications')
363        for appname in os.listdir(apppath):
364            if not appname.startswith('.') and \
365                    isdir(abspath(apppath, appname)) and \
366                    isdir(abspath(apppath, appname, 'controllers')):
367                all_apps.append(appname)
368                if routers:
369                    router = Storage(routers.BASE)   # new copy
370                    if appname in routers:
371                        for key in routers[appname].keys():
372                            if key in ROUTER_BASE_KEYS:
373                                raise SyntaxError("BASE-only key '%s' in router '%s'" % (key, appname))
374                        router.update(routers[appname])
375                    routers[appname] = router
376                if exists(abspath('applications', appname, routes)):
377                    load(routes, appname)
378
379        if routers:
380            load_routers(all_apps)
381
382    else:  # app
383        params_apps[app] = p
384        if routers and p.routers:
385            if app in p.routers:
386                routers[app].update(p.routers[app])
387
388    log_rewrite('URL rewrite is on. configuration in %s' % path)
389
390
391def compile_regex(k, v, env=None):
392    """
393    Preprocess and compile the regular expressions in routes_app/in/out
394    The resulting regex will match a pattern of the form::
395
396        [remote address]:[protocol]://[host]:[method] [path]
397
398    We allow abbreviated regexes on input; here we try to complete them.
399    """
400    k0 = k  # original k for error reporting
401    # bracket regex in ^...$ if not already done
402    if not k[0] == '^':
403        k = '^%s' % k
404    if not k[-1] == '$':
405        k = '%s$' % k
406    # if there are no :-separated parts, prepend a catch-all for the IP address
407    if k.find(':') < 0:
408        # k = '^.*?:%s' % k[1:]
409        k = '^.*?:https?://[^:/]+:[a-z]+ %s' % k[1:]
410    # if there's no ://, provide a catch-all for the protocol, host & method
411    if k.find('://') < 0:
412        i = k.find(':/')
413        if i < 0:
414            raise SyntaxError("routes pattern syntax error: path needs leading '/' [%s]" % k0)
415        k = r'%s:https?://[^:/]+:[a-z]+ %s' % (k[:i], k[i + 1:])
416    # $anything -> ?P<anything>.*
417    for item in regex_anything.findall(k):
418        k = k.replace(item, '(?P<anything>.*)')
419    # $a (etc) -> ?P<a>\w+
420    for item in regex_at.findall(k):
421        k = k.replace(item, r'(?P<%s>\w+)' % item[1:])
422    # same for replacement pattern, but with \g
423    for item in regex_at.findall(v):
424        v = v.replace(item, r'\g<%s>' % item[1:])
425    return (re.compile(k, re.DOTALL), v, env or {})
426
427
428def load_routers(all_apps):
429    """Load-time post-processing of routers"""
430
431    for app in routers:
432        # initialize apps with routers that aren't present,
433        # on behalf of unit tests
434        if app not in all_apps:
435            all_apps.append(app)
436            router = Storage(routers.BASE)   # new copy
437            if app != 'BASE':
438                keys = set(routers[app]).intersection(ROUTER_BASE_KEYS)
439                if keys:
440                    raise SyntaxError("BASE-only key(s) %s in router '%s'" % (
441                        tuple(keys), app))
442            router.update(routers[app])
443            routers[app] = router
444        router = routers[app]
445        keys = set(router).difference(ROUTER_KEYS)
446        if keys:
447            raise SyntaxError("unknown key(s) %s in router '%s'" % (
448                tuple(keys), app))
449        if not router.controllers:
450            router.controllers = set()
451        elif not isinstance(router.controllers, str):
452            router.controllers = set(router.controllers)
453        if router.languages:
454            router.languages = set(router.languages)
455        else:
456            router.languages = set()
457        if router.functions:
458            if isinstance(router.functions, (set, tuple, list)):
459                functions = set(router.functions)
460                if isinstance(router.default_function, str):
461                    functions.add(
462                        router.default_function)  # legacy compatibility
463                router.functions = {router.default_controller: functions}
464            for controller in router.functions:
465                router.functions[controller] = set(
466                    router.functions[controller])
467        else:
468            router.functions = dict()
469        if app != 'BASE':
470            for base_only in ROUTER_BASE_KEYS:
471                router.pop(base_only, None)
472            if 'domain' in router:
473                routers.BASE.domains[router.domain] = app
474            if isinstance(router.controllers, str) and router.controllers == 'DEFAULT':
475                router.controllers = set()
476                if isdir(abspath('applications', app)):
477                    cpath = abspath('applications', app, 'controllers')
478                    for cname in os.listdir(cpath):
479                        if isfile(abspath(cpath, cname)) and cname.endswith('.py'):
480                            router.controllers.add(cname[:-3])
481            if router.controllers:
482                router.controllers.add('static')
483                router.controllers.add(router.default_controller)
484
485    if isinstance(routers.BASE.applications, str) and routers.BASE.applications == 'ALL':
486        routers.BASE.applications = list(all_apps)
487    if routers.BASE.applications:
488        routers.BASE.applications = set(routers.BASE.applications)
489    else:
490        routers.BASE.applications = set()
491
492    for app in routers.keys():
493        # set router name
494        router = routers[app]
495        router.name = app
496        # compile URL validation patterns
497        router._acfe_match = re.compile(router.acfe_match)
498        router._file_match = re.compile(router.file_match)
499        if router.args_match:
500            router._args_match = re.compile(router.args_match)
501        # convert path_prefix to a list of path elements
502        if router.path_prefix:
503            if isinstance(router.path_prefix, str):
504                router.path_prefix = router.path_prefix.strip('/').split('/')
505
506    #  rewrite BASE.domains as tuples
507    #
508    #      key:   'domain[:port]' -> (domain, port)
509    #      value: 'application[/controller] -> (application, controller)
510    #      (port and controller may be None)
511    #
512    domains = dict()
513    if routers.BASE.domains:
514        for (d, a) in iteritems(routers.BASE.domains):
515            (domain, app) = (d.strip(':'), a.strip('/'))
516            if ':' in domain:
517                (domain, port) = domain.split(':')
518            else:
519                port = None
520            if '/' in app:
521                (app, ctlr) = app.split('/', 1)
522            else:
523                ctlr = None
524            if ctlr and '/' in ctlr:
525                (ctlr, fcn) = ctlr.split('/')
526            else:
527                fcn = None
528            if app not in all_apps and app not in routers:
529                raise SyntaxError("unknown app '%s' in domains" % app)
530            domains[(domain, port)] = (app, ctlr, fcn)
531    routers.BASE.domains = domains
532
533
534def regex_uri(e, regexes, tag, default=None):
535    """Filters incoming URI against a list of regexes"""
536    path = e['PATH_INFO']
537    host = e.get('HTTP_HOST', e.get('SERVER_NAME', 'localhost')).lower()
538    i = host.find(':')
539    if i > 0:
540        host = host[:i]
541    key = '%s:%s://%s:%s %s' % \
542        (e.get('REMOTE_ADDR', 'localhost'),
543         e.get('wsgi.url_scheme', 'http').lower(), host,
544         e.get('REQUEST_METHOD', 'get').lower(), path)
545    for (regex, value, custom_env) in regexes:
546        if regex.match(key):
547            e.update(custom_env)
548            rewritten = regex.sub(value, key)
549            log_rewrite('%s: [%s] [%s] -> %s' % (tag, key, value, rewritten))
550            return rewritten
551    log_rewrite('%s: [%s] -> %s (not rewritten)' % (tag, key, default))
552    return default
553
554
555def regex_select(env=None, app=None, request=None):
556    """
557    Selects a set of regex rewrite params for the current request
558    """
559    if app:
560        THREAD_LOCAL.routes = params_apps.get(app, params)
561    elif env and params.routes_app:
562        if routers:
563            map_url_in(request, env, app=True)
564        else:
565            app = regex_uri(env, params.routes_app, "routes_app")
566            THREAD_LOCAL.routes = params_apps.get(app, params)
567    else:
568        THREAD_LOCAL.routes = params  # default to base rewrite parameters
569    log_rewrite("select routing parameters: %s" % THREAD_LOCAL.routes.name)
570    return app  # for doctest
571
572
573def regex_filter_in(e):
574    """Regex rewrite incoming URL"""
575    routes = THREAD_LOCAL.routes
576    query = e.get('QUERY_STRING', None)
577    e['WEB2PY_ORIGINAL_URI'] = e['PATH_INFO'] + (query and ('?' + query) or '')
578    if routes.routes_in:
579        path = regex_uri(e, routes.routes_in,
580                         "routes_in", e['PATH_INFO'])
581        rmatch = regex_redirect.match(path)
582        if rmatch:
583            raise HTTP(int(rmatch.group(1)), location=rmatch.group(2))
584        items = path.split('?', 1)
585        e['PATH_INFO'] = items[0]
586        if len(items) > 1:
587            if query:
588                query = items[1] + '&' + query
589            else:
590                query = items[1]
591            e['QUERY_STRING'] = query
592    e['REQUEST_URI'] = e['PATH_INFO'] + (query and ('?' + query) or '')
593    return e
594
595
596def sluggify(key):
597    return key.lower().replace('.', '_')
598
599
600def invalid_url(routes):
601    raise HTTP(400,
602               routes.error_message % 'invalid request',
603               web2py_error='invalid path')
604
605
606def regex_url_in(request, environ):
607    """Rewrites and parses incoming URL"""
608
609    # ##################################################
610    # select application
611    # rewrite URL if routes_in is defined
612    # update request.env
613    # ##################################################
614
615    regex_select(env=environ, request=request)
616    routes = THREAD_LOCAL.routes
617    if routes.routes_in:
618        environ = regex_filter_in(environ)
619    request.env.update(
620        (k.lower().replace('.', '_'), v) for k, v in iteritems(environ))
621
622    # ##################################################
623    # serve if a static file
624    # ##################################################
625
626    path = urllib_unquote(request.env.path_info) or '/'
627    path = path.replace('\\', '/')
628    if path.endswith('/') and len(path) > 1:
629        path = path[:-1]
630    match = regex_url.match(path)
631    if not match:
632        invalid_url(routes)
633    request.raw_args = (match.group('s') or '')
634    if request.raw_args.startswith('/'):
635        request.raw_args = request.raw_args[1:]
636    if match.group('c') == 'static':
637        application = match.group('a')
638        version, filename = None, match.group('z')
639        if not filename:
640            raise HTTP(404)
641        filename = filename.replace(' ','_')
642        items = filename.split('/', 1)
643        if regex_version.match(items[0]):
644            version, filename = items
645        static_folder = pjoin(global_settings.applications_parent,
646                              'applications', application, 'static')
647        static_file = os.path.abspath(pjoin(static_folder, filename))
648        if not static_file.startswith(static_folder):
649            invalid_url(routes)
650        return (static_file, version, environ)
651    else:
652        # ##################################################
653        # parse application, controller and function
654        # ##################################################
655        request.application = match.group('a') or routes.default_application
656        request.controller = match.group('c') or routes.default_controller
657        request.function = match.group('f') or routes.default_function
658        request.raw_extension = match.group('e')
659        request.extension = request.raw_extension or 'html'
660        if request.application in routes.routes_apps_raw:
661            # application is responsible for parsing args
662            request.args = None
663        elif request.raw_args:
664            args = regex_args.sub('_', request.raw_args)
665            request.args = List(args.split('/'))
666        else:
667            request.args = List([])
668    return (None, None, environ)
669
670
671def regex_filter_out(url, e=None):
672    """Regex rewrite outgoing URL"""
673    if not hasattr(THREAD_LOCAL, 'routes'):
674        regex_select()    # ensure routes is set (for application threads)
675    routes = THREAD_LOCAL.routes
676    if routers:
677        return url  # already filtered
678    if routes.routes_out:
679        items = url.split('?', 1)
680        if e:
681            host = e.get('http_host', 'localhost').lower()
682            i = host.find(':')
683            if i > 0:
684                host = host[:i]
685            items[0] = '%s:%s://%s:%s %s' % \
686                (e.get('remote_addr', ''),
687                 e.get('wsgi_url_scheme', 'http').lower(), host,
688                 e.get('request_method', 'get').lower(), items[0])
689        else:
690            items[0] = ':http://localhost:get %s' % items[0]
691        for (regex, value, tmp) in routes.routes_out:
692            if regex.match(items[0]):
693                rewritten = '?'.join([regex.sub(value, items[0])] + items[1:])
694                log_rewrite('routes_out: [%s] -> %s' % (url, rewritten))
695                return rewritten
696    log_rewrite('routes_out: [%s] not rewritten' % url)
697    return url
698
699
700def filter_url(url, method='get', remote='0.0.0.0',
701               out=False, app=False, lang=None,
702               domain=(None, None), env=False, scheme=None,
703               host=None, port=None, language=None):
704    """
705    doctest/unittest interface to regex_filter_in() and regex_filter_out()
706    """
707    match = regex_full_url.match(url)
708    urlscheme = match.group('scheme').lower()
709    urlhost = match.group('host').lower()
710    uri = match.group('uri')
711    k = uri.find('?')
712    if k < 0:
713        k = len(uri)
714    if isinstance(domain, str):
715        domain = (domain, None)
716    (path_info, query_string) = (uri[:k], uri[k + 1:])
717    path_info = urllib_unquote(path_info)   # simulate server
718    e = {
719        'REMOTE_ADDR': remote,
720        'REQUEST_METHOD': method,
721        'wsgi.url_scheme': urlscheme,
722        'HTTP_HOST': urlhost,
723        'REQUEST_URI': uri,
724        'PATH_INFO': path_info,
725        'QUERY_STRING': query_string,
726        #for filter_out request.env use lowercase
727        'remote_addr': remote,
728        'request_method': method,
729        'wsgi_url_scheme': urlscheme,
730        'http_host': urlhost
731    }
732
733    request = Storage()
734    e["applications_parent"] = global_settings.applications_parent
735    request.env = Storage(e)
736    request.uri_language = lang
737
738    #  determine application only
739    #
740    if app:
741        if routers:
742            return map_url_in(request, e, app=True)
743        return regex_select(e)
744
745    #  rewrite outbound URL
746    #
747    if out:
748        (request.env.domain_application,
749         request.env.domain_controller) = domain
750        items = path_info.lstrip('/').split('/')
751        if items[-1] == '':
752            items.pop()  # adjust trailing empty args
753        assert len(items) >= 3, "at least /a/c/f is required"
754        a = items.pop(0)
755        c = items.pop(0)
756        f = items.pop(0)
757        if not routers:
758            return regex_filter_out(uri, e)
759        acf = map_url_out(
760            request, None, a, c, f, items, None, scheme, host, port, language=language)
761        if items:
762            url = '%s/%s' % (acf, '/'.join(items))
763            if items[-1] == '':
764                url += '/'
765        else:
766            url = acf
767        if query_string:
768            url += '?' + query_string
769        return url
770
771    #  rewrite inbound URL
772    #
773    (static, version, e) = url_in(request, e)
774    if static:
775        return static
776    result = "/%s/%s/%s" % (
777        request.application, request.controller, request.function)
778    if request.extension and request.extension != 'html':
779        result += ".%s" % request.extension
780    if request.args:
781        result += " %s" % request.args
782    if e['QUERY_STRING']:
783        result += " ?%s" % e['QUERY_STRING']
784    if request.uri_language:
785        result += " (%s)" % request.uri_language
786    if env:
787        return request.env
788    return result
789
790
791def filter_err(status, application='app', ticket='tkt'):
792    """doctest/unittest interface to routes_onerror"""
793    routes = THREAD_LOCAL.routes
794    if status > 399 and routes.routes_onerror:
795        keys = set(('%s/%s' % (application, status),
796                    '%s/*' % (application),
797                    '*/%s' % (status),
798                    '*/*'))
799        for (key, redir) in routes.routes_onerror:
800            if key in keys:
801                if redir == '!':
802                    break
803                elif '?' in redir:
804                    url = redir + '&' + 'code=%s&ticket=%s' % (status, ticket)
805                else:
806                    url = redir + '?' + 'code=%s&ticket=%s' % (status, ticket)
807                return url  # redirection
808    return status  # no action
809
810#  router support
811#
812
813
814class MapUrlIn(object):
815    """Logic for mapping incoming URLs"""
816
817    def __init__(self, request=None, env=None):
818        """Initializes a map-in object"""
819        self.request = request
820        self.env = env
821
822        self.router = None
823        self.application = None
824        self.language = None
825        self.controller = None
826        self.function = None
827        self.extension = 'html'
828
829        self.controllers = set()
830        self.functions = dict()
831        self.languages = set()
832        self.default_language = None
833        self.map_hyphen = False
834        self.exclusive_domain = False
835
836        path = self.env['PATH_INFO']
837        self.query = self.env.get('QUERY_STRING', None)
838        path = path.lstrip('/')
839        self.env['PATH_INFO'] = '/' + path
840        self.env['WEB2PY_ORIGINAL_URI'] = self.env['PATH_INFO'] + (
841            self.query and ('?' + self.query) or '')
842
843        # to handle empty args, strip exactly one trailing slash, if present
844        # .../arg1// represents one trailing empty arg
845        #
846        if path.endswith('/'):
847            path = path[:-1]
848        self.args = List(path and path.split('/') or [])
849
850        # see http://www.python.org/dev/peps/pep-3333/#url-reconstruction for URL composition
851        self.remote_addr = self.env.get('REMOTE_ADDR', 'localhost')
852        self.scheme = self.env.get('wsgi.url_scheme', 'http').lower()
853        self.method = self.env.get('REQUEST_METHOD', 'get').lower()
854        (self.host, self.port) = (self.env.get('HTTP_HOST'), None)
855        if not self.host:
856            (self.host, self.port) = (
857                self.env.get('SERVER_NAME'), self.env.get('SERVER_PORT'))
858        if not self.host:
859            (self.host, self.port) = ('localhost', '80')
860        if ':' in self.host:
861            (self.host, self.port) = self.host.rsplit(':', 1)  # for ipv6 support
862        if not self.port:
863            self.port = '443' if self.scheme == 'https' else '80'
864
865    def map_prefix(self):
866        """Strips path prefix, if present in its entirety"""
867        prefix = routers.BASE.path_prefix
868        if prefix:
869            prefixlen = len(prefix)
870            if prefixlen > len(self.args):
871                return
872            for i in xrange(prefixlen):
873                if prefix[i] != self.args[i]:
874                    return  # prefix didn't match
875            self.args = List(self.args[prefixlen:])  # strip the prefix
876
877    def map_app(self):
878        """Determines application name"""
879        base = routers.BASE  # base router
880        self.domain_application = None
881        self.domain_controller = None
882        self.domain_function = None
883        self.map_hyphen = base.map_hyphen
884        arg0 = self.harg0
885        if not base.exclusive_domain and base.applications and arg0 in base.applications:
886            self.application = arg0
887        elif not base.exclusive_domain and arg0 and not base.applications:
888            self.application = arg0
889        elif (self.host, self.port) in base.domains:
890            (self.application, self.domain_controller,
891             self.domain_function) = base.domains[(self.host, self.port)]
892            self.env['domain_application'] = self.application
893            self.env['domain_controller'] = self.domain_controller
894            self.env['domain_function'] = self.domain_function
895        elif (self.host, None) in base.domains:
896            (self.application, self.domain_controller,
897             self.domain_function) = base.domains[(self.host, None)]
898            self.env['domain_application'] = self.application
899            self.env['domain_controller'] = self.domain_controller
900            self.env['domain_function'] = self.domain_function
901        elif base.applications and arg0 in base.applications:
902            self.application = arg0
903        elif arg0 and not base.applications:
904            self.application = arg0
905        else:
906            self.application = base.default_application or ''
907        self.pop_arg_if(self.application == arg0)
908
909        if not base._acfe_match.match(self.application):
910            raise HTTP(
911                400, THREAD_LOCAL.routes.error_message % 'invalid request',
912                web2py_error="invalid application: '%s'" % self.application)
913
914        if self.application not in routers and \
915                (self.application != THREAD_LOCAL.routes.default_application or self.application == 'welcome'):
916            raise HTTP(
917                400, THREAD_LOCAL.routes.error_message % 'invalid request',
918                web2py_error="unknown application: '%s'" % self.application)
919
920        #  set the application router
921        #
922        log_rewrite("select application=%s" % self.application)
923        self.request.application = self.application
924        if self.application not in routers:
925            self.router = routers.BASE                # support gluon.main.wsgibase init->welcome
926        else:
927            self.router = routers[self.application]   # application router
928        self.controllers = self.router.controllers
929        self.default_controller = self.domain_controller or self.router.default_controller
930        self.functions = self.router.functions
931        self.languages = self.router.languages
932        self.default_language = self.router.default_language
933        self.map_hyphen = self.router.map_hyphen
934        self.exclusive_domain = self.router.exclusive_domain
935        self._acfe_match = self.router._acfe_match
936        self.file_match = self.router.file_match
937        self._file_match = self.router._file_match
938        self._args_match = self.router._args_match
939
940    def map_root_static(self):
941        """
942        Handles root-static files (no hyphen mapping)
943
944        a root-static file is one whose incoming URL expects it to be at the root,
945        typically robots.txt & favicon.ico
946        """
947
948        if len(self.args) == 1 and self.arg0 in self.router.root_static:
949            self.controller = self.request.controller = 'static'
950            root_static_file = pjoin(global_settings.applications_parent,
951                                     'applications', self.application,
952                                     self.controller, self.arg0)
953            log_rewrite("route: root static=%s" % root_static_file)
954            return root_static_file, None
955        return None, None
956
957    def map_language(self):
958        """Handles language (no hyphen mapping)"""
959        arg0 = self.arg0  # no hyphen mapping
960        if arg0 and self.languages and arg0 in self.languages:
961            self.language = arg0
962        else:
963            self.language = self.default_language
964        if self.language:
965            log_rewrite("route: language=%s" % self.language)
966            self.pop_arg_if(self.language == arg0)
967            arg0 = self.arg0
968
969    def map_controller(self):
970        """Identifies controller"""
971        #  handle controller
972        #
973        arg0 = self.harg0    # map hyphens
974        if not arg0 or (self.controllers and arg0 not in self.controllers):
975            self.controller = self.default_controller or ''
976        else:
977            self.controller = arg0
978        self.pop_arg_if(arg0 == self.controller)
979        log_rewrite("route: controller=%s" % self.controller)
980        if not self.router._acfe_match.match(self.controller):
981            raise HTTP(
982                400, THREAD_LOCAL.routes.error_message % 'invalid request',
983                web2py_error='invalid controller')
984
985    def map_static(self):
986        """
987        Handles static files
988        file_match but no hyphen mapping
989        """
990        if self.controller != 'static':
991            return None, None
992        version = regex_version.match(self.args(0))
993        if self.args and version:
994            file = '/'.join(self.args[1:])
995        else:
996            file = '/'.join(self.args)
997        if len(self.args) == 0:
998            bad_static = True   # require a file name
999        elif '/' in self.file_match:
1000            # match the path
1001            bad_static = not self.router._file_match.match(file)
1002        else:
1003            # match path elements
1004            bad_static = False
1005            for name in self.args:
1006                bad_static = bad_static or name in (
1007                    '', '.', '..') or not self.router._file_match.match(name)
1008        if bad_static:
1009            log_rewrite('bad static path=%s' % file)
1010            raise HTTP(400,
1011                       THREAD_LOCAL.routes.error_message % 'invalid request',
1012                       web2py_error='invalid static file')
1013        #
1014        #  support language-specific static subdirectories,
1015        #  eg /appname/en/static/filename => applications/appname/static/en/filename
1016        #  if language-specific file doesn't exist, try same file in static
1017        #
1018        if self.language:
1019            static_file = pjoin(global_settings.applications_parent,
1020                                'applications', self.application,
1021                                'static', self.language, file)
1022        if not self.language or not isfile(static_file):
1023            static_file = pjoin(global_settings.applications_parent,
1024                                'applications', self.application,
1025                                'static', file)
1026        self.extension = None
1027        log_rewrite("route: static=%s" % static_file)
1028        return static_file, version
1029
1030    def map_function(self):
1031        """Handles function.extension"""
1032        arg0 = self.harg0    # map hyphens
1033        functions = self.functions.get(self.controller, set())
1034        if isinstance(self.router.default_function, dict):
1035            default_function = self.router.default_function.get(
1036                self.controller, None)
1037        else:
1038            default_function = self.router.default_function  # str or None
1039        default_function = self.domain_function or default_function
1040        if not arg0 or functions and arg0.split('.')[0] not in functions:
1041            self.function = default_function or ""
1042            self.pop_arg_if(arg0 and self.function == arg0)
1043        else:
1044            func_ext = arg0.split('.')
1045            if len(func_ext) > 1:
1046                self.function = func_ext[0]
1047                self.extension = func_ext[-1]
1048            else:
1049                self.function = arg0
1050            self.pop_arg_if(True)
1051        log_rewrite(
1052            "route: function.ext=%s.%s" % (self.function, self.extension))
1053
1054        if not self.router._acfe_match.match(self.function):
1055            raise HTTP(
1056                400, THREAD_LOCAL.routes.error_message % 'invalid request',
1057                web2py_error='invalid function')
1058        if self.extension and not self.router._acfe_match.match(self.extension):
1059            raise HTTP(
1060                400, THREAD_LOCAL.routes.error_message % 'invalid request',
1061                web2py_error='invalid extension')
1062
1063    def validate_args(self):
1064        """
1065        Checks args against validation pattern
1066        """
1067        for arg in self.args:
1068            if not self.router._args_match.match(arg):
1069                raise HTTP(
1070                    400, THREAD_LOCAL.routes.error_message % 'invalid request',
1071                    web2py_error='invalid arg <%s>' % arg)
1072
1073    def sluggify(self):
1074        self.request.env.update(
1075            (k.lower().replace('.', '_'), v) for k, v in iteritems(self.env))
1076
1077    def update_request(self):
1078        """
1079        Updates request from self
1080        Builds env.request_uri
1081        Makes lower-case versions of http headers in env
1082        """
1083        self.request.application = self.application
1084        self.request.controller = self.controller
1085        self.request.function = self.function
1086        self.request.extension = self.extension
1087        self.request.args = self.args
1088        if self.language:
1089            self.request.uri_language = self.language
1090        uri = '/%s/%s' % (self.controller, self.function)
1091        app = self.application
1092        if self.map_hyphen:
1093            uri = uri.replace('_', '-')
1094            app = app.replace('_', '-')
1095        if self.extension and self.extension != 'html':
1096            uri += '.' + self.extension
1097        if self.language:
1098            uri = '/%s%s' % (self.language, uri)
1099        uri = '/%s%s%s%s' % (
1100            app,
1101            uri,
1102            urllib_quote('/' + '/'.join(
1103                str(x) for x in self.args)) if self.args else '',
1104            ('?' + self.query) if self.query else '')
1105        self.env['REQUEST_URI'] = uri
1106        self.sluggify()
1107
1108    @property
1109    def arg0(self):
1110        """Returns first arg"""
1111        return self.args(0)
1112
1113    @property
1114    def harg0(self):
1115        """Returns first arg with optional hyphen mapping"""
1116        if self.map_hyphen and self.args(0):
1117            return self.args(0).replace('-', '_')
1118        return self.args(0)
1119
1120    def pop_arg_if(self, dopop):
1121        """Conditionally removes first arg and returns new first arg"""
1122        if dopop:
1123            self.args.pop(0)
1124
1125
1126class MapUrlOut(object):
1127    """Logic for mapping outgoing URLs"""
1128
1129    def __init__(self, request, env, application, controller,
1130                 function, args, other, scheme, host, port, language):
1131        """initialize a map-out object"""
1132        self.default_application = routers.BASE.default_application
1133        if application in routers:
1134            self.router = routers[application]
1135        else:
1136            self.router = routers.BASE
1137        self.request = request
1138        self.env = env
1139        self.application = application
1140        self.controller = controller
1141        self.is_static = (
1142            controller == 'static' or controller.startswith('static/'))
1143        self.function = function
1144        self.args = args
1145        self.other = other
1146        self.scheme = scheme
1147        self.host = host
1148        self.port = port
1149        self.language = language
1150
1151        self.applications = routers.BASE.applications
1152        self.controllers = self.router.controllers
1153        self.functions = self.router.functions.get(self.controller, set())
1154        self.languages = self.router.languages
1155        self.default_language = self.router.default_language
1156        self.exclusive_domain = self.router.exclusive_domain
1157        self.map_hyphen = self.router.map_hyphen
1158        self.map_static = self.router.map_static
1159        self.path_prefix = routers.BASE.path_prefix
1160
1161        self.domain_application = request and self.request.env.domain_application
1162        self.domain_controller = request and self.request.env.domain_controller
1163        if isinstance(self.router.default_function, dict):
1164            self.default_function = self.router.default_function.get(
1165                self.controller, None)
1166        else:
1167            self.default_function = self.router.default_function
1168
1169        if (self.router.exclusive_domain
1170            and self.domain_application
1171            and self.domain_application != self.application
1172            and not self.host):
1173            raise SyntaxError('cross-domain conflict: must specify host')
1174
1175        lang = self.language if self.language else request and request.uri_language
1176        if lang and self.languages and lang in self.languages:
1177            self.language = lang
1178        else:
1179            self.language = None
1180
1181        self.omit_application = False
1182        self.omit_language = False
1183        self.omit_controller = False
1184        self.omit_function = False
1185
1186    def omit_lang(self):
1187        """Omits language if possible"""
1188        if not self.language or self.language == self.default_language:
1189            self.omit_language = True
1190
1191    def omit_acf(self):
1192        """Omits what we can of a/c/f"""
1193        router = self.router
1194
1195        #  Handle the easy no-args case of tail-defaults: /a/c  /a  /
1196        #
1197        if not self.args and self.function == self.default_function:
1198            self.omit_function = True
1199            if self.controller == router.default_controller:
1200                self.omit_controller = True
1201                if self.application == self.default_application:
1202                    self.omit_application = True
1203
1204        #  omit default application
1205        #  (which might be the domain default application)
1206        #
1207        default_application = self.domain_application or self.default_application
1208        if self.application == default_application:
1209            self.omit_application = True
1210
1211        #  omit controller if default controller
1212        #
1213        default_controller = ((self.application == self.domain_application) and self.domain_controller) or router.default_controller or ''
1214        if self.controller == default_controller:
1215            self.omit_controller = True
1216
1217        #  omit function if possible
1218        #
1219        if self.functions and self.function in self.functions and self.function == self.default_function:
1220            self.omit_function = True
1221
1222        #  prohibit ambiguous cases
1223        #
1224        #  because we presume the lang string to be unambiguous, its presence protects application omission
1225        #
1226        if self.exclusive_domain:
1227            applications = [self.domain_application]
1228        else:
1229            applications = self.applications
1230        if self.omit_language:
1231            if not applications or self.controller in applications:
1232                self.omit_application = False
1233            if self.omit_application:
1234                if not applications or self.function in applications:
1235                    self.omit_controller = False
1236        if not self.controllers or self.function in self.controllers:
1237            self.omit_controller = False
1238        if self.args:
1239            if self.args[0] in self.functions or self.args[0] in self.controllers or self.args[0] in applications:
1240                self.omit_function = False
1241        if self.omit_controller:
1242            if self.function in self.controllers or self.function in applications:
1243                self.omit_controller = False
1244        if self.omit_application:
1245            if self.controller in applications:
1246                self.omit_application = False
1247
1248        #  handle static as a special case
1249        #  (easier for external static handling)
1250        #
1251        if self.is_static:
1252            if not self.map_static:
1253                self.omit_application = False
1254                if self.language:
1255                    self.omit_language = False
1256            self.omit_controller = False
1257            self.omit_function = False
1258
1259    def build_acf(self):
1260        "Builds a/c/f from components"
1261        acf = ''
1262        if self.map_hyphen:
1263            self.controller = self.controller.replace('_', '-')
1264            if self.controller != 'static' and not self.controller.startswith('static/'):
1265                self.application = self.application.replace('_', '-')
1266                self.function = self.function.replace('_', '-')
1267        if not self.omit_application:
1268            acf += '/' + self.application
1269        # handle case of flipping lang/static/file to static/lang/file for external rewrite
1270        if self.is_static and self.map_static is False and not self.omit_language:
1271            acf += '/' + self.controller + '/' + self.language
1272        else:
1273            if not self.omit_language:
1274                acf += '/' + self.language
1275            if not self.omit_controller:
1276                acf += '/' + self.controller
1277        if not self.omit_function:
1278            acf += '/' + self.function
1279        if self.path_prefix:
1280            acf = '/' + '/'.join(self.path_prefix) + acf
1281        if self.args:
1282            return acf
1283        return acf or '/'
1284
1285    def acf(self):
1286        """Converts components to /app/lang/controller/function"""
1287        if not routers:
1288            return None         # use regex filter
1289        self.omit_lang()        # try to omit language
1290        self.omit_acf()         # try to omit a/c/f
1291        return self.build_acf()  # build and return the /a/lang/c/f string
1292
1293
1294def map_url_in(request, env, app=False):
1295    """Routes incoming URL"""
1296    #  initialize router-url object
1297    #
1298    THREAD_LOCAL.routes = params  # default to base routes
1299    map = MapUrlIn(request=request, env=env)
1300    map.sluggify()
1301    map.map_prefix()  # strip prefix if present
1302    map.map_app()     # determine application
1303
1304    #  configure THREAD_LOCAL.routes for error rewrite
1305    #
1306    if params.routes_app:
1307        THREAD_LOCAL.routes = params_apps.get(app, params)
1308
1309    if app:
1310        return map.application
1311
1312    root_static_file, version = map.map_root_static(
1313    )  # handle root-static files
1314    if root_static_file:
1315        map.update_request()
1316        return (root_static_file, version, map.env)
1317    # handle mapping of lang/static to static/lang in externally-rewritten URLs
1318    # in case we have to handle them ourselves
1319    if map.languages and map.map_static is False and map.arg0 == 'static' and map.args(1) in map.languages:
1320        map.map_controller()
1321        map.map_language()
1322    else:
1323        map.map_language()
1324        map.map_controller()
1325    static_file, version = map.map_static()
1326    if static_file:
1327        map.update_request()
1328        return (static_file, version, map.env)
1329    map.map_function()
1330    map.validate_args()
1331    map.update_request()
1332    return (None, None, map.env)
1333
1334
1335def map_url_out(request, env, application, controller,
1336                function, args, other, scheme, host, port, language=None):
1337    """
1338    Supply /a/c/f (or /a/lang/c/f) portion of outgoing url
1339
1340    The basic rule is that we can only make transformations
1341    that map_url_in can reverse.
1342
1343    Suppose that the incoming arguments are a,c,f,args,lang
1344    and that the router defaults are da, dc, df, dl.
1345
1346    We can perform these transformations trivially if args=[] and lang=None or dl::
1347
1348        /da/dc/df => /
1349        /a/dc/df => /a
1350        /a/c/df => /a/c
1351
1352    We would also like to be able to strip the default application or application/controller
1353    from URLs with function/args present, thus::
1354
1355        /da/c/f/args  => /c/f/args
1356        /da/dc/f/args => /f/args
1357
1358    We use [applications] and [controllers] and {functions} to suppress ambiguous omissions.
1359
1360    We assume that language names do not collide with a/c/f names.
1361    """
1362    map = MapUrlOut(request, env, application, controller,
1363                    function, args, other, scheme, host, port, language)
1364    return map.acf()
1365
1366
1367def get_effective_router(appname):
1368    """Returns a private copy of the effective router for the specified application"""
1369    if not routers or appname not in routers:
1370        return None
1371    return Storage(routers[appname])  # return a copy
Note: See TracBrowser for help on using the repository browser.