PNG  IHDRxsBIT|d pHYs+tEXtSoftwarewww.inkscape.org<,tEXtComment File Manager

File Manager

Path: /opt/alt/python311/lib/python3.11/site-packages/validators/

Viewing File: url.py

"""URL."""

# standard
from functools import lru_cache
import re
from typing import Callable, Optional
from urllib.parse import parse_qs, unquote, urlsplit

# local
from .hostname import hostname
from .utils import validator


@lru_cache
def _username_regex():
    return re.compile(
        # extended latin
        r"(^[\u0100-\u017F\u0180-\u024F]"
        # dot-atom
        + r"|[-!#$%&'*+/=?^_`{}|~0-9a-z]+(\.[-!#$%&'*+/=?^_`{}|~0-9a-z]+)*$"
        # non-quoted-string
        + r"|^([\001-\010\013\014\016-\037!#-\[\]-\177]|\\[\011.])*$)",
        re.IGNORECASE,
    )


@lru_cache
def _path_regex():
    return re.compile(
        # allowed symbols
        r"^[\/a-z0-9\-\.\_\~\!\$\&\'\(\)\*\+\,\;\=\:\@\%"
        # symbols / pictographs
        + r"\U0001F300-\U0001F5FF"
        # emoticons / emoji
        + r"\U0001F600-\U0001F64F"
        # multilingual unicode ranges
        + r"\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF]+$",
        re.IGNORECASE,
    )


def _validate_scheme(value: str):
    """Validate scheme."""
    # More schemes will be considered later.
    return (
        value
        # fmt: off
        in {
            "ftp",
            "ftps",
            "git",
            "http",
            "https",
            "irc",
            "rtmp",
            "rtmps",
            "rtsp",
            "sftp",
            "ssh",
            "telnet",
        }
        # fmt: on
        if value
        else False
    )


def _confirm_ipv6_skip(value: str, skip_ipv6_addr: bool):
    """Confirm skip IPv6 check."""
    return skip_ipv6_addr or value.count(":") < 2 or not value.startswith("[")


def _validate_auth_segment(value: str):
    """Validate authentication segment."""
    if not value:
        return True
    if (colon_count := value.count(":")) > 1:
        # everything before @ is then considered as a username
        # this is a bad practice, but syntactically valid URL
        return _username_regex().match(unquote(value))
    if colon_count < 1:
        return _username_regex().match(value)
    username, password = value.rsplit(":", 1)
    return _username_regex().match(username) and all(
        char_to_avoid not in password for char_to_avoid in ("/", "?", "#", "@")
    )


def _validate_netloc(
    value: str,
    skip_ipv6_addr: bool,
    skip_ipv4_addr: bool,
    may_have_port: bool,
    simple_host: bool,
    consider_tld: bool,
    private: Optional[bool],
    rfc_1034: bool,
    rfc_2782: bool,
):
    """Validate netloc."""
    if not value or value.count("@") > 1:
        return False
    if value.count("@") < 1:
        return hostname(
            (
                value
                if _confirm_ipv6_skip(value, skip_ipv6_addr) or "]:" in value
                else value.lstrip("[").replace("]", "", 1)
            ),
            skip_ipv6_addr=_confirm_ipv6_skip(value, skip_ipv6_addr),
            skip_ipv4_addr=skip_ipv4_addr,
            may_have_port=may_have_port,
            maybe_simple=simple_host,
            consider_tld=consider_tld,
            private=private,
            rfc_1034=rfc_1034,
            rfc_2782=rfc_2782,
        )
    basic_auth, host = value.rsplit("@", 1)
    return hostname(
        (
            host
            if _confirm_ipv6_skip(host, skip_ipv6_addr) or "]:" in value
            else host.lstrip("[").replace("]", "", 1)
        ),
        skip_ipv6_addr=_confirm_ipv6_skip(host, skip_ipv6_addr),
        skip_ipv4_addr=skip_ipv4_addr,
        may_have_port=may_have_port,
        maybe_simple=simple_host,
        consider_tld=consider_tld,
        private=private,
        rfc_1034=rfc_1034,
        rfc_2782=rfc_2782,
    ) and _validate_auth_segment(basic_auth)


def _validate_optionals(path: str, query: str, fragment: str, strict_query: bool):
    """Validate path query and fragments."""
    optional_segments = True
    if path:
        optional_segments &= bool(_path_regex().match(path))
    try:
        if (
            query
            # ref: https://github.com/python/cpython/issues/117109
            and parse_qs(query, strict_parsing=strict_query, separator="&")
            and parse_qs(query, strict_parsing=strict_query, separator=";")
        ):
            optional_segments &= True
    except TypeError:
        # for Python < v3.9.2 (official v3.10)
        if query and parse_qs(query, strict_parsing=strict_query):
            optional_segments &= True
    if fragment:
        # See RFC3986 Section 3.5 Fragment for allowed characters
        # Adding "#", see https://github.com/python-validators/validators/issues/403
        optional_segments &= bool(
            re.fullmatch(r"[0-9a-z?/:@\-._~%!$&'()*+,;=#]*", fragment, re.IGNORECASE)
        )
    return optional_segments


@validator
def url(
    value: str,
    /,
    *,
    skip_ipv6_addr: bool = False,
    skip_ipv4_addr: bool = False,
    may_have_port: bool = True,
    simple_host: bool = False,
    strict_query: bool = True,
    consider_tld: bool = False,
    private: Optional[bool] = None,  # only for ip-addresses
    rfc_1034: bool = False,
    rfc_2782: bool = False,
    validate_scheme: Callable[[str], bool] = _validate_scheme,
):
    r"""Return whether or not given value is a valid URL.

    This validator was originally inspired from [URL validator of dperini][1].
    The following diagram is from [urlly][2]::


            foo://admin:hunter1@example.com:8042/over/there?name=ferret#nose
            \_/   \___/ \_____/ \_________/ \__/\_________/ \_________/ \__/
             |      |       |       |        |       |          |         |
          scheme username password hostname port    path      query    fragment

    [1]: https://gist.github.com/dperini/729294
    [2]: https://github.com/treeform/urlly

    Examples:
        >>> url('http://duck.com')
        True
        >>> url('ftp://foobar.dk')
        True
        >>> url('http://10.0.0.1')
        True
        >>> url('http://example.com/">user@example.com')
        ValidationError(func=url, args={'value': 'http://example.com/">user@example.com'})

    Args:
        value:
            URL string to validate.
        skip_ipv6_addr:
            When URL string cannot contain an IPv6 address.
        skip_ipv4_addr:
            When URL string cannot contain an IPv4 address.
        may_have_port:
            URL string may contain port number.
        simple_host:
            URL string maybe only hyphens and alpha-numerals.
        strict_query:
            Fail validation on query string parsing error.
        consider_tld:
            Restrict domain to TLDs allowed by IANA.
        private:
            Embedded IP address is public if `False`, private/local if `True`.
        rfc_1034:
            Allow trailing dot in domain/host name.
            Ref: [RFC 1034](https://www.rfc-editor.org/rfc/rfc1034).
        rfc_2782:
            Domain/Host name is of type service record.
            Ref: [RFC 2782](https://www.rfc-editor.org/rfc/rfc2782).
        validate_scheme:
            Function that validates URL scheme.

    Returns:
        (Literal[True]): If `value` is a valid url.
        (ValidationError): If `value` is an invalid url.
    """
    if not value or re.search(r"\s", value):
        # url must not contain any white
        # spaces, they must be encoded
        return False

    try:
        scheme, netloc, path, query, fragment = urlsplit(value)
    except ValueError:
        return False

    return (
        validate_scheme(scheme)
        and _validate_netloc(
            netloc,
            skip_ipv6_addr,
            skip_ipv4_addr,
            may_have_port,
            simple_host,
            consider_tld,
            private,
            rfc_1034,
            rfc_2782,
        )
        and _validate_optionals(path, query, fragment, strict_query)
    )
b IDATxytVսϓ22 A@IR :hCiZ[v*E:WũZA ^dQeQ @ !jZ'>gsV仿$|?g)&x-EIENT ;@xT.i%-X}SvS5.r/UHz^_$-W"w)Ɗ/@Z &IoX P$K}JzX:;` &, ŋui,e6mX ԵrKb1ԗ)DADADADADADADADADADADADADADADADADADADADADADADADADADADADADADADADADADADADADADADADADADA݀!I*]R;I2$eZ#ORZSrr6mteffu*((Pu'v{DIߔ4^pIm'77WEEE;vƎ4-$]'RI{\I&G :IHJ DWBB=\WR޽m o$K(V9ABB.}jѢv`^?IOȅ} ڶmG}T#FJ`56$-ھ}FI&v;0(h;Б38CӧOWf!;A i:F_m9s&|q%=#wZprrrla A &P\\СC[A#! {olF} `E2}MK/vV)i{4BffV\|ۭX`b@kɶ@%i$K z5zhmX[IXZ` 'b%$r5M4º/l ԃߖxhʔ)[@=} K6IM}^5k㏷݆z ΗÿO:gdGBmyT/@+Vɶ纽z񕏵l.y޴it뭷zV0[Y^>Wsqs}\/@$(T7f.InݺiR$푔n.~?H))\ZRW'Mo~v Ov6oԃxz! S,&xm/yɞԟ?'uaSѽb,8GלKboi&3t7Y,)JJ c[nzӳdE&KsZLӄ I?@&%ӟ۶mSMMњ0iؐSZ,|J+N ~,0A0!5%Q-YQQa3}$_vVrf9f?S8`zDADADADADADADADADAdqP,تmMmg1V?rSI꒟]u|l RCyEf٢9 jURbztѰ!m5~tGj2DhG*{H9)꒟ר3:(+3\?/;TUݭʴ~S6lڧUJ*i$d(#=Yݺd{,p|3B))q:vN0Y.jkק6;SɶVzHJJЀ-utѹսk>QUU\޲~]fFnK?&ߡ5b=z9)^|u_k-[y%ZNU6 7Mi:]ۦtk[n X(e6Bb."8cۭ|~teuuw|ήI-5"~Uk;ZicEmN/:]M> cQ^uiƞ??Ңpc#TUU3UakNwA`:Y_V-8.KKfRitv޲* 9S6ֿj,ՃNOMߤ]z^fOh|<>@Å5 _/Iu?{SY4hK/2]4%it5q]GGe2%iR| W&f*^]??vq[LgE_3f}Fxu~}qd-ږFxu~I N>\;͗O֊:̗WJ@BhW=y|GgwܷH_NY?)Tdi'?խwhlmQi !SUUsw4kӺe4rfxu-[nHtMFj}H_u~w>)oV}(T'ebʒv3_[+vn@Ȭ\S}ot}w=kHFnxg S 0eޢm~l}uqZfFoZuuEg `zt~? b;t%>WTkķh[2eG8LIWx,^\thrl^Ϊ{=dž<}qV@ ⠨Wy^LF_>0UkDuʫuCs$)Iv:IK;6ֲ4{^6եm+l3>݆uM 9u?>Zc }g~qhKwڭeFMM~pМuqǿz6Tb@8@Y|jx](^]gf}M"tG -w.@vOqh~/HII`S[l.6nØXL9vUcOoB\xoǤ'T&IǍQw_wpv[kmO{w~>#=P1Pɞa-we:iǏlHo׈꒟f9SzH?+shk%Fs:qVhqY`jvO'ρ?PyX3lх]˾uV{ݞ]1,MzYNW~̈́ joYn}ȚF߾׮mS]F z+EDxm/d{F{-W-4wY듏:??_gPf ^3ecg ҵs8R2מz@TANGj)}CNi/R~}c:5{!ZHӋӾ6}T]G]7W6^n 9*,YqOZj:P?Q DFL|?-^.Ɵ7}fFh׶xe2Pscz1&5\cn[=Vn[ĶE鎀uˌd3GII k;lNmشOuuRVfBE]ۣeӶu :X-[(er4~LHi6:Ѻ@ԅrST0trk%$Č0ez" *z"T/X9|8.C5Feg}CQ%͞ˣJvL/?j^h&9xF`њZ(&yF&Iݻfg#W;3^{Wo^4'vV[[K';+mӍִ]AC@W?1^{එyh +^]fm~iԵ]AB@WTk̏t uR?l.OIHiYyԶ]Aˀ7c:q}ힽaf6Z~қm(+sK4{^6}T*UUu]n.:kx{:2 _m=sAߤU@?Z-Vކеz왍Nэ{|5 pڶn b p-@sPg]0G7fy-M{GCF'%{4`=$-Ge\ eU:m+Zt'WjO!OAF@ik&t݆ϥ_ e}=]"Wz_.͜E3leWFih|t-wZۍ-uw=6YN{6|} |*={Ѽn.S.z1zjۻTH]흾 DuDvmvK.`V]yY~sI@t?/ϓ. m&["+P?MzovVЫG3-GRR[(!!\_,^%?v@ҵő m`Y)tem8GMx.))A]Y i`ViW`?^~!S#^+ѽGZj?Vģ0.))A꨷lzL*]OXrY`DBBLOj{-MH'ii-ϰ ok7^ )쭡b]UXSְmռY|5*cֽk0B7镹%ڽP#8nȎq}mJr23_>lE5$iwui+ H~F`IjƵ@q \ @#qG0".0" l`„.0! ,AQHN6qzkKJ#o;`Xv2>,tێJJ7Z/*A .@fفjMzkg @TvZH3Zxu6Ra'%O?/dQ5xYkU]Rֽkق@DaS^RSּ5|BeHNN͘p HvcYcC5:y #`οb;z2.!kr}gUWkyZn=f Pvsn3p~;4p˚=ē~NmI] ¾ 0lH[_L hsh_ғߤc_њec)g7VIZ5yrgk̞W#IjӪv>՞y睝M8[|]\շ8M6%|@PZڨI-m>=k='aiRo-x?>Q.}`Ȏ:Wsmu u > .@,&;+!!˱tﭧDQwRW\vF\~Q7>spYw$%A~;~}6¾ g&if_=j,v+UL1(tWake:@Ș>j$Gq2t7S?vL|]u/ .(0E6Mk6hiۺzښOrifޱxm/Gx> Lal%%~{lBsR4*}{0Z/tNIɚpV^#Lf:u@k#RSu =S^ZyuR/.@n&΃z~B=0eg뺆#,Þ[B/?H uUf7y Wy}Bwegל`Wh(||`l`.;Ws?V@"c:iɍL֯PGv6zctM̠':wuW;d=;EveD}9J@B(0iհ bvP1{\P&G7D޴Iy_$-Qjm~Yrr&]CDv%bh|Yzni_ˆR;kg}nJOIIwyuL}{ЌNj}:+3Y?:WJ/N+Rzd=hb;dj͒suݔ@NKMԄ jqzC5@y°hL m;*5ezᕏ=ep XL n?מ:r`۵tŤZ|1v`V뽧_csج'ߤ%oTuumk%%%h)uy]Nk[n 'b2 l.=͜E%gf$[c;s:V-͞WߤWh-j7]4=F-X]>ZLSi[Y*We;Zan(ӇW|e(HNNP5[= r4tP &0<pc#`vTNV GFqvTi*Tyam$ߏWyE*VJKMTfFw>'$-ؽ.Ho.8c"@DADADADADADADADADA~j*֘,N;Pi3599h=goضLgiJ5փy~}&Zd9p֚ e:|hL``b/d9p? fgg+%%hMgXosج, ΩOl0Zh=xdjLmhݻoO[g_l,8a]٭+ӧ0$I]c]:粹:Teꢢ"5a^Kgh,&= =՟^߶“ߢE ܹS J}I%:8 IDAT~,9/ʃPW'Mo}zNƍ쨓zPbNZ~^z=4mswg;5 Y~SVMRXUյڱRf?s:w ;6H:ºi5-maM&O3;1IKeamZh͛7+##v+c ~u~ca]GnF'ټL~PPPbn voC4R,ӟgg %hq}@#M4IÇ Oy^xMZx ) yOw@HkN˖-Sǎmb]X@n+i͖!++K3gd\$mt$^YfJ\8PRF)77Wא!Cl$i:@@_oG I{$# 8磌ŋ91A (Im7֭>}ߴJq7ޗt^ -[ԩSj*}%]&' -ɓ'ꫯVzzvB#;a 7@GxI{j޼ƌ.LÇWBB7`O"I$/@R @eee@۷>}0,ɒ2$53Xs|cS~rpTYYY} kHc %&k.], @ADADADADADADADADA@lT<%''*Lo^={رc5h %$+CnܸQ3fҥK}vUVVs9G R,_{xˇ3o߾;TTTd}馛]uuuG~iԩ@4bnvmvfϞ /Peeeq}}za I~,誫{UWW뮻}_~YƍSMMMYχ֝waw\ďcxꩧtEƍկ_?۷5@u?1kNׯWzz/wy>}zj3 k(ٺuq_Zvf̘:~ ABQ&r|!%KҥKgԞ={<_X-z !CyFUUz~ ABQIIIjݺW$UXXDٳZ~ ABQƍecW$<(~<RSSvZujjjԧOZQu@4 8m&&&jԩg$ď1h ͟?_{768@g =@`)))5o6m3)ѣƌJ;wҿUTT /KZR{~a=@0o<*狔iFɶ[ˎ;T]]OX@?K.ۈxN pppppppppppppppppPfl߾] ,{ァk۶mڿo5BTӦMӴiӴ|r DB2e|An!Dy'tkΝ[A $***t5' "!駟oaDnΝ:t֭[gDШQ06qD;@ x M6v(PiizmZ4ew"@̴ixf [~-Fٱc&IZ2|n!?$@{[HTɏ#@hȎI# _m(F /6Z3z'\r,r!;w2Z3j=~GY7"I$iI.p_"?pN`y DD?: _  Gÿab7J !Bx@0 Bo cG@`1C[@0G @`0C_u V1 aCX>W ` | `!<S `"<. `#c`?cAC4 ?c p#~@0?:08&_MQ1J h#?/`7;I  q 7a wQ A 1 Hp !#<8/#@1Ul7=S=K.4Z?E_$i@!1!E4?`P_  @Bă10#: "aU,xbFY1 [n|n #'vEH:`xb #vD4Y hi.i&EΖv#O H4IŶ}:Ikh @tZRF#(tXҙzZ ?I3l7q@õ|ۍ1,GpuY Ꮿ@hJv#xxk$ v#9 5 }_$c S#=+"K{F*m7`#%H:NRSp6I?sIՖ{Ap$I$I:QRv2$Z @UJ*$]<FO4IENDB`