<,�tEXtComment File Manager

File Manager

Path: /opt/cloudlinux/venv/lib/python3.11/site-packages/lxml/html/

Viewing File: html5parser.py

"""
An interface to html5lib that mimics the lxml.html interface.
"""
import sys
import string

from html5lib import HTMLParser as _HTMLParser
from html5lib.treebuilders.etree_lxml import TreeBuilder
from lxml import etree
from lxml.html import Element, XHTML_NAMESPACE, _contains_block_level_tag

# python3 compatibility
try:
    _strings = basestring
except NameError:
    _strings = (bytes, str)
try:
    from urllib2 import urlopen
except ImportError:
    from urllib.request import urlopen
try:
    from urlparse import urlparse
except ImportError:
    from urllib.parse import urlparse


class HTMLParser(_HTMLParser):
    """An html5lib HTML parser with lxml as tree."""

    def __init__(self, strict=False, **kwargs):
        _HTMLParser.__init__(self, strict=strict, tree=TreeBuilder, **kwargs)


try:
    from html5lib import XHTMLParser as _XHTMLParser
except ImportError:
    pass
else:
    class XHTMLParser(_XHTMLParser):
        """An html5lib XHTML Parser with lxml as tree."""

        def __init__(self, strict=False, **kwargs):
            _XHTMLParser.__init__(self, strict=strict, tree=TreeBuilder, **kwargs)

    xhtml_parser = XHTMLParser()


def _find_tag(tree, tag):
    elem = tree.find(tag)
    if elem is not None:
        return elem
    return tree.find('{%s}%s' % (XHTML_NAMESPACE, tag))


def document_fromstring(html, guess_charset=None, parser=None):
    """
    Parse a whole document into a string.

    If `guess_charset` is true, or if the input is not Unicode but a
    byte string, the `chardet` library will perform charset guessing
    on the string.
    """
    if not isinstance(html, _strings):
        raise TypeError('string required')

    if parser is None:
        parser = html_parser

    options = {}
    if guess_charset is None and isinstance(html, bytes):
        # html5lib does not accept useChardet as an argument, if it
        # detected the html argument would produce unicode objects.
        guess_charset = True
    if guess_charset is not None:
        options['useChardet'] = guess_charset
    return parser.parse(html, **options).getroot()


def fragments_fromstring(html, no_leading_text=False,
                         guess_charset=None, parser=None):
    """Parses several HTML elements, returning a list of elements.

    The first item in the list may be a string.  If no_leading_text is true,
    then it will be an error if there is leading text, and it will always be
    a list of only elements.

    If `guess_charset` is true, the `chardet` library will perform charset
    guessing on the string.
    """
    if not isinstance(html, _strings):
        raise TypeError('string required')

    if parser is None:
        parser = html_parser

    options = {}
    if guess_charset is None and isinstance(html, bytes):
        # html5lib does not accept useChardet as an argument, if it
        # detected the html argument would produce unicode objects.
        guess_charset = False
    if guess_charset is not None:
        options['useChardet'] = guess_charset
    children = parser.parseFragment(html, 'div', **options)
    if children and isinstance(children[0], _strings):
        if no_leading_text:
            if children[0].strip():
                raise etree.ParserError('There is leading text: %r' %
                                        children[0])
            del children[0]
    return children


def fragment_fromstring(html, create_parent=False,
                        guess_charset=None, parser=None):
    """Parses a single HTML element; it is an error if there is more than
    one element, or if anything but whitespace precedes or follows the
    element.

    If 'create_parent' is true (or is a tag name) then a parent node
    will be created to encapsulate the HTML in a single element.  In
    this case, leading or trailing text is allowed.

    If `guess_charset` is true, the `chardet` library will perform charset
    guessing on the string.
    """
    if not isinstance(html, _strings):
        raise TypeError('string required')

    accept_leading_text = bool(create_parent)

    elements = fragments_fromstring(
        html, guess_charset=guess_charset, parser=parser,
        no_leading_text=not accept_leading_text)

    if create_parent:
        if not isinstance(create_parent, _strings):
            create_parent = 'div'
        new_root = Element(create_parent)
        if elements:
            if isinstance(elements[0], _strings):
                new_root.text = elements[0]
                del elements[0]
            new_root.extend(elements)
        return new_root

    if not elements:
        raise etree.ParserError('No elements found')
    if len(elements) > 1:
        raise etree.ParserError('Multiple elements found')
    result = elements[0]
    if result.tail and result.tail.strip():
        raise etree.ParserError('Element followed by text: %r' % result.tail)
    result.tail = None
    return result


def fromstring(html, guess_charset=None, parser=None):
    """Parse the html, returning a single element/document.

    This tries to minimally parse the chunk of text, without knowing if it
    is a fragment or a document.

    'base_url' will set the document's base_url attribute (and the tree's
    docinfo.URL)

    If `guess_charset` is true, or if the input is not Unicode but a
    byte string, the `chardet` library will perform charset guessing
    on the string.
    """
    if not isinstance(html, _strings):
        raise TypeError('string required')
    doc = document_fromstring(html, parser=parser,
                              guess_charset=guess_charset)

    # document starts with doctype or <html>, full document!
    start = html[:50]
    if isinstance(start, bytes):
        # Allow text comparison in python3.
        # Decode as ascii, that also covers latin-1 and utf-8 for the
        # characters we need.
        start = start.decode('ascii', 'replace')

    start = start.lstrip().lower()
    if start.startswith('<html') or start.startswith('<!doctype'):
        return doc

    head = _find_tag(doc, 'head')

    # if the head is not empty we have a full document
    if len(head):
        return doc

    body = _find_tag(doc, 'body')

    # The body has just one element, so it was probably a single
    # element passed in
    if (len(body) == 1 and (not body.text or not body.text.strip())
        and (not body[-1].tail or not body[-1].tail.strip())):
        return body[0]

    # Now we have a body which represents a bunch of tags which have the
    # content that was passed in.  We will create a fake container, which
    # is the body tag, except <body> implies too much structure.
    if _contains_block_level_tag(body):
        body.tag = 'div'
    else:
        body.tag = 'span'
    return body


def parse(filename_url_or_file, guess_charset=None, parser=None):
    """Parse a filename, URL, or file-like object into an HTML document
    tree.  Note: this returns a tree, not an element.  Use
    ``parse(...).getroot()`` to get the document root.

    If ``guess_charset`` is true, the ``useChardet`` option is passed into
    html5lib to enable character detection.  This option is on by default
    when parsing from URLs, off by default when parsing from file(-like)
    objects (which tend to return Unicode more often than not), and on by
    default when parsing from a file path (which is read in binary mode).
    """
    if parser is None:
        parser = html_parser
    if not isinstance(filename_url_or_file, _strings):
        fp = filename_url_or_file
        if guess_charset is None:
            # assume that file-like objects return Unicode more often than bytes
            guess_charset = False
    elif _looks_like_url(filename_url_or_file):
        fp = urlopen(filename_url_or_file)
        if guess_charset is None:
            # assume that URLs return bytes
            guess_charset = True
    else:
        fp = open(filename_url_or_file, 'rb')
        if guess_charset is None:
            guess_charset = True

    options = {}
    # html5lib does not accept useChardet as an argument, if it
    # detected the html argument would produce unicode objects.
    if guess_charset:
        options['useChardet'] = guess_charset
    return parser.parse(fp, **options)


def _looks_like_url(str):
    scheme = urlparse(str)[0]
    if not scheme:
        return False
    elif (sys.platform == 'win32' and
            scheme in string.ascii_letters
            and len(scheme) == 1):
        # looks like a 'normal' absolute path
        return False
    else:
        return True


html_parser = HTMLParser()

��b IDATx��ytVս��ϓ22 �A@�IR��:�h�CiZ[�v��*E��:�W��ũZA� ^d��QeQ ��@ !��j�Z�'�>g�s��V��仿$�|?g�)&x-E�IE�N�T ��;@x�T.i��%-��X��}S�v�S�5�.��r/UHz^�_$-��W"�w��)�Ɗ�/@Z �&IoX��P�$K��}��JzX��:�;`�� &�, ��ŋu�i��,�e��6��m��X� Ե��rKb1�ԗ��)D�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�A�݀!��I*��]R;I�2$eZ#OR��Z��Sr��r�6m��teff�u��*((P��u�'��v{��DIߔ4^�pI��m�'77WEEE;v�Ǝ��4�-��$]'�RI��{��\�I�&�G�� :I��HJ�� DWBB��=�\��W�R�޽m� o$K�(�V9��ABB�.��}��jѢ��v�`^?IO�ȅ}ڶm�G}T#F��J`�5�6��$-��ھ}�F��I�&��v;0(h;��Б�3�8CӧOWf��!�;A i:��F_m��9s&�|�q%=�#��w��Z�p�rrrl�a A� &��P\\��СC�[A#!� �{��o��l��F�} `��E2��}��M��K/��vV��)i��{4Bff�V�\��|ۭX�`��b�@��k��ɶ�@��%i��$K�z��5z�h�mX�[�I�XZ`� '��b%$��r�5��M��4�º��/�l� ԃ��ߖ�xhʔ)�[@=�} K�6IM}^��5k��㏷݆��z ��Η�ÿO��:gdG��B�m��y�T/��@+��Vɶ�纽z񕏵l�.��y�޴i�t뭷zV��0[�Y^>�Ws�qs}�\/�@$�(�T7�f��.��I�nݺi��R$푔n��.�~?H))\Z�RW'M�o�~v� Ov6o��ԃ��x��z��!�S,&�xm/�y�ɞԟ?�'ua��S�ѽ��b�,8�Gל�K��b��o�i�&��3t7Y,��)JJ��c[�n��z��ӳ�d�E��&K�sZ�Lӄ��I?@��&�%ӟ�۶mSMM�њ0��i�ؐSZ�,�|J+��N�� ~�,��0A0!5%Q-��YQQa��3}$_vV��r�f9f?S8`��zD�AD�AD�AD�AD�AD�AD�AD�AD�A�d��q�P��,ت��m��M��mg��1V?�r�S��I꒟��]�u|�l ��R��CyE�f�٢9�j��URb�zt�Ѱ!m5~tG��j�2�D��h�G�*�{H9�)꒟��ר3:(+3\?�/;TU��ݭ��ʴ~S�6lڧ��U��J*�i�$�d�(#=Yݺd�{�,�p|3��B))��q�:��vN0Y.�jk��ק6��;�SɶVzHJJЀ�-u��tѹ�սk��>�QUU��\��޲��~]fF�n��K?��&��ߡ��5��b��=z9�)��^�|u_�k�-[��y%Z��NU6 ��7M�i��:�]ۦt�k��[�n� ��X��(�e6B�b�.��"�8�cۭ|��~�t�e��u��u��w�|��ή��I-5�"��~��U��k��;��Zi�cE�m��N/��:�]�M��> cQ�^�ui�ƞ��??Ң��p�c#TUU�3�U��a��k��Nw�A`��:��Y_V��-8.KKf�R��itv�޲* 9S�6ֿ��j�,Ճ��NOMߤ��]��z�^��f��O�h��|��<�>@Å5��_��/I��u?{S��Y��4h�K��/2��]��4�%i��t�5��q�]�G�G��e�2�%i�R��| ��W��&f�*^]�??��vq[��Lg��E��_��3f}��F�xu~��}q��d�-��ږF�xu~I N>\��;��͗��O��֊�:�̗��W��J@Bh�W=��y��|��Ggwܷ�H�_NY��?��)T��d��i�'?��խw�h�lm�Qi� �!SUU��sw4kӺ�e4rf��x�u�-[n�Ht��MFj}�H�_��u�~w�>)�o�V}�(�T'��e��bʒ�v��3_�[+v�n@Ȭ\S�}��o��t��}w��=��k�H��F�n�xg��S�� 0eޢm~�l��}��u��q��Z�f��F��oZuu��E��g� ��`z�t~?b�;t�%�>��WTkķ�h��[�2e�G8L��IW�x��,��^\thr�l��^��Ϊ��{��=�ǆ��<}�q��V�@ ��⠨��W��y^��L��F_��>�0��U�k��D�u�ʫu��Cs$)I��v��:�IK��;6ֲ�4{^��6��ե��m+l��3>�݆��uM �9��u��?>�Z��c��}g�~q��h��Kw��ڭ��eFMM�~p�М�uq�ǿz��6T��b@8��@Y�|��jx��]�(^]�gf�}�M�"tG ��-w��.@�vOqh~/�HII��`��S�[l��.��6�nØXL��9�v�U�cOo��B�\�x�o��Ǥ�'�T�&I��Ǎ�Qw��_w�p�v��[�k�m��O�{�w�~�>�#=P1P�ɞ�a�-w��e�:i�Ǐl��H��o�׈��꒟�f9��SzH�?��+s�h�k%F��s:��q��Vh�qY��`��j�vO�'ρ?PyX3�lх��]�˾u��V�{��ݞ]1��,�M�z�YN�W~̈́�joY�n��}��ȚF߾׮mS]��F�� z�+�E��D�xm/��d{�F��{-�W��-�4w�Y��듏:�?��?��_�g�P�f� ^3��e��cg ��ҵs��8R��2�מ�z�@T��A��N��Gj�)��}CNi��/�R~��}��c:5�{��!��Z�HӋӾ��6}T�]��G�]�7W�6^��n �9*,��Y��qOZj��:P?�Q�� DF��L�|��?��-�^��.��Ɵ�7��}f�F�h׶��xe��2P�s��c��z�1��&5\cn��[�=�V��n[��ĶE鎀uˌ��d3G�II ��k�;�l�NmشOuuRVf��BE��]ۣ�eӶu :��X�-[��(��e��r�4��~��LH�i�6:��Ѻ@ԅ��r��ST�0��trk%$Č��0ez��"� *��z�"��T�/X9|8��.��C5F�eg}��C�Q%�͞�ˣ��JvL��/��?�j�^��h��&�9x�F��`�њ�Z��(��&�y��F��&Iݻf�g��#��W��;�3^�{Wo^4�'v�V[[�K'��;+��m��Ӎִ�]AC@��W?1^{�එyh��+^]��f��m��~�i��Ե�]AB@��WTk�̏t��uR�?�l�.O�IH�i�Yy�Զ�]A�ˀ7c��:��q}ힽ��a�f�6��Z~�қm(��+sK4{^�6}T��*UUu�]��n��.��:kx{�:��2�� _m��=�sA�ߤ�U��@?��Z��-V�ކ�е��z왍��Nэ��{|5� pڶn��b� �p-@��sPg]0G7�fy��-��M�{GCF��'%�{�4`��=�$-�Ge\��eU:m�+Z�t�'�W��jO�!O�AF@��i�k&t�݆��ϥ_�� e��}��=]"��Wz��_��.��͜�E3�l�e�W��F�i��h|t��-w��Z��ۍ��-�u��w=�6�YN��{��6|��}��|��*={��Ѽ��n.�S�.��z��1z��j�ۻT��H]��흾� �D�u��D��v��mv��K��.`V]yY�~s�I��@��t?/��ϓ.� ��m�&�["�+��P��?M��z�ovV��ЫG3�-�G��RR��[(!!\�_��,��^��%?�v@��ҵ�ő�� m��`�Y)�te�m8��G��Mx.))A�]Y��i`�V��i��W�`�?�^��~!�S#��^+�ѽ��GZj��?V�ģ�0.))A�꨷��l�z�L�*��]��O��X�r��Y�`DBBL�Oj��{��-M�H'�ii�-ϰ��ok�7^�� )쭡�b��]�UX��S�ְ�mռY��|5��*��c�ֽk��0B��7镹%ڽ��P#8n�Ȏ��q}mJr�23��_>��l��E�5��$i��wu��i�+ ��H�~�F`��IjƵ@�q �\ �@#qG�0"��.�0"� l��`��.�0!� ,�AQ�HN6�q��z�k��KJ�#��o;`X�v2��>,tێJ��J��7Z/*��A��.@f�ف�jM��zk��g��@TvZ�H3Z�xu�6Ra��'%��O��?/d�Q��5�x��Yk��U]��R�ֽk�ق@��Da�S^�RS�ּ�5��|��B��e��HNN�͘p �Hvc�Y�c��C5:��y �#��`�οb��;z��2��.��!��k��r��}g��U��Wk��yZ��n�=�f�� Pv��sn��3�p��~�;4p�˚=�ē~�Nm�I] ��¾0lH[��_��L��h�sh��_��ғߤ��c_њ�e��c�)��g�7��V�IZ��5��yr�gk̞W��#��IjӪ�v�>��՞��y睝��M�8�[�|�]��\�շ��8�M��6�%�|@P��Z��ڨ�I-m��>=�k��=�'a��iRo�-x�?>Q��.��}`�Ȏ:�Ws�mu� �u�� > �.@,&��;+!!�˱�tﭧD��Q�w��RW\��vF\~Q7�>�s�p�Y�w$��%A~�;~}��6��¾��g�&if_��=��j�,�v+�U��L�1(tW��a��ke��:@Ș>�j�$�Gq�2�t7S?�vL��|��]u�/�� .�(�0�E��6M��k�6�h��iۺ��z�ښ��O��r��i�f�ޱ�xm/��G�x>�� La�l%%��~�{��l�Bs��R4�*��}{�0Z/��t��N�I��ɚ��p�V^#�L�f�:u@k#�RSu�� =��S^��Zy��uR/��.@n�&��΃z~��B=��0eg뺆��#,��Þ�[�B/?�H� uUf�7�y ��W��y}Bw��eg��ל`��Wh�(|��|��`l`.�;�Ws��?��V�@"��c:i�ɍ��L֯��PG�v�6z�c�tM��̠��'��:w��uW��;d��=�;E�v�e��D��}��9�J@B(��0�iհ�b�v�P�1{�\P��&��G�7��D��޴I��y��_��$-Q�jm�~Yrr�&]�C��Dv%b�h|�Yz�n�i_�R;�k��g�}n��JOII��w��yuL}{�Ќǋ�}�:+3Y�?:��W��J/N+Rz��d=�h��b��;d��j͒su��ݔ��@NKMԄ�j��qz��C��5@��y°�h��L��m;*5�ezᕏ��=�ep�� X��L�n?�מ:��r�`��۵�tŤ�Z�|�1�v`�V�뽧��_c��s�ج'�ߤ%o�Tuum�k%%%h�)�u�y]��N�k��[�n� ��'b�2� �l�.=��͜�E%��gf$��[c;�s:�V-�͞W��ߤW�h��-��j�7��]4��=��F-X�]�>��Z�LS�i��[�Y��*�W��e;��Z�a��n(ӇW|e(HNNP��5[= r4tP �&0�<��p�c#��`�v�TNV GFq��v�T�i�*��Ty�a�m�$��ߏWyE�*�V��JKMTfF�w��>'��$-�ؽ�.Ho��.��8��c��"@D�AD�AD�AD�AD�AD�AD�AD�AD�A~�j��*֘,N;Pi�3599�h=��g�o�ضL�g��i��J�5��փy~�}&��Z��d9p֚ e:|��h��L��`��`��b/��d9p�?�fgg+%%�hM��gXo��s�ج��, Ω��Ol0Z��h=x��d��j��L��m��h��ݻ�o��O�[�g_�l�,8a��]�٭+��ӧ��0��$��I�]��c��]:粹:Te��ꢢ"�5a^��K�g�h,&��=��=��՟^��߶�ߢE�ܹS �J}��I%�:8 �IDAT~,�9/ʃPW'M�o�}z��N�ƍ쨓z�Pb��NZ�~�^z�=4m��s��w��g;5�� Y�~��S�VM��RXUյڱ�R��f��?��s�:w �;6�H:�º��i��5��-�maM��&O��3;1I�K�eam�Z�h�͛7+##�v+�c ~u�~ca]�Gn��F'��ټL~��PPP��b��n� v�o��C�4R,ӟ��gg��%�hq}@#M�4IÇ�� O�y^�x�M��Z�x�� )� �yOw@H�k�N˖-Sǎm�b]X@n��+i��͖��!++K3g�d��\�$m�t�$^��Y�f��J��\8PR��F�)77W��א!Cl��$i��:�@@��_o��G�� I{$��#� ��8磌��ŋ9�1A��(�I�m7��֭��>}�ߴ�J�q�7ޗt��^� -[��ԩS�j�*��}��%]&�'� -��ɓ'�ꫯVzz��vB#�;a �7@G��xI��{��j޼�ƌ��.�LÇWBB�7��`O��"I�$/�@R�@eee��@�۷��>}�0��,ɒ2$53Xs��|c��S~��rpTYYY��} kH�c�%��&k��.]��, @��AD�AD�AD�AD�AD�AD�AD�AD�A��@�l��T��<%''�*��L�o�^={��رc5h� %$+CnܸQ3f��ҥK�}�vUVV��s�9G� R,�_{�x��ˇ��3��o߾��;TTTd�}��馛��]uuuG��~��i�ԩ��@4��b��n��v�m�vfϞ�/�Peeeq}}za I~,�誫��{UWW뮻��}��_~Y�ƍSMMM��Yχ֝w��aw\�ď�cxꩧt�E��ƍկ_?�۷��5��@�u�?�1�k�N�ׯWzz�/�w�y�>}z�j��3�� k�(�ٺu��q_�Z�v�f̘��:~�AB�Q&�r��|��!��%K��ҥK��g��Ԟ={<_��X-�z� !��C�y�FUU��z~�AB�QIIIjݺ��W�$UXX��D��ٳ��Z~�AB�Q�ƍ�e��c��W�$<(~<�RSS�v�Zu��jjjԧO�Z��Qu��@4� 8��m�&&&j�ԩ��g�$�ď��1h� ͟?_��{�768��@��g �=@�`)))��5o��6m��3��)��ѣ�ƌ��J�;w��ҿUTT��/��K��ZR�{~a=@��0o�<��*狔��i�F��ɶ[�ˎ;T]]��OX@��?��K�.�ۈ�xN ��pppppppppppppppppP��fl߾],�{ｧ��k۶mڿ��o�5B��TӦMӴiӴ|�r�� DB��2e�|�A��n!D��y�'t��k�Ν�[A� $***t�5��'�� "�!�駟��o�a�Dn�Ν:t�֭[g�D��ШQ��0�6q�D��;��@ ��x� M�6�v�(Pii��z�m��Z�4e��w��"@��̴i�x��f��[��~-F�ٱc��&I�Z�2�|��n��!�?$@��{�[��H�T��ɏ�#��@�h��Ȏ��I#�_�m�(F��/6Z3��z��'�\r�,��r��!��;��w2Z3j��=~�G��Y��7��"I�$��i�I.�p��_"��?�p��N`�y��DD��?: ��_�� G�ÿa��b�7J��!B��x@�0�� B��o ��c��G��@`1��C��[@�0��G ��@`0��C��_�u�V1�� a��CX��>�W�` |��`!<��S�`"<��.��`#�c�`�?c��A��C4 �?��c�� p#��~@�0��?:��0��8&��_��M�Q1��J�h#��?��/`��7��;I��q7�aw�Q��A�1Hp ��!�#��<8��/#@�1��U�l7��=�S�=��K.�4Z�?E��_$i��@��!�1�!E4�?��`��P_� ��@��Bă�1��0�#��:��"��a��U�,�x�b��F��Y1� ��[��n|��n� ��#'��v�E��H:`�x��b� ��#��v��D��4��Y ��h��i.i��&��E�Ζ��v#��O� H��4�I�Ŷ��}�:I�k�h��@t��ZR��F��#��(�tXҙ��zZ �?��I��3l7q��@õ��|ۍ�1,G��p�u��Y� ��Ꮿ@h��J��v#�x��xk$ ��v#�9��5��}��_��$��c �S�#��=+��"K�{F��*m7�`#��%�H:NRS�p�6I?��sIՖ{Ap��$I$I:QR��v�2$�Z�@�UJ*��$�]<��F��O4IEND�B`�