<,�tEXtComment File Manager

File Manager

Path: /opt/alt/python27/lib/python2.7/site-packages/pip/_vendor/html5lib/

Viewing File: _inputstream.py

from __future__ import absolute_import, division, unicode_literals

from pip._vendor.six import text_type
from pip._vendor.six.moves import http_client, urllib

import codecs
import re
from io import BytesIO, StringIO

from pip._vendor import webencodings

from .constants import EOF, spaceCharacters, asciiLetters, asciiUppercase
from .constants import _ReparseException
from . import _utils

# Non-unicode versions of constants for use in the pre-parser
spaceCharactersBytes = frozenset([item.encode("ascii") for item in spaceCharacters])
asciiLettersBytes = frozenset([item.encode("ascii") for item in asciiLetters])
asciiUppercaseBytes = frozenset([item.encode("ascii") for item in asciiUppercase])
spacesAngleBrackets = spaceCharactersBytes | frozenset([b">", b"<"])


invalid_unicode_no_surrogate = "[\u0001-\u0008\u000B\u000E-\u001F\u007F-\u009F\uFDD0-\uFDEF\uFFFE\uFFFF\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF\U0010FFFE\U0010FFFF]"  # noqa

if _utils.supports_lone_surrogates:
    # Use one extra step of indirection and create surrogates with
    # eval. Not using this indirection would introduce an illegal
    # unicode literal on platforms not supporting such lone
    # surrogates.
    assert invalid_unicode_no_surrogate[-1] == "]" and invalid_unicode_no_surrogate.count("]") == 1
    invalid_unicode_re = re.compile(invalid_unicode_no_surrogate[:-1] +
                                    eval('"\\uD800-\\uDFFF"') +  # pylint:disable=eval-used
                                    "]")
else:
    invalid_unicode_re = re.compile(invalid_unicode_no_surrogate)

non_bmp_invalid_codepoints = {0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE,
                              0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF,
                              0x6FFFE, 0x6FFFF, 0x7FFFE, 0x7FFFF, 0x8FFFE,
                              0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF,
                              0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE,
                              0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF,
                              0x10FFFE, 0x10FFFF}

ascii_punctuation_re = re.compile("[\u0009-\u000D\u0020-\u002F\u003A-\u0040\u005C\u005B-\u0060\u007B-\u007E]")

# Cache for charsUntil()
charsUntilRegEx = {}


class BufferedStream(object):
    """Buffering for streams that do not have buffering of their own

    The buffer is implemented as a list of chunks on the assumption that
    joining many strings will be slow since it is O(n**2)
    """

    def __init__(self, stream):
        self.stream = stream
        self.buffer = []
        self.position = [-1, 0]  # chunk number, offset

    def tell(self):
        pos = 0
        for chunk in self.buffer[:self.position[0]]:
            pos += len(chunk)
        pos += self.position[1]
        return pos

    def seek(self, pos):
        assert pos <= self._bufferedBytes()
        offset = pos
        i = 0
        while len(self.buffer[i]) < offset:
            offset -= len(self.buffer[i])
            i += 1
        self.position = [i, offset]

    def read(self, bytes):
        if not self.buffer:
            return self._readStream(bytes)
        elif (self.position[0] == len(self.buffer) and
              self.position[1] == len(self.buffer[-1])):
            return self._readStream(bytes)
        else:
            return self._readFromBuffer(bytes)

    def _bufferedBytes(self):
        return sum([len(item) for item in self.buffer])

    def _readStream(self, bytes):
        data = self.stream.read(bytes)
        self.buffer.append(data)
        self.position[0] += 1
        self.position[1] = len(data)
        return data

    def _readFromBuffer(self, bytes):
        remainingBytes = bytes
        rv = []
        bufferIndex = self.position[0]
        bufferOffset = self.position[1]
        while bufferIndex < len(self.buffer) and remainingBytes != 0:
            assert remainingBytes > 0
            bufferedData = self.buffer[bufferIndex]

            if remainingBytes <= len(bufferedData) - bufferOffset:
                bytesToRead = remainingBytes
                self.position = [bufferIndex, bufferOffset + bytesToRead]
            else:
                bytesToRead = len(bufferedData) - bufferOffset
                self.position = [bufferIndex, len(bufferedData)]
                bufferIndex += 1
            rv.append(bufferedData[bufferOffset:bufferOffset + bytesToRead])
            remainingBytes -= bytesToRead

            bufferOffset = 0

        if remainingBytes:
            rv.append(self._readStream(remainingBytes))

        return b"".join(rv)


def HTMLInputStream(source, **kwargs):
    # Work around Python bug #20007: read(0) closes the connection.
    # http://bugs.python.org/issue20007
    if (isinstance(source, http_client.HTTPResponse) or
        # Also check for addinfourl wrapping HTTPResponse
        (isinstance(source, urllib.response.addbase) and
         isinstance(source.fp, http_client.HTTPResponse))):
        isUnicode = False
    elif hasattr(source, "read"):
        isUnicode = isinstance(source.read(0), text_type)
    else:
        isUnicode = isinstance(source, text_type)

    if isUnicode:
        encodings = [x for x in kwargs if x.endswith("_encoding")]
        if encodings:
            raise TypeError("Cannot set an encoding with a unicode input, set %r" % encodings)

        return HTMLUnicodeInputStream(source, **kwargs)
    else:
        return HTMLBinaryInputStream(source, **kwargs)


class HTMLUnicodeInputStream(object):
    """Provides a unicode stream of characters to the HTMLTokenizer.

    This class takes care of character encoding and removing or replacing
    incorrect byte-sequences and also provides column and line tracking.

    """

    _defaultChunkSize = 10240

    def __init__(self, source):
        """Initialises the HTMLInputStream.

        HTMLInputStream(source, [encoding]) -> Normalized stream from source
        for use by html5lib.

        source can be either a file-object, local filename or a string.

        The optional encoding parameter must be a string that indicates
        the encoding.  If specified, that encoding will be used,
        regardless of any BOM or later declaration (such as in a meta
        element)

        """

        if not _utils.supports_lone_surrogates:
            # Such platforms will have already checked for such
            # surrogate errors, so no need to do this checking.
            self.reportCharacterErrors = None
        elif len("\U0010FFFF") == 1:
            self.reportCharacterErrors = self.characterErrorsUCS4
        else:
            self.reportCharacterErrors = self.characterErrorsUCS2

        # List of where new lines occur
        self.newLines = [0]

        self.charEncoding = (lookupEncoding("utf-8"), "certain")
        self.dataStream = self.openStream(source)

        self.reset()

    def reset(self):
        self.chunk = ""
        self.chunkSize = 0
        self.chunkOffset = 0
        self.errors = []

        # number of (complete) lines in previous chunks
        self.prevNumLines = 0
        # number of columns in the last line of the previous chunk
        self.prevNumCols = 0

        # Deal with CR LF and surrogates split over chunk boundaries
        self._bufferedCharacter = None

    def openStream(self, source):
        """Produces a file object from source.

        source can be either a file object, local filename or a string.

        """
        # Already a file object
        if hasattr(source, 'read'):
            stream = source
        else:
            stream = StringIO(source)

        return stream

    def _position(self, offset):
        chunk = self.chunk
        nLines = chunk.count('\n', 0, offset)
        positionLine = self.prevNumLines + nLines
        lastLinePos = chunk.rfind('\n', 0, offset)
        if lastLinePos == -1:
            positionColumn = self.prevNumCols + offset
        else:
            positionColumn = offset - (lastLinePos + 1)
        return (positionLine, positionColumn)

    def position(self):
        """Returns (line, col) of the current position in the stream."""
        line, col = self._position(self.chunkOffset)
        return (line + 1, col)

    def char(self):
        """ Read one character from the stream or queue if available. Return
            EOF when EOF is reached.
        """
        # Read a new chunk from the input stream if necessary
        if self.chunkOffset >= self.chunkSize:
            if not self.readChunk():
                return EOF

        chunkOffset = self.chunkOffset
        char = self.chunk[chunkOffset]
        self.chunkOffset = chunkOffset + 1

        return char

    def readChunk(self, chunkSize=None):
        if chunkSize is None:
            chunkSize = self._defaultChunkSize

        self.prevNumLines, self.prevNumCols = self._position(self.chunkSize)

        self.chunk = ""
        self.chunkSize = 0
        self.chunkOffset = 0

        data = self.dataStream.read(chunkSize)

        # Deal with CR LF and surrogates broken across chunks
        if self._bufferedCharacter:
            data = self._bufferedCharacter + data
            self._bufferedCharacter = None
        elif not data:
            # We have no more data, bye-bye stream
            return False

        if len(data) > 1:
            lastv = ord(data[-1])
            if lastv == 0x0D or 0xD800 <= lastv <= 0xDBFF:
                self._bufferedCharacter = data[-1]
                data = data[:-1]

        if self.reportCharacterErrors:
            self.reportCharacterErrors(data)

        # Replace invalid characters
        data = data.replace("\r\n", "\n")
        data = data.replace("\r", "\n")

        self.chunk = data
        self.chunkSize = len(data)

        return True

    def characterErrorsUCS4(self, data):
        for _ in range(len(invalid_unicode_re.findall(data))):
            self.errors.append("invalid-codepoint")

    def characterErrorsUCS2(self, data):
        # Someone picked the wrong compile option
        # You lose
        skip = False
        for match in invalid_unicode_re.finditer(data):
            if skip:
                continue
            codepoint = ord(match.group())
            pos = match.start()
            # Pretty sure there should be endianness issues here
            if _utils.isSurrogatePair(data[pos:pos + 2]):
                # We have a surrogate pair!
                char_val = _utils.surrogatePairToCodepoint(data[pos:pos + 2])
                if char_val in non_bmp_invalid_codepoints:
                    self.errors.append("invalid-codepoint")
                skip = True
            elif (codepoint >= 0xD800 and codepoint <= 0xDFFF and
                  pos == len(data) - 1):
                self.errors.append("invalid-codepoint")
            else:
                skip = False
                self.errors.append("invalid-codepoint")

    def charsUntil(self, characters, opposite=False):
        """ Returns a string of characters from the stream up to but not
        including any character in 'characters' or EOF. 'characters' must be
        a container that supports the 'in' method and iteration over its
        characters.
        """

        # Use a cache of regexps to find the required characters
        try:
            chars = charsUntilRegEx[(characters, opposite)]
        except KeyError:
            if __debug__:
                for c in characters:
                    assert(ord(c) < 128)
            regex = "".join(["\\x%02x" % ord(c) for c in characters])
            if not opposite:
                regex = "^%s" % regex
            chars = charsUntilRegEx[(characters, opposite)] = re.compile("[%s]+" % regex)

        rv = []

        while True:
            # Find the longest matching prefix
            m = chars.match(self.chunk, self.chunkOffset)
            if m is None:
                # If nothing matched, and it wasn't because we ran out of chunk,
                # then stop
                if self.chunkOffset != self.chunkSize:
                    break
            else:
                end = m.end()
                # If not the whole chunk matched, return everything
                # up to the part that didn't match
                if end != self.chunkSize:
                    rv.append(self.chunk[self.chunkOffset:end])
                    self.chunkOffset = end
                    break
            # If the whole remainder of the chunk matched,
            # use it all and read the next chunk
            rv.append(self.chunk[self.chunkOffset:])
            if not self.readChunk():
                # Reached EOF
                break

        r = "".join(rv)
        return r

    def unget(self, char):
        # Only one character is allowed to be ungotten at once - it must
        # be consumed again before any further call to unget
        if char is not EOF:
            if self.chunkOffset == 0:
                # unget is called quite rarely, so it's a good idea to do
                # more work here if it saves a bit of work in the frequently
                # called char and charsUntil.
                # So, just prepend the ungotten character onto the current
                # chunk:
                self.chunk = char + self.chunk
                self.chunkSize += 1
            else:
                self.chunkOffset -= 1
                assert self.chunk[self.chunkOffset] == char


class HTMLBinaryInputStream(HTMLUnicodeInputStream):
    """Provides a unicode stream of characters to the HTMLTokenizer.

    This class takes care of character encoding and removing or replacing
    incorrect byte-sequences and also provides column and line tracking.

    """

    def __init__(self, source, override_encoding=None, transport_encoding=None,
                 same_origin_parent_encoding=None, likely_encoding=None,
                 default_encoding="windows-1252", useChardet=True):
        """Initialises the HTMLInputStream.

        HTMLInputStream(source, [encoding]) -> Normalized stream from source
        for use by html5lib.

        source can be either a file-object, local filename or a string.

        The optional encoding parameter must be a string that indicates
        the encoding.  If specified, that encoding will be used,
        regardless of any BOM or later declaration (such as in a meta
        element)

        """
        # Raw Stream - for unicode objects this will encode to utf-8 and set
        #              self.charEncoding as appropriate
        self.rawStream = self.openStream(source)

        HTMLUnicodeInputStream.__init__(self, self.rawStream)

        # Encoding Information
        # Number of bytes to use when looking for a meta element with
        # encoding information
        self.numBytesMeta = 1024
        # Number of bytes to use when using detecting encoding using chardet
        self.numBytesChardet = 100
        # Things from args
        self.override_encoding = override_encoding
        self.transport_encoding = transport_encoding
        self.same_origin_parent_encoding = same_origin_parent_encoding
        self.likely_encoding = likely_encoding
        self.default_encoding = default_encoding

        # Determine encoding
        self.charEncoding = self.determineEncoding(useChardet)
        assert self.charEncoding[0] is not None

        # Call superclass
        self.reset()

    def reset(self):
        self.dataStream = self.charEncoding[0].codec_info.streamreader(self.rawStream, 'replace')
        HTMLUnicodeInputStream.reset(self)

    def openStream(self, source):
        """Produces a file object from source.

        source can be either a file object, local filename or a string.

        """
        # Already a file object
        if hasattr(source, 'read'):
            stream = source
        else:
            stream = BytesIO(source)

        try:
            stream.seek(stream.tell())
        except Exception:
            stream = BufferedStream(stream)

        return stream

    def determineEncoding(self, chardet=True):
        # BOMs take precedence over everything
        # This will also read past the BOM if present
        charEncoding = self.detectBOM(), "certain"
        if charEncoding[0] is not None:
            return charEncoding

        # If we've been overridden, we've been overridden
        charEncoding = lookupEncoding(self.override_encoding), "certain"
        if charEncoding[0] is not None:
            return charEncoding

        # Now check the transport layer
        charEncoding = lookupEncoding(self.transport_encoding), "certain"
        if charEncoding[0] is not None:
            return charEncoding

        # Look for meta elements with encoding information
        charEncoding = self.detectEncodingMeta(), "tentative"
        if charEncoding[0] is not None:
            return charEncoding

        # Parent document encoding
        charEncoding = lookupEncoding(self.same_origin_parent_encoding), "tentative"
        if charEncoding[0] is not None and not charEncoding[0].name.startswith("utf-16"):
            return charEncoding

        # "likely" encoding
        charEncoding = lookupEncoding(self.likely_encoding), "tentative"
        if charEncoding[0] is not None:
            return charEncoding

        # Guess with chardet, if available
        if chardet:
            try:
                from pip._vendor.chardet.universaldetector import UniversalDetector
            except ImportError:
                pass
            else:
                buffers = []
                detector = UniversalDetector()
                while not detector.done:
                    buffer = self.rawStream.read(self.numBytesChardet)
                    assert isinstance(buffer, bytes)
                    if not buffer:
                        break
                    buffers.append(buffer)
                    detector.feed(buffer)
                detector.close()
                encoding = lookupEncoding(detector.result['encoding'])
                self.rawStream.seek(0)
                if encoding is not None:
                    return encoding, "tentative"

        # Try the default encoding
        charEncoding = lookupEncoding(self.default_encoding), "tentative"
        if charEncoding[0] is not None:
            return charEncoding

        # Fallback to html5lib's default if even that hasn't worked
        return lookupEncoding("windows-1252"), "tentative"

    def changeEncoding(self, newEncoding):
        assert self.charEncoding[1] != "certain"
        newEncoding = lookupEncoding(newEncoding)
        if newEncoding is None:
            return
        if newEncoding.name in ("utf-16be", "utf-16le"):
            newEncoding = lookupEncoding("utf-8")
            assert newEncoding is not None
        elif newEncoding == self.charEncoding[0]:
            self.charEncoding = (self.charEncoding[0], "certain")
        else:
            self.rawStream.seek(0)
            self.charEncoding = (newEncoding, "certain")
            self.reset()
            raise _ReparseException("Encoding changed from %s to %s" % (self.charEncoding[0], newEncoding))

    def detectBOM(self):
        """Attempts to detect at BOM at the start of the stream. If
        an encoding can be determined from the BOM return the name of the
        encoding otherwise return None"""
        bomDict = {
            codecs.BOM_UTF8: 'utf-8',
            codecs.BOM_UTF16_LE: 'utf-16le', codecs.BOM_UTF16_BE: 'utf-16be',
            codecs.BOM_UTF32_LE: 'utf-32le', codecs.BOM_UTF32_BE: 'utf-32be'
        }

        # Go to beginning of file and read in 4 bytes
        string = self.rawStream.read(4)
        assert isinstance(string, bytes)

        # Try detecting the BOM using bytes from the string
        encoding = bomDict.get(string[:3])         # UTF-8
        seek = 3
        if not encoding:
            # Need to detect UTF-32 before UTF-16
            encoding = bomDict.get(string)         # UTF-32
            seek = 4
            if not encoding:
                encoding = bomDict.get(string[:2])  # UTF-16
                seek = 2

        # Set the read position past the BOM if one was found, otherwise
        # set it to the start of the stream
        if encoding:
            self.rawStream.seek(seek)
            return lookupEncoding(encoding)
        else:
            self.rawStream.seek(0)
            return None

    def detectEncodingMeta(self):
        """Report the encoding declared by the meta element
        """
        buffer = self.rawStream.read(self.numBytesMeta)
        assert isinstance(buffer, bytes)
        parser = EncodingParser(buffer)
        self.rawStream.seek(0)
        encoding = parser.getEncoding()

        if encoding is not None and encoding.name in ("utf-16be", "utf-16le"):
            encoding = lookupEncoding("utf-8")

        return encoding


class EncodingBytes(bytes):
    """String-like object with an associated position and various extra methods
    If the position is ever greater than the string length then an exception is
    raised"""
    def __new__(self, value):
        assert isinstance(value, bytes)
        return bytes.__new__(self, value.lower())

    def __init__(self, value):
        # pylint:disable=unused-argument
        self._position = -1

    def __iter__(self):
        return self

    def __next__(self):
        p = self._position = self._position + 1
        if p >= len(self):
            raise StopIteration
        elif p < 0:
            raise TypeError
        return self[p:p + 1]

    def next(self):
        # Py2 compat
        return self.__next__()

    def previous(self):
        p = self._position
        if p >= len(self):
            raise StopIteration
        elif p < 0:
            raise TypeError
        self._position = p = p - 1
        return self[p:p + 1]

    def setPosition(self, position):
        if self._position >= len(self):
            raise StopIteration
        self._position = position

    def getPosition(self):
        if self._position >= len(self):
            raise StopIteration
        if self._position >= 0:
            return self._position
        else:
            return None

    position = property(getPosition, setPosition)

    def getCurrentByte(self):
        return self[self.position:self.position + 1]

    currentByte = property(getCurrentByte)

    def skip(self, chars=spaceCharactersBytes):
        """Skip past a list of characters"""
        p = self.position               # use property for the error-checking
        while p < len(self):
            c = self[p:p + 1]
            if c not in chars:
                self._position = p
                return c
            p += 1
        self._position = p
        return None

    def skipUntil(self, chars):
        p = self.position
        while p < len(self):
            c = self[p:p + 1]
            if c in chars:
                self._position = p
                return c
            p += 1
        self._position = p
        return None

    def matchBytes(self, bytes):
        """Look for a sequence of bytes at the start of a string. If the bytes
        are found return True and advance the position to the byte after the
        match. Otherwise return False and leave the position alone"""
        rv = self.startswith(bytes, self.position)
        if rv:
            self.position += len(bytes)
        return rv

    def jumpTo(self, bytes):
        """Look for the next sequence of bytes matching a given sequence. If
        a match is found advance the position to the last byte of the match"""
        try:
            self._position = self.index(bytes, self.position) + len(bytes) - 1
        except ValueError:
            raise StopIteration
        return True


class EncodingParser(object):
    """Mini parser for detecting character encoding from meta elements"""

    def __init__(self, data):
        """string - the data to work on for encoding detection"""
        self.data = EncodingBytes(data)
        self.encoding = None

    def getEncoding(self):
        if b"<meta" not in self.data:
            return None

        methodDispatch = (
            (b"<!--", self.handleComment),
            (b"<meta", self.handleMeta),
            (b"</", self.handlePossibleEndTag),
            (b"<!", self.handleOther),
            (b"<?", self.handleOther),
            (b"<", self.handlePossibleStartTag))
        for _ in self.data:
            keepParsing = True
            try:
                self.data.jumpTo(b"<")
            except StopIteration:
                break
            for key, method in methodDispatch:
                if self.data.matchBytes(key):
                    try:
                        keepParsing = method()
                        break
                    except StopIteration:
                        keepParsing = False
                        break
            if not keepParsing:
                break

        return self.encoding

    def handleComment(self):
        """Skip over comments"""
        return self.data.jumpTo(b"-->")

    def handleMeta(self):
        if self.data.currentByte not in spaceCharactersBytes:
            # if we have <meta not followed by a space so just keep going
            return True
        # We have a valid meta element we want to search for attributes
        hasPragma = False
        pendingEncoding = None
        while True:
            # Try to find the next attribute after the current position
            attr = self.getAttribute()
            if attr is None:
                return True
            else:
                if attr[0] == b"http-equiv":
                    hasPragma = attr[1] == b"content-type"
                    if hasPragma and pendingEncoding is not None:
                        self.encoding = pendingEncoding
                        return False
                elif attr[0] == b"charset":
                    tentativeEncoding = attr[1]
                    codec = lookupEncoding(tentativeEncoding)
                    if codec is not None:
                        self.encoding = codec
                        return False
                elif attr[0] == b"content":
                    contentParser = ContentAttrParser(EncodingBytes(attr[1]))
                    tentativeEncoding = contentParser.parse()
                    if tentativeEncoding is not None:
                        codec = lookupEncoding(tentativeEncoding)
                        if codec is not None:
                            if hasPragma:
                                self.encoding = codec
                                return False
                            else:
                                pendingEncoding = codec

    def handlePossibleStartTag(self):
        return self.handlePossibleTag(False)

    def handlePossibleEndTag(self):
        next(self.data)
        return self.handlePossibleTag(True)

    def handlePossibleTag(self, endTag):
        data = self.data
        if data.currentByte not in asciiLettersBytes:
            # If the next byte is not an ascii letter either ignore this
            # fragment (possible start tag case) or treat it according to
            # handleOther
            if endTag:
                data.previous()
                self.handleOther()
            return True

        c = data.skipUntil(spacesAngleBrackets)
        if c == b"<":
            # return to the first step in the overall "two step" algorithm
            # reprocessing the < byte
            data.previous()
        else:
            # Read all attributes
            attr = self.getAttribute()
            while attr is not None:
                attr = self.getAttribute()
        return True

    def handleOther(self):
        return self.data.jumpTo(b">")

    def getAttribute(self):
        """Return a name,value pair for the next attribute in the stream,
        if one is found, or None"""
        data = self.data
        # Step 1 (skip chars)
        c = data.skip(spaceCharactersBytes | frozenset([b"/"]))
        assert c is None or len(c) == 1
        # Step 2
        if c in (b">", None):
            return None
        # Step 3
        attrName = []
        attrValue = []
        # Step 4 attribute name
        while True:
            if c == b"=" and attrName:
                break
            elif c in spaceCharactersBytes:
                # Step 6!
                c = data.skip()
                break
            elif c in (b"/", b">"):
                return b"".join(attrName), b""
            elif c in asciiUppercaseBytes:
                attrName.append(c.lower())
            elif c is None:
                return None
            else:
                attrName.append(c)
            # Step 5
            c = next(data)
        # Step 7
        if c != b"=":
            data.previous()
            return b"".join(attrName), b""
        # Step 8
        next(data)
        # Step 9
        c = data.skip()
        # Step 10
        if c in (b"'", b'"'):
            # 10.1
            quoteChar = c
            while True:
                # 10.2
                c = next(data)
                # 10.3
                if c == quoteChar:
                    next(data)
                    return b"".join(attrName), b"".join(attrValue)
                # 10.4
                elif c in asciiUppercaseBytes:
                    attrValue.append(c.lower())
                # 10.5
                else:
                    attrValue.append(c)
        elif c == b">":
            return b"".join(attrName), b""
        elif c in asciiUppercaseBytes:
            attrValue.append(c.lower())
        elif c is None:
            return None
        else:
            attrValue.append(c)
        # Step 11
        while True:
            c = next(data)
            if c in spacesAngleBrackets:
                return b"".join(attrName), b"".join(attrValue)
            elif c in asciiUppercaseBytes:
                attrValue.append(c.lower())
            elif c is None:
                return None
            else:
                attrValue.append(c)


class ContentAttrParser(object):
    def __init__(self, data):
        assert isinstance(data, bytes)
        self.data = data

    def parse(self):
        try:
            # Check if the attr name is charset
            # otherwise return
            self.data.jumpTo(b"charset")
            self.data.position += 1
            self.data.skip()
            if not self.data.currentByte == b"=":
                # If there is no = sign keep looking for attrs
                return None
            self.data.position += 1
            self.data.skip()
            # Look for an encoding between matching quote marks
            if self.data.currentByte in (b'"', b"'"):
                quoteMark = self.data.currentByte
                self.data.position += 1
                oldPosition = self.data.position
                if self.data.jumpTo(quoteMark):
                    return self.data[oldPosition:self.data.position]
                else:
                    return None
            else:
                # Unquoted value
                oldPosition = self.data.position
                try:
                    self.data.skipUntil(spaceCharactersBytes)
                    return self.data[oldPosition:self.data.position]
                except StopIteration:
                    # Return the whole remaining value
                    return self.data[oldPosition:]
        except StopIteration:
            return None


def lookupEncoding(encoding):
    """Return the python codec name corresponding to an encoding or None if the
    string doesn't correspond to a valid encoding."""
    if isinstance(encoding, bytes):
        try:
            encoding = encoding.decode("ascii")
        except UnicodeDecodeError:
            return None

    if encoding is not None:
        try:
            return webencodings.lookup(encoding)
        except AttributeError:
            return None
    else:
        return None

��b IDATx��ytVս��ϓ22 �A@�IR��:�h�CiZ[�v��*E��:�W��ũZA� ^d��QeQ ��@ !��j�Z�'�>g�s��V��仿$�|?g�)&x-E�IE�N�T ��;@x�T.i��%-��X��}S�v�S�5�.��r/UHz^�_$-��W"�w��)�Ɗ�/@Z �&IoX��P�$K��}��JzX��:�;`�� &�, ��ŋu�i��,�e��6��m��X� Ե��rKb1�ԗ��)D�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�A�݀!��I*��]R;I�2$eZ#OR��Z��Sr��r�6m��teff�u��*((P��u�'��v{��DIߔ4^�pI��m�'77WEEE;v�Ǝ��4�-��$]'�RI��{��\�I�&�G�� :I��HJ�� DWBB��=�\��W�R�޽m� o$K�(�V9��ABB�.��}��jѢ��v�`^?IO�ȅ}ڶm�G}T#F��J`�5�6��$-��ھ}�F��I�&��v;0(h;��Б�3�8CӧOWf��!�;A i:��F_m��9s&�|�q%=�#��w��Z�p�rrrl�a A� &��P\\��СC�[A#!� �{��o��l��F�} `��E2��}��M��K/��vV��)i��{4Bff�V�\��|ۭX�`��b�@��k��ɶ�@��%i��$K�z��5z�h�mX�[�I�XZ`� '��b%$��r�5��M��4�º��/�l� ԃ��ߖ�xhʔ)�[@=�} K�6IM}^��5k��㏷݆��z ��Η�ÿO��:gdG��B�m��y�T/��@+��Vɶ�纽z񕏵l�.��y�޴i�t뭷zV��0[�Y^>�Ws�qs}�\/�@$�(�T7�f��.��I�nݺi��R$푔n��.�~?H))\Z�RW'M�o�~v� Ov6o��ԃ��x��z��!�S,&�xm/�y�ɞԟ?�'ua��S�ѽ��b�,8�Gל�K��b��o�i�&��3t7Y,��)JJ��c[�n��z��ӳ�d�E��&K�sZ�Lӄ��I?@��&�%ӟ�۶mSMM�њ0��i�ؐSZ�,�|J+��N�� ~�,��0A0!5%Q-��YQQa��3}$_vV��r�f9f?S8`��zD�AD�AD�AD�AD�AD�AD�AD�AD�A�d��q�P��,ت��m��M��mg��1V?�r�S��I꒟��]�u|�l ��R��CyE�f�٢9�j��URb�zt�Ѱ!m5~tG��j�2�D��h�G�*�{H9�)꒟��ר3:(+3\?�/;TU��ݭ��ʴ~S�6lڧ��U��J*�i�$�d�(#=Yݺd�{�,�p|3��B))��q�:��vN0Y.�jk��ק6��;�SɶVzHJJЀ�-u��tѹ�սk��>�QUU��\��޲��~]fF�n��K?��&��ߡ��5��b��=z9�)��^�|u_�k�-[��y%Z��NU6 ��7M�i��:�]ۦt�k��[�n� ��X��(�e6B�b�.��"�8�cۭ|��~�t�e��u��u��w�|��ή��I-5�"��~��U��k��;��Zi�cE�m��N/��:�]�M��> cQ�^�ui�ƞ��??Ң��p�c#TUU�3�U��a��k��Nw�A`��:��Y_V��-8.KKf�R��itv�޲* 9S�6ֿ��j�,Ճ��NOMߤ��]��z�^��f��O�h��|��<�>@Å5��_��/I��u?{S��Y��4h�K��/2��]��4�%i��t�5��q�]�G�G��e�2�%i�R��| ��W��&f�*^]�??��vq[��Lg��E��_��3f}��F�xu~��}q��d�-��ږF�xu~I N>\��;��͗��O��֊�:�̗��W��J@Bh�W=��y��|��Ggwܷ�H�_NY��?��)T��d��i�'?��խw�h�lm�Qi� �!SUU��sw4kӺ�e4rf��x�u�-[n�Ht��MFj}�H�_��u�~w�>)�o�V}�(�T'��e��bʒ�v��3_�[+v�n@Ȭ\S�}��o��t��}w��=��k�H��F�n�xg��S�� 0eޢm~�l��}��u��q��Z�f��F��oZuu��E��g� ��`z�t~?b�;t�%�>��WTkķ�h��[�2e�G8L��IW�x��,��^\thr�l��^��Ϊ��{��=�ǆ��<}�q��V�@ ��⠨��W��y^��L��F_��>�0��U�k��D�u�ʫu��Cs$)I��v��:�IK��;6ֲ�4{^��6��ե��m+l��3>�݆��uM �9��u��?>�Z��c��}g�~q��h��Kw��ڭ��eFMM�~p�М�uq�ǿz��6T��b@8��@Y�|��jx��]�(^]�gf�}�M�"tG ��-w��.@�vOqh~/�HII��`��S�[l��.��6�nØXL��9�v�U�cOo��B�\�x�o��Ǥ�'�T�&I��Ǎ�Qw��_w�p�v��[�k�m��O�{�w�~�>�#=P1P�ɞ�a�-w��e�:i�Ǐl��H��o�׈��꒟�f9��SzH�?��+s�h�k%F��s:��q��Vh�qY��`��j�vO�'ρ?PyX3�lх��]�˾u��V�{��ݞ]1��,�M�z�YN�W~̈́�joY�n��}��ȚF߾׮mS]��F�� z�+�E��D�xm/��d{�F��{-�W��-�4w�Y��듏:�?��?��_�g�P�f� ^3��e��cg ��ҵs��8R��2�מ�z�@T��A��N��Gj�)��}CNi��/�R~��}��c:5�{��!��Z�HӋӾ��6}T�]��G�]�7W�6^��n �9*,��Y��qOZj��:P?�Q�� DF��L�|��?��-�^��.��Ɵ�7��}f�F�h׶��xe��2P�s��c��z�1��&5\cn��[�=�V��n[��ĶE鎀uˌ��d3G�II ��k�;�l�NmشOuuRVf��BE��]ۣ�eӶu :��X�-[��(��e��r�4��~��LH�i�6:��Ѻ@ԅ��r��ST�0��trk%$Č��0ez��"� *��z�"��T�/X9|8��.��C5F�eg}��C�Q%�͞�ˣ��JvL��/��?�j�^��h��&�9x�F��`�њ�Z��(��&�y��F��&Iݻf�g��#��W��;�3^�{Wo^4�'v�V[[�K'��;+��m��Ӎִ�]AC@��W?1^{�එyh��+^]��f��m��~�i��Ե�]AB@��WTk�̏t��uR�?�l�.O�IH�i�Yy�Զ�]A�ˀ7c��:��q}ힽ��a�f�6��Z~�қm(��+sK4{^�6}T��*UUu�]��n��.��:kx{�:��2�� _m��=�sA�ߤ�U��@?��Z��-V�ކ�е��z왍��Nэ��{|5� pڶn��b� �p-@��sPg]0G7�fy��-��M�{GCF��'%�{�4`��=�$-�Ge\��eU:m�+Z�t�'�W��jO�!O�AF@��i�k&t�݆��ϥ_�� e��}��=]"��Wz��_��.��͜�E3�l�e�W��F�i��h|t��-w��Z��ۍ��-�u��w=�6�YN��{��6|��}��|��*={��Ѽ��n.�S�.��z��1z��j�ۻT��H]��흾� �D�u��D��v��mv��K��.`V]yY�~s�I��@��t?/��ϓ.� ��m�&�["�+��P��?M��z�ovV��ЫG3�-�G��RR��[(!!\�_��,��^��%?�v@��ҵ�ő�� m��`�Y)�te�m8��G��Mx.))A�]Y��i`�V��i��W�`�?�^��~!�S#��^+�ѽ��GZj��?V�ģ�0.))A�꨷��l�z�L�*��]��O��X�r��Y�`DBBL�Oj��{��-M�H'�ii�-ϰ��ok�7^�� )쭡�b��]�UX��S�ְ�mռY��|5��*��c�ֽk��0B��7镹%ڽ��P#8n�Ȏ��q}mJr�23��_>��l��E�5��$i��wu��i�+ ��H�~�F`��IjƵ@�q �\ �@#qG�0"��.�0"� l��`��.�0!� ,�AQ�HN6�q��z�k��KJ�#��o;`X�v2��>,tێJ��J��7Z/*��A��.@f�ف�jM��zk��g��@TvZ�H3Z�xu�6Ra��'%��O��?/d�Q��5�x��Yk��U]��R�ֽk�ق@��Da�S^�RS�ּ�5��|��B��e��HNN�͘p �Hvc�Y�c��C5:��y �#��`�οb��;z��2��.��!��k��r��}g��U��Wk��yZ��n�=�f�� Pv��sn��3�p��~�;4p�˚=�ē~�Nm�I] ��¾0lH[��_��L��h�sh��_��ғߤ��c_њ�e��c�)��g�7��V�IZ��5��yr�gk̞W��#��IjӪ�v�>��՞��y睝��M�8�[�|�]��\�շ��8�M��6�%�|@P��Z��ڨ�I-m��>=�k��=�'a��iRo�-x�?>Q��.��}`�Ȏ:�Ws�mu� �u�� > �.@,&��;+!!�˱�tﭧD��Q�w��RW\��vF\~Q7�>�s�p�Y�w$��%A~�;~}��6��¾��g�&if_��=��j�,�v+�U��L�1(tW��a��ke��:@Ș>�j�$�Gq�2�t7S?�vL��|��]u�/�� .�(�0�E��6M��k�6�h��iۺ��z�ښ��O��r��i�f�ޱ�xm/��G�x>�� La�l%%��~�{��l�Bs��R4�*��}{�0Z/��t��N�I��ɚ��p�V^#�L�f�:u@k#�RSu�� =��S^��Zy��uR/��.@n�&��΃z~��B=��0eg뺆��#,��Þ�[�B/?�H� uUf�7�y ��W��y}Bw��eg��ל`��Wh�(|��|��`l`.�;�Ws��?��V�@"��c:i�ɍ��L֯��PG�v�6z�c�tM��̠��'��:w��uW��;d��=�;E�v�e��D��}��9�J@B(��0�iհ�b�v�P�1{�\P��&��G�7��D��޴I��y��_��$-Q�jm�~Yrr�&]�C��Dv%b�h|�Yz�n�i_�R;�k��g�}n��JOII��w��yuL}{�Ќǋ�}�:+3Y�?:��W��J/N+Rz��d=�h��b��;d��j͒su��ݔ��@NKMԄ�j��qz��C��5@��y°�h��L��m;*5�ezᕏ��=�ep�� X��L�n?�מ:��r�`��۵�tŤ�Z�|�1�v`�V�뽧��_c��s�ج'�ߤ%o�Tuum�k%%%h�)�u�y]��N�k��[�n� ��'b�2� �l�.=��͜�E%��gf$��[c;�s:�V-�͞W��ߤW�h��-��j�7��]4��=��F-X�]�>��Z�LS�i��[�Y��*�W��e;��Z�a��n(ӇW|e(HNNP��5[= r4tP �&0�<��p�c#��`�v�TNV GFq��v�T�i�*��Ty�a�m�$��ߏWyE�*�V��JKMTfF�w��>'��$-�ؽ�.Ho��.��8��c��"@D�AD�AD�AD�AD�AD�AD�AD�AD�A~�j��*֘,N;Pi�3599�h=��g�o�ضL�g��i��J�5��փy~�}&��Z��d9p֚ e:|��h��L��`��`��b/��d9p�?�fgg+%%�hM��gXo��s�ج��, Ω��Ol0Z��h=x��d��j��L��m��h��ݻ�o��O�[�g_�l�,8a��]�٭+��ӧ��0��$��I�]��c��]:粹:Te��ꢢ"�5a^��K�g�h,&��=��=��՟^��߶�ߢE�ܹS �J}��I%�:8 �IDAT~,�9/ʃPW'M�o�}z��N�ƍ쨓z�Pb��NZ�~�^z�=4m��s��w��g;5�� Y�~��S�VM��RXUյڱ�R��f��?��s�:w �;6�H:�º��i��5��-�maM��&O��3;1I�K�eam�Z�h�͛7+##�v+�c ~u�~ca]�Gn��F'��ټL~��PPP��b��n� v�o��C�4R,ӟ��gg��%�hq}@#M�4IÇ�� O�y^�x�M��Z�x�� )� �yOw@H�k�N˖-Sǎm�b]X@n��+i��͖��!++K3g�d��\�$m�t�$^��Y�f��J��\8PR��F�)77W��א!Cl��$i��:�@@��_o��G�� I{$��#� ��8磌��ŋ9�1A��(�I�m7��֭��>}�ߴ�J�q�7ޗt��^� -[��ԩS�j�*��}��%]&�'� -��ɓ'�ꫯVzz��vB#�;a �7@G��xI��{��j޼�ƌ��.�LÇWBB�7��`O��"I�$/�@R�@eee��@�۷��>}�0��,ɒ2$53Xs��|c��S~��rpTYYY��} kH�c�%��&k��.]��, @��AD�AD�AD�AD�AD�AD�AD�AD�A��@�l��T��<%''�*��L�o�^={��رc5h� %$+CnܸQ3f��ҥK�}�vUVV��s�9G� R,�_{�x��ˇ��3��o߾��;TTTd�}��馛��]uuuG��~��i�ԩ��@4��b��n��v�m�vfϞ�/�Peeeq}}za I~,�誫��{UWW뮻��}��_~Y�ƍSMMM��Yχ֝w��aw\�ď�cxꩧt�E��ƍկ_?�۷��5��@�u�?�1�k�N�ׯWzz�/�w�y�>}z�j��3�� k�(�ٺu��q_�Z�v�f̘��:~�AB�Q&�r��|��!��%K��ҥK��g��Ԟ={<_��X-�z� !��C�y�FUU��z~�AB�QIIIjݺ��W�$UXX��D��ٳ��Z~�AB�Q�ƍ�e��c��W�$<(~<�RSS�v�Zu��jjjԧO�Z��Qu��@4� 8��m�&&&j�ԩ��g�$�ď��1h� ͟?_��{�768��@��g �=@�`)))��5o��6m��3��)��ѣ�ƌ��J�;w��ҿUTT��/��K��ZR�{~a=@��0o�<��*狔��i�F��ɶ[�ˎ;T]]��OX@��?��K�.�ۈ�xN ��pppppppppppppppppP��fl߾],�{ｧ��k۶mڿ��o�5B��TӦMӴiӴ|�r�� DB��2e�|�A��n!D��y�'t��k�Ν�[A� $***t�5��'�� "�!�駟��o�a�Dn�Ν:t�֭[g�D��ШQ��0�6q�D��;��@ ��x� M�6�v�(Pii��z�m��Z�4e��w��"@��̴i�x��f��[��~-F�ٱc��&I�Z�2�|��n��!�?$@��{�[��H�T��ɏ�#��@�h��Ȏ��I#�_�m�(F��/6Z3��z��'�\r�,��r��!��;��w2Z3j��=~�G��Y��7��"I�$��i�I.�p��_"��?�p��N`�y��DD��?: ��_�� G�ÿa��b�7J��!B��x@�0�� B��o ��c��G��@`1��C��[@�0��G ��@`0��C��_�u�V1�� a��CX��>�W�` |��`!<��S�`"<��.��`#�c�`�?c��A��C4 �?��c�� p#��~@�0��?:��0��8&��_��M�Q1��J�h#��?��/`��7��;I��q7�aw�Q��A�1Hp ��!�#��<8��/#@�1��U�l7��=�S�=��K.�4Z�?E��_$i��@��!�1�!E4�?��`��P_� ��@��Bă�1��0�#��:��"��a��U�,�x�b��F��Y1� ��[��n|��n� ��#'��v�E��H:`�x��b� ��#��v��D��4��Y ��h��i.i��&��E�Ζ��v#��O� H��4�I�Ŷ��}�:I�k�h��@t��ZR��F��#��(�tXҙ��zZ �?��I��3l7q��@õ��|ۍ�1,G��p�u��Y� ��Ꮿ@h��J��v#�x��xk$ ��v#�9��5��}��_��$��c �S�#��=+��"K�{F��*m7�`#��%�H:NRS�p�6I?��sIՖ{Ap��$I$I:QR��v�2$�Z�@�UJ*��$�]<��F��O4IEND�B`�