<,�tEXtComment File Manager

File Manager

Path: /opt/alt/alt-nodejs22/root/usr/include/unicode/

Viewing File: normalizer2.h

// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
*
*   Copyright (C) 2009-2013, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
*******************************************************************************
*   file name:  normalizer2.h
*   encoding:   UTF-8
*   tab size:   8 (not used)
*   indentation:4
*
*   created on: 2009nov22
*   created by: Markus W. Scherer
*/

#ifndef __NORMALIZER2_H__
#define __NORMALIZER2_H__

/**
 * \file
 * \brief C++ API: New API for Unicode Normalization.
 */

#include "unicode/utypes.h"

#if U_SHOW_CPLUSPLUS_API

#if !UCONFIG_NO_NORMALIZATION

#include "unicode/stringpiece.h"
#include "unicode/uniset.h"
#include "unicode/unistr.h"
#include "unicode/unorm2.h"

U_NAMESPACE_BEGIN

class ByteSink;

/**
 * Unicode normalization functionality for standard Unicode normalization or
 * for using custom mapping tables.
 * All instances of this class are unmodifiable/immutable.
 * Instances returned by getInstance() are singletons that must not be deleted by the caller.
 * The Normalizer2 class is not intended for public subclassing.
 *
 * The primary functions are to produce a normalized string and to detect whether
 * a string is already normalized.
 * The most commonly used normalization forms are those defined in
 * http://www.unicode.org/unicode/reports/tr15/
 * However, this API supports additional normalization forms for specialized purposes.
 * For example, NFKC_Casefold is provided via getInstance("nfkc_cf", COMPOSE)
 * and can be used in implementations of UTS #46.
 *
 * Not only are the standard compose and decompose modes supplied,
 * but additional modes are provided as documented in the Mode enum.
 *
 * Some of the functions in this class identify normalization boundaries.
 * At a normalization boundary, the portions of the string
 * before it and starting from it do not interact and can be handled independently.
 *
 * The spanQuickCheckYes() stops at a normalization boundary.
 * When the goal is a normalized string, then the text before the boundary
 * can be copied, and the remainder can be processed with normalizeSecondAndAppend().
 *
 * The hasBoundaryBefore(), hasBoundaryAfter() and isInert() functions test whether
 * a character is guaranteed to be at a normalization boundary,
 * regardless of context.
 * This is used for moving from one normalization boundary to the next
 * or preceding boundary, and for performing iterative normalization.
 *
 * Iterative normalization is useful when only a small portion of a
 * longer string needs to be processed.
 * For example, in ICU, iterative normalization is used by the NormalizationTransliterator
 * (to avoid replacing already-normalized text) and ucol_nextSortKeyPart()
 * (to process only the substring for which sort key bytes are computed).
 *
 * The set of normalization boundaries returned by these functions may not be
 * complete: There may be more boundaries that could be returned.
 * Different functions may return different boundaries.
 * @stable ICU 4.4
 */
class U_COMMON_API Normalizer2 : public UObject {
public:
    /**
     * Destructor.
     * @stable ICU 4.4
     */
    ~Normalizer2();

    /**
     * Returns a Normalizer2 instance for Unicode NFC normalization.
     * Same as getInstance(nullptr, "nfc", UNORM2_COMPOSE, errorCode).
     * Returns an unmodifiable singleton instance. Do not delete it.
     * @param errorCode Standard ICU error code. Its input value must
     *                  pass the U_SUCCESS() test, or else the function returns
     *                  immediately. Check for U_FAILURE() on output or use with
     *                  function chaining. (See User Guide for details.)
     * @return the requested Normalizer2, if successful
     * @stable ICU 49
     */
    static const Normalizer2 *
    getNFCInstance(UErrorCode &errorCode);

    /**
     * Returns a Normalizer2 instance for Unicode NFD normalization.
     * Same as getInstance(nullptr, "nfc", UNORM2_DECOMPOSE, errorCode).
     * Returns an unmodifiable singleton instance. Do not delete it.
     * @param errorCode Standard ICU error code. Its input value must
     *                  pass the U_SUCCESS() test, or else the function returns
     *                  immediately. Check for U_FAILURE() on output or use with
     *                  function chaining. (See User Guide for details.)
     * @return the requested Normalizer2, if successful
     * @stable ICU 49
     */
    static const Normalizer2 *
    getNFDInstance(UErrorCode &errorCode);

    /**
     * Returns a Normalizer2 instance for Unicode NFKC normalization.
     * Same as getInstance(nullptr, "nfkc", UNORM2_COMPOSE, errorCode).
     * Returns an unmodifiable singleton instance. Do not delete it.
     * @param errorCode Standard ICU error code. Its input value must
     *                  pass the U_SUCCESS() test, or else the function returns
     *                  immediately. Check for U_FAILURE() on output or use with
     *                  function chaining. (See User Guide for details.)
     * @return the requested Normalizer2, if successful
     * @stable ICU 49
     */
    static const Normalizer2 *
    getNFKCInstance(UErrorCode &errorCode);

    /**
     * Returns a Normalizer2 instance for Unicode NFKD normalization.
     * Same as getInstance(nullptr, "nfkc", UNORM2_DECOMPOSE, errorCode).
     * Returns an unmodifiable singleton instance. Do not delete it.
     * @param errorCode Standard ICU error code. Its input value must
     *                  pass the U_SUCCESS() test, or else the function returns
     *                  immediately. Check for U_FAILURE() on output or use with
     *                  function chaining. (See User Guide for details.)
     * @return the requested Normalizer2, if successful
     * @stable ICU 49
     */
    static const Normalizer2 *
    getNFKDInstance(UErrorCode &errorCode);

    /**
     * Returns a Normalizer2 instance for Unicode toNFKC_Casefold() normalization
     * which is equivalent to applying the NFKC_Casefold mappings and then NFC.
     * See https://www.unicode.org/reports/tr44/#NFKC_Casefold
     *
     * Same as getInstance(nullptr, "nfkc_cf", UNORM2_COMPOSE, errorCode).
     * Returns an unmodifiable singleton instance. Do not delete it.
     * @param errorCode Standard ICU error code. Its input value must
     *                  pass the U_SUCCESS() test, or else the function returns
     *                  immediately. Check for U_FAILURE() on output or use with
     *                  function chaining. (See User Guide for details.)
     * @return the requested Normalizer2, if successful
     * @stable ICU 49
     */
    static const Normalizer2 *
    getNFKCCasefoldInstance(UErrorCode &errorCode);

    /**
     * Returns a Normalizer2 instance for a variant of Unicode toNFKC_Casefold() normalization
     * which is equivalent to applying the NFKC_Simple_Casefold mappings and then NFC.
     * See https://www.unicode.org/reports/tr44/#NFKC_Simple_Casefold
     *
     * Same as getInstance(nullptr, "nfkc_scf", UNORM2_COMPOSE, errorCode).
     * Returns an unmodifiable singleton instance. Do not delete it.
     * @param errorCode Standard ICU error code. Its input value must
     *                  pass the U_SUCCESS() test, or else the function returns
     *                  immediately. Check for U_FAILURE() on output or use with
     *                  function chaining. (See User Guide for details.)
     * @return the requested Normalizer2, if successful
     * @stable ICU 74
     */
    static const Normalizer2 *
    getNFKCSimpleCasefoldInstance(UErrorCode &errorCode);

    /**
     * Returns a Normalizer2 instance which uses the specified data file
     * (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle)
     * and which composes or decomposes text according to the specified mode.
     * Returns an unmodifiable singleton instance. Do not delete it.
     *
     * Use packageName=nullptr for data files that are part of ICU's own data.
     * Use name="nfc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFC/NFD.
     * Use name="nfkc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFKC/NFKD.
     * Use name="nfkc_cf" and UNORM2_COMPOSE for Unicode standard NFKC_CF=NFKC_Casefold.
     *
     * @param packageName nullptr for ICU built-in data, otherwise application data package name
     * @param name "nfc" or "nfkc" or "nfkc_cf" or "nfkc_scf" or name of custom data file
     * @param mode normalization mode (compose or decompose etc.)
     * @param errorCode Standard ICU error code. Its input value must
     *                  pass the U_SUCCESS() test, or else the function returns
     *                  immediately. Check for U_FAILURE() on output or use with
     *                  function chaining. (See User Guide for details.)
     * @return the requested Normalizer2, if successful
     * @stable ICU 4.4
     */
    static const Normalizer2 *
    getInstance(const char *packageName,
                const char *name,
                UNormalization2Mode mode,
                UErrorCode &errorCode);

    /**
     * Returns the normalized form of the source string.
     * @param src source string
     * @param errorCode Standard ICU error code. Its input value must
     *                  pass the U_SUCCESS() test, or else the function returns
     *                  immediately. Check for U_FAILURE() on output or use with
     *                  function chaining. (See User Guide for details.)
     * @return normalized src
     * @stable ICU 4.4
     */
    UnicodeString
    normalize(const UnicodeString &src, UErrorCode &errorCode) const {
        UnicodeString result;
        normalize(src, result, errorCode);
        return result;
    }
    /**
     * Writes the normalized form of the source string to the destination string
     * (replacing its contents) and returns the destination string.
     * The source and destination strings must be different objects.
     * @param src source string
     * @param dest destination string; its contents is replaced with normalized src
     * @param errorCode Standard ICU error code. Its input value must
     *                  pass the U_SUCCESS() test, or else the function returns
     *                  immediately. Check for U_FAILURE() on output or use with
     *                  function chaining. (See User Guide for details.)
     * @return dest
     * @stable ICU 4.4
     */
    virtual UnicodeString &
    normalize(const UnicodeString &src,
              UnicodeString &dest,
              UErrorCode &errorCode) const = 0;

    /**
     * Normalizes a UTF-8 string and optionally records how source substrings
     * relate to changed and unchanged result substrings.
     *
     * Implemented completely for all built-in modes except for FCD.
     * The base class implementation converts to & from UTF-16 and does not support edits.
     *
     * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
     * @param src       Source UTF-8 string.
     * @param sink      A ByteSink to which the normalized UTF-8 result string is written.
     *                  sink.Flush() is called at the end.
     * @param edits     Records edits for index mapping, working with styled text,
     *                  and getting only changes (if any).
     *                  The Edits contents is undefined if any error occurs.
     *                  This function calls edits->reset() first unless
     *                  options includes U_EDITS_NO_RESET. edits can be nullptr.
     * @param errorCode Standard ICU error code. Its input value must
     *                  pass the U_SUCCESS() test, or else the function returns
     *                  immediately. Check for U_FAILURE() on output or use with
     *                  function chaining. (See User Guide for details.)
     * @stable ICU 60
     */
    virtual void
    normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
                  Edits *edits, UErrorCode &errorCode) const;

    /**
     * Appends the normalized form of the second string to the first string
     * (merging them at the boundary) and returns the first string.
     * The result is normalized if the first string was normalized.
     * The first and second strings must be different objects.
     * @param first string, should be normalized
     * @param second string, will be normalized
     * @param errorCode Standard ICU error code. Its input value must
     *                  pass the U_SUCCESS() test, or else the function returns
     *                  immediately. Check for U_FAILURE() on output or use with
     *                  function chaining. (See User Guide for details.)
     * @return first
     * @stable ICU 4.4
     */
    virtual UnicodeString &
    normalizeSecondAndAppend(UnicodeString &first,
                             const UnicodeString &second,
                             UErrorCode &errorCode) const = 0;
    /**
     * Appends the second string to the first string
     * (merging them at the boundary) and returns the first string.
     * The result is normalized if both the strings were normalized.
     * The first and second strings must be different objects.
     * @param first string, should be normalized
     * @param second string, should be normalized
     * @param errorCode Standard ICU error code. Its input value must
     *                  pass the U_SUCCESS() test, or else the function returns
     *                  immediately. Check for U_FAILURE() on output or use with
     *                  function chaining. (See User Guide for details.)
     * @return first
     * @stable ICU 4.4
     */
    virtual UnicodeString &
    append(UnicodeString &first,
           const UnicodeString &second,
           UErrorCode &errorCode) const = 0;

    /**
     * Gets the decomposition mapping of c.
     * Roughly equivalent to normalizing the String form of c
     * on a UNORM2_DECOMPOSE Normalizer2 instance, but much faster, and except that this function
     * returns false and does not write a string
     * if c does not have a decomposition mapping in this instance's data.
     * This function is independent of the mode of the Normalizer2.
     * @param c code point
     * @param decomposition String object which will be set to c's
     *                      decomposition mapping, if there is one.
     * @return true if c has a decomposition, otherwise false
     * @stable ICU 4.6
     */
    virtual UBool
    getDecomposition(UChar32 c, UnicodeString &decomposition) const = 0;

    /**
     * Gets the raw decomposition mapping of c.
     *
     * This is similar to the getDecomposition() method but returns the
     * raw decomposition mapping as specified in UnicodeData.txt or
     * (for custom data) in the mapping files processed by the gennorm2 tool.
     * By contrast, getDecomposition() returns the processed,
     * recursively-decomposed version of this mapping.
     *
     * When used on a standard NFKC Normalizer2 instance,
     * getRawDecomposition() returns the Unicode Decomposition_Mapping (dm) property.
     *
     * When used on a standard NFC Normalizer2 instance,
     * it returns the Decomposition_Mapping only if the Decomposition_Type (dt) is Canonical (Can);
     * in this case, the result contains either one or two code points (=1..4 char16_ts).
     *
     * This function is independent of the mode of the Normalizer2.
     * The default implementation returns false.
     * @param c code point
     * @param decomposition String object which will be set to c's
     *                      raw decomposition mapping, if there is one.
     * @return true if c has a decomposition, otherwise false
     * @stable ICU 49
     */
    virtual UBool
    getRawDecomposition(UChar32 c, UnicodeString &decomposition) const;

    /**
     * Performs pairwise composition of a & b and returns the composite if there is one.
     *
     * Returns a composite code point c only if c has a two-way mapping to a+b.
     * In standard Unicode normalization, this means that
     * c has a canonical decomposition to a+b
     * and c does not have the Full_Composition_Exclusion property.
     *
     * This function is independent of the mode of the Normalizer2.
     * The default implementation returns a negative value.
     * @param a A (normalization starter) code point.
     * @param b Another code point.
     * @return The non-negative composite code point if there is one; otherwise a negative value.
     * @stable ICU 49
     */
    virtual UChar32
    composePair(UChar32 a, UChar32 b) const;

    /**
     * Gets the combining class of c.
     * The default implementation returns 0
     * but all standard implementations return the Unicode Canonical_Combining_Class value.
     * @param c code point
     * @return c's combining class
     * @stable ICU 49
     */
    virtual uint8_t
    getCombiningClass(UChar32 c) const;

    /**
     * Tests if the string is normalized.
     * Internally, in cases where the quickCheck() method would return "maybe"
     * (which is only possible for the two COMPOSE modes) this method
     * resolves to "yes" or "no" to provide a definitive result,
     * at the cost of doing more work in those cases.
     * @param s input string
     * @param errorCode Standard ICU error code. Its input value must
     *                  pass the U_SUCCESS() test, or else the function returns
     *                  immediately. Check for U_FAILURE() on output or use with
     *                  function chaining. (See User Guide for details.)
     * @return true if s is normalized
     * @stable ICU 4.4
     */
    virtual UBool
    isNormalized(const UnicodeString &s, UErrorCode &errorCode) const = 0;
    /**
     * Tests if the UTF-8 string is normalized.
     * Internally, in cases where the quickCheck() method would return "maybe"
     * (which is only possible for the two COMPOSE modes) this method
     * resolves to "yes" or "no" to provide a definitive result,
     * at the cost of doing more work in those cases.
     *
     * This works for all normalization modes.
     * It is optimized for UTF-8 for all built-in modes except for FCD.
     * The base class implementation converts to UTF-16 and calls isNormalized().
     *
     * @param s UTF-8 input string
     * @param errorCode Standard ICU error code. Its input value must
     *                  pass the U_SUCCESS() test, or else the function returns
     *                  immediately. Check for U_FAILURE() on output or use with
     *                  function chaining. (See User Guide for details.)
     * @return true if s is normalized
     * @stable ICU 60
     */
    virtual UBool
    isNormalizedUTF8(StringPiece s, UErrorCode &errorCode) const;


    /**
     * Tests if the string is normalized.
     * For the two COMPOSE modes, the result could be "maybe" in cases that
     * would take a little more work to resolve definitively.
     * Use spanQuickCheckYes() and normalizeSecondAndAppend() for a faster
     * combination of quick check + normalization, to avoid
     * re-checking the "yes" prefix.
     * @param s input string
     * @param errorCode Standard ICU error code. Its input value must
     *                  pass the U_SUCCESS() test, or else the function returns
     *                  immediately. Check for U_FAILURE() on output or use with
     *                  function chaining. (See User Guide for details.)
     * @return UNormalizationCheckResult
     * @stable ICU 4.4
     */
    virtual UNormalizationCheckResult
    quickCheck(const UnicodeString &s, UErrorCode &errorCode) const = 0;

    /**
     * Returns the end of the normalized substring of the input string.
     * In other words, with <code>end=spanQuickCheckYes(s, ec);</code>
     * the substring <code>UnicodeString(s, 0, end)</code>
     * will pass the quick check with a "yes" result.
     *
     * The returned end index is usually one or more characters before the
     * "no" or "maybe" character: The end index is at a normalization boundary.
     * (See the class documentation for more about normalization boundaries.)
     *
     * When the goal is a normalized string and most input strings are expected
     * to be normalized already, then call this method,
     * and if it returns a prefix shorter than the input string,
     * copy that prefix and use normalizeSecondAndAppend() for the remainder.
     * @param s input string
     * @param errorCode Standard ICU error code. Its input value must
     *                  pass the U_SUCCESS() test, or else the function returns
     *                  immediately. Check for U_FAILURE() on output or use with
     *                  function chaining. (See User Guide for details.)
     * @return "yes" span end index
     * @stable ICU 4.4
     */
    virtual int32_t
    spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const = 0;

    /**
     * Tests if the character always has a normalization boundary before it,
     * regardless of context.
     * If true, then the character does not normalization-interact with
     * preceding characters.
     * In other words, a string containing this character can be normalized
     * by processing portions before this character and starting from this
     * character independently.
     * This is used for iterative normalization. See the class documentation for details.
     * @param c character to test
     * @return true if c has a normalization boundary before it
     * @stable ICU 4.4
     */
    virtual UBool hasBoundaryBefore(UChar32 c) const = 0;

    /**
     * Tests if the character always has a normalization boundary after it,
     * regardless of context.
     * If true, then the character does not normalization-interact with
     * following characters.
     * In other words, a string containing this character can be normalized
     * by processing portions up to this character and after this
     * character independently.
     * This is used for iterative normalization. See the class documentation for details.
     * Note that this operation may be significantly slower than hasBoundaryBefore().
     * @param c character to test
     * @return true if c has a normalization boundary after it
     * @stable ICU 4.4
     */
    virtual UBool hasBoundaryAfter(UChar32 c) const = 0;

    /**
     * Tests if the character is normalization-inert.
     * If true, then the character does not change, nor normalization-interact with
     * preceding or following characters.
     * In other words, a string containing this character can be normalized
     * by processing portions before this character and after this
     * character independently.
     * This is used for iterative normalization. See the class documentation for details.
     * Note that this operation may be significantly slower than hasBoundaryBefore().
     * @param c character to test
     * @return true if c is normalization-inert
     * @stable ICU 4.4
     */
    virtual UBool isInert(UChar32 c) const = 0;
};

/**
 * Normalization filtered by a UnicodeSet.
 * Normalizes portions of the text contained in the filter set and leaves
 * portions not contained in the filter set unchanged.
 * Filtering is done via UnicodeSet::span(..., USET_SPAN_SIMPLE).
 * Not-in-the-filter text is treated as "is normalized" and "quick check yes".
 * This class implements all of (and only) the Normalizer2 API.
 * An instance of this class is unmodifiable/immutable but is constructed and
 * must be destructed by the owner.
 * @stable ICU 4.4
 */
class U_COMMON_API FilteredNormalizer2 : public Normalizer2 {
public:
    /**
     * Constructs a filtered normalizer wrapping any Normalizer2 instance
     * and a filter set.
     * Both are aliased and must not be modified or deleted while this object
     * is used.
     * The filter set should be frozen; otherwise the performance will suffer greatly.
     * @param n2 wrapped Normalizer2 instance
     * @param filterSet UnicodeSet which determines the characters to be normalized
     * @stable ICU 4.4
     */
    FilteredNormalizer2(const Normalizer2 &n2, const UnicodeSet &filterSet) :
            norm2(n2), set(filterSet) {}

    /**
     * Destructor.
     * @stable ICU 4.4
     */
    ~FilteredNormalizer2();

    /**
     * Writes the normalized form of the source string to the destination string
     * (replacing its contents) and returns the destination string.
     * The source and destination strings must be different objects.
     * @param src source string
     * @param dest destination string; its contents is replaced with normalized src
     * @param errorCode Standard ICU error code. Its input value must
     *                  pass the U_SUCCESS() test, or else the function returns
     *                  immediately. Check for U_FAILURE() on output or use with
     *                  function chaining. (See User Guide for details.)
     * @return dest
     * @stable ICU 4.4
     */
    virtual UnicodeString &
    normalize(const UnicodeString &src,
              UnicodeString &dest,
              UErrorCode &errorCode) const override;

    /**
     * Normalizes a UTF-8 string and optionally records how source substrings
     * relate to changed and unchanged result substrings.
     *
     * Implemented completely for most built-in modes except for FCD.
     * The base class implementation converts to & from UTF-16 and does not support edits.
     *
     * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
     * @param src       Source UTF-8 string.
     * @param sink      A ByteSink to which the normalized UTF-8 result string is written.
     *                  sink.Flush() is called at the end.
     * @param edits     Records edits for index mapping, working with styled text,
     *                  and getting only changes (if any).
     *                  The Edits contents is undefined if any error occurs.
     *                  This function calls edits->reset() first unless
     *                  options includes U_EDITS_NO_RESET. edits can be nullptr.
     * @param errorCode Standard ICU error code. Its input value must
     *                  pass the U_SUCCESS() test, or else the function returns
     *                  immediately. Check for U_FAILURE() on output or use with
     *                  function chaining. (See User Guide for details.)
     * @stable ICU 60
     */
    virtual void
    normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
                  Edits *edits, UErrorCode &errorCode) const override;

    /**
     * Appends the normalized form of the second string to the first string
     * (merging them at the boundary) and returns the first string.
     * The result is normalized if the first string was normalized.
     * The first and second strings must be different objects.
     * @param first string, should be normalized
     * @param second string, will be normalized
     * @param errorCode Standard ICU error code. Its input value must
     *                  pass the U_SUCCESS() test, or else the function returns
     *                  immediately. Check for U_FAILURE() on output or use with
     *                  function chaining. (See User Guide for details.)
     * @return first
     * @stable ICU 4.4
     */
    virtual UnicodeString &
    normalizeSecondAndAppend(UnicodeString &first,
                             const UnicodeString &second,
                             UErrorCode &errorCode) const override;
    /**
     * Appends the second string to the first string
     * (merging them at the boundary) and returns the first string.
     * The result is normalized if both the strings were normalized.
     * The first and second strings must be different objects.
     * @param first string, should be normalized
     * @param second string, should be normalized
     * @param errorCode Standard ICU error code. Its input value must
     *                  pass the U_SUCCESS() test, or else the function returns
     *                  immediately. Check for U_FAILURE() on output or use with
     *                  function chaining. (See User Guide for details.)
     * @return first
     * @stable ICU 4.4
     */
    virtual UnicodeString &
    append(UnicodeString &first,
           const UnicodeString &second,
           UErrorCode &errorCode) const override;

    /**
     * Gets the decomposition mapping of c.
     * For details see the base class documentation.
     *
     * This function is independent of the mode of the Normalizer2.
     * @param c code point
     * @param decomposition String object which will be set to c's
     *                      decomposition mapping, if there is one.
     * @return true if c has a decomposition, otherwise false
     * @stable ICU 4.6
     */
    virtual UBool
    getDecomposition(UChar32 c, UnicodeString &decomposition) const override;

    /**
     * Gets the raw decomposition mapping of c.
     * For details see the base class documentation.
     *
     * This function is independent of the mode of the Normalizer2.
     * @param c code point
     * @param decomposition String object which will be set to c's
     *                      raw decomposition mapping, if there is one.
     * @return true if c has a decomposition, otherwise false
     * @stable ICU 49
     */
    virtual UBool
    getRawDecomposition(UChar32 c, UnicodeString &decomposition) const override;

    /**
     * Performs pairwise composition of a & b and returns the composite if there is one.
     * For details see the base class documentation.
     *
     * This function is independent of the mode of the Normalizer2.
     * @param a A (normalization starter) code point.
     * @param b Another code point.
     * @return The non-negative composite code point if there is one; otherwise a negative value.
     * @stable ICU 49
     */
    virtual UChar32
    composePair(UChar32 a, UChar32 b) const override;

    /**
     * Gets the combining class of c.
     * The default implementation returns 0
     * but all standard implementations return the Unicode Canonical_Combining_Class value.
     * @param c code point
     * @return c's combining class
     * @stable ICU 49
     */
    virtual uint8_t
    getCombiningClass(UChar32 c) const override;

    /**
     * Tests if the string is normalized.
     * For details see the Normalizer2 base class documentation.
     * @param s input string
     * @param errorCode Standard ICU error code. Its input value must
     *                  pass the U_SUCCESS() test, or else the function returns
     *                  immediately. Check for U_FAILURE() on output or use with
     *                  function chaining. (See User Guide for details.)
     * @return true if s is normalized
     * @stable ICU 4.4
     */
    virtual UBool
    isNormalized(const UnicodeString &s, UErrorCode &errorCode) const override;
    /**
     * Tests if the UTF-8 string is normalized.
     * Internally, in cases where the quickCheck() method would return "maybe"
     * (which is only possible for the two COMPOSE modes) this method
     * resolves to "yes" or "no" to provide a definitive result,
     * at the cost of doing more work in those cases.
     *
     * This works for all normalization modes.
     * It is optimized for UTF-8 for all built-in modes except for FCD.
     * The base class implementation converts to UTF-16 and calls isNormalized().
     *
     * @param s UTF-8 input string
     * @param errorCode Standard ICU error code. Its input value must
     *                  pass the U_SUCCESS() test, or else the function returns
     *                  immediately. Check for U_FAILURE() on output or use with
     *                  function chaining. (See User Guide for details.)
     * @return true if s is normalized
     * @stable ICU 60
     */
    virtual UBool
    isNormalizedUTF8(StringPiece s, UErrorCode &errorCode) const override;
    /**
     * Tests if the string is normalized.
     * For details see the Normalizer2 base class documentation.
     * @param s input string
     * @param errorCode Standard ICU error code. Its input value must
     *                  pass the U_SUCCESS() test, or else the function returns
     *                  immediately. Check for U_FAILURE() on output or use with
     *                  function chaining. (See User Guide for details.)
     * @return UNormalizationCheckResult
     * @stable ICU 4.4
     */
    virtual UNormalizationCheckResult
    quickCheck(const UnicodeString &s, UErrorCode &errorCode) const override;
    /**
     * Returns the end of the normalized substring of the input string.
     * For details see the Normalizer2 base class documentation.
     * @param s input string
     * @param errorCode Standard ICU error code. Its input value must
     *                  pass the U_SUCCESS() test, or else the function returns
     *                  immediately. Check for U_FAILURE() on output or use with
     *                  function chaining. (See User Guide for details.)
     * @return "yes" span end index
     * @stable ICU 4.4
     */
    virtual int32_t
    spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const override;

    /**
     * Tests if the character always has a normalization boundary before it,
     * regardless of context.
     * For details see the Normalizer2 base class documentation.
     * @param c character to test
     * @return true if c has a normalization boundary before it
     * @stable ICU 4.4
     */
    virtual UBool hasBoundaryBefore(UChar32 c) const override;

    /**
     * Tests if the character always has a normalization boundary after it,
     * regardless of context.
     * For details see the Normalizer2 base class documentation.
     * @param c character to test
     * @return true if c has a normalization boundary after it
     * @stable ICU 4.4
     */
    virtual UBool hasBoundaryAfter(UChar32 c) const override;

    /**
     * Tests if the character is normalization-inert.
     * For details see the Normalizer2 base class documentation.
     * @param c character to test
     * @return true if c is normalization-inert
     * @stable ICU 4.4
     */
    virtual UBool isInert(UChar32 c) const override;
private:
    UnicodeString &
    normalize(const UnicodeString &src,
              UnicodeString &dest,
              USetSpanCondition spanCondition,
              UErrorCode &errorCode) const;

    void
    normalizeUTF8(uint32_t options, const char *src, int32_t length,
                  ByteSink &sink, Edits *edits,
                  USetSpanCondition spanCondition,
                  UErrorCode &errorCode) const;

    UnicodeString &
    normalizeSecondAndAppend(UnicodeString &first,
                             const UnicodeString &second,
                             UBool doNormalize,
                             UErrorCode &errorCode) const;

    const Normalizer2 &norm2;
    const UnicodeSet &set;
};

U_NAMESPACE_END

#endif  // !UCONFIG_NO_NORMALIZATION

#endif /* U_SHOW_CPLUSPLUS_API */

#endif  // __NORMALIZER2_H__

��b IDATx��ytVս��ϓ22 �A@�IR��:�h�CiZ[�v��*E��:�W��ũZA� ^d��QeQ ��@ !��j�Z�'�>g�s��V��仿$�|?g�)&x-E�IE�N�T ��;@x�T.i��%-��X��}S�v�S�5�.��r/UHz^�_$-��W"�w��)�Ɗ�/@Z �&IoX��P�$K��}��JzX��:�;`�� &�, ��ŋu�i��,�e��6��m��X� Ե��rKb1�ԗ��)D�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�AD�A�݀!��I*��]R;I�2$eZ#OR��Z��Sr��r�6m��teff�u��*((P��u�'��v{��DIߔ4^�pI��m�'77WEEE;v�Ǝ��4�-��$]'�RI��{��\�I�&�G�� :I��HJ�� DWBB��=�\��W�R�޽m� o$K�(�V9��ABB�.��}��jѢ��v�`^?IO�ȅ}ڶm�G}T#F��J`�5�6��$-��ھ}�F��I�&��v;0(h;��Б�3�8CӧOWf��!�;A i:��F_m��9s&�|�q%=�#��w��Z�p�rrrl�a A� &��P\\��СC�[A#!� �{��o��l��F�} `��E2��}��M��K/��vV��)i��{4Bff�V�\��|ۭX�`��b�@��k��ɶ�@��%i��$K�z��5z�h�mX�[�I�XZ`� '��b%$��r�5��M��4�º��/�l� ԃ��ߖ�xhʔ)�[@=�} K�6IM}^��5k��㏷݆��z ��Η�ÿO��:gdG��B�m��y�T/��@+��Vɶ�纽z񕏵l�.��y�޴i�t뭷zV��0[�Y^>�Ws�qs}�\/�@$�(�T7�f��.��I�nݺi��R$푔n��.�~?H))\Z�RW'M�o�~v� Ov6o��ԃ��x��z��!�S,&�xm/�y�ɞԟ?�'ua��S�ѽ��b�,8�Gל�K��b��o�i�&��3t7Y,��)JJ��c[�n��z��ӳ�d�E��&K�sZ�Lӄ��I?@��&�%ӟ�۶mSMM�њ0��i�ؐSZ�,�|J+��N�� ~�,��0A0!5%Q-��YQQa��3}$_vV��r�f9f?S8`��zD�AD�AD�AD�AD�AD�AD�AD�AD�A�d��q�P��,ت��m��M��mg��1V?�r�S��I꒟��]�u|�l ��R��CyE�f�٢9�j��URb�zt�Ѱ!m5~tG��j�2�D��h�G�*�{H9�)꒟��ר3:(+3\?�/;TU��ݭ��ʴ~S�6lڧ��U��J*�i�$�d�(#=Yݺd�{�,�p|3��B))��q�:��vN0Y.�jk��ק6��;�SɶVzHJJЀ�-u��tѹ�սk��>�QUU��\��޲��~]fF�n��K?��&��ߡ��5��b��=z9�)��^�|u_�k�-[��y%Z��NU6 ��7M�i��:�]ۦt�k��[�n� ��X��(�e6B�b�.��"�8�cۭ|��~�t�e��u��u��w�|��ή��I-5�"��~��U��k��;��Zi�cE�m��N/��:�]�M��> cQ�^�ui�ƞ��??Ң��p�c#TUU�3�U��a��k��Nw�A`��:��Y_V��-8.KKf�R��itv�޲* 9S�6ֿ��j�,Ճ��NOMߤ��]��z�^��f��O�h��|��<�>@Å5��_��/I��u?{S��Y��4h�K��/2��]��4�%i��t�5��q�]�G�G��e�2�%i�R��| ��W��&f�*^]�??��vq[��Lg��E��_��3f}��F�xu~��}q��d�-��ږF�xu~I N>\��;��͗��O��֊�:�̗��W��J@Bh�W=��y��|��Ggwܷ�H�_NY��?��)T��d��i�'?��խw�h�lm�Qi� �!SUU��sw4kӺ�e4rf��x�u�-[n�Ht��MFj}�H�_��u�~w�>)�o�V}�(�T'��e��bʒ�v��3_�[+v�n@Ȭ\S�}��o��t��}w��=��k�H��F�n�xg��S�� 0eޢm~�l��}��u��q��Z�f��F��oZuu��E��g� ��`z�t~?b�;t�%�>��WTkķ�h��[�2e�G8L��IW�x��,��^\thr�l��^��Ϊ��{��=�ǆ��<}�q��V�@ ��⠨��W��y^��L��F_��>�0��U�k��D�u�ʫu��Cs$)I��v��:�IK��;6ֲ�4{^��6��ե��m+l��3>�݆��uM �9��u��?>�Z��c��}g�~q��h��Kw��ڭ��eFMM�~p�М�uq�ǿz��6T��b@8��@Y�|��jx��]�(^]�gf�}�M�"tG ��-w��.@�vOqh~/�HII��`��S�[l��.��6�nØXL��9�v�U�cOo��B�\�x�o��Ǥ�'�T�&I��Ǎ�Qw��_w�p�v��[�k�m��O�{�w�~�>�#=P1P�ɞ�a�-w��e�:i�Ǐl��H��o�׈��꒟�f9��SzH�?��+s�h�k%F��s:��q��Vh�qY��`��j�vO�'ρ?PyX3�lх��]�˾u��V�{��ݞ]1��,�M�z�YN�W~̈́�joY�n��}��ȚF߾׮mS]��F�� z�+�E��D�xm/��d{�F��{-�W��-�4w�Y��듏:�?��?��_�g�P�f� ^3��e��cg ��ҵs��8R��2�מ�z�@T��A��N��Gj�)��}CNi��/�R~��}��c:5�{��!��Z�HӋӾ��6}T�]��G�]�7W�6^��n �9*,��Y��qOZj��:P?�Q�� DF��L�|��?��-�^��.��Ɵ�7��}f�F�h׶��xe��2P�s��c��z�1��&5\cn��[�=�V��n[��ĶE鎀uˌ��d3G�II ��k�;�l�NmشOuuRVf��BE��]ۣ�eӶu :��X�-[��(��e��r�4��~��LH�i�6:��Ѻ@ԅ��r��ST�0��trk%$Č��0ez��"� *��z�"��T�/X9|8��.��C5F�eg}��C�Q%�͞�ˣ��JvL��/��?�j�^��h��&�9x�F��`�њ�Z��(��&�y��F��&Iݻf�g��#��W��;�3^�{Wo^4�'v�V[[�K'��;+��m��Ӎִ�]AC@��W?1^{�එyh��+^]��f��m��~�i��Ե�]AB@��WTk�̏t��uR�?�l�.O�IH�i�Yy�Զ�]A�ˀ7c��:��q}ힽ��a�f�6��Z~�қm(��+sK4{^�6}T��*UUu�]��n��.��:kx{�:��2�� _m��=�sA�ߤ�U��@?��Z��-V�ކ�е��z왍��Nэ��{|5� pڶn��b� �p-@��sPg]0G7�fy��-��M�{GCF��'%�{�4`��=�$-�Ge\��eU:m�+Z�t�'�W��jO�!O�AF@��i�k&t�݆��ϥ_�� e��}��=]"��Wz��_��.��͜�E3�l�e�W��F�i��h|t��-w��Z��ۍ��-�u��w=�6�YN��{��6|��}��|��*={��Ѽ��n.�S�.��z��1z��j�ۻT��H]��흾� �D�u��D��v��mv��K��.`V]yY�~s�I��@��t?/��ϓ.� ��m�&�["�+��P��?M��z�ovV��ЫG3�-�G��RR��[(!!\�_��,��^��%?�v@��ҵ�ő�� m��`�Y)�te�m8��G��Mx.))A�]Y��i`�V��i��W�`�?�^��~!�S#��^+�ѽ��GZj��?V�ģ�0.))A�꨷��l�z�L�*��]��O��X�r��Y�`DBBL�Oj��{��-M�H'�ii�-ϰ��ok�7^�� )쭡�b��]�UX��S�ְ�mռY��|5��*��c�ֽk��0B��7镹%ڽ��P#8n�Ȏ��q}mJr�23��_>��l��E�5��$i��wu��i�+ ��H�~�F`��IjƵ@�q �\ �@#qG�0"��.�0"� l��`��.�0!� ,�AQ�HN6�q��z�k��KJ�#��o;`X�v2��>,tێJ��J��7Z/*��A��.@f�ف�jM��zk��g��@TvZ�H3Z�xu�6Ra��'%��O��?/d�Q��5�x��Yk��U]��R�ֽk�ق@��Da�S^�RS�ּ�5��|��B��e��HNN�͘p �Hvc�Y�c��C5:��y �#��`�οb��;z��2��.��!��k��r��}g��U��Wk��yZ��n�=�f�� Pv��sn��3�p��~�;4p�˚=�ē~�Nm�I] ��¾0lH[��_��L��h�sh��_��ғߤ��c_њ�e��c�)��g�7��V�IZ��5��yr�gk̞W��#��IjӪ�v�>��՞��y睝��M�8�[�|�]��\�շ��8�M��6�%�|@P��Z��ڨ�I-m��>=�k��=�'a��iRo�-x�?>Q��.��}`�Ȏ:�Ws�mu� �u�� > �.@,&��;+!!�˱�tﭧD��Q�w��RW\��vF\~Q7�>�s�p�Y�w$��%A~�;~}��6��¾��g�&if_��=��j�,�v+�U��L�1(tW��a��ke��:@Ș>�j�$�Gq�2�t7S?�vL��|��]u�/�� .�(�0�E��6M��k�6�h��iۺ��z�ښ��O��r��i�f�ޱ�xm/��G�x>�� La�l%%��~�{��l�Bs��R4�*��}{�0Z/��t��N�I��ɚ��p�V^#�L�f�:u@k#�RSu�� =��S^��Zy��uR/��.@n�&��΃z~��B=��0eg뺆��#,��Þ�[�B/?�H� uUf�7�y ��W��y}Bw��eg��ל`��Wh�(|��|��`l`.�;�Ws��?��V�@"��c:i�ɍ��L֯��PG�v�6z�c�tM��̠��'��:w��uW��;d��=�;E�v�e��D��}��9�J@B(��0�iհ�b�v�P�1{�\P��&��G�7��D��޴I��y��_��$-Q�jm�~Yrr�&]�C��Dv%b�h|�Yz�n�i_�R;�k��g�}n��JOII��w��yuL}{�Ќǋ�}�:+3Y�?:��W��J/N+Rz��d=�h��b��;d��j͒su��ݔ��@NKMԄ�j��qz��C��5@��y°�h��L��m;*5�ezᕏ��=�ep�� X��L�n?�מ:��r�`��۵�tŤ�Z�|�1�v`�V�뽧��_c��s�ج'�ߤ%o�Tuum�k%%%h�)�u�y]��N�k��[�n� ��'b�2� �l�.=��͜�E%��gf$��[c;�s:�V-�͞W��ߤW�h��-��j�7��]4��=��F-X�]�>��Z�LS�i��[�Y��*�W��e;��Z�a��n(ӇW|e(HNNP��5[= r4tP �&0�<��p�c#��`�v�TNV GFq��v�T�i�*��Ty�a�m�$��ߏWyE�*�V��JKMTfF�w��>'��$-�ؽ�.Ho��.��8��c��"@D�AD�AD�AD�AD�AD�AD�AD�AD�A~�j��*֘,N;Pi�3599�h=��g�o�ضL�g��i��J�5��փy~�}&��Z��d9p֚ e:|��h��L��`��`��b/��d9p�?�fgg+%%�hM��gXo��s�ج��, Ω��Ol0Z��h=x��d��j��L��m��h��ݻ�o��O�[�g_�l�,8a��]�٭+��ӧ��0��$��I�]��c��]:粹:Te��ꢢ"�5a^��K�g�h,&��=��=��՟^��߶�ߢE�ܹS �J}��I%�:8 �IDAT~,�9/ʃPW'M�o�}z��N�ƍ쨓z�Pb��NZ�~�^z�=4m��s��w��g;5�� Y�~��S�VM��RXUյڱ�R��f��?��s�:w �;6�H:�º��i��5��-�maM��&O��3;1I�K�eam�Z�h�͛7+##�v+�c ~u�~ca]�Gn��F'��ټL~��PPP��b��n� v�o��C�4R,ӟ��gg��%�hq}@#M�4IÇ�� O�y^�x�M��Z�x�� )� �yOw@H�k�N˖-Sǎm�b]X@n��+i��͖��!++K3g�d��\�$m�t�$^��Y�f��J��\8PR��F�)77W��א!Cl��$i��:�@@��_o��G�� I{$��#� ��8磌��ŋ9�1A��(�I�m7��֭��>}�ߴ�J�q�7ޗt��^� -[��ԩS�j�*��}��%]&�'� -��ɓ'�ꫯVzz��vB#�;a �7@G��xI��{��j޼�ƌ��.�LÇWBB�7��`O��"I�$/�@R�@eee��@�۷��>}�0��,ɒ2$53Xs��|c��S~��rpTYYY��} kH�c�%��&k��.]��, @��AD�AD�AD�AD�AD�AD�AD�AD�A��@�l��T��<%''�*��L�o�^={��رc5h� %$+CnܸQ3f��ҥK�}�vUVV��s�9G� R,�_{�x��ˇ��3��o߾��;TTTd�}��馛��]uuuG��~��i�ԩ��@4��b��n��v�m�vfϞ�/�Peeeq}}za I~,�誫��{UWW뮻��}��_~Y�ƍSMMM��Yχ֝w��aw\�ď�cxꩧt�E��ƍկ_?�۷��5��@�u�?�1�k�N�ׯWzz�/�w�y�>}z�j��3�� k�(�ٺu��q_�Z�v�f̘��:~�AB�Q&�r��|��!��%K��ҥK��g��Ԟ={<_��X-�z� !��C�y�FUU��z~�AB�QIIIjݺ��W�$UXX��D��ٳ��Z~�AB�Q�ƍ�e��c��W�$<(~<�RSS�v�Zu��jjjԧO�Z��Qu��@4� 8��m�&&&j�ԩ��g�$�ď��1h� ͟?_��{�768��@��g �=@�`)))��5o��6m��3��)��ѣ�ƌ��J�;w��ҿUTT��/��K��ZR�{~a=@��0o�<��*狔��i�F��ɶ[�ˎ;T]]��OX@��?��K�.�ۈ�xN ��pppppppppppppppppP��fl߾],�{ｧ��k۶mڿ��o�5B��TӦMӴiӴ|�r�� DB��2e�|�A��n!D��y�'t��k�Ν�[A� $***t�5��'�� "�!�駟��o�a�Dn�Ν:t�֭[g�D��ШQ��0�6q�D��;��@ ��x� M�6�v�(Pii��z�m��Z�4e��w��"@��̴i�x��f��[��~-F�ٱc��&I�Z�2�|��n��!�?$@��{�[��H�T��ɏ�#��@�h��Ȏ��I#�_�m�(F��/6Z3��z��'�\r�,��r��!��;��w2Z3j��=~�G��Y��7��"I�$��i�I.�p��_"��?�p��N`�y��DD��?: ��_�� G�ÿa��b�7J��!B��x@�0�� B��o ��c��G��@`1��C��[@�0��G ��@`0��C��_�u�V1�� a��CX��>�W�` |��`!<��S�`"<��.��`#�c�`�?c��A��C4 �?��c�� p#��~@�0��?:��0��8&��_��M�Q1��J�h#��?��/`��7��;I��q7�aw�Q��A�1Hp ��!�#��<8��/#@�1��U�l7��=�S�=��K.�4Z�?E��_$i��@��!�1�!E4�?��`��P_� ��@��Bă�1��0�#��:��"��a��U�,�x�b��F��Y1� ��[��n|��n� ��#'��v�E��H:`�x��b� ��#��v��D��4��Y ��h��i.i��&��E�Ζ��v#��O� H��4�I�Ŷ��}�:I�k�h��@t��ZR��F��#��(�tXҙ��zZ �?��I��3l7q��@õ��|ۍ�1,G��p�u��Y� ��Ꮿ@h��J��v#�x��xk$ ��v#�9��5��}��_��$��c �S�#��=+��"K�{F��*m7�`#��%�H:NRS�p�6I?��sIՖ{Ap��$I$I:QR��v�2$�Z�@�UJ*��$�]<��F��O4IEND�B`�