from decimal import Decimal

#-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------
def to_utf8(
        cp,
        modified=True,
        normal=None,
        extended=None,
):
    """Convert one or more Unicode code points to UTF-8, MUTF-8, or XUTF-8.

function        to_utf8

reverses        un_utf8

purpose         convert a sequence of Unicode code points to a like
                sequence of 8-bit codes encoded as UTF-8, or as MUTF-8.
                or as XUTF-8 in a bytes sequence   the Unicode code
                points may be given as characters or as numbers (int
                or anything that can be converted to int).

argument        a value or sequence of one or more code points.

returns         a like value or sequence structure with code point
                values replaced with UTF-8, MUTF-8, or XUTF-8.

note            this function also encodes values that are outside the
                set of valid values for Unicode code points including
                values reserved for surrogate pairs used in UTF-16 and
                values exceeding the Unicode limit of 1114112

note            the largest working value for UTF-8 and MUTF-8
                (modified UTF-8) is 1114111 (0x10ffff) which is
                encoded with 5 octets.  the largest working value
                for XUTF-8 (extended UTF-8) is 4398046511103
                (2**42-1) which is encoded with 8 octets.  any
                32-bit word may be encoded this way.

note            this function is only responsiple for carrying out the
                conversion logic.  it is not responsible detecting and
                any special handling of UTF-16 surrogates.

note            for type dictionary only its values, not its keys,
                are converted.

note            for type str, bytes, or bytearray, each character is
                converted and the [MX]UTF-8 result is returned with
                encoded characters of the same type.  for bytes and
                bytearray, code points are limited to 0 through 255.

note            for types list or tuple, each item is encoded if it can
                be, and the results are returned within the same type.

note            for types int, float, or decimal.Decimal, conversion
                is made only from the whole value.  any fractional value
                is truncated.  negative values are handled specially or
                not handled at all and may cause exceptions.  the value
                -1 will always be encoded as 0.  the value 0 is encoded
                as C0 80 when modified UTF-8 (MUTF-8) is in effect.
                the caller may use -1 to force a 0 into the result.

note            for a solo int, float, or decimal.Decimal, the returned
                type is bytes.

author          Phil D. Howard
                The author may be contacted by decoding the number
                11054987560151472272755686915985840251291393453694611309
                (provu igi la numeron al duuma)
"""
#-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.
    if modified is None:
        modified = True # default
    modified = True if modified else False
    normal   = True if normal   else False
    extended = True if extended else False
    count = (modified,normal,extended).count(True)
    if count > 1:
        raise ValueError('multiple conversion types requested, UTF-8 vs MUTF-8 vs XUTF-8')
    del count

    ty = type(cp)
    if ty is str:
        cp = [ord(x)for x in cp]
    elif ty is dict:
        return {k:to_utf8(v) for k,v in cp.items()}
    elif ty in (int,float,Decimal):
        cp = [cp,] # make a solo number into a 1-sequence
        ty = list
    elif ty in (set,frozenset):
        return ty(to_utf8([x for x in cp]))

    if ty not in (list,tuple,set,frozenset,bytes,bytearray):
        raise TypeError(f'unsupported type {ty.__name__!r}')
#-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.

    two7  = 2** 7
    two11 = 2**11
    two16 = 2**16
    two21 = 2**21
    two26 = 2**26
    two31 = 2**31
    two36 = 2**36 if extended else 0
    two42 = 2**42 if extended else 0

#-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.
    u = []                                 # conversion will be concatenated here
    for z in cp:                           # iterate sequence of code points
        z = int(z)                         # int is needed by binary ops below

        if z < 0:                          # any negative code point encodes ...
            u += [ 0 ]                     # ... 0 as the one octet result

        elif modified and z == 0:          # in modified UTF-8 (MUTF-8) 0 is ...
            u += [ 192, 128 ]              # ... encoded as overlong C0 80

        elif z < two7:                     # 7 bits encode to 1 octet
            u += [ z ]                     # 0 .. 127 0xxxxxxx (ASCII)

        elif z < two11:                    # 11 bits encode to 2 octets
            u += [ ( z >>  6) + 192,       # 192 .. 223 110xxxxx
                   ( z       & 63 ) + 128] # 128 .. 191 10xxxxxx

        elif z < two16:                    # 16 bits encode to 3 octets
            u += [ ( z >> 12) + 224,       # 224 .. 239 1110xxxx
                   ( z >>  6 & 63 ) + 128, # 128 .. 191 10xxxxxx
                   ( z       & 63 ) + 128] # 128 .. 191 10xxxxxx

        elif z < two21:                    # 21 bits encode to 4 octets
            u += [ ( z >> 18) + 240,       # 240 .. 247 11110xxx
                   ( z >> 12 & 63 ) + 128, # 128 .. 191 10xxxxxx
                   ( z >>  6 & 63 ) + 128, # 128 .. 191 10xxxxxx
                   ( z       & 63 ) + 128] # 128 .. 191 10xxxxxx

        elif z < two26:                    # 26 bits encode to 5 octets
            u += [ ( z >> 24) + 248,       # 248 .. 251 111110xx
                   ( z >> 18 & 63 ) + 128, # 128 .. 191 10xxxxxx
                   ( z >> 12 & 63 ) + 128, # 128 .. 191 10xxxxxx
                   ( z >>  6 & 63 ) + 128, # 128 .. 191 10xxxxxx
                   ( z       & 63 ) + 128] # 128 .. 191 10xxxxxx

        elif z < two31:                    # 31 bits encode to 6 octets
            u += [ ( z >> 30) + 252,       # 252, 253   1111110x
                   ( z >> 24 & 63 ) + 128, # 128 .. 191 10xxxxxx
                   ( z >> 18 & 63 ) + 128, # 128 .. 191 10xxxxxx
                   ( z >> 12 & 63 ) + 128, # 128 .. 191 10xxxxxx
                   ( z >>  6 & 63 ) + 128, # 128 .. 191 10xxxxxx
                   ( z       & 63 ) + 128] # 128 .. 191 10xxxxxx

        elif z < two36:                    # 36 bits encode to 7 octets
            u += [ 254,                    # 254 may confuse some octet streams
                   ( z >> 30 & 63 ) + 128, # 128 .. 191 10xxxxxx
                   ( z >> 24 & 63 ) + 128, # 128 .. 191 10xxxxxx
                   ( z >> 18 & 63 ) + 128, # 128 .. 191 10xxxxxx
                   ( z >> 12 & 63 ) + 128, # 128 .. 191 10xxxxxx
                   ( z >>  6 & 63 ) + 128, # 128 .. 191 10xxxxxx
                   ( z       & 63 ) + 128] # 128 .. 191 10xxxxxx

        elif z < two42:                    # 42 bits encode to 8 octets
            u += [ 255,                    # 255 will confuse many octet streams
                   ( z >> 36 & 63 ) + 128, # 128 .. 191 10xxxxxx
                   ( z >> 30 & 63 ) + 128, # 128 .. 191 10xxxxxx
                   ( z >> 24 & 63 ) + 128, # 128 .. 191 10xxxxxx
                   ( z >> 18 & 63 ) + 128, # 128 .. 191 10xxxxxx
                   ( z >> 12 & 63 ) + 128, # 128 .. 191 10xxxxxx
                   ( z >>  6 & 63 ) + 128, # 128 .. 191 10xxxxxx
                   ( z       & 63 ) + 128] # 128 .. 191 10xxxxxx

        else:
            u += [z] # an invalid value remains unconverted and will exceed 2**42
#-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.
    if ty in (int,float,Decimal):
        ty = bytes
    elif ty is str:
        return ty().join(chr(x) for x in u)
    return ty(u)
#-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.-------.