SleekXMPP/sleekxmpp/thirdparty/suelta/saslprep.py

from __future__ import unicode_literals

import sys
import stringprep
import unicodedata


def saslprep(text, strict=True):
    """
    Return a processed version of the given string, using the SASLPrep
    profile of stringprep.

    :param text: The string to process, in UTF-8.
    :param strict: If ``True``, prevent the use of unassigned code points.
    """

    if sys.version_info < (3, 0):
        if type(text) == str:
            text = text.decode('us-ascii')

    # Mapping:
    #
    #  -  non-ASCII space characters [StringPrep, C.1.2] that can be
    #     mapped to SPACE (U+0020), and
    #
    #  -  the 'commonly mapped to nothing' characters [StringPrep, B.1]
    #     that can be mapped to nothing.
    buffer = ''
    for char in text:
        if stringprep.in_table_c12(char):
            buffer += ' '
        elif not stringprep.in_table_b1(char):
            buffer += char

    # Normalization using form KC
    text = unicodedata.normalize('NFKC', buffer)

    # Check for bidirectional string
    buffer = ''
    first_is_randal = False
    if text:
        first_is_randal = stringprep.in_table_d1(text[0])
        if first_is_randal and not stringprep.in_table_d1(text[-1]):
            raise UnicodeError('Section 6.3 [end]')

    # Check for prohibited characters
    for x in range(len(text)):
        if strict and stringprep.in_table_a1(text[x]):
            raise UnicodeError('Unassigned Codepoint')
        if stringprep.in_table_c12(text[x]):
            raise UnicodeError('In table C.1.2')
        if stringprep.in_table_c21(text[x]):
            raise UnicodeError('In table C.2.1')
        if stringprep.in_table_c22(text[x]):
            raise UnicodeError('In table C.2.2')
        if stringprep.in_table_c3(text[x]):
            raise UnicodeError('In table C.3')
        if stringprep.in_table_c4(text[x]):
            raise UnicodeError('In table C.4')
        if stringprep.in_table_c5(text[x]):
            raise UnicodeError('In table C.5')
        if stringprep.in_table_c6(text[x]):
            raise UnicodeError('In table C.6')
        if stringprep.in_table_c7(text[x]):
            raise UnicodeError('In table C.7')
        if stringprep.in_table_c8(text[x]):
            raise UnicodeError('In table C.8')
        if stringprep.in_table_c9(text[x]):
            raise UnicodeError('In table C.9')
        if x:
            if first_is_randal and stringprep.in_table_d2(text[x]):
                raise UnicodeError('Section 6.2')
            if not first_is_randal and \
               x != len(text) - 1 and \
               stringprep.in_table_d1(text[x]):
                raise UnicodeError('Section 6.3')

    return text
Integrate a modified version of Dave Cridland's Suelta SASL library. 2011-08-04 00:00:51 +00:00			`from __future__ import unicode_literals`

			`import sys`
			`import stringprep`
			`import unicodedata`


			`def saslprep(text, strict=True):`
			`"""`
			`Return a processed version of the given string, using the SASLPrep`
			`profile of stringprep.`

			`:param text: The string to process, in UTF-8.`
			:param strict: If ``True``, prevent the use of unassigned code points.
			`"""`

			`if sys.version_info < (3, 0):`
			`if type(text) == str:`
			`text = text.decode('us-ascii')`

			`# Mapping:`
			`#`
			`# - non-ASCII space characters [StringPrep, C.1.2] that can be`
			`# mapped to SPACE (U+0020), and`
			`#`
			`# - the 'commonly mapped to nothing' characters [StringPrep, B.1]`
			`# that can be mapped to nothing.`
			`buffer = ''`
			`for char in text:`
			`if stringprep.in_table_c12(char):`
			`buffer += ' '`
			`elif not stringprep.in_table_b1(char):`
			`buffer += char`

			`# Normalization using form KC`
			`text = unicodedata.normalize('NFKC', buffer)`

			`# Check for bidirectional string`
			`buffer = ''`
			`first_is_randal = False`
			`if text:`
			`first_is_randal = stringprep.in_table_d1(text[0])`
			`if first_is_randal and not stringprep.in_table_d1(text[-1]):`
			`raise UnicodeError('Section 6.3 [end]')`

			`# Check for prohibited characters`
			`for x in range(len(text)):`
			`if strict and stringprep.in_table_a1(text[x]):`
			`raise UnicodeError('Unassigned Codepoint')`
			`if stringprep.in_table_c12(text[x]):`
			`raise UnicodeError('In table C.1.2')`
			`if stringprep.in_table_c21(text[x]):`
			`raise UnicodeError('In table C.2.1')`
			`if stringprep.in_table_c22(text[x]):`
			`raise UnicodeError('In table C.2.2')`
			`if stringprep.in_table_c3(text[x]):`
			`raise UnicodeError('In table C.3')`
			`if stringprep.in_table_c4(text[x]):`
			`raise UnicodeError('In table C.4')`
			`if stringprep.in_table_c5(text[x]):`
			`raise UnicodeError('In table C.5')`
			`if stringprep.in_table_c6(text[x]):`
			`raise UnicodeError('In table C.6')`
			`if stringprep.in_table_c7(text[x]):`
			`raise UnicodeError('In table C.7')`
			`if stringprep.in_table_c8(text[x]):`
			`raise UnicodeError('In table C.8')`
			`if stringprep.in_table_c9(text[x]):`
			`raise UnicodeError('In table C.9')`
			`if x:`
			`if first_is_randal and stringprep.in_table_d2(text[x]):`
			`raise UnicodeError('Section 6.2')`
			`if not first_is_randal and \`
			`x != len(text) - 1 and \`
			`stringprep.in_table_d1(text[x]):`
			`raise UnicodeError('Section 6.3')`

			`return text`