view libphobos/src/std/ascii.d @ 158:494b0b89df80 default tip

...
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Mon, 25 May 2020 18:13:55 +0900
parents 1830386684a0
children
line wrap: on
line source

// Written in the D programming language.

/++
    Functions which operate on ASCII characters.

    All of the functions in std._ascii accept Unicode characters but
    effectively ignore them if they're not ASCII. All $(D isX) functions return
    $(D false) for non-ASCII characters, and all $(D toX) functions do nothing
    to non-ASCII characters.

    For functions which operate on Unicode characters, see
    $(MREF std, uni).

$(SCRIPT inhibitQuickIndex = 1;)
$(DIVC quickindex,
$(BOOKTABLE,
$(TR $(TH Category) $(TH Functions))
$(TR $(TD Validation) $(TD
        $(LREF isAlpha)
        $(LREF isAlphaNum)
        $(LREF isASCII)
        $(LREF isControl)
        $(LREF isDigit)
        $(LREF isGraphical)
        $(LREF isHexDigit)
        $(LREF isOctalDigit)
        $(LREF isPrintable)
        $(LREF isPunctuation)
        $(LREF isUpper)
        $(LREF isWhite)
))
$(TR $(TD Conversions) $(TD
        $(LREF toLower)
        $(LREF toUpper)
))
$(TR $(TD Constants) $(TD
        $(LREF digits)
        $(LREF fullHexDigits)
        $(LREF hexDigits)
        $(LREF letters)
        $(LREF lowercase)
        $(LREF lowerHexDigits)
        $(LREF newline)
        $(LREF octalDigits)
        $(LREF uppercase)
        $(LREF whitespace)
))
$(TR $(TD Enums) $(TD
        $(LREF LetterCase)
))
))
    References:
        $(LINK2 http://www.digitalmars.com/d/ascii-table.html, ASCII Table),
        $(HTTP en.wikipedia.org/wiki/Ascii, Wikipedia)

    License:   $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
    Authors:   $(HTTP digitalmars.com, Walter Bright) and Jonathan M Davis
    Source:    $(PHOBOSSRC std/_ascii.d)
  +/
module std.ascii;

version (unittest)
{
    // FIXME: When dmd bug #314 is fixed, make these selective.
    import std.meta; // : AliasSeq;
    import std.range; // : chain;
    import std.traits; // : functionAttributes, FunctionAttribute, isSafe;
}


immutable fullHexDigits  = "0123456789ABCDEFabcdef";     /// 0 .. 9A .. Fa .. f
immutable hexDigits      = fullHexDigits[0 .. 16];         /// 0 .. 9A .. F
immutable lowerHexDigits = "0123456789abcdef";           /// 0 .. 9a .. f
immutable digits         = hexDigits[0 .. 10];             /// 0 .. 9
immutable octalDigits    = digits[0 .. 8];                 /// 0 .. 7
immutable letters        = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; /// A .. Za .. z
immutable uppercase      = letters[0 .. 26];               /// A .. Z
immutable lowercase      = letters[26 .. 52];              /// a .. z
immutable whitespace     = " \t\v\r\n\f";                /// ASCII _whitespace

/++
    Letter case specifier.
  +/
enum LetterCase : bool
{
    upper, /// Upper case letters
    lower  /// Lower case letters
}

///
@safe unittest
{
    import std.conv : to;

    assert(42.to!string(16, LetterCase.upper) == "2A");
    assert(42.to!string(16, LetterCase.lower) == "2a");
}

///
@system unittest
{
    import std.digest.hmac : hmac;
    import std.digest.digest : toHexString;
    import std.digest.sha : SHA1;
    import std.string : representation;

    const sha1HMAC = "A very long phrase".representation
        .hmac!SHA1("secret".representation)
        .toHexString!(LetterCase.lower);
    assert(sha1HMAC == "49f2073c7bf58577e8c9ae59fe8cfd37c9ab94e5");
}

/// Newline sequence for this system.
version (Windows)
    immutable newline = "\r\n";
else version (Posix)
    immutable newline = "\n";
else
    static assert(0, "Unsupported OS");


/++
    Params: c = The character to test.
    Returns: Whether $(D c) is a letter or a number (0 .. 9, a .. z, A .. Z).
  +/
bool isAlphaNum(dchar c) @safe pure nothrow @nogc
{
    return c <= 'z' && c >= '0' && (c <= '9' || c >= 'a' || (c >= 'A' && c <= 'Z'));
}

///
@safe pure nothrow @nogc unittest
{
    assert( isAlphaNum('A'));
    assert( isAlphaNum('1'));
    assert(!isAlphaNum('#'));

    // N.B.: does not return true for non-ASCII Unicode alphanumerics:
    assert(!isAlphaNum('á'));
}

@safe unittest
{
    foreach (c; chain(digits, octalDigits, fullHexDigits, letters, lowercase, uppercase))
        assert(isAlphaNum(c));

    foreach (c; whitespace)
        assert(!isAlphaNum(c));
}


/++
    Params: c = The character to test.
    Returns: Whether $(D c) is an ASCII letter (A .. Z, a .. z).
  +/
bool isAlpha(dchar c) @safe pure nothrow @nogc
{
    // Optimizer can turn this into a bitmask operation on 64 bit code
    return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
}

///
@safe pure nothrow @nogc unittest
{
    assert( isAlpha('A'));
    assert(!isAlpha('1'));
    assert(!isAlpha('#'));

    // N.B.: does not return true for non-ASCII Unicode alphabetic characters:
    assert(!isAlpha('á'));
}

@safe unittest
{
    foreach (c; chain(letters, lowercase, uppercase))
        assert(isAlpha(c));

    foreach (c; chain(digits, octalDigits, whitespace))
        assert(!isAlpha(c));
}


/++
    Params: c = The character to test.
    Returns: Whether $(D c) is a lowercase ASCII letter (a .. z).
  +/
bool isLower(dchar c) @safe pure nothrow @nogc
{
    return c >= 'a' && c <= 'z';
}

///
@safe pure nothrow @nogc unittest
{
    assert( isLower('a'));
    assert(!isLower('A'));
    assert(!isLower('#'));

    // N.B.: does not return true for non-ASCII Unicode lowercase letters
    assert(!isLower('á'));
    assert(!isLower('Á'));
}

@safe unittest
{
    foreach (c; lowercase)
        assert(isLower(c));

    foreach (c; chain(digits, uppercase, whitespace))
        assert(!isLower(c));
}


/++
    Params: c = The character to test.
    Returns: Whether $(D c) is an uppercase ASCII letter (A .. Z).
  +/
bool isUpper(dchar c) @safe pure nothrow @nogc
{
    return c <= 'Z' && 'A' <= c;
}

///
@safe pure nothrow @nogc unittest
{
    assert( isUpper('A'));
    assert(!isUpper('a'));
    assert(!isUpper('#'));

    // N.B.: does not return true for non-ASCII Unicode uppercase letters
    assert(!isUpper('á'));
    assert(!isUpper('Á'));
}

@safe unittest
{
    foreach (c; uppercase)
        assert(isUpper(c));

    foreach (c; chain(digits, lowercase, whitespace))
        assert(!isUpper(c));
}


/++
    Params: c = The character to test.
    Returns: Whether $(D c) is a digit (0 .. 9).
  +/
bool isDigit(dchar c) @safe pure nothrow @nogc
{
    return '0' <= c && c <= '9';
}

///
@safe pure nothrow @nogc unittest
{
    assert( isDigit('3'));
    assert( isDigit('8'));
    assert(!isDigit('B'));
    assert(!isDigit('#'));

    // N.B.: does not return true for non-ASCII Unicode numbers
    assert(!isDigit('0')); // full-width digit zero (U+FF10)
    assert(!isDigit('4')); // full-width digit four (U+FF14)
}

@safe unittest
{
    foreach (c; digits)
        assert(isDigit(c));

    foreach (c; chain(letters, whitespace))
        assert(!isDigit(c));
}


/++
    Params: c = The character to test.
    Returns: Whether $(D c) is a digit in base 8 (0 .. 7).
  +/
bool isOctalDigit(dchar c) @safe pure nothrow @nogc
{
    return c >= '0' && c <= '7';
}

///
@safe pure nothrow @nogc unittest
{
    assert( isOctalDigit('0'));
    assert( isOctalDigit('7'));
    assert(!isOctalDigit('8'));
    assert(!isOctalDigit('A'));
    assert(!isOctalDigit('#'));
}

@safe unittest
{
    foreach (c; octalDigits)
        assert(isOctalDigit(c));

    foreach (c; chain(letters, ['8', '9'], whitespace))
        assert(!isOctalDigit(c));
}


/++
    Params: c = The character to test.
    Returns: Whether $(D c) is a digit in base 16 (0 .. 9, A .. F, a .. f).
  +/
bool isHexDigit(dchar c) @safe pure nothrow @nogc
{
    return c <= 'f' && c >= '0' && (c <= '9' || c >= 'a' || (c >= 'A' && c <= 'F'));
}

///
@safe pure nothrow @nogc unittest
{
    assert( isHexDigit('0'));
    assert( isHexDigit('A'));
    assert( isHexDigit('f')); // lowercase hex digits are accepted
    assert(!isHexDigit('g'));
    assert(!isHexDigit('G'));
    assert(!isHexDigit('#'));
}

@safe unittest
{
    foreach (c; fullHexDigits)
        assert(isHexDigit(c));

    foreach (c; chain(lowercase[6 .. $], uppercase[6 .. $], whitespace))
        assert(!isHexDigit(c));
}


/++
    Params: c = The character to test.
    Returns: Whether or not $(D c) is a whitespace character. That includes the
    space, tab, vertical tab, form feed, carriage return, and linefeed
    characters.
  +/
bool isWhite(dchar c) @safe pure nothrow @nogc
{
    return c == ' ' || (c >= 0x09 && c <= 0x0D);
}

///
@safe pure nothrow @nogc unittest
{
    assert( isWhite(' '));
    assert( isWhite('\t'));
    assert( isWhite('\n'));
    assert(!isWhite('1'));
    assert(!isWhite('a'));
    assert(!isWhite('#'));

    // N.B.: Does not return true for non-ASCII Unicode whitespace characters.
    static import std.uni;
    assert(std.uni.isWhite('\u00A0'));
    assert(!isWhite('\u00A0')); // std.ascii.isWhite
}

@safe unittest
{
    foreach (c; whitespace)
        assert(isWhite(c));

    foreach (c; chain(digits, letters))
        assert(!isWhite(c));
}


/++
    Params: c = The character to test.
    Returns: Whether $(D c) is a control character.
  +/
bool isControl(dchar c) @safe pure nothrow @nogc
{
    return c < 0x20 || c == 0x7F;
}

///
@safe pure nothrow @nogc unittest
{
    assert( isControl('\0'));
    assert( isControl('\022'));
    assert( isControl('\n')); // newline is both whitespace and control
    assert(!isControl(' '));
    assert(!isControl('1'));
    assert(!isControl('a'));
    assert(!isControl('#'));

    // N.B.: non-ASCII Unicode control characters are not recognized:
    assert(!isControl('\u0080'));
    assert(!isControl('\u2028'));
    assert(!isControl('\u2029'));
}

@safe unittest
{
    foreach (dchar c; 0 .. 32)
        assert(isControl(c));
    assert(isControl(127));

    foreach (c; chain(digits, letters, [' ']))
        assert(!isControl(c));
}


/++
    Params: c = The character to test.
    Returns: Whether or not $(D c) is a punctuation character. That includes
    all ASCII characters which are not control characters, letters, digits, or
    whitespace.
  +/
bool isPunctuation(dchar c) @safe pure nothrow @nogc
{
    return c <= '~' && c >= '!' && !isAlphaNum(c);
}

///
@safe pure nothrow @nogc unittest
{
    assert( isPunctuation('.'));
    assert( isPunctuation(','));
    assert( isPunctuation(':'));
    assert( isPunctuation('!'));
    assert( isPunctuation('#'));
    assert( isPunctuation('~'));
    assert( isPunctuation('+'));
    assert( isPunctuation('_'));

    assert(!isPunctuation('1'));
    assert(!isPunctuation('a'));
    assert(!isPunctuation(' '));
    assert(!isPunctuation('\n'));
    assert(!isPunctuation('\0'));

    // N.B.: Non-ASCII Unicode punctuation characters are not recognized.
    assert(!isPunctuation('\u2012')); // (U+2012 = en-dash)
}

@safe unittest
{
    foreach (dchar c; 0 .. 128)
    {
        if (isControl(c) || isAlphaNum(c) || c == ' ')
            assert(!isPunctuation(c));
        else
            assert(isPunctuation(c));
    }
}


/++
    Params: c = The character to test.
    Returns: Whether or not $(D c) is a printable character other than the
    space character.
  +/
bool isGraphical(dchar c) @safe pure nothrow @nogc
{
    return '!' <= c && c <= '~';
}

///
@safe pure nothrow @nogc unittest
{
    assert( isGraphical('1'));
    assert( isGraphical('a'));
    assert( isGraphical('#'));
    assert(!isGraphical(' ')); // whitespace is not graphical
    assert(!isGraphical('\n'));
    assert(!isGraphical('\0'));

    // N.B.: Unicode graphical characters are not regarded as such.
    assert(!isGraphical('á'));
}

@safe unittest
{
    foreach (dchar c; 0 .. 128)
    {
        if (isControl(c) || c == ' ')
            assert(!isGraphical(c));
        else
            assert(isGraphical(c));
    }
}


/++
    Params: c = The character to test.
    Returns: Whether or not $(D c) is a printable character - including the
    space character.
  +/
bool isPrintable(dchar c) @safe pure nothrow @nogc
{
    return c >= ' ' && c <= '~';
}

///
@safe pure nothrow @nogc unittest
{
    assert( isPrintable(' '));  // whitespace is printable
    assert( isPrintable('1'));
    assert( isPrintable('a'));
    assert( isPrintable('#'));
    assert(!isPrintable('\0')); // control characters are not printable

    // N.B.: Printable non-ASCII Unicode characters are not recognized.
    assert(!isPrintable('á'));
}

@safe unittest
{
    foreach (dchar c; 0 .. 128)
    {
        if (isControl(c))
            assert(!isPrintable(c));
        else
            assert(isPrintable(c));
    }
}


/++
    Params: c = The character to test.
    Returns: Whether or not $(D c) is in the ASCII character set - i.e. in the
    range 0 .. 0x7F.
  +/
pragma(inline, true)
bool isASCII(dchar c) @safe pure nothrow @nogc
{
    return c <= 0x7F;
}

///
@safe pure nothrow @nogc unittest
{
    assert( isASCII('a'));
    assert(!isASCII('á'));
}

@safe unittest
{
    foreach (dchar c; 0 .. 128)
        assert(isASCII(c));

    assert(!isASCII(128));
}


/++
    Converts an ASCII letter to lowercase.

    Params: c = A character of any type that implicitly converts to $(D dchar).
    In the case where it's a built-in type, or an enum of a built-in type,
    $(D Unqual!(OriginalType!C)) is returned, whereas if it's a user-defined
    type, $(D dchar) is returned.

    Returns: The corresponding lowercase letter, if $(D c) is an uppercase
    ASCII character, otherwise $(D c) itself.
  +/
auto toLower(C)(C c)
if (is(C : dchar))
{
    import std.traits : isAggregateType, OriginalType, Unqual;

    alias OC = OriginalType!C;
    static if (isAggregateType!OC)
        alias R = dchar;
    else
        alias R = Unqual!OC;

    return isUpper(c) ? cast(R)(cast(R) c + 'a' - 'A') : cast(R) c;
}

///
@safe pure nothrow @nogc unittest
{
    assert(toLower('a') == 'a');
    assert(toLower('A') == 'a');
    assert(toLower('#') == '#');

    // N.B.: Non-ASCII Unicode uppercase letters are not converted.
    assert(toLower('Á') == 'Á');
}

@safe pure nothrow unittest
{

    foreach (C; AliasSeq!(char, wchar, dchar, immutable char, ubyte))
    {
        foreach (i, c; uppercase)
            assert(toLower(cast(C) c) == lowercase[i]);

        foreach (C c; 0 .. 128)
        {
            if (c < 'A' || c > 'Z')
                assert(toLower(c) == c);
            else
                assert(toLower(c) != c);
        }

        foreach (C c; 128 .. C.max)
            assert(toLower(c) == c);

        //CTFE
        static assert(toLower(cast(C)'a') == 'a');
        static assert(toLower(cast(C)'A') == 'a');
    }
}


/++
    Converts an ASCII letter to uppercase.

    Params: c = Any type which implicitly converts to $(D dchar). In the case
    where it's a built-in type, or an enum of a built-in type,
    $(D Unqual!(OriginalType!C)) is returned, whereas if it's a user-defined
    type, $(D dchar) is returned.

    Returns: The corresponding uppercase letter, if $(D c) is a lowercase ASCII
    character, otherwise $(D c) itself.
  +/
auto toUpper(C)(C c)
if (is(C : dchar))
{
    import std.traits : isAggregateType, OriginalType, Unqual;

    alias OC = OriginalType!C;
    static if (isAggregateType!OC)
        alias R = dchar;
    else
        alias R = Unqual!OC;

    return isLower(c) ? cast(R)(cast(R) c - ('a' - 'A')) : cast(R) c;
}

///
@safe pure nothrow @nogc unittest
{
    assert(toUpper('a') == 'A');
    assert(toUpper('A') == 'A');
    assert(toUpper('#') == '#');

    // N.B.: Non-ASCII Unicode lowercase letters are not converted.
    assert(toUpper('á') == 'á');
}

@safe pure nothrow unittest
{
    foreach (C; AliasSeq!(char, wchar, dchar, immutable char, ubyte))
    {
        foreach (i, c; lowercase)
            assert(toUpper(cast(C) c) == uppercase[i]);

        foreach (C c; 0 .. 128)
        {
            if (c < 'a' || c > 'z')
                assert(toUpper(c) == c);
            else
                assert(toUpper(c) != c);
        }

        foreach (C c; 128 .. C.max)
            assert(toUpper(c) == c);

        //CTFE
        static assert(toUpper(cast(C)'a') == 'A');
        static assert(toUpper(cast(C)'A') == 'A');
    }
}


@safe unittest //Test both toUpper and toLower with non-builtin
{
    //User Defined [Char|Wchar|Dchar]
    static struct UDC {  char c; alias c this; }
    static struct UDW { wchar c; alias c this; }
    static struct UDD { dchar c; alias c this; }
    //[Char|Wchar|Dchar] Enum
    enum CE :  char {a = 'a', A = 'A'}
    enum WE : wchar {a = 'a', A = 'A'}
    enum DE : dchar {a = 'a', A = 'A'}
    //User Defined [Char|Wchar|Dchar] Enum
    enum UDCE : UDC {a = UDC('a'), A = UDC('A')}
    enum UDWE : UDW {a = UDW('a'), A = UDW('A')}
    enum UDDE : UDD {a = UDD('a'), A = UDD('A')}

    //User defined types with implicit cast to dchar test.
    foreach (Char; AliasSeq!(UDC, UDW, UDD))
    {
        assert(toLower(Char('a')) == 'a');
        assert(toLower(Char('A')) == 'a');
        static assert(toLower(Char('a')) == 'a');
        static assert(toLower(Char('A')) == 'a');
        static assert(toUpper(Char('a')) == 'A');
        static assert(toUpper(Char('A')) == 'A');
    }

    //Various enum tests.
    foreach (Enum; AliasSeq!(CE, WE, DE, UDCE, UDWE, UDDE))
    {
        assert(toLower(Enum.a) == 'a');
        assert(toLower(Enum.A) == 'a');
        assert(toUpper(Enum.a) == 'A');
        assert(toUpper(Enum.A) == 'A');
        static assert(toLower(Enum.a) == 'a');
        static assert(toLower(Enum.A) == 'a');
        static assert(toUpper(Enum.a) == 'A');
        static assert(toUpper(Enum.A) == 'A');
    }

    //Return value type tests for enum of non-UDT. These should be the original type.
    foreach (T; AliasSeq!(CE, WE, DE))
    {
        alias C = OriginalType!T;
        static assert(is(typeof(toLower(T.init)) == C));
        static assert(is(typeof(toUpper(T.init)) == C));
    }

    //Return value tests for UDT and enum of UDT. These should be dchar
    foreach (T; AliasSeq!(UDC, UDW, UDD, UDCE, UDWE, UDDE))
    {
        static assert(is(typeof(toLower(T.init)) == dchar));
        static assert(is(typeof(toUpper(T.init)) == dchar));
    }
}