ru.malik.elaborarer.avt.lexer.pas

Переключить прокрутку окна
Загрузить этот исходный код

{
    Этот исходный код является частью проекта ПВТ-ОО.

    Copyright © 2021 Малик Разработчик

    Это свободная программа: вы можете перераспространять её и/или
    изменять её на условиях Стандартной общественной лицензии GNU в том виде,
    в каком она была опубликована Фондом свободного программного обеспечения;
    либо версии 3 лицензии, либо (по вашему выбору) любой более поздней версии.

    Эта программа распространяется в надежде, что она может быть полезна,
    но БЕЗО ВСЯКИХ ГАРАНТИЙ; даже без неявной гарантии ТОВАРНОГО ВИДА
    или ПРИГОДНОСТИ ДЛЯ ОПРЕДЕЛЁННЫХ ЦЕЛЕЙ. Подробнее см. в Стандартной
    общественной лицензии GNU.

    Вы должны были получить копию Стандартной общественной лицензии GNU
    вместе с этой программой. Если это не так, см.
    <http://www.gnu.org/licenses/>.
}

unit ru.malik.elaborarer.avt.lexer;

{$MODE DELPHI}

interface

uses
    pascalx.lang,
    pascalx.utils,
    ru.malik.elaborarer.avt.programme;

{$ASMMODE INTEL,CALLING REGISTER,TYPEINFO ON}

{%region pulic }
type
    AVTLexer = class;
    DOCLexer = class;

    AVTLexer = class(_Object)
    private
        class var lexemes: AnsiString_Array1d;
        class procedure clinit(); static;
        class function coords(lineIndex, charIndex: int): long; static;
    public
        class function lexemeToString(lexeme: int): AnsiString; static;
    private
        documentationEnabled: boolean;
        function parseComment(source: AVTSource; linesCount, lineLength, lineIndex, charIndex: int; lineChars: PWideChar; c: wchar): long;
        function parseStringChar(lineLength, charIndex: int; lineChars: PWideChar; c: wchar): long;
        function parseCharacter(source: AVTSource; lineLength, lineIndex, charIndex: int; lineChars: PWideChar; c: wchar): int;
        function parseOperator(source: AVTSource; lineLength, lineIndex, charIndex: int; lineChars: PWideChar; c: wchar): int;
        function parseNumeric(source: AVTSource; lineLength, lineIndex, charIndex: int; lineChars: PWideChar; c: wchar): int;
        function parseString(source: AVTSource; lineLength, lineIndex, charIndex: int; lineChars: PWideChar; c: wchar): int;
        function parseName(source: AVTSource; lineLength, lineIndex, charIndex: int; lineChars: PWideChar; c: wchar): int;
    public
        procedure split(source: AVTSource; documentationEnabled: boolean = false);
    end;

    DOCLexer = class(_Object)
    private
        class var lexemes: AnsiString_Array1d;
        class procedure clinit(); static;
    private
        function parseCharacter(source: AVTSource; lineLength, lineIndex, charIndex: int; lineChars: PWideChar; c: wchar): int;
        function parseText(source: AVTSource; lineLength, lineIndex, charIndex: int; lineChars: PWideChar; c: wchar): int;
    public
        procedure split(source: AVTSource; lineEndingEnabled: boolean = false);
    end;
{%endregion}

implementation

{%region AVTLexer }
    class procedure AVTLexer.clinit();
    begin
        lexemes := AnsiString_Array1d_create(512);
        lexemes[AVT_PRIVATE] := 'private';
        lexemes[AVT_PACKAGE] := 'package';
        lexemes[AVT_PROTECTED] := 'protected';
        lexemes[AVT_PUBLIC] := 'public';
        lexemes[AVT_PUBLISHED] := 'published';
        lexemes[AVT_ABSTRACT] := 'abstract';
        lexemes[AVT_FINAL] := 'final';
        lexemes[AVT_IMPORT] := 'import';
        lexemes[AVT_UNION] := 'union';
        lexemes[AVT_CLASS] := 'class';
        lexemes[AVT_STRUCT] := 'struct';
        lexemes[AVT_SERVICE] := 'service';
        lexemes[AVT_INTERFACE] := 'interface';
        lexemes[AVT_STATIC] := 'static';
        lexemes[AVT_NATIVE] := 'native';
        lexemes[AVT_INTERRUPT] := 'interrupt';
        lexemes[AVT_SYNCHRONIZED] := 'synchronized';
        lexemes[AVT_VOID] := 'void';
        lexemes[AVT_BOOLEAN] := 'boolean';
        lexemes[AVT_CHAR] := 'char';
        lexemes[AVT_REAL] := 'real';
        lexemes[AVT_BYTE] := 'byte';
        lexemes[AVT_BYTE2] := 'byte2';
        lexemes[AVT_BYTE4] := 'byte4';
        lexemes[AVT_BYTE8] := 'byte8';
        lexemes[AVT_SHORT] := 'short';
        lexemes[AVT_SHORT2] := 'short2';
        lexemes[AVT_SHORT4] := 'short4';
        lexemes[AVT_SHORT8] := 'short8';
        lexemes[AVT_INT] := 'int';
        lexemes[AVT_INT2] := 'int2';
        lexemes[AVT_INT4] := 'int4';
        lexemes[AVT_INT8] := 'int8';
        lexemes[AVT_LONG] := 'long';
        lexemes[AVT_LONG2] := 'long2';
        lexemes[AVT_LONG4] := 'long4';
        lexemes[AVT_LONG8] := 'long8';
        lexemes[AVT_FLOAT] := 'float';
        lexemes[AVT_FLOAT2] := 'float2';
        lexemes[AVT_FLOAT4] := 'float4';
        lexemes[AVT_FLOAT8] := 'float8';
        lexemes[AVT_DOUBLE] := 'double';
        lexemes[AVT_DOUBLE2] := 'double2';
        lexemes[AVT_DOUBLE4] := 'double4';
        lexemes[AVT_DOUBLE8] := 'double8';
        lexemes[AVT_OPERATOR] := 'operator';
        lexemes[AVT_THROWS] := 'throws';
        lexemes[AVT_SUPER] := 'super';
        lexemes[AVT_THIS] := 'this';
        lexemes[AVT_INSTANCEOF] := 'instanceof';
        lexemes[AVT_WITH] := 'with';
        lexemes[AVT_IF] := 'if';
        lexemes[AVT_ELSE] := 'else';
        lexemes[AVT_SWITCH] := 'switch';
        lexemes[AVT_CASE] := 'case';
        lexemes[AVT_DEFAULT] := 'default';
        lexemes[AVT_DO] := 'do';
        lexemes[AVT_FOR] := 'for';
        lexemes[AVT_WHILE] := 'while';
        lexemes[AVT_BREAK] := 'break';
        lexemes[AVT_CONTINUE] := 'continue';
        lexemes[AVT_RETURN] := 'return';
        lexemes[AVT_THROW] := 'throw';
        lexemes[AVT_TRY_BEGIN] := 'try';
        lexemes[AVT_CATCH] := 'catch';
        lexemes[AVT_FINALLY] := 'finally';
        lexemes[AVT_FALSE] := 'false';
        lexemes[AVT_TRUE] := 'true';
        lexemes[AVT_NEW] := 'new';
        lexemes[AVT_NULL] := 'null';
        lexemes[CHAR_PARENTH_OPENED] := '(';
        lexemes[CHAR_PARENTH_CLOSED] := ')';
        lexemes[CHAR_EQUALS] := '=';
        lexemes[CHAR_QUESTION] := '?';
        lexemes[CHAR_PERIOD] := '.';
        lexemes[CHAR_COMMA] := ',';
        lexemes[CHAR_COLON] := ':';
        lexemes[CHAR_SEMICOLON] := ';';
        lexemes[CHAR_BRACKET_OPENED] := '[';
        lexemes[CHAR_BRACKET_CLOSED] := ']';
        lexemes[CHAR_CURLY_OPENED] := '{';
        lexemes[CHAR_CURLY_CLOSED] := '}';
        lexemes[OPER_INCREMENT] := '++';
        lexemes[OPER_DECREMENT] := '--';
        lexemes[OPER_BOOL_NOT] := '!';
        lexemes[OPER_BOOL_AND] := '&&';
        lexemes[OPER_BOOL_OR] := '||';
        lexemes[OPER_BIT_NOT] := '~';
        lexemes[OPER_BIT_AND] := '&';
        lexemes[OPER_BIT_OR] := '|';
        lexemes[OPER_BIT_XOR] := '^';
        lexemes[OPER_SCAL_MUL] := '*';
        lexemes[OPER_SCAL_DIV] := '/';
        lexemes[OPER_SCAL_DIVU] := '//';
        lexemes[OPER_SCAL_REM] := '%';
        lexemes[OPER_SCAL_REMU] := '%%';
        lexemes[OPER_SCAL_ADD] := '+';
        lexemes[OPER_SCAL_SUB] := '-';
        lexemes[OPER_SCAL_SAR] := '>>';
        lexemes[OPER_SCAL_SAL] := '<<';
        lexemes[OPER_SCAL_SHR] := '>>>';
        lexemes[OPER_SCAL_G] := '>';
        lexemes[OPER_SCAL_GE] := '>=';
        lexemes[OPER_SCAL_L] := '<';
        lexemes[OPER_SCAL_LE] := '<=';
        lexemes[OPER_SCAL_E] := '==';
        lexemes[OPER_SCAL_NE] := '!=';
        lexemes[OPER_VECT_UNPCKL] := '####';
        lexemes[OPER_VECT_UNPCKU] := '^^^^';
        lexemes[OPER_VECT_PACK] := '@@@@';
        lexemes[OPER_VECT_MUL] := '****';
        lexemes[OPER_VECT_DIV] := '////';
        lexemes[OPER_VECT_ADD] := '++++';
        lexemes[OPER_VECT_SUB] := '----';
        lexemes[OPER_VECT_SAR] := '>>>>';
        lexemes[OPER_VECT_SAL] := '<<<<';
        lexemes[OPER_VECT_SHR] := '>>>>>';
        lexemes[OPER_VECT_G] := '|>>|';
        lexemes[OPER_VECT_GE] := '|>=|';
        lexemes[OPER_VECT_L] := '|<<|';
        lexemes[OPER_VECT_LE] := '|<=|';
        lexemes[OPER_VECT_E] := '|==|';
        lexemes[OPER_VECT_NE] := '|!=|';
        lexemes[OPER_VECT_MULS] := '|**|';
        lexemes[OPER_VECT_ADDS] := '|++|';
        lexemes[OPER_VECT_SUBS] := '|--|';
        lexemes[OPER_VECT_MULU] := '#**#';
        lexemes[OPER_VECT_ADDU] := '#++#';
        lexemes[OPER_VECT_SUBU] := '#--#';
        lexemes[ASGN_BIT_AND] := '&=';
        lexemes[ASGN_BIT_OR] := '|=';
        lexemes[ASGN_BIT_XOR] := '^=';
        lexemes[ASGN_SCAL_MUL] := '*=';
        lexemes[ASGN_SCAL_DIV] := '/=';
        lexemes[ASGN_SCAL_DIVU] := '//=';
        lexemes[ASGN_SCAL_REM] := '%=';
        lexemes[ASGN_SCAL_REMU] := '%%=';
        lexemes[ASGN_SCAL_ADD] := '+=';
        lexemes[ASGN_SCAL_SUB] := '-=';
        lexemes[ASGN_SCAL_SAR] := '>>=';
        lexemes[ASGN_SCAL_SAL] := '<<=';
        lexemes[ASGN_SCAL_SHR] := '>>>=';
        lexemes[ASGN_VECT_MUL] := '****=';
        lexemes[ASGN_VECT_DIV] := '////=';
        lexemes[ASGN_VECT_ADD] := '++++=';
        lexemes[ASGN_VECT_SUB] := '----=';
        lexemes[ASGN_VECT_SAR] := '>>>>=';
        lexemes[ASGN_VECT_SAL] := '<<<<=';
        lexemes[ASGN_VECT_SHR] := '>>>>>=';
        lexemes[ASGN_VECT_G] := '|>>|=';
        lexemes[ASGN_VECT_GE] := '|>=|=';
        lexemes[ASGN_VECT_L] := '|<<|=';
        lexemes[ASGN_VECT_LE] := '|<=|=';
        lexemes[ASGN_VECT_E] := '|==|=';
        lexemes[ASGN_VECT_NE] := '|!=|=';
        lexemes[ASGN_VECT_MULS] := '|**|=';
        lexemes[ASGN_VECT_ADDS] := '|++|=';
        lexemes[ASGN_VECT_SUBS] := '|--|=';
        lexemes[ASGN_VECT_MULU] := '#**#=';
        lexemes[ASGN_VECT_ADDU] := '#++#=';
        lexemes[ASGN_VECT_SUBU] := '#--#=';
    end;

    class function AVTLexer.coords(lineIndex, charIndex: int): long;
    begin
        result := (long(lineIndex) shl 32) or (long(charIndex) and $00000000ffffffff);
    end;

    class function AVTLexer.lexemeToString(lexeme: int): AnsiString;
    begin
        if (lexeme < 0) or (lexeme >= length(lexemes)) then begin
            result := '';
            exit;
        end;
        result := lexemes[lexeme];
    end;

    function AVTLexer.parseComment(source: AVTSource; linesCount, lineLength, lineIndex, charIndex: int; lineChars: PWideChar; c: wchar): long;
    var
        isDocumentation: boolean;
        i: int;
        lineIndexResult: int;
        charIndexResult: int;
        lineContent: UnicodeString;
        documentationLine: UnicodeString;
        documentationContent: UnicodeString;
    begin
        if (charIndex >= lineLength - 1) or (c <> '/') or (lineChars[charIndex + 1] <> '*') then begin
            result := coords(lineIndex, charIndex);
            exit;
        end;
        isDocumentation := documentationEnabled and (charIndex < lineLength - 2) and (lineChars[charIndex + 2] = '*');
        lineIndexResult := lineIndex;
        charIndexResult := charIndex + 2;
        repeat
            if charIndexResult >= lineLength then begin
                if lineIndexResult >= linesCount - 1 then break;
                inc(lineIndexResult); charIndexResult := 0;
                lineContent := source.line[lineIndexResult];
                lineChars := PWideChar(lineContent);
                lineLength := length(lineContent);
            end;
            if (charIndexResult < lineLength - 1) and (lineChars[charIndexResult] = '*') and (lineChars[charIndexResult + 1] = '/') then begin
                inc(charIndexResult, 2);
                break;
            end;
            inc(charIndexResult);
        until false;
        if isDocumentation then begin
            documentationLine := source.line[lineIndex];
            if lineIndex = lineIndexResult then begin
                documentationContent := stringTrim(stringCopy(documentationLine, charIndex + 4, charIndexResult - 1));
            end else begin
                documentationContent := stringTrim(stringCopy(documentationLine, charIndex + 4));
                for i := lineIndex + 1 to lineIndexResult do begin
                    if i < lineIndexResult then begin
                        documentationLine := stringTrim(source.line[i]);
                    end else begin
                        documentationLine := stringTrim(stringCopy(source.line[i], 1, charIndexResult - 1));
                    end;
                    if stringStartsWith(UnicodeString('*'), documentationLine) then begin
                        documentationLine := stringTrim(stringCopy(documentationLine, 2));
                    end;
                    documentationContent := documentationContent + UnicodeString(LINE_ENDING) + documentationLine;
                end;
            end;
            source.addLexemeUnicodeString(lineIndex, charIndex, LITR_DOCUMENTATION, documentationContent);
        end;
        result := coords(lineIndexResult, charIndexResult);
    end;

    function AVTLexer.parseStringChar(lineLength, charIndex: int; lineChars: PWideChar; c: wchar): long;
    var
        i: int;
        digit: int;
        charCode: int;
        charIndexResult: int;
    begin
        case c of
        '\': ;
        '''', '"': begin
            result := coords(-1, charIndex);
            exit;
        end;
        else
            result := coords(int(c), charIndex + 1);
            exit;
        end;
        charIndexResult := charIndex + 1;
        if charIndexResult >= lineLength then begin
            result := coords(-1, charIndex);
            exit;
        end;
        case lineChars[charIndexResult] of
        'u': ;
        '0': begin
            result := coords($0000, charIndexResult + 1);
            exit;
        end;
        'b': begin
            result := coords($0008, charIndexResult + 1);
            exit;
        end;
        't': begin
            result := coords($0009, charIndexResult + 1);
            exit;
        end;
        'n': begin
            result := coords($000a, charIndexResult + 1);
            exit;
        end;
        'f': begin
            result := coords($000c, charIndexResult + 1);
            exit;
        end;
        'r': begin
            result := coords($000d, charIndexResult + 1);
            exit;
        end;
        '"': begin
            result := coords($0022, charIndexResult + 1);
            exit;
        end;
        '''': begin
            result := coords($0027, charIndexResult + 1);
            exit;
        end;
        '\': begin
            result := coords($005c, charIndexResult + 1);
            exit;
        end;
        else
            result := coords(-1, charIndex);
            exit;
        end;
        inc(charIndexResult);
        if charIndexResult >= lineLength - 3 then begin
            result := coords(-1, charIndex);
            exit;
        end;
        charCode := 0;
        for i := 3 downto 0 do begin
            c := lineChars[charIndexResult];
            if (c >= '0') and (c <= '9') then begin
                digit := int(c) - int('0');
            end else
            if (c >= 'a') and (c <= 'f') then begin
                digit := int(c) - (int('a') - $0a);
            end else
            if (c >= 'A') and (c <= 'F') then begin
                digit := int(c) - (int('A') - $0a);
            end else begin
                result := coords(-1, charIndex);
                exit;
            end;
            charCode := (charCode shl 4) or digit;
            inc(charIndexResult);
        end;
        result := coords(charCode, charIndexResult);
    end;

    function AVTLexer.parseCharacter(source: AVTSource; lineLength, lineIndex, charIndex: int; lineChars: PWideChar; c: wchar): int;
    var
        charCode: int;
        charIndexResult: int;
        charCodeAndCharIndex: long;
    begin
        if c <> '''' then begin
            result := charIndex;
            exit;
        end;
        charIndexResult := charIndex + 1;
        if charIndexResult >= lineLength then begin
            raise AVTCompilerException.create('Error in char literal', source.fileName, lineIndex, charIndex);
        end;
        charCodeAndCharIndex := parseStringChar(lineLength, charIndexResult, lineChars, lineChars[charIndexResult]);
        charIndexResult := int(charCodeAndCharIndex);
        charCode := int(charCodeAndCharIndex shr 32);
        if (charCode < 0) or (charIndexResult >= lineLength) or (lineChars[charIndexResult] <> '''') then begin
            raise AVTCompilerException.create('Error in char literal', source.fileName, lineIndex, charIndex);
        end;
        source.addLexemeChar(lineIndex, charIndex, LITR_CHAR, wchar(charCode));
        result := charIndexResult + 1;
    end;

    function AVTLexer.parseOperator(source: AVTSource; lineLength, lineIndex, charIndex: int; lineChars: PWideChar; c: wchar): int;
    label
        label0;
    var
        i: int;
        charIndexResult: int;
        symbolsCount: int;
        parsedOperator: AnsiString;
        symbols: wchar_Array1d;
    begin
        symbols := stringToWCharArray('.,:;?=()[]{}<>!~&^|+-*/%#@');
        symbolsCount := length(symbols);
        if arrayfindeqfPrimitive(symbols, 0, symbolsCount, c) < 0 then begin
            result := charIndex;
            exit;
        end;
        charIndexResult := charIndex + 1;
        while charIndexResult < lineLength do begin
            c := lineChars[charIndexResult];
            if arrayfindeqfPrimitive(symbols, 0, symbolsCount, c) < 0 then break;
            inc(charIndexResult);
        end;
        parsedOperator := stringToUTF8(stringCopy(source.line[lineIndex], charIndex + 1, charIndexResult + 1));
        repeat
            for i := 0 to length(lexemes) - 1 do begin
                if parsedOperator = lexemes[i] then begin
                    source.addLexeme(lineIndex, charIndex, i);
                    goto label0;
                end;
            end;
            dec(charIndexResult);
            if charIndexResult = charIndex then begin
                raise AVTCompilerException.create('Unknown operator', source.fileName, lineIndex, charIndex);
            end;
            parsedOperator := stringCopy(parsedOperator, 1, charIndexResult - charIndex + 1);
        until false;
        label0:
        result := charIndexResult;
    end;

    function AVTLexer.parseNumeric(source: AVTSource; lineLength, lineIndex, charIndex: int; lineChars: PWideChar; c: wchar): int;
    label
        label0,
        label1,
        label2;
    const
        BLIMIT = long($8000000000000000);
        OLIMIT = long($2000000000000000);
        DLIMIT = long($1999999999999999);
        XLIMIT = long($1000000000000000);
        ILIMIT = long($0000000100000000);
        SLIMIT = long($0000000080000000);
    var
        rounded: boolean;
        negorder: boolean;
        hasorder: boolean;
        hasover: boolean;
        hasfrac: boolean;
        lenfrac: int;
        order: int;
        digit: int;
        charIndexOrder: int;
        charIndexResult: int;
        unumber: long;
        fnumber: float;
        dnumber: double;
        rnumber: real;
    begin
        if ((c < '0') or (c > '9')) and (c <> '.') then begin
            result := charIndex;
            exit;
        end;
        unumber := 0;
        hasover := false;
        charIndexResult := charIndex + 1;
        { префикс }
        if (charIndexResult < lineLength - 1) and (c = '0') then begin
            case lineChars[charIndexResult] of
            'B', 'b': begin
                c := lineChars[charIndexResult + 1];
                if (c < '0') or (c > '1') then begin
                    source.addLexemeByte(lineIndex, charIndex, LITR_BYTE, 0);
                    result := charIndexResult;
                    exit;
                end;
                inc(charIndexResult);
                repeat
                    if (c >= '0') and (c <= '1') then begin
                        digit := int(c) - int('0');
                    end else begin
                        break;
                    end;
                    if ulongCmp(unumber, BLIMIT) >= 0 then begin
                        hasover := true;
                    end else begin
                        unumber := (unumber shl 1) or digit;
                    end;
                    inc(charIndexResult);
                    if charIndexResult >= lineLength then break;
                    c := lineChars[charIndexResult];
                until false;
            end;
            'O', 'o': begin
                c := lineChars[charIndexResult + 1];
                if (c < '0') or (c > '7') then begin
                    source.addLexemeByte(lineIndex, charIndex, LITR_BYTE, 0);
                    result := charIndexResult;
                    exit;
                end;
                inc(charIndexResult);
                repeat
                    if (c >= '0') and (c <= '7') then begin
                        digit := int(c) - int('0');
                    end else begin
                        break;
                    end;
                    if ulongCmp(unumber, OLIMIT) >= 0 then begin
                        hasover := true;
                    end else begin
                        unumber := (unumber shl 3) or digit;
                    end;
                    inc(charIndexResult);
                    if charIndexResult >= lineLength then break;
                    c := lineChars[charIndexResult];
                until false;
            end;
            'X', 'x': begin
                c := lineChars[charIndexResult + 1];
                if ((c < '0') or (c > '9')) and ((c < 'a') or (c > 'f')) and ((c < 'A') or (c > 'F')) then begin
                    source.addLexemeByte(lineIndex, charIndex, LITR_BYTE, 0);
                    result := charIndexResult;
                    exit;
                end;
                inc(charIndexResult);
                repeat
                    if (c >= '0') and (c <= '9') then begin
                        digit := int(c) - int('0');
                    end else
                    if (c >= 'a') and (c <= 'f') then begin
                        digit := int(c) - (int('a') - $0a);
                    end else
                    if (c >= 'A') and (c <= 'F') then begin
                        digit := int(c) - (int('A') - $0a);
                    end else begin
                        break;
                    end;
                    if ulongCmp(unumber, XLIMIT) >= 0 then begin
                        hasover := true;
                    end else begin
                        unumber := (unumber shl 4) or digit;
                    end;
                    inc(charIndexResult);
                    if charIndexResult >= lineLength then break;
                    c := lineChars[charIndexResult];
                until false;
            end
            else
                goto label1;
            end;
            if charIndexResult < lineLength then begin
                case c of
                'S', 's': begin
                    if ulongCmp(unumber, ILIMIT) >= 0 then begin
                        raise AVTCompilerException.create('Error in short literal', source.fileName, lineIndex, charIndex);
                    end;
                    unumber := int(unumber);
                    if (unumber < SHORT_MIN_VALUE) or (unumber > SHORT_MAX_VALUE) then begin
                        raise AVTCompilerException.create('Error in short literal', source.fileName, lineIndex, charIndex);
                    end;
                    source.addLexemeShort(lineIndex, charIndex, LITR_SHORT, short(unumber));
                end;
                'I', 'i': begin
                    if ulongCmp(unumber, ILIMIT) >= 0 then begin
                        raise AVTCompilerException.create('Error in int literal', source.fileName, lineIndex, charIndex);
                    end;
                    source.addLexemeInt(lineIndex, charIndex, LITR_INT, int(unumber));
                end;
                'L', 'l': begin
                    if hasover then begin
                        raise AVTCompilerException.create('Error in long literal', source.fileName, lineIndex, charIndex);
                    end;
                    source.addLexemeLong(lineIndex, charIndex, LITR_LONG, unumber);
                end
                else
                    goto label0;
                end;
                result := charIndexResult + 1;
                exit;
            end;
            label0:
            if ulongCmp(unumber, ILIMIT) >= 0 then begin
                raise AVTCompilerException.create('Error in int literal', source.fileName, lineIndex, charIndex);
            end;
            unumber := int(unumber);
            if (unumber >= BYTE_MIN_VALUE) and (unumber <= BYTE_MAX_VALUE) then begin
                source.addLexemeByte(lineIndex, charIndex, LITR_BYTE, byte(unumber));
            end else
            if (unumber >= SHORT_MIN_VALUE) and (unumber <= SHORT_MAX_VALUE) then begin
                source.addLexemeShort(lineIndex, charIndex, LITR_SHORT, short(unumber));
            end else begin
                source.addLexemeInt(lineIndex, charIndex, LITR_INT, int(unumber));
            end;
            result := charIndexResult;
            exit;
        end;
        label1:
        { точка }
        if c = '.' then begin
            if charIndexResult >= lineLength then begin
                source.addLexeme(lineIndex, charIndex, CHAR_PERIOD);
                result := charIndexResult;
                exit;
            end;
            c := lineChars[charIndexResult];
            if (c < '0') or (c > '9') then begin
                source.addLexeme(lineIndex, charIndex, CHAR_PERIOD);
                result := charIndexResult;
                exit;
            end;
            inc(charIndexResult);
            hasfrac := true;
            lenfrac := 1;
        end else begin
            hasfrac := false;
            lenfrac := 0;
        end;
        { мантисса }
        digit := int(c) - int('0');
        rounded := false;
        unumber := digit;
        while charIndexResult < lineLength do begin
            c := lineChars[charIndexResult];
            if (c >= '0') and (c <= '9') then begin
                digit := int(c) - int('0');
                if (unumber < DLIMIT) and (unumber >= 0) or (unumber = DLIMIT) and (digit < 6) then begin
                    unumber := (unumber * 10) + digit;
                    if hasfrac then inc(lenfrac);
                end else
                if hasfrac then begin
                    if not rounded and (digit >= 5) and (unumber <> -1) then inc(unumber);
                    rounded := true;
                end else begin
                    hasover := true;
                    dec(lenfrac);
                end;
            end else
            if c = '.' then begin
                if hasfrac then break;
                hasfrac := true;
            end else begin
                break;
            end;
            inc(charIndexResult);
        end;
        { порядок }
        order := 0;
        hasorder := false;
        if charIndexResult < lineLength then begin
            c := lineChars[charIndexResult];
            if (c = 'E') or (c = 'e') then begin
                negorder := false;
                charIndexOrder := charIndexResult + 1;
                if charIndexOrder < lineLength then begin
                    case lineChars[charIndexOrder] of
                    '-': begin
                        negorder := true;
                        inc(charIndexOrder);
                    end;
                    '+': begin
                        inc(charIndexOrder);
                    end;
                    end;
                end;
                if charIndexOrder < lineLength then begin
                    c := lineChars[charIndexOrder];
                    if (c >= '0') and (c <= '9') then begin
                        if not rounded and (digit >= 5) and (unumber <> -1) then inc(unumber);
                        repeat
                            if (c >= '0') and (c <= '9') then begin
                                digit := int(c) - int('0');
                                order := (order * 10) + digit;
                            end else begin
                                break;
                            end;
                            if order > 9999 then begin
                                raise AVTCompilerException.create('Error in numeric literal', source.fileName, lineIndex, charIndex);
                            end;
                            inc(charIndexOrder);
                            if charIndexOrder >= lineLength then break;
                            c := lineChars[charIndexOrder];
                        until false;
                        hasorder := true;
                        if negorder then order := -order;
                        charIndexResult := charIndexOrder;
                    end;
                end;
            end;
        end;
        { обработка данных }
        if (hasfrac or hasorder) and (unumber <> 0) then begin
            while ulongRem(unumber, 10) = 0 do begin
                unumber := ulongDiv(unumber, 10);
                dec(lenfrac);
            end;
        end;
        dec(order, lenfrac);
        rnumber := ulongToReal(unumber);
        while order >= 4932 do begin
            rnumber := RealValueRepresenter.pow10(rnumber, 4932);
            dec(order, 4932);
        end;
        while order <= -4931 do begin
            rnumber := RealValueRepresenter.pow10(rnumber, -4931);
            dec(order, -4931);
        end;
        rnumber := RealValueRepresenter.pow10(rnumber, order);
        if charIndexResult < lineLength then begin
            case lineChars[charIndexResult] of
            'S', 's': begin
                if hasfrac or hasorder or hasover or (ulongCmp(unumber, SHORT_MAX_VALUE) >= 0) then begin
                    raise AVTCompilerException.create('Error in short literal', source.fileName, lineIndex, charIndex);
                end;
                source.addLexemeShort(lineIndex, charIndex, LITR_SHORT, short(unumber));
            end;
            'I', 'i': begin
                if hasfrac or hasorder or hasover or (ulongCmp(unumber, INT_MAX_VALUE) >= 0) then begin
                    raise AVTCompilerException.create('Error in int literal', source.fileName, lineIndex, charIndex);
                end;
                source.addLexemeInt(lineIndex, charIndex, LITR_INT, int(unumber));
            end;
            'L', 'l': begin
                if hasfrac or hasorder or hasover or (unumber < 0) then begin
                    raise AVTCompilerException.create('Error in long literal', source.fileName, lineIndex, charIndex);
                end;
                source.addLexemeLong(lineIndex, charIndex, LITR_LONG, unumber);
            end;
            'F', 'f': begin
                fnumber := realToFloat(rnumber);
                if floatIsInfinite(fnumber) then begin
                    raise AVTCompilerException.create('Error in float literal', source.fileName, lineIndex, charIndex);
                end;
                source.addLexemeFloat(lineIndex, charIndex, LITR_FLOAT, fnumber);
            end;
            'D', 'd': begin
                dnumber := realToDouble(rnumber);
                if doubleIsInfinite(dnumber) then begin
                    raise AVTCompilerException.create('Error in double literal', source.fileName, lineIndex, charIndex);
                end;
                source.addLexemeDouble(lineIndex, charIndex, LITR_DOUBLE, dnumber);
            end;
            'R', 'r': begin
                if realIsInfinite(rnumber) then begin
                    raise AVTCompilerException.create('Error in real literal', source.fileName, lineIndex, charIndex);
                end;
                source.addLexemeReal(lineIndex, charIndex, LITR_REAL, rnumber);
            end
            else
                goto label2;
            end;
            result := charIndexResult + 1;
            exit;
        end;
        label2:
        if hasfrac or hasorder then begin
            if realIsInfinite(rnumber) then begin
                raise AVTCompilerException.create('Error in real literal', source.fileName, lineIndex, charIndex);
            end;
            source.addLexemeReal(lineIndex, charIndex, LITR_REAL, rnumber);
        end else begin
            if ulongCmp(unumber, SLIMIT) >= 0 then begin
                raise AVTCompilerException.create('Error in int literal', source.fileName, lineIndex, charIndex);
            end;
            if unumber <= BYTE_MAX_VALUE then begin
                source.addLexemeByte(lineIndex, charIndex, LITR_BYTE, byte(unumber));
            end else
            if unumber <= SHORT_MAX_VALUE then begin
                source.addLexemeShort(lineIndex, charIndex, LITR_SHORT, short(unumber));
            end else begin
                source.addLexemeInt(lineIndex, charIndex, LITR_INT, int(unumber));
            end;
        end;
        result := charIndexResult;
    end;

    function AVTLexer.parseString(source: AVTSource; lineLength, lineIndex, charIndex: int; lineChars: PWideChar; c: wchar): int;
    label
        label0;
    var
        charCode: int;
        charIndexResult: int;
        parsedLength: int;
        parsedCapacity: int;
        charCodeAndCharIndex: long;
        parsedBuffer: wchar_Array1d;
        parsedBufferNew: wchar_Array1d;
    begin
        if c <> '"' then begin
            result := charIndex;
            exit;
        end;
        charIndexResult := charIndex + 1;
        if charIndexResult >= lineLength then begin
            raise AVTCompilerException.create('Error in String literal', source.fileName, lineIndex, charIndex);
        end;
        parsedLength := 0;
        parsedCapacity := $1f;
        parsedBuffer := wchar_Array1d_create(parsedCapacity);
        begin
            repeat
                c := lineChars[charIndexResult];
                if c = '"' then goto label0;
                charCodeAndCharIndex := parseStringChar(lineLength, charIndexResult, lineChars, c);
                charIndexResult := int(charCodeAndCharIndex);
                charCode := int(charCodeAndCharIndex shr 32);
                if charCode < 0 then begin
                    raise AVTCompilerException.create('Error in String literal', source.fileName, lineIndex, charIndex);
                end;
                if parsedLength = parsedCapacity then begin
                    if parsedCapacity = SHORT_MAX_VALUE then begin
                        raise AVTCompilerException.create('String literal is too long', source.fileName, lineIndex, charIndex);
                    end;
                    parsedCapacity := (parsedCapacity shl 1) + 1;
                    parsedBufferNew := wchar_Array1d_create(parsedCapacity);
                    arraycopyPrimitives(parsedBuffer, 0, parsedBufferNew, 0, parsedLength);
                    parsedBuffer := parsedBufferNew;
                end;
                parsedBuffer[parsedLength] := wchar(charCode);
                inc(parsedLength);
            until charIndexResult >= lineLength;
            raise AVTCompilerException.create('Error in String literal', source.fileName, lineIndex, charIndex);
        end;
        label0:
        source.addLexemeUnicodeString(lineIndex, charIndex, LITR_STRING, UnicodeString_create(parsedBuffer, 0, parsedLength));
        result := charIndexResult + 1;
    end;

    function AVTLexer.parseName(source: AVTSource; lineLength, lineIndex, charIndex: int; lineChars: PWideChar; c: wchar): int;
    label
        label0;
    var
        i: int;
        charIndexResult: int;
        parsedName: AnsiString;
    begin
        if ((c < 'A') or (c > 'Z')) and ((c < 'a') or (c > 'z')) and (c <> '_') then begin
            result := charIndex;
            exit;
        end;
        charIndexResult := charIndex + 1;
        while charIndexResult < lineLength do begin
            c := lineChars[charIndexResult];
            if ((c < '0') or (c > '9')) and ((c < 'A') or (c > 'Z')) and ((c < 'a') or (c > 'z')) and (c <> '_') then break;
            inc(charIndexResult);
        end;
        parsedName := stringToUTF8(stringCopy(source.line[lineIndex], charIndex + 1, charIndexResult + 1));
        begin
            for i := 0 to length(lexemes) - 1 do if parsedName = lexemes[i] then begin
                case i of
                AVT_FALSE:
                    source.addLexemeBoolean(lineIndex, charIndex, LITR_BOOLEAN, false);
                AVT_TRUE:
                    source.addLexemeBoolean(lineIndex, charIndex, LITR_BOOLEAN, true);
                else
                    source.addLexeme(lineIndex, charIndex, i);
                end;
                goto label0;
            end;
            source.addLexemeAnsiString(lineIndex, charIndex, LITR_NAME, parsedName);
        end;
        label0:
        result := charIndexResult;
    end;

    procedure AVTLexer.split(source: AVTSource; documentationEnabled: boolean);
    var
        c: wchar;
        linesCount: int;
        lineLength: int;
        lineIndex: int;
        charIndex: int;
        prevIndex: int;
        coords0: long;
        coords1: long;
        lineContent: UnicodeString;
        lineChars: PWideChar;
    begin
        self.documentationEnabled := documentationEnabled;
        linesCount := source.length;
        lineIndex := 0;
        while lineIndex < linesCount do begin
            lineContent := source.line[lineIndex];
            lineLength := length(lineContent);
            lineChars := PWideChar(lineContent);
            charIndex := 0;
            while charIndex < lineLength do begin
                c := lineChars[charIndex];
                if c <= #$0020 then begin
                    inc(charIndex);
                    continue;
                end;
                coords0 := coords(lineIndex, charIndex);
                coords1 := parseComment(source, linesCount, lineLength, lineIndex, charIndex, lineChars, c);
                if coords1 > coords0 then begin
                    lineIndex := int(coords1 shr 32);
                    lineContent := source.line[lineIndex];
                    lineChars := PWideChar(lineContent);
                    lineLength := length(lineContent);
                    charIndex := int(coords1);
                    continue;
                end;
                prevIndex := charIndex;
                charIndex := parseName(source, lineLength, lineIndex, charIndex, lineChars, c);
                if charIndex > prevIndex then continue;
                prevIndex := charIndex;
                charIndex := parseString(source, lineLength, lineIndex, charIndex, lineChars, c);
                if charIndex > prevIndex then continue;
                prevIndex := charIndex;
                charIndex := parseNumeric(source, lineLength, lineIndex, charIndex, lineChars, c);
                if charIndex > prevIndex then continue;
                prevIndex := charIndex;
                charIndex := parseOperator(source, lineLength, lineIndex, charIndex, lineChars, c);
                if charIndex > prevIndex then continue;
                prevIndex := charIndex;
                charIndex := parseCharacter(source, lineLength, lineIndex, charIndex, lineChars, c);
                if charIndex > prevIndex then continue;
                raise AVTCompilerException.create('Illegal character', source.fileName, lineIndex, charIndex);
            end;
            inc(lineIndex);
        end;
        source.addLexeme(linesCount, 0, AVT_END);
    end;
{%endregion}

{%region DOCLexer }
    class procedure DOCLexer.clinit();
    begin
        lexemes := AnsiString_Array1d_create(64);
        lexemes[DOC_ALINK] := '@link';
        lexemes[DOC_APARAM] := '@param';
        lexemes[DOC_ARETURN] := '@return';
        lexemes[DOC_ATHROWS] := '@throws';
        lexemes[DOC_ASINCE] := '@since';
        lexemes[DOC_ASEE] := '@see';
        lexemes[DOC_VECT_PACK] := '@@@@';
        lexemes[DOC_VECT_UNPCKL] := '####';
        lexemes[DOC_VECT_UNPCKU] := '^^^^';
        lexemes[DOC_VECT_MUL] := '****';
        lexemes[DOC_VECT_DIV] := '////';
        lexemes[DOC_VECT_ADD] := '++++';
        lexemes[DOC_VECT_SUB] := '----';
        lexemes[DOC_VECT_SAR] := '>>>>';
        lexemes[DOC_VECT_SAL] := '<<<<';
        lexemes[DOC_VECT_SHR] := '>>>>>';
        lexemes[DOC_VECT_G] := '|>>|';
        lexemes[DOC_VECT_GE] := '|>=|';
        lexemes[DOC_VECT_L] := '|<<|';
        lexemes[DOC_VECT_LE] := '|<=|';
        lexemes[DOC_VECT_E] := '|==|';
        lexemes[DOC_VECT_NE] := '|!=|';
        lexemes[DOC_VECT_MULS] := '|**|';
        lexemes[DOC_VECT_ADDS] := '|++|';
        lexemes[DOC_VECT_SUBS] := '|--|';
        lexemes[DOC_VECT_MULU] := '#**#';
        lexemes[DOC_VECT_ADDU] := '#++#';
        lexemes[DOC_VECT_SUBU] := '#--#';
        lexemes[DOC_SCAL_DIVU] := '//';
        lexemes[DOC_SCAL_REMU] := '%%';
        lexemes[DOC_SCAL_SAR] := '>>';
        lexemes[DOC_SCAL_SAL] := '<<';
        lexemes[DOC_SCAL_SHR] := '>>>';
        lexemes[DOC_SCAL_GE] := '>=';
        lexemes[DOC_SCAL_LE] := '<=';
        lexemes[DOC_PARENTH_OPENED] := '(';
        lexemes[DOC_PARENTH_CLOSED] := ')';
        lexemes[DOC_BRACKET_OPENED] := '[';
        lexemes[DOC_BRACKET_CLOSED] := ']';
        lexemes[DOC_CURLY_OPENED] := '{';
        lexemes[DOC_CURLY_CLOSED] := '}';
        lexemes[DOC_TAG_OPENED] := '<';
        lexemes[DOC_TAG_CLOSED] := '>';
        lexemes[DOC_EXCLAMATION_MARK] := '!';
        lexemes[DOC_VERTICAL_LINE] := '|';
        lexemes[DOC_AMPERSAND] := '&';
        lexemes[DOC_EQUAL] := '=';
        lexemes[DOC_TILDE] := '~';
        lexemes[DOC_POUND_SIGN] := '#';
        lexemes[DOC_CIRCUMFLEX_ACCENT] := '^';
        lexemes[DOC_ASTERISK] := '*';
        lexemes[DOC_SOLIDUS] := '/';
        lexemes[DOC_PERCENT] := '%';
        lexemes[DOC_PLUS] := '+';
        lexemes[DOC_MINUS] := '-';
        lexemes[DOC_COMMA] := ',';
        lexemes[DOC_PERIOD] := '.';
    end;

    function DOCLexer.parseCharacter(source: AVTSource; lineLength, lineIndex, charIndex: int; lineChars: PWideChar; c: wchar): int;
    label
        label0;
    var
        i: int;
        charIndexResult: int;
        symbolsCount: int;
        parsedOperator: AnsiString;
        symbols: wchar_Array1d;
    begin
        symbols := stringToWCharArray('.,=()[]{}<>!~&^|+-*/%#');
        symbolsCount := length(symbols);
        if arrayfindeqfPrimitive(symbols, 0, symbolsCount, c) < 0 then begin
            result := charIndex;
            exit;
        end;
        charIndexResult := charIndex + 1;
        while charIndexResult < lineLength do begin
            c := lineChars[charIndexResult];
            if arrayfindeqfPrimitive(symbols, 0, symbolsCount, c) < 0 then break;
            inc(charIndexResult);
        end;
        parsedOperator := stringToUTF8(stringCopy(source.line[lineIndex], charIndex + 1, charIndexResult + 1));
        repeat
            for i := 0 to length(lexemes) - 1 do begin
                if parsedOperator = lexemes[i] then begin
                    source.addLexemeUnicodeString(lineIndex, charIndex, i, stringToUTF16(parsedOperator));
                    goto label0;
                end;
            end;
            dec(charIndexResult);
            parsedOperator := stringCopy(parsedOperator, 1, charIndexResult - charIndex + 1);
        until false;
        label0:
        result := charIndexResult;
    end;

    function DOCLexer.parseText(source: AVTSource; lineLength, lineIndex, charIndex: int; lineChars: PWideChar; c: wchar): int;
    label
        label0;
    var
        i: int;
        charIndexResult: int;
        symbolsCount: int;
        parsedNameUTF8: AnsiString;
        parsedNameUTF16: UnicodeString;
        symbols: wchar_Array1d;
    begin
        charIndexResult := charIndex + 1;
        symbols := stringToWCharArray('.,=()[]{}<>!~&^|+-*/%#');
        symbolsCount := length(symbols);
        repeat
            c := lineChars[charIndexResult];
            if (c <= ' ') or (arrayfindeqfPrimitive(symbols, 0, symbolsCount, c) >= 0) then break;
            inc(charIndexResult);
        until false;
        parsedNameUTF16 := stringCopy(source.line[lineIndex], charIndex + 1, charIndexResult + 1);
        parsedNameUTF8 := stringToUTF8(parsedNameUTF16);
        begin
            for i := 0 to length(lexemes) - 1 do if parsedNameUTF8 = lexemes[i] then begin
                source.addLexemeUnicodeString(lineIndex, charIndex, i, parsedNameUTF16);
                goto label0;
            end;
            for i := 1 to length(parsedNameUTF16) do begin
                c := parsedNameUTF16[i];
                if ((c < 'A') or (c > 'Z')) and ((c < 'a') or (c > 'z')) and (c <> '_') and ((i = 1) or (i > 1) and ((c < '0') or (c > '9'))) then begin
                    source.addLexemeUnicodeString(lineIndex, charIndex, DOC_TEXT, parsedNameUTF16);
                    goto label0;
                end;
            end;
            source.addLexemeUnicodeString(lineIndex, charIndex, DOC_NAME, parsedNameUTF16);
        end;
        label0:
        result := charIndexResult;
    end;

    procedure DOCLexer.split(source: AVTSource; lineEndingEnabled: boolean);
    var
        eopAdded: boolean;
        c: wchar;
        linesCount: int;
        lineLength: int;
        lineIndex: int;
        charIndex: int;
        prevIndex: int;
        lineContent: UnicodeString;
        lineChars: PWideChar;
    begin
        eopAdded := true;
        linesCount := source.length;
        lineIndex := 0;
        charIndex := 0;
        while lineIndex < linesCount do begin
            lineContent := source.line[lineIndex];
            lineLength := length(lineContent);
            if lineLength <= 0 then begin
                source.addLexeme(lineIndex, 0, DOC_END_OF_PARAGRAPH);
                eopAdded := true;
                inc(lineIndex);
                continue;
            end;
            if eopAdded then begin
                eopAdded := false;
            end else
            if lineEndingEnabled then begin
                source.addLexeme(lineIndex - 1, charIndex, DOC_END_OF_LINE);
            end;
            lineChars := PWideChar(lineContent);
            charIndex := 0;
            while charIndex < lineLength do begin
                c := lineChars[charIndex];
                if c <= #$0020 then begin
                    inc(charIndex);
                    continue;
                end;
                prevIndex := charIndex;
                charIndex := parseCharacter(source, lineLength, lineIndex, charIndex, lineChars, c);
                if charIndex > prevIndex then continue;
                charIndex := parseText(source, lineLength, lineIndex, charIndex, lineChars, c);
            end;
            inc(lineIndex);
        end;
        source.addLexeme(linesCount, 0, DOC_END);
    end;
{%endregion}

initialization
    AVTLexer.clinit();
    DOCLexer.clinit();

end.