{
Этот исходный код является частью проекта ПВТ-ОО.
Copyright © 2021 Малик Разработчик
Это свободная программа: вы можете перераспространять её и/или
изменять её на условиях Стандартной общественной лицензии GNU в том виде,
в каком она была опубликована Фондом свободного программного обеспечения;
либо версии 3 лицензии, либо (по вашему выбору) любой более поздней версии.
Эта программа распространяется в надежде, что она может быть полезна,
но БЕЗО ВСЯКИХ ГАРАНТИЙ; даже без неявной гарантии ТОВАРНОГО ВИДА
или ПРИГОДНОСТИ ДЛЯ ОПРЕДЕЛЁННЫХ ЦЕЛЕЙ. Подробнее см. в Стандартной
общественной лицензии GNU.
Вы должны были получить копию Стандартной общественной лицензии GNU
вместе с этой программой. Если это не так, см.
<http://www.gnu.org/licenses/>.
}
unit ru.malik.elaborarer.avt.lexer;
{$MODE DELPHI}
interface
uses
pascalx.lang,
pascalx.utils,
ru.malik.elaborarer.avt.programme;
{$ASMMODE INTEL,CALLING REGISTER,TYPEINFO ON}
{%region pulic }
type
AVTLexer = class;
DOCLexer = class;
AVTLexer = class(_Object)
private
class var lexemes: AnsiString_Array1d;
class procedure clinit(); static;
class function coords(lineIndex, charIndex: int): long; static;
public
class function lexemeToString(lexeme: int): AnsiString; static;
private
documentationEnabled: boolean;
function parseComment(source: AVTSource; linesCount, lineLength, lineIndex, charIndex: int; lineChars: PWideChar; c: wchar): long;
function parseStringChar(lineLength, charIndex: int; lineChars: PWideChar; c: wchar): long;
function parseCharacter(source: AVTSource; lineLength, lineIndex, charIndex: int; lineChars: PWideChar; c: wchar): int;
function parseOperator(source: AVTSource; lineLength, lineIndex, charIndex: int; lineChars: PWideChar; c: wchar): int;
function parseNumeric(source: AVTSource; lineLength, lineIndex, charIndex: int; lineChars: PWideChar; c: wchar): int;
function parseString(source: AVTSource; lineLength, lineIndex, charIndex: int; lineChars: PWideChar; c: wchar): int;
function parseName(source: AVTSource; lineLength, lineIndex, charIndex: int; lineChars: PWideChar; c: wchar): int;
public
procedure split(source: AVTSource; documentationEnabled: boolean = false);
end;
DOCLexer = class(_Object)
private
class var lexemes: AnsiString_Array1d;
class procedure clinit(); static;
private
function parseCharacter(source: AVTSource; lineLength, lineIndex, charIndex: int; lineChars: PWideChar; c: wchar): int;
function parseText(source: AVTSource; lineLength, lineIndex, charIndex: int; lineChars: PWideChar; c: wchar): int;
public
procedure split(source: AVTSource; lineEndingEnabled: boolean = false);
end;
{%endregion}
implementation
{%region AVTLexer }
class procedure AVTLexer.clinit();
begin
lexemes := AnsiString_Array1d_create(512);
lexemes[AVT_PRIVATE] := 'private';
lexemes[AVT_PACKAGE] := 'package';
lexemes[AVT_PROTECTED] := 'protected';
lexemes[AVT_PUBLIC] := 'public';
lexemes[AVT_PUBLISHED] := 'published';
lexemes[AVT_ABSTRACT] := 'abstract';
lexemes[AVT_FINAL] := 'final';
lexemes[AVT_IMPORT] := 'import';
lexemes[AVT_UNION] := 'union';
lexemes[AVT_CLASS] := 'class';
lexemes[AVT_STRUCT] := 'struct';
lexemes[AVT_SERVICE] := 'service';
lexemes[AVT_INTERFACE] := 'interface';
lexemes[AVT_STATIC] := 'static';
lexemes[AVT_NATIVE] := 'native';
lexemes[AVT_INTERRUPT] := 'interrupt';
lexemes[AVT_SYNCHRONIZED] := 'synchronized';
lexemes[AVT_VOID] := 'void';
lexemes[AVT_BOOLEAN] := 'boolean';
lexemes[AVT_CHAR] := 'char';
lexemes[AVT_REAL] := 'real';
lexemes[AVT_BYTE] := 'byte';
lexemes[AVT_BYTE2] := 'byte2';
lexemes[AVT_BYTE4] := 'byte4';
lexemes[AVT_BYTE8] := 'byte8';
lexemes[AVT_SHORT] := 'short';
lexemes[AVT_SHORT2] := 'short2';
lexemes[AVT_SHORT4] := 'short4';
lexemes[AVT_SHORT8] := 'short8';
lexemes[AVT_INT] := 'int';
lexemes[AVT_INT2] := 'int2';
lexemes[AVT_INT4] := 'int4';
lexemes[AVT_INT8] := 'int8';
lexemes[AVT_LONG] := 'long';
lexemes[AVT_LONG2] := 'long2';
lexemes[AVT_LONG4] := 'long4';
lexemes[AVT_LONG8] := 'long8';
lexemes[AVT_FLOAT] := 'float';
lexemes[AVT_FLOAT2] := 'float2';
lexemes[AVT_FLOAT4] := 'float4';
lexemes[AVT_FLOAT8] := 'float8';
lexemes[AVT_DOUBLE] := 'double';
lexemes[AVT_DOUBLE2] := 'double2';
lexemes[AVT_DOUBLE4] := 'double4';
lexemes[AVT_DOUBLE8] := 'double8';
lexemes[AVT_OPERATOR] := 'operator';
lexemes[AVT_THROWS] := 'throws';
lexemes[AVT_SUPER] := 'super';
lexemes[AVT_THIS] := 'this';
lexemes[AVT_INSTANCEOF] := 'instanceof';
lexemes[AVT_WITH] := 'with';
lexemes[AVT_IF] := 'if';
lexemes[AVT_ELSE] := 'else';
lexemes[AVT_SWITCH] := 'switch';
lexemes[AVT_CASE] := 'case';
lexemes[AVT_DEFAULT] := 'default';
lexemes[AVT_DO] := 'do';
lexemes[AVT_FOR] := 'for';
lexemes[AVT_WHILE] := 'while';
lexemes[AVT_BREAK] := 'break';
lexemes[AVT_CONTINUE] := 'continue';
lexemes[AVT_RETURN] := 'return';
lexemes[AVT_THROW] := 'throw';
lexemes[AVT_TRY_BEGIN] := 'try';
lexemes[AVT_CATCH] := 'catch';
lexemes[AVT_FINALLY] := 'finally';
lexemes[AVT_FALSE] := 'false';
lexemes[AVT_TRUE] := 'true';
lexemes[AVT_NEW] := 'new';
lexemes[AVT_NULL] := 'null';
lexemes[CHAR_PARENTH_OPENED] := '(';
lexemes[CHAR_PARENTH_CLOSED] := ')';
lexemes[CHAR_EQUALS] := '=';
lexemes[CHAR_QUESTION] := '?';
lexemes[CHAR_PERIOD] := '.';
lexemes[CHAR_COMMA] := ',';
lexemes[CHAR_COLON] := ':';
lexemes[CHAR_SEMICOLON] := ';';
lexemes[CHAR_BRACKET_OPENED] := '[';
lexemes[CHAR_BRACKET_CLOSED] := ']';
lexemes[CHAR_CURLY_OPENED] := '{';
lexemes[CHAR_CURLY_CLOSED] := '}';
lexemes[OPER_INCREMENT] := '++';
lexemes[OPER_DECREMENT] := '--';
lexemes[OPER_BOOL_NOT] := '!';
lexemes[OPER_BOOL_AND] := '&&';
lexemes[OPER_BOOL_OR] := '||';
lexemes[OPER_BIT_NOT] := '~';
lexemes[OPER_BIT_AND] := '&';
lexemes[OPER_BIT_OR] := '|';
lexemes[OPER_BIT_XOR] := '^';
lexemes[OPER_SCAL_MUL] := '*';
lexemes[OPER_SCAL_DIV] := '/';
lexemes[OPER_SCAL_DIVU] := '//';
lexemes[OPER_SCAL_REM] := '%';
lexemes[OPER_SCAL_REMU] := '%%';
lexemes[OPER_SCAL_ADD] := '+';
lexemes[OPER_SCAL_SUB] := '-';
lexemes[OPER_SCAL_SAR] := '>>';
lexemes[OPER_SCAL_SAL] := '<<';
lexemes[OPER_SCAL_SHR] := '>>>';
lexemes[OPER_SCAL_G] := '>';
lexemes[OPER_SCAL_GE] := '>=';
lexemes[OPER_SCAL_L] := '<';
lexemes[OPER_SCAL_LE] := '<=';
lexemes[OPER_SCAL_E] := '==';
lexemes[OPER_SCAL_NE] := '!=';
lexemes[OPER_VECT_UNPCKL] := '####';
lexemes[OPER_VECT_UNPCKU] := '^^^^';
lexemes[OPER_VECT_PACK] := '@@@@';
lexemes[OPER_VECT_MUL] := '****';
lexemes[OPER_VECT_DIV] := '////';
lexemes[OPER_VECT_ADD] := '++++';
lexemes[OPER_VECT_SUB] := '----';
lexemes[OPER_VECT_SAR] := '>>>>';
lexemes[OPER_VECT_SAL] := '<<<<';
lexemes[OPER_VECT_SHR] := '>>>>>';
lexemes[OPER_VECT_G] := '|>>|';
lexemes[OPER_VECT_GE] := '|>=|';
lexemes[OPER_VECT_L] := '|<<|';
lexemes[OPER_VECT_LE] := '|<=|';
lexemes[OPER_VECT_E] := '|==|';
lexemes[OPER_VECT_NE] := '|!=|';
lexemes[OPER_VECT_MULS] := '|**|';
lexemes[OPER_VECT_ADDS] := '|++|';
lexemes[OPER_VECT_SUBS] := '|--|';
lexemes[OPER_VECT_MULU] := '#**#';
lexemes[OPER_VECT_ADDU] := '#++#';
lexemes[OPER_VECT_SUBU] := '#--#';
lexemes[ASGN_BIT_AND] := '&=';
lexemes[ASGN_BIT_OR] := '|=';
lexemes[ASGN_BIT_XOR] := '^=';
lexemes[ASGN_SCAL_MUL] := '*=';
lexemes[ASGN_SCAL_DIV] := '/=';
lexemes[ASGN_SCAL_DIVU] := '//=';
lexemes[ASGN_SCAL_REM] := '%=';
lexemes[ASGN_SCAL_REMU] := '%%=';
lexemes[ASGN_SCAL_ADD] := '+=';
lexemes[ASGN_SCAL_SUB] := '-=';
lexemes[ASGN_SCAL_SAR] := '>>=';
lexemes[ASGN_SCAL_SAL] := '<<=';
lexemes[ASGN_SCAL_SHR] := '>>>=';
lexemes[ASGN_VECT_MUL] := '****=';
lexemes[ASGN_VECT_DIV] := '////=';
lexemes[ASGN_VECT_ADD] := '++++=';
lexemes[ASGN_VECT_SUB] := '----=';
lexemes[ASGN_VECT_SAR] := '>>>>=';
lexemes[ASGN_VECT_SAL] := '<<<<=';
lexemes[ASGN_VECT_SHR] := '>>>>>=';
lexemes[ASGN_VECT_G] := '|>>|=';
lexemes[ASGN_VECT_GE] := '|>=|=';
lexemes[ASGN_VECT_L] := '|<<|=';
lexemes[ASGN_VECT_LE] := '|<=|=';
lexemes[ASGN_VECT_E] := '|==|=';
lexemes[ASGN_VECT_NE] := '|!=|=';
lexemes[ASGN_VECT_MULS] := '|**|=';
lexemes[ASGN_VECT_ADDS] := '|++|=';
lexemes[ASGN_VECT_SUBS] := '|--|=';
lexemes[ASGN_VECT_MULU] := '#**#=';
lexemes[ASGN_VECT_ADDU] := '#++#=';
lexemes[ASGN_VECT_SUBU] := '#--#=';
end;
class function AVTLexer.coords(lineIndex, charIndex: int): long;
begin
result := (long(lineIndex) shl 32) or (long(charIndex) and $00000000ffffffff);
end;
class function AVTLexer.lexemeToString(lexeme: int): AnsiString;
begin
if (lexeme < 0) or (lexeme >= length(lexemes)) then begin
result := '';
exit;
end;
result := lexemes[lexeme];
end;
function AVTLexer.parseComment(source: AVTSource; linesCount, lineLength, lineIndex, charIndex: int; lineChars: PWideChar; c: wchar): long;
var
isDocumentation: boolean;
i: int;
lineIndexResult: int;
charIndexResult: int;
lineContent: UnicodeString;
documentationLine: UnicodeString;
documentationContent: UnicodeString;
begin
if (charIndex >= lineLength - 1) or (c <> '/') or (lineChars[charIndex + 1] <> '*') then begin
result := coords(lineIndex, charIndex);
exit;
end;
isDocumentation := documentationEnabled and (charIndex < lineLength - 2) and (lineChars[charIndex + 2] = '*');
lineIndexResult := lineIndex;
charIndexResult := charIndex + 2;
repeat
if charIndexResult >= lineLength then begin
if lineIndexResult >= linesCount - 1 then break;
inc(lineIndexResult); charIndexResult := 0;
lineContent := source.line[lineIndexResult];
lineChars := PWideChar(lineContent);
lineLength := length(lineContent);
end;
if (charIndexResult < lineLength - 1) and (lineChars[charIndexResult] = '*') and (lineChars[charIndexResult + 1] = '/') then begin
inc(charIndexResult, 2);
break;
end;
inc(charIndexResult);
until false;
if isDocumentation then begin
documentationLine := source.line[lineIndex];
if lineIndex = lineIndexResult then begin
documentationContent := stringTrim(stringCopy(documentationLine, charIndex + 4, charIndexResult - 1));
end else begin
documentationContent := stringTrim(stringCopy(documentationLine, charIndex + 4));
for i := lineIndex + 1 to lineIndexResult do begin
if i < lineIndexResult then begin
documentationLine := stringTrim(source.line[i]);
end else begin
documentationLine := stringTrim(stringCopy(source.line[i], 1, charIndexResult - 1));
end;
if stringStartsWith(UnicodeString('*'), documentationLine) then begin
documentationLine := stringTrim(stringCopy(documentationLine, 2));
end;
documentationContent := documentationContent + UnicodeString(LINE_ENDING) + documentationLine;
end;
end;
source.addLexemeUnicodeString(lineIndex, charIndex, LITR_DOCUMENTATION, documentationContent);
end;
result := coords(lineIndexResult, charIndexResult);
end;
function AVTLexer.parseStringChar(lineLength, charIndex: int; lineChars: PWideChar; c: wchar): long;
var
i: int;
digit: int;
charCode: int;
charIndexResult: int;
begin
case c of
'\': ;
'''', '"': begin
result := coords(-1, charIndex);
exit;
end;
else
result := coords(int(c), charIndex + 1);
exit;
end;
charIndexResult := charIndex + 1;
if charIndexResult >= lineLength then begin
result := coords(-1, charIndex);
exit;
end;
case lineChars[charIndexResult] of
'u': ;
'0': begin
result := coords($0000, charIndexResult + 1);
exit;
end;
'b': begin
result := coords($0008, charIndexResult + 1);
exit;
end;
't': begin
result := coords($0009, charIndexResult + 1);
exit;
end;
'n': begin
result := coords($000a, charIndexResult + 1);
exit;
end;
'f': begin
result := coords($000c, charIndexResult + 1);
exit;
end;
'r': begin
result := coords($000d, charIndexResult + 1);
exit;
end;
'"': begin
result := coords($0022, charIndexResult + 1);
exit;
end;
'''': begin
result := coords($0027, charIndexResult + 1);
exit;
end;
'\': begin
result := coords($005c, charIndexResult + 1);
exit;
end;
else
result := coords(-1, charIndex);
exit;
end;
inc(charIndexResult);
if charIndexResult >= lineLength - 3 then begin
result := coords(-1, charIndex);
exit;
end;
charCode := 0;
for i := 3 downto 0 do begin
c := lineChars[charIndexResult];
if (c >= '0') and (c <= '9') then begin
digit := int(c) - int('0');
end else
if (c >= 'a') and (c <= 'f') then begin
digit := int(c) - (int('a') - $0a);
end else
if (c >= 'A') and (c <= 'F') then begin
digit := int(c) - (int('A') - $0a);
end else begin
result := coords(-1, charIndex);
exit;
end;
charCode := (charCode shl 4) or digit;
inc(charIndexResult);
end;
result := coords(charCode, charIndexResult);
end;
function AVTLexer.parseCharacter(source: AVTSource; lineLength, lineIndex, charIndex: int; lineChars: PWideChar; c: wchar): int;
var
charCode: int;
charIndexResult: int;
charCodeAndCharIndex: long;
begin
if c <> '''' then begin
result := charIndex;
exit;
end;
charIndexResult := charIndex + 1;
if charIndexResult >= lineLength then begin
raise AVTCompilerException.create('Error in char literal', source.fileName, lineIndex, charIndex);
end;
charCodeAndCharIndex := parseStringChar(lineLength, charIndexResult, lineChars, lineChars[charIndexResult]);
charIndexResult := int(charCodeAndCharIndex);
charCode := int(charCodeAndCharIndex shr 32);
if (charCode < 0) or (charIndexResult >= lineLength) or (lineChars[charIndexResult] <> '''') then begin
raise AVTCompilerException.create('Error in char literal', source.fileName, lineIndex, charIndex);
end;
source.addLexemeChar(lineIndex, charIndex, LITR_CHAR, wchar(charCode));
result := charIndexResult + 1;
end;
function AVTLexer.parseOperator(source: AVTSource; lineLength, lineIndex, charIndex: int; lineChars: PWideChar; c: wchar): int;
label
label0;
var
i: int;
charIndexResult: int;
symbolsCount: int;
parsedOperator: AnsiString;
symbols: wchar_Array1d;
begin
symbols := stringToWCharArray('.,:;?=()[]{}<>!~&^|+-*/%#@');
symbolsCount := length(symbols);
if arrayfindeqfPrimitive(symbols, 0, symbolsCount, c) < 0 then begin
result := charIndex;
exit;
end;
charIndexResult := charIndex + 1;
while charIndexResult < lineLength do begin
c := lineChars[charIndexResult];
if arrayfindeqfPrimitive(symbols, 0, symbolsCount, c) < 0 then break;
inc(charIndexResult);
end;
parsedOperator := stringToUTF8(stringCopy(source.line[lineIndex], charIndex + 1, charIndexResult + 1));
repeat
for i := 0 to length(lexemes) - 1 do begin
if parsedOperator = lexemes[i] then begin
source.addLexeme(lineIndex, charIndex, i);
goto label0;
end;
end;
dec(charIndexResult);
if charIndexResult = charIndex then begin
raise AVTCompilerException.create('Unknown operator', source.fileName, lineIndex, charIndex);
end;
parsedOperator := stringCopy(parsedOperator, 1, charIndexResult - charIndex + 1);
until false;
label0:
result := charIndexResult;
end;
function AVTLexer.parseNumeric(source: AVTSource; lineLength, lineIndex, charIndex: int; lineChars: PWideChar; c: wchar): int;
label
label0,
label1,
label2;
const
BLIMIT = long($8000000000000000);
OLIMIT = long($2000000000000000);
DLIMIT = long($1999999999999999);
XLIMIT = long($1000000000000000);
ILIMIT = long($0000000100000000);
SLIMIT = long($0000000080000000);
var
rounded: boolean;
negorder: boolean;
hasorder: boolean;
hasover: boolean;
hasfrac: boolean;
lenfrac: int;
order: int;
digit: int;
charIndexOrder: int;
charIndexResult: int;
unumber: long;
fnumber: float;
dnumber: double;
rnumber: real;
begin
if ((c < '0') or (c > '9')) and (c <> '.') then begin
result := charIndex;
exit;
end;
unumber := 0;
hasover := false;
charIndexResult := charIndex + 1;
{ префикс }
if (charIndexResult < lineLength - 1) and (c = '0') then begin
case lineChars[charIndexResult] of
'B', 'b': begin
c := lineChars[charIndexResult + 1];
if (c < '0') or (c > '1') then begin
source.addLexemeByte(lineIndex, charIndex, LITR_BYTE, 0);
result := charIndexResult;
exit;
end;
inc(charIndexResult);
repeat
if (c >= '0') and (c <= '1') then begin
digit := int(c) - int('0');
end else begin
break;
end;
if ulongCmp(unumber, BLIMIT) >= 0 then begin
hasover := true;
end else begin
unumber := (unumber shl 1) or digit;
end;
inc(charIndexResult);
if charIndexResult >= lineLength then break;
c := lineChars[charIndexResult];
until false;
end;
'O', 'o': begin
c := lineChars[charIndexResult + 1];
if (c < '0') or (c > '7') then begin
source.addLexemeByte(lineIndex, charIndex, LITR_BYTE, 0);
result := charIndexResult;
exit;
end;
inc(charIndexResult);
repeat
if (c >= '0') and (c <= '7') then begin
digit := int(c) - int('0');
end else begin
break;
end;
if ulongCmp(unumber, OLIMIT) >= 0 then begin
hasover := true;
end else begin
unumber := (unumber shl 3) or digit;
end;
inc(charIndexResult);
if charIndexResult >= lineLength then break;
c := lineChars[charIndexResult];
until false;
end;
'X', 'x': begin
c := lineChars[charIndexResult + 1];
if ((c < '0') or (c > '9')) and ((c < 'a') or (c > 'f')) and ((c < 'A') or (c > 'F')) then begin
source.addLexemeByte(lineIndex, charIndex, LITR_BYTE, 0);
result := charIndexResult;
exit;
end;
inc(charIndexResult);
repeat
if (c >= '0') and (c <= '9') then begin
digit := int(c) - int('0');
end else
if (c >= 'a') and (c <= 'f') then begin
digit := int(c) - (int('a') - $0a);
end else
if (c >= 'A') and (c <= 'F') then begin
digit := int(c) - (int('A') - $0a);
end else begin
break;
end;
if ulongCmp(unumber, XLIMIT) >= 0 then begin
hasover := true;
end else begin
unumber := (unumber shl 4) or digit;
end;
inc(charIndexResult);
if charIndexResult >= lineLength then break;
c := lineChars[charIndexResult];
until false;
end
else
goto label1;
end;
if charIndexResult < lineLength then begin
case c of
'S', 's': begin
if ulongCmp(unumber, ILIMIT) >= 0 then begin
raise AVTCompilerException.create('Error in short literal', source.fileName, lineIndex, charIndex);
end;
unumber := int(unumber);
if (unumber < SHORT_MIN_VALUE) or (unumber > SHORT_MAX_VALUE) then begin
raise AVTCompilerException.create('Error in short literal', source.fileName, lineIndex, charIndex);
end;
source.addLexemeShort(lineIndex, charIndex, LITR_SHORT, short(unumber));
end;
'I', 'i': begin
if ulongCmp(unumber, ILIMIT) >= 0 then begin
raise AVTCompilerException.create('Error in int literal', source.fileName, lineIndex, charIndex);
end;
source.addLexemeInt(lineIndex, charIndex, LITR_INT, int(unumber));
end;
'L', 'l': begin
if hasover then begin
raise AVTCompilerException.create('Error in long literal', source.fileName, lineIndex, charIndex);
end;
source.addLexemeLong(lineIndex, charIndex, LITR_LONG, unumber);
end
else
goto label0;
end;
result := charIndexResult + 1;
exit;
end;
label0:
if ulongCmp(unumber, ILIMIT) >= 0 then begin
raise AVTCompilerException.create('Error in int literal', source.fileName, lineIndex, charIndex);
end;
unumber := int(unumber);
if (unumber >= BYTE_MIN_VALUE) and (unumber <= BYTE_MAX_VALUE) then begin
source.addLexemeByte(lineIndex, charIndex, LITR_BYTE, byte(unumber));
end else
if (unumber >= SHORT_MIN_VALUE) and (unumber <= SHORT_MAX_VALUE) then begin
source.addLexemeShort(lineIndex, charIndex, LITR_SHORT, short(unumber));
end else begin
source.addLexemeInt(lineIndex, charIndex, LITR_INT, int(unumber));
end;
result := charIndexResult;
exit;
end;
label1:
{ точка }
if c = '.' then begin
if charIndexResult >= lineLength then begin
source.addLexeme(lineIndex, charIndex, CHAR_PERIOD);
result := charIndexResult;
exit;
end;
c := lineChars[charIndexResult];
if (c < '0') or (c > '9') then begin
source.addLexeme(lineIndex, charIndex, CHAR_PERIOD);
result := charIndexResult;
exit;
end;
inc(charIndexResult);
hasfrac := true;
lenfrac := 1;
end else begin
hasfrac := false;
lenfrac := 0;
end;
{ мантисса }
digit := int(c) - int('0');
rounded := false;
unumber := digit;
while charIndexResult < lineLength do begin
c := lineChars[charIndexResult];
if (c >= '0') and (c <= '9') then begin
digit := int(c) - int('0');
if (unumber < DLIMIT) and (unumber >= 0) or (unumber = DLIMIT) and (digit < 6) then begin
unumber := (unumber * 10) + digit;
if hasfrac then inc(lenfrac);
end else
if hasfrac then begin
if not rounded and (digit >= 5) and (unumber <> -1) then inc(unumber);
rounded := true;
end else begin
hasover := true;
dec(lenfrac);
end;
end else
if c = '.' then begin
if hasfrac then break;
hasfrac := true;
end else begin
break;
end;
inc(charIndexResult);
end;
{ порядок }
order := 0;
hasorder := false;
if charIndexResult < lineLength then begin
c := lineChars[charIndexResult];
if (c = 'E') or (c = 'e') then begin
negorder := false;
charIndexOrder := charIndexResult + 1;
if charIndexOrder < lineLength then begin
case lineChars[charIndexOrder] of
'-': begin
negorder := true;
inc(charIndexOrder);
end;
'+': begin
inc(charIndexOrder);
end;
end;
end;
if charIndexOrder < lineLength then begin
c := lineChars[charIndexOrder];
if (c >= '0') and (c <= '9') then begin
if not rounded and (digit >= 5) and (unumber <> -1) then inc(unumber);
repeat
if (c >= '0') and (c <= '9') then begin
digit := int(c) - int('0');
order := (order * 10) + digit;
end else begin
break;
end;
if order > 9999 then begin
raise AVTCompilerException.create('Error in numeric literal', source.fileName, lineIndex, charIndex);
end;
inc(charIndexOrder);
if charIndexOrder >= lineLength then break;
c := lineChars[charIndexOrder];
until false;
hasorder := true;
if negorder then order := -order;
charIndexResult := charIndexOrder;
end;
end;
end;
end;
{ обработка данных }
if (hasfrac or hasorder) and (unumber <> 0) then begin
while ulongRem(unumber, 10) = 0 do begin
unumber := ulongDiv(unumber, 10);
dec(lenfrac);
end;
end;
dec(order, lenfrac);
rnumber := ulongToReal(unumber);
while order >= 4932 do begin
rnumber := RealValueRepresenter.pow10(rnumber, 4932);
dec(order, 4932);
end;
while order <= -4931 do begin
rnumber := RealValueRepresenter.pow10(rnumber, -4931);
dec(order, -4931);
end;
rnumber := RealValueRepresenter.pow10(rnumber, order);
if charIndexResult < lineLength then begin
case lineChars[charIndexResult] of
'S', 's': begin
if hasfrac or hasorder or hasover or (ulongCmp(unumber, SHORT_MAX_VALUE) >= 0) then begin
raise AVTCompilerException.create('Error in short literal', source.fileName, lineIndex, charIndex);
end;
source.addLexemeShort(lineIndex, charIndex, LITR_SHORT, short(unumber));
end;
'I', 'i': begin
if hasfrac or hasorder or hasover or (ulongCmp(unumber, INT_MAX_VALUE) >= 0) then begin
raise AVTCompilerException.create('Error in int literal', source.fileName, lineIndex, charIndex);
end;
source.addLexemeInt(lineIndex, charIndex, LITR_INT, int(unumber));
end;
'L', 'l': begin
if hasfrac or hasorder or hasover or (unumber < 0) then begin
raise AVTCompilerException.create('Error in long literal', source.fileName, lineIndex, charIndex);
end;
source.addLexemeLong(lineIndex, charIndex, LITR_LONG, unumber);
end;
'F', 'f': begin
fnumber := realToFloat(rnumber);
if floatIsInfinite(fnumber) then begin
raise AVTCompilerException.create('Error in float literal', source.fileName, lineIndex, charIndex);
end;
source.addLexemeFloat(lineIndex, charIndex, LITR_FLOAT, fnumber);
end;
'D', 'd': begin
dnumber := realToDouble(rnumber);
if doubleIsInfinite(dnumber) then begin
raise AVTCompilerException.create('Error in double literal', source.fileName, lineIndex, charIndex);
end;
source.addLexemeDouble(lineIndex, charIndex, LITR_DOUBLE, dnumber);
end;
'R', 'r': begin
if realIsInfinite(rnumber) then begin
raise AVTCompilerException.create('Error in real literal', source.fileName, lineIndex, charIndex);
end;
source.addLexemeReal(lineIndex, charIndex, LITR_REAL, rnumber);
end
else
goto label2;
end;
result := charIndexResult + 1;
exit;
end;
label2:
if hasfrac or hasorder then begin
if realIsInfinite(rnumber) then begin
raise AVTCompilerException.create('Error in real literal', source.fileName, lineIndex, charIndex);
end;
source.addLexemeReal(lineIndex, charIndex, LITR_REAL, rnumber);
end else begin
if ulongCmp(unumber, SLIMIT) >= 0 then begin
raise AVTCompilerException.create('Error in int literal', source.fileName, lineIndex, charIndex);
end;
if unumber <= BYTE_MAX_VALUE then begin
source.addLexemeByte(lineIndex, charIndex, LITR_BYTE, byte(unumber));
end else
if unumber <= SHORT_MAX_VALUE then begin
source.addLexemeShort(lineIndex, charIndex, LITR_SHORT, short(unumber));
end else begin
source.addLexemeInt(lineIndex, charIndex, LITR_INT, int(unumber));
end;
end;
result := charIndexResult;
end;
function AVTLexer.parseString(source: AVTSource; lineLength, lineIndex, charIndex: int; lineChars: PWideChar; c: wchar): int;
label
label0;
var
charCode: int;
charIndexResult: int;
parsedLength: int;
parsedCapacity: int;
charCodeAndCharIndex: long;
parsedBuffer: wchar_Array1d;
parsedBufferNew: wchar_Array1d;
begin
if c <> '"' then begin
result := charIndex;
exit;
end;
charIndexResult := charIndex + 1;
if charIndexResult >= lineLength then begin
raise AVTCompilerException.create('Error in String literal', source.fileName, lineIndex, charIndex);
end;
parsedLength := 0;
parsedCapacity := $1f;
parsedBuffer := wchar_Array1d_create(parsedCapacity);
begin
repeat
c := lineChars[charIndexResult];
if c = '"' then goto label0;
charCodeAndCharIndex := parseStringChar(lineLength, charIndexResult, lineChars, c);
charIndexResult := int(charCodeAndCharIndex);
charCode := int(charCodeAndCharIndex shr 32);
if charCode < 0 then begin
raise AVTCompilerException.create('Error in String literal', source.fileName, lineIndex, charIndex);
end;
if parsedLength = parsedCapacity then begin
if parsedCapacity = SHORT_MAX_VALUE then begin
raise AVTCompilerException.create('String literal is too long', source.fileName, lineIndex, charIndex);
end;
parsedCapacity := (parsedCapacity shl 1) + 1;
parsedBufferNew := wchar_Array1d_create(parsedCapacity);
arraycopyPrimitives(parsedBuffer, 0, parsedBufferNew, 0, parsedLength);
parsedBuffer := parsedBufferNew;
end;
parsedBuffer[parsedLength] := wchar(charCode);
inc(parsedLength);
until charIndexResult >= lineLength;
raise AVTCompilerException.create('Error in String literal', source.fileName, lineIndex, charIndex);
end;
label0:
source.addLexemeUnicodeString(lineIndex, charIndex, LITR_STRING, UnicodeString_create(parsedBuffer, 0, parsedLength));
result := charIndexResult + 1;
end;
function AVTLexer.parseName(source: AVTSource; lineLength, lineIndex, charIndex: int; lineChars: PWideChar; c: wchar): int;
label
label0;
var
i: int;
charIndexResult: int;
parsedName: AnsiString;
begin
if ((c < 'A') or (c > 'Z')) and ((c < 'a') or (c > 'z')) and (c <> '_') then begin
result := charIndex;
exit;
end;
charIndexResult := charIndex + 1;
while charIndexResult < lineLength do begin
c := lineChars[charIndexResult];
if ((c < '0') or (c > '9')) and ((c < 'A') or (c > 'Z')) and ((c < 'a') or (c > 'z')) and (c <> '_') then break;
inc(charIndexResult);
end;
parsedName := stringToUTF8(stringCopy(source.line[lineIndex], charIndex + 1, charIndexResult + 1));
begin
for i := 0 to length(lexemes) - 1 do if parsedName = lexemes[i] then begin
case i of
AVT_FALSE:
source.addLexemeBoolean(lineIndex, charIndex, LITR_BOOLEAN, false);
AVT_TRUE:
source.addLexemeBoolean(lineIndex, charIndex, LITR_BOOLEAN, true);
else
source.addLexeme(lineIndex, charIndex, i);
end;
goto label0;
end;
source.addLexemeAnsiString(lineIndex, charIndex, LITR_NAME, parsedName);
end;
label0:
result := charIndexResult;
end;
procedure AVTLexer.split(source: AVTSource; documentationEnabled: boolean);
var
c: wchar;
linesCount: int;
lineLength: int;
lineIndex: int;
charIndex: int;
prevIndex: int;
coords0: long;
coords1: long;
lineContent: UnicodeString;
lineChars: PWideChar;
begin
self.documentationEnabled := documentationEnabled;
linesCount := source.length;
lineIndex := 0;
while lineIndex < linesCount do begin
lineContent := source.line[lineIndex];
lineLength := length(lineContent);
lineChars := PWideChar(lineContent);
charIndex := 0;
while charIndex < lineLength do begin
c := lineChars[charIndex];
if c <= #$0020 then begin
inc(charIndex);
continue;
end;
coords0 := coords(lineIndex, charIndex);
coords1 := parseComment(source, linesCount, lineLength, lineIndex, charIndex, lineChars, c);
if coords1 > coords0 then begin
lineIndex := int(coords1 shr 32);
lineContent := source.line[lineIndex];
lineChars := PWideChar(lineContent);
lineLength := length(lineContent);
charIndex := int(coords1);
continue;
end;
prevIndex := charIndex;
charIndex := parseName(source, lineLength, lineIndex, charIndex, lineChars, c);
if charIndex > prevIndex then continue;
prevIndex := charIndex;
charIndex := parseString(source, lineLength, lineIndex, charIndex, lineChars, c);
if charIndex > prevIndex then continue;
prevIndex := charIndex;
charIndex := parseNumeric(source, lineLength, lineIndex, charIndex, lineChars, c);
if charIndex > prevIndex then continue;
prevIndex := charIndex;
charIndex := parseOperator(source, lineLength, lineIndex, charIndex, lineChars, c);
if charIndex > prevIndex then continue;
prevIndex := charIndex;
charIndex := parseCharacter(source, lineLength, lineIndex, charIndex, lineChars, c);
if charIndex > prevIndex then continue;
raise AVTCompilerException.create('Illegal character', source.fileName, lineIndex, charIndex);
end;
inc(lineIndex);
end;
source.addLexeme(linesCount, 0, AVT_END);
end;
{%endregion}
{%region DOCLexer }
class procedure DOCLexer.clinit();
begin
lexemes := AnsiString_Array1d_create(64);
lexemes[DOC_ALINK] := '@link';
lexemes[DOC_APARAM] := '@param';
lexemes[DOC_ARETURN] := '@return';
lexemes[DOC_ATHROWS] := '@throws';
lexemes[DOC_ASINCE] := '@since';
lexemes[DOC_ASEE] := '@see';
lexemes[DOC_VECT_PACK] := '@@@@';
lexemes[DOC_VECT_UNPCKL] := '####';
lexemes[DOC_VECT_UNPCKU] := '^^^^';
lexemes[DOC_VECT_MUL] := '****';
lexemes[DOC_VECT_DIV] := '////';
lexemes[DOC_VECT_ADD] := '++++';
lexemes[DOC_VECT_SUB] := '----';
lexemes[DOC_VECT_SAR] := '>>>>';
lexemes[DOC_VECT_SAL] := '<<<<';
lexemes[DOC_VECT_SHR] := '>>>>>';
lexemes[DOC_VECT_G] := '|>>|';
lexemes[DOC_VECT_GE] := '|>=|';
lexemes[DOC_VECT_L] := '|<<|';
lexemes[DOC_VECT_LE] := '|<=|';
lexemes[DOC_VECT_E] := '|==|';
lexemes[DOC_VECT_NE] := '|!=|';
lexemes[DOC_VECT_MULS] := '|**|';
lexemes[DOC_VECT_ADDS] := '|++|';
lexemes[DOC_VECT_SUBS] := '|--|';
lexemes[DOC_VECT_MULU] := '#**#';
lexemes[DOC_VECT_ADDU] := '#++#';
lexemes[DOC_VECT_SUBU] := '#--#';
lexemes[DOC_SCAL_DIVU] := '//';
lexemes[DOC_SCAL_REMU] := '%%';
lexemes[DOC_SCAL_SAR] := '>>';
lexemes[DOC_SCAL_SAL] := '<<';
lexemes[DOC_SCAL_SHR] := '>>>';
lexemes[DOC_SCAL_GE] := '>=';
lexemes[DOC_SCAL_LE] := '<=';
lexemes[DOC_PARENTH_OPENED] := '(';
lexemes[DOC_PARENTH_CLOSED] := ')';
lexemes[DOC_BRACKET_OPENED] := '[';
lexemes[DOC_BRACKET_CLOSED] := ']';
lexemes[DOC_CURLY_OPENED] := '{';
lexemes[DOC_CURLY_CLOSED] := '}';
lexemes[DOC_TAG_OPENED] := '<';
lexemes[DOC_TAG_CLOSED] := '>';
lexemes[DOC_EXCLAMATION_MARK] := '!';
lexemes[DOC_VERTICAL_LINE] := '|';
lexemes[DOC_AMPERSAND] := '&';
lexemes[DOC_EQUAL] := '=';
lexemes[DOC_TILDE] := '~';
lexemes[DOC_POUND_SIGN] := '#';
lexemes[DOC_CIRCUMFLEX_ACCENT] := '^';
lexemes[DOC_ASTERISK] := '*';
lexemes[DOC_SOLIDUS] := '/';
lexemes[DOC_PERCENT] := '%';
lexemes[DOC_PLUS] := '+';
lexemes[DOC_MINUS] := '-';
lexemes[DOC_COMMA] := ',';
lexemes[DOC_PERIOD] := '.';
end;
function DOCLexer.parseCharacter(source: AVTSource; lineLength, lineIndex, charIndex: int; lineChars: PWideChar; c: wchar): int;
label
label0;
var
i: int;
charIndexResult: int;
symbolsCount: int;
parsedOperator: AnsiString;
symbols: wchar_Array1d;
begin
symbols := stringToWCharArray('.,=()[]{}<>!~&^|+-*/%#');
symbolsCount := length(symbols);
if arrayfindeqfPrimitive(symbols, 0, symbolsCount, c) < 0 then begin
result := charIndex;
exit;
end;
charIndexResult := charIndex + 1;
while charIndexResult < lineLength do begin
c := lineChars[charIndexResult];
if arrayfindeqfPrimitive(symbols, 0, symbolsCount, c) < 0 then break;
inc(charIndexResult);
end;
parsedOperator := stringToUTF8(stringCopy(source.line[lineIndex], charIndex + 1, charIndexResult + 1));
repeat
for i := 0 to length(lexemes) - 1 do begin
if parsedOperator = lexemes[i] then begin
source.addLexemeUnicodeString(lineIndex, charIndex, i, stringToUTF16(parsedOperator));
goto label0;
end;
end;
dec(charIndexResult);
parsedOperator := stringCopy(parsedOperator, 1, charIndexResult - charIndex + 1);
until false;
label0:
result := charIndexResult;
end;
function DOCLexer.parseText(source: AVTSource; lineLength, lineIndex, charIndex: int; lineChars: PWideChar; c: wchar): int;
label
label0;
var
i: int;
charIndexResult: int;
symbolsCount: int;
parsedNameUTF8: AnsiString;
parsedNameUTF16: UnicodeString;
symbols: wchar_Array1d;
begin
charIndexResult := charIndex + 1;
symbols := stringToWCharArray('.,=()[]{}<>!~&^|+-*/%#');
symbolsCount := length(symbols);
repeat
c := lineChars[charIndexResult];
if (c <= ' ') or (arrayfindeqfPrimitive(symbols, 0, symbolsCount, c) >= 0) then break;
inc(charIndexResult);
until false;
parsedNameUTF16 := stringCopy(source.line[lineIndex], charIndex + 1, charIndexResult + 1);
parsedNameUTF8 := stringToUTF8(parsedNameUTF16);
begin
for i := 0 to length(lexemes) - 1 do if parsedNameUTF8 = lexemes[i] then begin
source.addLexemeUnicodeString(lineIndex, charIndex, i, parsedNameUTF16);
goto label0;
end;
for i := 1 to length(parsedNameUTF16) do begin
c := parsedNameUTF16[i];
if ((c < 'A') or (c > 'Z')) and ((c < 'a') or (c > 'z')) and (c <> '_') and ((i = 1) or (i > 1) and ((c < '0') or (c > '9'))) then begin
source.addLexemeUnicodeString(lineIndex, charIndex, DOC_TEXT, parsedNameUTF16);
goto label0;
end;
end;
source.addLexemeUnicodeString(lineIndex, charIndex, DOC_NAME, parsedNameUTF16);
end;
label0:
result := charIndexResult;
end;
procedure DOCLexer.split(source: AVTSource; lineEndingEnabled: boolean);
var
eopAdded: boolean;
c: wchar;
linesCount: int;
lineLength: int;
lineIndex: int;
charIndex: int;
prevIndex: int;
lineContent: UnicodeString;
lineChars: PWideChar;
begin
eopAdded := true;
linesCount := source.length;
lineIndex := 0;
charIndex := 0;
while lineIndex < linesCount do begin
lineContent := source.line[lineIndex];
lineLength := length(lineContent);
if lineLength <= 0 then begin
source.addLexeme(lineIndex, 0, DOC_END_OF_PARAGRAPH);
eopAdded := true;
inc(lineIndex);
continue;
end;
if eopAdded then begin
eopAdded := false;
end else
if lineEndingEnabled then begin
source.addLexeme(lineIndex - 1, charIndex, DOC_END_OF_LINE);
end;
lineChars := PWideChar(lineContent);
charIndex := 0;
while charIndex < lineLength do begin
c := lineChars[charIndex];
if c <= #$0020 then begin
inc(charIndex);
continue;
end;
prevIndex := charIndex;
charIndex := parseCharacter(source, lineLength, lineIndex, charIndex, lineChars, c);
if charIndex > prevIndex then continue;
charIndex := parseText(source, lineLength, lineIndex, charIndex, lineChars, c);
end;
inc(lineIndex);
end;
source.addLexeme(linesCount, 0, DOC_END);
end;
{%endregion}
initialization
AVTLexer.clinit();
DOCLexer.clinit();
end.