colorful rat Ratfactor.com > Dave's Repos

meow5

A stack-based pure inlining concatenative programming language written in NASM assembly
git clone http://ratfactor.com/repos/meow5/meow5.git

meow5/meow5.asm

Download raw file: meow5.asm

1 ; +--------------------------------------------------------+ 2 ; | >o.o< Meow5: A very conCATenative language | 3 ; +--------------------------------------------------------+ 4 5 ; Meow5 Constants 6 %assign INPUT_SIZE 1024 ; size of input buffer 7 %assign COMPILE 00000001b ; flag: can be compiled 8 %assign IMMEDIATE 00000010b ; flag: can be called 9 %assign RUNCOMP 00000100b ; flag: runs in comp mode 10 11 ; Linux Constants 12 %assign STDIN 0 13 %assign STDOUT 1 14 %assign STDERR 2 15 %assign SYS_EXIT 1 16 %assign SYS_READ 3 17 %assign SYS_WRITE 4 18 %assign SYS_OPEN 5 19 %assign SYS_CLOSE 6 20 21 ; TODO add SYS_CREATE 8 22 23 ; ---------------------------------------------------------- 24 ; BSS - reserved space 25 ; ---------------------------------------------------------- 26 section .bss 27 mode: resb 4 ; IMMEDATE or COMPILE 28 var_radix: resb 4 ; decimal=10, hex=16, etc. 29 input_file: resb 4 ; input file desc. (STDIN, etc.) 30 last: resb 4 ; Pointer to last def tail 31 here: resb 4 ; Will point to compile_area 32 free: resb 4 ; Will point to data_area 33 stack_start: resb 4 ; Will point to first stack addr 34 token_buffer: resb 32 ; For get_token 35 name_buffer: resb 32 ; For 'def' (copy of token) 36 compile_area: resb 4096 ; We inline ("compile") here! 37 data_area: resb 1024 ; All variables go here! 38 39 input_buffer: resb INPUT_SIZE ; input from user (or file?) 40 input_buffer_end: resb 4 ; current last addr of input 41 input_buffer_pos: resb 4 ; current position in input 42 input_eof: resb 4 ; flag 1=EOF reached 43 44 ; Return address for immediate mode execution only 45 return_addr: resb 4 ; To "push/pop" return stack 46 47 ; ---------------------------------------------------------- 48 ; MACROS! 49 ; ---------------------------------------------------------- 50 51 ; PRINTSTR "Foo bar." 52 %macro PRINTSTR 1 53 ; Param is the string to print - put it in data section 54 ; with macro-local label %%str. No need for null 55 ; termination. 56 %strlen mystr_len %1 ; and get length for later 57 section .data 58 %%mystr: db %1 59 ; now the executable part 60 section .text 61 pusha ; preserve all registers 62 ; Print the string 63 mov ebx, STDOUT 64 mov edx, mystr_len 65 mov ecx, %%mystr 66 mov eax, SYS_WRITE 67 int 0x80 68 popa ; restore all registers 69 %endmacro ; PRINTSTR 70 71 ; Macro to compile in a number printing diagnostic message. 72 ; DEBUG "<string>", <valid second param of MOV> 73 %macro DEBUG 2 74 PRINTSTR %1 75 ; Make this safe to plop absolutely anywhere 76 ; by pushing the 4 registers used. 77 push eax ;A 78 push ebx ;B 79 push ecx ;C 80 push edx ;D 81 ; Second param is the source expression for this 82 ; MOV instruction - we'll print this value as a 83 ; 32bit (4 byte, dword, 8 digit) hex num. 84 ; We must perform the MOV now before the register 85 ; values are overwritten by printing the string. 86 mov eax, %2 87 ; Now print the value. We'll use the stack as a 88 ; scratch space to construct the ASCII string of the 89 ; hex value. Only 9 bytes are needed (8 digits + 90 ; newline), but due to a tricky "fencepost" issue, 91 ; I've elected to leave room for 10 bytes and 92 ; "waste" the first one. 93 lea ebx, [esp - 10] ; make room for string 94 mov ecx, 8 ; counter - 8 characters 95 %%digit_loop: 96 mov edx, eax 97 and edx, 0x0f ; just keep lowest 4 bits 98 cmp edx, 9 ; bigger than 9? 99 jg %%af ; yes, print 'a'-'f' 100 add edx, '0' ; no, turn it into ascii number 101 jmp %%continue 102 %%af: 103 add edx, 'a'-10 ; because 10 is 'a'... 104 %%continue: 105 mov byte [ebx + ecx], dl ; store character 106 ror eax, 4 ; rotate 4 bits 107 dec ecx ; update counter 108 jnz %%digit_loop ; loop 109 ; Print hex number string 110 mov byte [ebx + 9], 0x0A ; add newline 111 lea ecx, [ebx+1] ; because ecx went 8...1 112 mov ebx, STDOUT 113 mov edx, 9 ; 8 hex digits + newline 114 mov eax, SYS_WRITE 115 int 0x80 116 ; Restore all registers. (Reverse order) 117 pop edx ;D 118 pop ecx ;C 119 pop ebx ;B 120 pop eax ;A 121 %endmacro ; DEBUG print 122 123 %macro CALLDEF 1 ; takes label/addr of def to call 124 ; For faking call/ret to def as if 'twas a function 125 ; within assembly while creating the meow5 executable. 126 ; Note that '%%return_to' is a macro-local label. 127 mov dword [return_addr], %%return_to ; CALLDEF 128 jmp %1 ; CALLDEF 129 %%return_to: ; CALLDEF 130 %endmacro 131 132 %macro STARTDEF 1 ; takes name of def to make 133 ; Start a definition 134 %1: 135 %endmacro 136 137 %macro RETURN_CODE 0 138 mov eax, [return_addr] ; RETURN 139 jmp eax ; RETURN 140 %endmacro 141 142 %macro ENDDEF 3 143 ; End a definiton with a tail, etc. 144 ; params: 145 ; %1 - name for label (must be NASM-safe) 146 ; %2 - string name for find 147 ; %3 - 32 bits of flags 148 ; Here ends the machine code for the def: 149 end_%1: 150 ; If we've called this in immediate mode, we'll 151 ; This part won't be inlined, so it won't get 152 ; in the way of the flow of "compiled" code. 153 RETURN_CODE 154 tail_%1: 155 dd LAST_DEF_TAIL ; 32b address, linked list 156 %define LAST_DEF_TAIL tail_%1 157 dd (end_%1 - %1) ; 32b length of def machine code 158 dd (tail_%1 - %1) ; 32b distance from tail to start 159 dd %3 ; 32b flags for def 160 db %2, 0 ; xxb null-terminated name string 161 %endmacro 162 163 ; Memory offsets for each item in tail: 164 %define T_CODE_LEN 4 165 %define T_CODE_OFFSET 8 166 %define T_FLAGS 12 167 %define T_NAME 16 168 169 ; ---------------------------------------------------------- 170 ; TEXT - executable program - starting with defs 171 ; ---------------------------------------------------------- 172 section .text 173 174 ; Keep track of def addresses for linked list. 175 ; We start at 0 (null pointer) to indicate end of list. 176 %define LAST_DEF_TAIL 0 177 178 %macro EXIT_CODE 0 179 pop ebx ; param1: exit code 180 mov eax, SYS_EXIT 181 int 0x80 182 %endmacro 183 STARTDEF exit 184 EXIT_CODE 185 ENDDEF exit, "exit", (COMPILE | IMMEDIATE) 186 187 ; Gets length of null-terminated string 188 %macro STRLEN_CODE 0 189 pop eax 190 mov ecx, 0 ; byte counter will contain len 191 %%find_null: 192 cmp byte [eax + ecx], 0 ; null term? 193 je %%strlen_done ; yes, done 194 inc ecx ; no, continue 195 jmp %%find_null ; loop 196 %%strlen_done: 197 push ecx ; return len 198 %endmacro 199 STARTDEF strlen ; (straddr) strlen (straddr len) 200 STRLEN_CODE 201 ENDDEF strlen, "strlen", (IMMEDIATE | COMPILE) 202 203 ; Prints a string by address and length 204 %macro LEN_PRINT_CODE 0 205 pop edx ; strlen from stack 206 pop ecx ; string address from stack 207 mov ebx, STDOUT ; write destination file 208 mov eax, SYS_WRITE ; syscall 209 int 0x80 ; interrupt to linux! 210 %endmacro 211 212 ; Prints a null-terminated string by address on stack. 213 %macro PRINT_CODE 0 214 pop eax 215 push eax ; one for strlen 216 push eax ; one for write 217 STRLEN_CODE ; (after: straddr, len) 218 LEN_PRINT_CODE 219 %endmacro 220 STARTDEF print ; (straddr) print () 221 PRINT_CODE 222 ENDDEF print, "print", (IMMEDIATE | COMPILE) 223 224 %macro INLINE_CODE 0 225 pop esi ; param1: tail of def to inline 226 mov edi, [here] ; destination 227 mov eax, [esi + T_CODE_LEN] ; get len of code 228 mov ebx, [esi + T_CODE_OFFSET] ; get start of code 229 sub esi, ebx ; set start of code for movsb 230 mov ecx, eax ; set len of code for movsb 231 rep movsb ; copy [esi]...[esi+ecx] into [edi] 232 ;add [here], eax ; save current position 233 mov [here], edi ; movsb updates edi for us 234 %endmacro 235 STARTDEF inline 236 INLINE_CODE 237 ENDDEF inline, "inline", (IMMEDIATE) 238 239 ; Given a tail addr, leaves def's flags AND the tail addr 240 %macro GET_FLAGS_CODE 0 241 mov ebp, [esp] ; get tail addr without popping 242 mov eax, [ebp + T_FLAGS] ; get flags! 243 push eax 244 %endmacro 245 STARTDEF get_flags ; (tail_addr) get_flags (tailaddr flags) 246 GET_FLAGS_CODE 247 ENDDEF get_flags, "get_flags", (IMMEDIATE | COMPILE) 248 249 ; Consumes def flags, leaves truthy/falsy if RUNCOMP 250 ; flag existed. (Non-zero is true!) 251 %macro IS_RUNCOMP_CODE 0 252 pop eax ; param: flags 253 and eax, RUNCOMP ; AND mask to leave truthy/falsy 254 push eax 255 %endmacro 256 STARTDEF is_runcomp ; (flags) is_runcomp (true/false) 257 IS_RUNCOMP_CODE 258 ENDDEF is_runcomp, "is_runcomp", (IMMEDIATE | COMPILE) 259 260 %macro FIND_CODE 0 261 pop ebp ; param1 - start of def string to find 262 ; in-def register use: 263 ; al - to-find name character being checked 264 ; ebx - start of dict def's name string 265 ; ecx - byte offset counter (each string character) 266 ; edx - dictionary list pointer 267 ; search backwards from last def 268 mov edx, [last] 269 %%test_def: 270 cmp edx, 0 ; a null pointer (0) is end of list 271 je %%not_found 272 ; First, see if this def is for the mode we're 273 ; currently in (IMMEDIATE vs COMPILE): 274 mov eax, [mode] 275 and eax, [edx + T_FLAGS] ; see if mode bit is set in def tail 276 cmp eax, 0 277 jz %%try_next_def ; bit wasn't set to match this mode 278 ; Now we'll compare name to find vs this dictionary name 279 ; (ebx vs edx) byte-by-byte until a mismatch or one hits 280 ; a 0 terminator first. Only having all correct letters 281 ; AND hitting 0 at the same time is a match. 282 lea ebx, [edx + T_NAME] ; set dict. def name pointer 283 mov ecx, 0 ; reset byte offset counter 284 %%compare_names_loop: 285 mov al, [ebp + ecx] ; get next to-find name byte 286 cmp al, [ebx + ecx] ; compare with next dict def byte 287 jne %%try_next_def ; found a mismatch! 288 cmp al, 0 ; both hit 0 terminator at same time 289 je %%found_it 290 inc ecx 291 jmp %%compare_names_loop 292 %%try_next_def: 293 mov edx, [edx] ; follow the tail! (linked list) 294 jmp %%test_def 295 %%not_found: 296 push 0 ; return 0 to indicate not found 297 jmp %%done 298 %%found_it: 299 push edx ; return pointer to tail of dictionary def 300 %%done: 301 %endmacro 302 STARTDEF find 303 FIND_CODE 304 ENDDEF find, "find", (IMMEDIATE) 305 306 ; Gets input from a file, filling input_buffer and resetting 307 ; input_buffer_pos. 308 %macro GET_INPUT_CODE 0 309 pusha ; preserve all reg 310 ; Fill input buffer via linux 'read' syscall 311 mov ebx, [input_file] ; file descriptor (default STDIN) 312 mov ecx, input_buffer ; buffer for read 313 mov edx, INPUT_SIZE ; max bytes to read 314 mov eax, SYS_READ ; linux syscall 'read' 315 int 0x80 ; syscall interrupt! 316 cmp eax, 0 ; 0=EOF, -1=error 317 jg %%normal 318 mov dword [input_eof], 1 ; set EOF reached 319 %%normal: 320 lea ebx, [input_buffer + eax] ; end of current input 321 mov dword [input_buffer_end], ebx ; save it 322 ; cmp eax, INPUT_SIZE ; we read less than full buffer? 323 ; jge %%done ; No, continue 324 ; mov byte [input_buffer + eax], 0 ; Yes, null-terminate 325 ;%%done: 326 mov dword [input_buffer_pos], input_buffer ; reset pos 327 popa ; restore all reg 328 %endmacro 329 STARTDEF get_input 330 GET_INPUT_CODE 331 ENDDEF get_input, "get_input", (IMMEDIATE | COMPILE) 332 333 ; Comments in source! Eats input until newline (0x0a) 334 STARTDEF comment 335 .reset: 336 mov esi, [input_buffer_pos] ; set input index 337 cmp dword [input_eof], 1 338 je .done ; we hit eof at some point, we're done 339 mov ebx, [input_buffer_end] ; store for comparison 340 .check: 341 cmp esi, ebx ; have we hit end pos? 342 jl .continue ; no, keep going 343 GET_INPUT_CODE ; yes, get some 344 jmp .reset ; got more input, reset and continue 345 .continue: 346 mov al, [esi] ; input addr + position index 347 cmp al, 0 ; end of input (null terminator)? 348 je .done ; yes, return 349 cmp al, 0x0a ; newline? 350 je .done ; yup, done with comment line 351 inc esi ; 'eat' input 352 jmp .check ; loop 353 .done: 354 mov [input_buffer_pos], esi ; save input index 355 ENDDEF comment, "#", (IMMEDIATE | COMPILE | RUNCOMP) 356 357 ; Skips any characters space and below from input buffer. 358 %macro EAT_SPACES_CODE 0 359 .reset: 360 mov esi, [input_buffer_pos] ; set input index 361 cmp dword [input_eof], 1 362 je .done ; we hit eof at some point, we're done 363 mov ebx, [input_buffer_end] ; store for comparison 364 .check: 365 cmp esi, ebx ; have we hit end pos? 366 jl .continue ; no, keep going 367 GET_INPUT_CODE ; yes, get some 368 jmp .reset ; got more input, reset and continue 369 .continue: 370 mov al, [esi] ; input addr + position index 371 cmp al, 0 ; end of input (null terminator)? 372 je .done ; yes, return 373 cmp al, 0x20 ; anything space and below? 374 jg .done ; nope, we're done 375 inc esi ; 'eat' space by advancing input 376 jmp .check ; loop 377 .done: 378 mov [input_buffer_pos], esi ; save input index 379 %endmacro 380 STARTDEF eat_spaces 381 EAT_SPACES_CODE 382 ENDDEF eat_spaces, "eat_spaces", (IMMEDIATE | COMPILE) 383 384 ; Gets a space-separated "token" of input. 385 ; Returns a null-terminated string OR 0 if we're out of 386 ; input. 387 %macro GET_TOKEN_CODE 0 388 ; was: 389 ; ebx = input <-- esi 390 ; edx = output <-- edi 391 mov esi, [input_buffer_pos] ; input source index 392 mov edi, token_buffer ; destination index 393 .get_char: 394 cmp esi, [input_buffer_end] ; need to get more input? 395 jl .skip_read ; no, keep going 396 GET_INPUT_CODE ; yes, get some 397 cmp dword [input_eof], 1 398 je .return ; we hit eof, we're done 399 mov esi, [input_buffer_pos] ; reset source index 400 .skip_read: 401 mov al, [esi] ; input addr + position index 402 cmp al, 0x20 ; end of token (spece or lower?) 403 jle .end_of_token ; yes 404 mov byte [edi], al ; write character 405 inc esi ; next source 406 inc edi ; next destination 407 jmp .get_char 408 .end_of_token: 409 cmp edi, token_buffer ; did we write anything? 410 jg .return_token ; yes, push the token addr 411 push dword 0 ; no, push 0 ("no token") 412 jmp .return 413 .return_token: 414 mov [input_buffer_pos], esi ; save position 415 mov byte [edi], 0 ; null-terminate token str 416 push dword token_buffer ; return str address 417 .return: 418 %endmacro 419 STARTDEF get_token 420 GET_TOKEN_CODE 421 ENDDEF get_token, "get_token", (IMMEDIATE) 422 423 ; Copy null-terminated string. 424 %macro COPYSTR_CODE 0 425 pop edi ; dest 426 pop esi ; source 427 mov ecx, 0 ; index 428 %%copy_char: 429 mov al, [esi + ecx] ; from source 430 mov [edi + ecx], al ; to dest 431 inc ecx 432 cmp al, 0 ; hit terminator? 433 jnz %%copy_char 434 %endmacro 435 STARTDEF copystr ; (sourceaddr, destaddr) copystr () 436 COPYSTR_CODE 437 ENDDEF copystr, "copystr", (IMMEDIATE | COMPILE) 438 439 STARTDEF def 440 mov dword [mode], COMPILE 441 ; get name from next token and store it... 442 EAT_SPACES_CODE 443 GET_TOKEN_CODE ; leaves source addr for copystr 444 push name_buffer ; dest 445 COPYSTR_CODE ; copy name into name_buffer 446 ; copy the here pointer so we have the start address 447 ; of the def 448 mov eax, [here] 449 push eax ; leave 'here' on stack - the start of the def 450 ENDDEF def, "def", (IMMEDIATE) 451 452 ; This exists just so we can inline it at the end of 453 ; definitions with the semicolon (;) def 454 STARTDEF return 455 RETURN_CODE 456 ENDDEF return, "return", (IMMEDIATE) 457 458 ; Does what ENDDEF macro does, but into memory at runtime. 459 %macro SEMICOLON_CODE 0 460 ; End of Machine Code 461 ; 'here' currently points to the end of the new def's 462 ; machine code. We need to save that. 463 mov eax, [here] 464 push eax ; push end of machine code to stack 465 ; Return Code 466 ; Inline 'return' before the tail to allow our new 467 ; def to be callable in immdiate mode. 468 ; (Future improvement: Don't include this if this is 469 ; not an immediate-capable def!) 470 push tail_return ; push what to inline on stack 471 INLINE_CODE ; inline the 'return' machine code 472 ; Start of Tail 473 ; The above inline will have advanced 'here' again. 474 mov eax, [here] ; Current 'here' position 475 mov ecx, eax ; another copy, for tail start calc 476 ; Link previous def 'last' 477 mov ebx, [last] ; get prev tail pointer 'last' 478 mov [eax], ebx ; link it here 479 mov [last], eax ; and store this tail as new 'last' 480 add eax, 4 ; advance 'here' 4 bytes 481 ; Store length of new def's machine code 482 pop ebx ; get end of machine code addr pushed above 483 pop edx ; get start of machine code addr pushed by ':' 484 sub ebx, edx ; calc length of machine code 485 mov [eax], ebx 486 add eax, 4 ; advance 'here' 4 bytes 487 ; Store distance from start of tail to start of machine 488 ; code. 489 sub ecx, edx ; tail - start of mc 490 mov [eax], ecx 491 add eax, 4 ; advance 'here' 4 bytes 492 ; Store flags 493 ; dd %3 ; 32b flags for def 494 ; NOTE: Temporarily hard-coded value! 495 mov dword [eax], (IMMEDIATE | COMPILE) 496 add eax, 4 ; advance 'here' 4 bytes 497 push eax ; save a copy of 'here' 498 ; Store name string 499 ; db %2, 0 ; xxb null-terminated name string 500 push name_buffer ; source 501 push eax ; destination 502 COPYSTR_CODE ; copy name into tail 503 ; Call strlen so we know how much string name we 504 ; wrote to the tail: 505 push name_buffer 506 STRLEN_CODE 507 pop ebx ; get string len pushed by STRLEN_CODE 508 pop eax ; get saved 'here' position 509 add eax, ebx ; advance 'here' by that amt 510 inc eax ; plus one for the null 511 ; Store here in 'here' 512 mov [here], eax 513 ; return us to immediate mode now that we're done 514 mov dword [mode], IMMEDIATE 515 %endmacro 516 STARTDEF semicolon 517 SEMICOLON_CODE 518 ENDDEF semicolon, ";", (COMPILE | RUNCOMP) 519 520 ; Takes an addr and number from stack, writes string 521 ; representation (not null-terminated) of number to the 522 ; address and returns number of bytes (characters) written. 523 %macro NUM2STR_CODE 0 524 pop ebp ; address of string destination 525 pop eax ; number 526 mov ecx, 0 ; counter of digit characters 527 mov ebx, [var_radix] 528 %%divide_next: ; idiv divides 529 mov edx, 0 ; div actually divides edx:eax / ebx! 530 div ebx ; eax / ebx = eax, remainder in edx 531 cmp edx, 9 ; digit bigger than 9? (radix allows a-z) 532 jg %%toalpha ; yes, convert to 'a'-'z' 533 add edx, '0' ; no, convert to '0'-'9' 534 jmp %%store_char 535 %%toalpha: 536 add edx, ('a'-10) ; to convert 10 to 'a' 537 %%store_char: 538 push edx ; put on stack (pop later to reverse order) 539 inc ecx 540 cmp eax, 0 ; are we done converting? 541 jne %%divide_next ; no, loop 542 mov eax, ecx ; yes, store counter as return value 543 mov ecx, 0 ; now we'll count up 544 %%store_next: 545 pop edx ; popping to reverse order 546 mov [ebp + ecx], edx ; store it at addr! 547 inc ecx 548 cmp ecx, eax ; are we done storing? 549 jl %%store_next 550 push eax ; return num chars written 551 %endmacro 552 STARTDEF num2str ; (num addr -- bytes_written) 553 NUM2STR_CODE 554 ENDDEF num2str, "num2str", (IMMEDIATE | COMPILE) 555 556 ; To be called when we're in a quoted string (a start quote 557 ; was found). Puts string's address on the stack. 558 STARTDEF quote 559 mov esi, [input_buffer_pos] ; string source 560 inc esi ; move past initial quote '"' 561 562 ; In immediate mode, we'll write the string to free 563 ; memory. In compile mode, we'll change edi to point 564 ; to "here" where the def is being compiled instead. 565 mov edi, [free] 566 567 cmp dword [mode], COMPILE ; compile mode? 568 jne .copy_char ; no, skip it 569 570 ; Setup destination for "compiled" string 571 mov edi, [here] 572 push edi ; save position for 'call' opcode 573 add edi, 5 ; leave space for 'call' opcode 574 575 ; copy string (and handle escapes) to wherever edi points! 576 .copy_char: 577 cmp esi, [input_buffer_end] ; need to get more input? 578 jl .skip_read ; no, keep going 579 GET_INPUT_CODE ; yes, get some 580 cmp dword [input_eof], 1 581 je .quote_done ; we hit eof, we're done 582 mov esi, [input_buffer_pos] ; reset source index 583 .skip_read: 584 mov al, [esi] ; get char from source 585 cmp al, '"' ; look for endquote 586 je .end_quote 587 cmp al, '\' ; escape sequence 588 je .insert_esc 589 mov [edi], al ; copy char to desination 590 inc esi ; next source char 591 inc edi ; next desination pos 592 jmp .copy_char ; loop 593 .insert_esc: 594 ; read the next character to determine what to do: 595 inc esi 596 mov al, [esi] 597 cmp al, '\' ; literal backslash 598 jne .esc2 599 mov byte [edi], '\' 600 inc esi 601 inc edi 602 jmp .copy_char 603 .esc2: 604 cmp al, '$' ; literal $ 605 jne .esc3 606 mov byte [edi], '$' 607 inc esi 608 inc edi 609 jmp .copy_char 610 .esc3: 611 cmp al, 'n' ; newline 612 jne .esc4 613 mov byte [edi], 0xa 614 inc esi 615 inc edi 616 jmp .copy_char 617 .esc4: 618 .end_quote: 619 lea eax, [esi + 1] ; get next input position 620 mov [input_buffer_pos], eax ; save it 621 mov [edi], byte 0 ; terminate str null 622 ; Done copying string to either "free" memory or 623 ; to the "here" compiled def area. 624 625 ; Check again if we're in immediate or compile mode 626 cmp dword [mode], IMMEDIATE 627 je .finish_immediate 628 629 ; do compile mode stuff 630 inc edi 631 mov [here], edi ; save new "here" 632 pop edx ; back to destination for opcode 633 mov byte [edx], 0xE8 ; i386 CALL relative 634 sub edi, edx ; calc length of string into edi 635 sub edi, 5 ; and subtract the opcode len! 636 mov dword [edx+1], edi ; is jump length 637 jmp .end_if 638 639 ; else 640 .finish_immediate: 641 ; do immediate stuff 642 push dword [free] ; yes, push string addr 643 lea eax, [edi + 1] ; calc next "free" space 644 mov [free], eax ; save it 645 646 .end_if: 647 EAT_SPACES_CODE ; advance to next token 648 .quote_done: 649 ENDDEF quote, 'quote', (IMMEDIATE | COMPILE) 650 651 ; Attempts to parse num from string using radix. 652 ; Doesn't handle negative sign. Leaves just 0 653 ; (false) on stack if not successful. 654 %macro STR2NUM_CODE 0 ; (str_addr -- [num] success) 655 pop ebp ; address of input token 656 mov eax, 0 ; result 657 mov ebx, 0 ; char conversion 658 mov ecx, 0 ; char counter/pointer 659 mov edx, [var_radix] 660 .next_char: 661 mov bl, [ebp + ecx] ; put char in bl 662 cmp bl, 0 ; null terminator? 663 je .return_num ; yup, return value 664 inc ecx 665 ; Multiply the current value by the radix to prepare for 666 ; the next, less significant digit. If we're starting 667 ; out, the current value is 0, which is no problem. 668 imul eax, edx 669 cmp bl, '0' ; ASCII less than '0' is invalid 670 jl .error 671 cmp bl, '9' ; is it '0'-'9'? 672 jg .try_upper ; no, try 'A'-'Z' 673 sub bl, '0' ; yes, convert ASCII '0' to 0 674 jmp .add_value 675 .try_upper: 676 cmp bl, 'A' 677 jl .error 678 cmp bl, 'Z' 679 jg .try_lower 680 sub bl, ('A'-10) ; it's uppercase, convert 'A' to 10 681 jmp .add_value 682 .try_lower: 683 cmp bl, 'z' 684 jg .error 685 sub bl, ('a'-10) ; it's lowercase, convert 'a' to 10 686 jmp .add_value 687 .add_value: 688 ; Make sure the number is within the radix 689 cmp bl, dl ; edx has radix 690 jg .error ; greater than radix 691 add eax, ebx ; bl has converted char's value 692 jmp .next_char ; loop 693 .error: 694 push 0 ; failure code (false) 695 jmp .str2num_done 696 .return_num: 697 cmp ecx, 0 ; did we actually get any chars? 698 je .error ; no, empty token string! error 699 push eax ; push number 700 push 1 ; success (true) 701 .str2num_done: 702 %endmacro 703 STARTDEF str2num ; (str_addr -- [num] success) 704 STR2NUM_CODE 705 ENDDEF str2num, 'str2num', (IMMEDIATE | COMPILE) 706 707 %macro RADIX_CODE 0 708 pop eax 709 mov [var_radix], eax 710 %endmacro 711 STARTDEF radix 712 RADIX_CODE 713 ENDDEF radix, 'radix', (IMMEDIATE | COMPILE) 714 STARTDEF hex 715 mov dword [var_radix], 16 716 ENDDEF hex, 'hex', (IMMEDIATE | COMPILE) 717 STARTDEF oct 718 mov dword [var_radix], 8 719 ENDDEF oct, 'oct', (IMMEDIATE | COMPILE) 720 STARTDEF bin 721 mov dword [var_radix], 2 722 ENDDEF bin, 'bin', (IMMEDIATE | COMPILE) 723 STARTDEF decimal 724 mov dword [var_radix], 10 725 ENDDEF decimal, 'decimal', (IMMEDIATE | COMPILE) 726 727 ; see if token starts with number. if it does, parse it 728 STARTDEF number 729 GET_TOKEN_CODE 730 STR2NUM_CODE 731 pop eax ; return value from str2num 732 cmp eax, 0 ; did it fail? 733 je .invalid_number 734 cmp dword [mode], COMPILE 735 je .compile_number 736 ; We got number in IMMEDIATE mode, so just keep the 737 ; value on the stack and keep going! 738 jmp .done 739 .compile_number: 740 ; like 'quote' and 'var', this writes a raw x86 741 ; opcode to push an immediate value on the stack 742 ; at runtime 743 pop eax ; get number from stack 744 mov edx, [here] ; compile var code here 745 mov byte [edx], 0x68 ; i386 opcode for PUSH imm32 746 mov dword [edx + 1], eax ; the number literal 747 add edx, 5 ; update here 748 mov [here], edx 749 jmp .done 750 .invalid_number: 751 ; If we got here, there was a token that started with a 752 ; digit, but could not be parsed as a number. We're 753 ; defining that as a fatal error. 754 PRINTSTR 'Error parsing "' 755 push token_buffer 756 CALLDEF print 757 PRINTSTR `" as a number.\n` 758 EXIT_CODE 759 .done: 760 ENDDEF number, 'number', (IMMEDIATE | COMPILE) 761 762 ; Call with num to to be printed on the stack 763 %macro PRINTNUM_CODE 0 764 ; param2 address desination for number str 765 mov eax, [free] ; use free space temporarily 766 push eax ; addr for num2str 767 NUM2STR_CODE ; leaves length of string 768 pop ebx 769 mov eax, [free] 770 push eax ; addr 771 push ebx ; len 772 LEN_PRINT_CODE 773 %endmacro 774 STARTDEF printnum 775 PRINTNUM_CODE 776 ENDDEF printnum, 'printnum', (IMMEDIATE | COMPILE) 777 778 %macro PRINT_FMT_CODE 0 779 pop esi ; string addr from stack is source pointer 780 mov ecx, 0 ; length of string to print 781 .examine_char: 782 mov al, [esi + ecx] ; get next char 783 cmp al, '$' 784 je .print_num 785 cmp al, 0 ; regular end of string! 786 je .print_the_rest 787 inc ecx ; neither, keep going 788 jmp .examine_char 789 .print_num: 790 ; first print the string segment before the num 791 pop eax ; get number to print from stack 792 push esi ; str addr (save a copy) 793 push ecx ; str len (save a copy) 794 push eax ; num to print 795 push esi ; str addr 796 push ecx ; str len 797 LEN_PRINT_CODE 798 PRINTNUM_CODE ; print number from stack 799 pop ecx ; restore str len 800 pop esi ; restore str addr 801 ; reset string to *after* the '$' placeholder and 802 ; keep printing 803 lea esi, [esi + ecx + 1] 804 mov ecx, 0 805 jmp .examine_char 806 .print_the_rest: 807 ; now we just need to print a "normal" string at 808 ; the end, so push the start address and print! 809 push esi ; print just needs start address 810 PRINT_CODE 811 %endmacro 812 STARTDEF print_fmt 813 PRINT_FMT_CODE 814 ENDDEF print_fmt, 'print$', (IMMEDIATE | COMPILE) 815 816 STARTDEF say 817 PRINT_FMT_CODE 818 mov eax, [free] 819 mov byte [eax], 0xa ; '\n' 820 push eax ; addr of string 821 push 1 ; length to print 822 LEN_PRINT_CODE 823 ENDDEF say, 'say', (IMMEDIATE | COMPILE) 824 825 ; Given a mode (dword) on the stack, prints the matching 826 ; modes (immediate/compile/runcomp). 827 %macro PRINTMODE_CODE 0 828 pop eax ; get mode dword 829 mov ebx, eax 830 and ebx, IMMEDIATE 831 jz %%try_compile 832 push eax ; save 833 PRINTSTR 'IMMEDIATE ' 834 pop eax ; restore 835 %%try_compile: 836 mov ebx, eax 837 and ebx, COMPILE 838 jz %%try_runcomp 839 push eax ; save 840 PRINTSTR 'COMPILE ' 841 pop eax ; restore 842 %%try_runcomp: 843 mov ebx, eax 844 and ebx, RUNCOMP 845 jz %%done 846 push eax ; save 847 PRINTSTR 'RUNCOMP ' 848 pop eax ; restore 849 %%done: 850 %endmacro 851 STARTDEF printmode 852 PRINTMODE_CODE 853 ENDDEF printmode, 'printmode', (IMMEDIATE | COMPILE) 854 855 %macro PRINTSTACK_CODE 0 856 mov ecx, [stack_start] 857 sub ecx, esp ; difference between start and current 858 %%dword_loop: 859 cmp ecx, 0 ; reached start? 860 jl %%done ; yup, done 861 mov eax, [esp + ecx] ; no, print this value 862 push ecx ; preserve 863 push eax ; print this value 864 PRINTNUM_CODE 865 PRINTSTR " " 866 pop ecx ; restore 867 sub ecx, 4 ; reduce stack index by dword 868 jmp %%dword_loop 869 %%done: 870 PRINTSTR `\n` 871 %endmacro 872 STARTDEF printstack 873 PRINTSTACK_CODE 874 ENDDEF printstack, 'ps', (IMMEDIATE | COMPILE) 875 876 ; Takes def tail addr, prints meta-info (from tail) 877 ; and prints a hex dump of the def's machine code. 878 ; Example: inspect foo 879 %macro INSPECT_CODE 0 880 EAT_SPACES_CODE 881 GET_TOKEN_CODE ; get address of next token's string 882 FIND_CODE ; get tail of def matching token 883 pop esi ; get tail addr 884 lea eax, [esi + T_NAME] 885 push esi ; preserve tail addr 886 push eax 887 PRINT_CODE 888 PRINTSTR ": " 889 pop esi ; restore tail addr 890 mov eax, [esi + T_CODE_LEN] 891 push esi ; preserve tail addr 892 ; param 1: num to be stringified 893 push eax 894 PRINTNUM_CODE 895 PRINTSTR " bytes " 896 pop esi ; restore tail 897 mov eax, [esi + T_FLAGS] 898 push esi ; preserve tail addr 899 push eax 900 PRINTMODE_CODE 901 PRINTSTR `\n ` 902 ; Now do a hex dump of the machine code for 903 ; this word! 904 pop esi ; get tail addr 905 mov ecx, [esi + T_CODE_LEN] ; len of code 906 mov eax, [esi + T_CODE_OFFSET] ; offset of code start 907 sub esi, eax ; esi is now start addr of code 908 add ecx, esi ; ecx is now end addr of code 909 mov dword ebx, [var_radix] ; save current radix 910 mov dword [var_radix], 16 ; set to hex 911 push ebx 912 %%byte_loop: 913 cmp ecx, esi ; end addr reached? 914 je %%done ; yup 915 mov al, [esi] ; no, print this value 916 push ecx ; preserve end addr 917 push eax ; print this value 918 PRINTNUM_CODE 919 PRINTSTR " " 920 pop ecx ; restore end addr 921 inc esi ; move to next addr 922 jmp %%byte_loop 923 %%done: 924 PRINTSTR `\n` 925 pop ebx 926 mov dword [var_radix], ebx ; restore previous radix 927 %endmacro 928 STARTDEF inspect 929 INSPECT_CODE 930 ENDDEF inspect, 'inspect', (IMMEDIATE) 931 932 ; Print all def names 933 STARTDEF all_names 934 mov esi, [last] ; tail addr of last def defined 935 .print_loop: 936 lea eax, [esi + T_NAME] ; name 937 mov esi, [esi] ; get prev tail pointer 938 push esi ; preserve it 939 push eax ; name for print 940 PRINT_CODE 941 PRINTSTR " " ; space between names 942 pop esi ; restore tail 943 cmp esi, 0 ; done? 944 jne .print_loop ; nope, keep going! 945 PRINTSTR `\n` 946 ENDDEF all_names, 'all', (IMMEDIATE) 947 948 STARTDEF add 949 pop eax 950 pop ebx 951 add eax, ebx 952 push eax 953 ENDDEF add, '+', (IMMEDIATE | COMPILE) 954 955 STARTDEF sub 956 pop ebx 957 pop eax 958 sub eax, ebx 959 push eax 960 ENDDEF sub, '-', (IMMEDIATE | COMPILE) 961 962 STARTDEF mul 963 pop eax 964 pop ebx 965 imul eax, ebx 966 push eax 967 ENDDEF mul, '*', (IMMEDIATE | COMPILE) 968 969 STARTDEF div 970 mov edx, 0 971 pop ebx 972 pop eax 973 idiv ebx 974 push edx ; remainder 975 push eax ; answer (quotient) 976 ENDDEF div, '/', (IMMEDIATE | COMPILE) 977 978 STARTDEF inc 979 pop ecx 980 inc ecx 981 push ecx 982 ENDDEF inc, 'inc', (IMMEDIATE | COMPILE) 983 984 STARTDEF dec 985 pop ecx 986 dec ecx 987 push ecx 988 ENDDEF dec, 'dec', (IMMEDIATE | COMPILE) 989 990 STARTDEF popstack 991 pop eax 992 ENDDEF popstack, 'pop', (IMMEDIATE | COMPILE) 993 994 STARTDEF dupstack 995 pop eax 996 push eax 997 push eax 998 ENDDEF dupstack, 'dup', (IMMEDIATE | COMPILE) 999 1000 ; Immediate mode 'if' (?) skips whatever token follows 1001 ; if the "condition" on the stack is false (0). 1002 STARTDEF if 1003 pop eax 1004 test eax, eax ; zero? 1005 jnz .continue_for_true 1006 EAT_SPACES_CODE 1007 GET_TOKEN_CODE 1008 pop eax ; throw away token! 1009 .continue_for_true: 1010 ENDDEF if, '?', (IMMEDIATE) 1011 1012 ; Compile mode 'if' (?) skips the next def's code. 1013 ; So we bake in a conditional jump over the next 1014 ; def's inlined code! 1015 STARTDEF if_compiled 1016 EAT_SPACES_CODE 1017 GET_TOKEN_CODE ; MUST be a def 1018 FIND_CODE ; get tail of def matching token 1019 pop esi ; tail of def to inline 1020 mov eax, [esi + T_CODE_LEN] ; get len of code 1021 push esi ; put tail back on stack for inline 1022 mov edx, [here] ; compile code here 1023 mov byte [edx], 0x58 ; i386 opcode: pop eax 1024 mov byte [edx+1], 0x85 ; i386 opcode: test eax 1025 mov byte [edx+2], 0xc0 ; (continued) 1026 mov byte [edx+3], 0x0f ; i386 opcode: jz 1027 mov byte [edx+4], 0x84 ; (continued) 1028 mov dword [edx+5], eax ; length of def's code 1029 add edx, 9 1030 mov [here], edx 1031 INLINE_CODE 1032 ENDDEF if_compiled, '?', (COMPILE | RUNCOMP) 1033 1034 ; Compile mode "loop" keeps replaying a def so long as the 1035 ; top of the stack contains "true" (non-zero) 1036 ; [jnz][...def...][jmp] 1037 ; ^ | | ^ 1038 ; +--|------B--------+ | 1039 ; +--------A---------+ 1040 ; A=len(def) + len(jmp) 1041 ; B=A + len(jnz) 1042 STARTDEF loop_compiled 1043 %assign LEN_BEFORE 10 1044 %assign LEN_AFTER 5 1045 EAT_SPACES_CODE 1046 GET_TOKEN_CODE ; MUST be a def 1047 FIND_CODE ; get tail of def matching token 1048 pop esi ; tail of def to inline 1049 mov eax, [esi + T_CODE_LEN] ; get len of code 1050 add eax, LEN_AFTER ; add length of unconditional jump at end 1051 push eax ; preserve length of def for end 1052 push esi ; put tail back on stack for inline 1053 ; Compile in the conditional jump over the def 1054 ; (Note that unlike "?" (if), it leaves test value on stack!) 1055 mov edx, [here] ; compile code here 1056 mov byte [edx], 0x58 ; i386 opcode: pop eax 1057 mov byte [edx+1], 0x50 ; i386 opcode: push eax 1058 mov byte [edx+2], 0x85 ; i386 opcode: test eax 1059 mov byte [edx+3], 0xc0 ; (continued) 1060 mov byte [edx+4], 0x0f ; i386 opcode: jz 1061 mov byte [edx+5], 0x84 ; (continued) 1062 mov dword [edx+6], eax ; distance to jump 1063 add edx, LEN_BEFORE 1064 mov [here], edx 1065 INLINE_CODE 1066 ; Compile in the unconditional jump to the start 1067 pop eax ; retrieve length of def plus "after" 1068 add eax, LEN_BEFORE ; now add the "before" len 1069 neg eax ; negate the jump (go back!) 1070 mov edx, [here] ; compile var code here 1071 mov byte [edx], 0xe9 ; i386 opcode: jmp 1072 mov dword [edx+1], eax ; distance to jump 1073 add edx, LEN_AFTER 1074 mov [here], edx 1075 ENDDEF loop_compiled, 'loop?', (COMPILE | RUNCOMP) 1076 1077 ; 'var' reserves a new space in memory (4 bytes for now) and 1078 ; creates a new def that puts the ADDRESS of that memory on 1079 ; the stack when it is called. 1080 STARTDEF var 1081 mov dword [mode], COMPILE 1082 ; get name from next token and store it... 1083 EAT_SPACES_CODE 1084 GET_TOKEN_CODE 1085 push name_buffer ; dest 1086 COPYSTR_CODE ; copy name into name_buffer 1087 mov eax, [free] ; get current free space addr 1088 mov edx, [here] ; compile var code here 1089 push edx ; save it for semicolon! 1090 mov byte [edx], 0x68 ; i386 opcode for PUSH imm32 1091 mov dword [edx + 1], eax ; address of var space 1092 add edx, 5 ; update here 1093 mov [here], edx 1094 add eax, 4 ; update free pointer 1095 mov [free], eax 1096 SEMICOLON_CODE 1097 ENDDEF var, 'var', (IMMEDIATE | COMPILE) 1098 1099 STARTDEF setvar 1100 pop edi ; address from stack 1101 pop eax ; value from stack 1102 mov [edi], eax ; set it! 1103 ENDDEF setvar, 'set', (IMMEDIATE | COMPILE) 1104 1105 STARTDEF getvar 1106 pop esi ; address from stack 1107 mov eax, [esi] ; get it! 1108 push eax ; put it on the stack 1109 ENDDEF getvar, 'get', (IMMEDIATE | COMPILE) 1110 1111 1112 ; ******************************************* 1113 ; * ELF HEADER! * 1114 ; ******************************************* 1115 ; This is defined in meow5's program data. It 1116 ; gets filled in with the correct values for 1117 ; whatever def we're "compiling" via: 1118 ; 1119 ; elf <def name> 1120 ; 1121 section .data 1122 %assign elf_va 0x08048000 ; elf virt mem start address 1123 elf_header: 1124 ; ********************************************** 1125 ; ELF Identification (16 bytes) 1126 db 0x7F,'ELF' ; Magic String 1127 db 1 ; "File class" 32 bit 1128 db 1 ; "Data encoding" 1=LSB (x86 little endian) 1129 db 1 ; "File version" ELF version (1="current") 1130 times 9 db 0 ; padding (to fill up 16 bytes) 1131 ; Section Header Table 1132 dw 2 ; type - 2="Executable file" 1133 dw 3 ; machine - 3="Intel 80386" 1134 dd 1 ; version - 1="Current" 1135 ; For reasons I do not understand, I had not had any 1136 ; luck setting this program entry to the beginning of 1137 ; the virtual memory start address and loading the file 1138 ; starting immediately after the ELF header where the 1139 ; def's machine code was written. It always segfaulted. 1140 ; 1141 ; Update: I still don't *really* get that when working 1142 ; just the ELF documentation, but I've accepted that 1143 ; Linux mmaps a file to an entire page at a time, so 1144 ; there are certain alignment-y rules at play here. We 1145 ; don't fight it. 1146 dd elf_va + elf_size ; entry - execution start address 1147 dd phdr1 - elf_header ; phoff - bytes to program header 1148 dd 0 ; shoff - 0 for no section header 1149 dd 0 ; flags - processor-specific flags 1150 dw hdr_size ; ehsize - this header bytes, see below 1151 dw phdr_size ; phentsize - program header size 1152 dw 1 ; phnum - program header count 1153 dw 0 ; shentsize - section header size (none) 1154 dw 0 ; shnum - section header count 1155 dw 0 ; shstrndx - section header offset 1156 hdr_size equ $ - elf_header ; calulate elf header size 1157 1158 ; ********************************************** 1159 ; Program Header 1 - The one and only. 1160 phdr1: 1161 dd 1 ; type of program header (1=PT_LOAD) 1162 dd 0 ; offset in file of data to load 1163 dd elf_va ; target memory start address 1164 dd elf_va ; target physical memory, same 1165 prog_bytes1: 1166 dd 0 ; bytes to load from file (placeholder) 1167 prog_bytes2: 1168 dd 0 ; bytes of memory to allocate (placeholder) 1169 dd 7 ; flags: 1=exec, 2=write, 4=read (7=RWX) 1170 dd 0 ; Memory alignment 1171 ; ********************************************** 1172 1173 ; Calculate program header size. Only need to do this 1174 ; once. All program headers are the same size. (And now 1175 ; there's just one again. So that's doubly true.) 1176 phdr_size equ $ - phdr1 ; calculate program header size 1177 1178 ; End of entire ELF header. 1179 ; Calculate size of ELF header, used above. 1180 elf_size equ $ - elf_header 1181 1182 temp_test_string: db 'HELLO WORLD' ; 11 bytes test 1183 1184 section .text 1185 1186 STARTDEF elf 1187 EAT_SPACES_CODE 1188 GET_TOKEN_CODE ; get address of next token's string 1189 FIND_CODE ; get tail of def matching token 1190 pop esi ; addr of tail (and keep in esi the whole time) 1191 mov eax, [esi + T_CODE_LEN] ; get len of code 1192 1193 ; Overwrite the placeholder program size values in the 1194 ; ELF program header with this def's code size. 1195 add eax, elf_size 1196 mov [prog_bytes1], eax ; file 1197 mov [prog_bytes2], eax ; memory 1198 1199 ; We can create and open at the same time! 1200 ; From open(2) man page: 1201 ; "A call to creat() is equivalent to calling open() 1202 ; with flags equal to O_CREAT|O_WRONLY|O_TRUNC." 1203 ; I got the flags by searching all of /usr/include and 1204 ; finding /usr/include/asm-generic/fcntl.h 1205 ; That yielded (along with bizarre comment "not fcntl"): 1206 ; #define O_CREAT 00000100 1207 ; #define O_WRONLY 00000001 1208 ; #define O_TRUNC 00001000 1209 ; which are apparently octal values??? (NOT binary) 1210 ; Hence this flag value for 'open': 1211 mov ecx, (0100o | 0001o | 1000o) 1212 ; ebx contains null-terminated def name (FIND_CODE) 1213 mov edx, 755o ; mode (permissions) 1214 mov eax, SYS_OPEN 1215 int 80h ; now eax will contain the new file desc. 1216 push ebx ; SAVE def tail address for end 1217 ; Write ELF header 1218 mov edx, elf_size ; bytes to write 1219 mov ecx, elf_header ; source address 1220 mov ebx, eax ; the fd for writing (opened/created above) 1221 mov eax, SYS_WRITE 1222 int 80h 1223 ; 1224 ; Write def (executable program segment) 1225 mov edx, [esi + T_CODE_LEN] ; bytes to write 1226 mov eax, [esi + T_CODE_OFFSET] ; for source addr 1227 mov ecx, esi ; tail addr 1228 sub ecx, eax ; source addr (code offset from tail) 1229 mov eax, SYS_WRITE ; ebx still has fd 1230 int 80h 1231 ; Write our memory (for data program segment) 1232 ; mov edx, 11 ; bytes to write 1233 ; mov ecx, temp_test_string ; for source addr 1234 ; mov eax, SYS_WRITE ; ebx still has fd 1235 ; int 80h 1236 ; Close file (ebx still has fd) 1237 mov eax, SYS_CLOSE 1238 int 80h 1239 ; File name pushed above. Display message about 1240 ; having written file. 1241 PRINTSTR 'Wrote to "' 1242 PRINT_CODE ; print fname from stack (see "SAVE" above) 1243 PRINTSTR `".\n` 1244 ENDDEF elf, 'elf', (IMMEDIATE) 1245 1246 ; ---------------------------------------------------------- 1247 ; PROGRAM START! 1248 ; ---------------------------------------------------------- 1249 global _start 1250 _start: 1251 cld ; use increment order for certain cmds 1252 1253 ; Start in immediate mode - execute def immediately! 1254 mov dword [mode], IMMEDIATE 1255 1256 ; Default to input file descriptor STDIN. We can change 1257 ; this to make get_input read from different sources. 1258 mov dword [input_file], STDIN 1259 1260 ; Here points to the current spot where we're going to 1261 ; inline ("compile") the next def 1262 mov dword [here], compile_area 1263 1264 ; Free points to the next free space in the data area 1265 ; where all variables and non-stack data goes. 1266 mov dword [free], data_area 1267 1268 ; Store the first stack address so we can reference it 1269 ; later (such as printing contents of stack). Subtract 4 1270 ; so that we mark the *next* position as the first (sich 1271 ; it's the first position to which we'll push a value). 1272 lea eax, [esp - 4] 1273 mov [stack_start], eax 1274 1275 ; Store last tail for dictionary searches (note that 1276 ; find just happens to be the last def defined in the 1277 ; dictionary at the moment). 1278 mov dword [last], LAST_DEF_TAIL 1279 1280 ; In order to signal that we need to read input on 1281 ; start, set both the current read index and the end 1282 ; location to the start of the buffer. Currently, the 1283 ; 'eat_spaces' def will see that and read more input. 1284 mov dword [input_buffer_pos], input_buffer 1285 mov dword [input_buffer_end], input_buffer 1286 mov dword [input_eof], 0 ; EOF flag 1287 1288 ; Start off parsing and printing numbers as decimals. 1289 mov dword [var_radix], 10 1290 1291 1292 ; ---------------------------------------------------------- 1293 ; Interpreter! 1294 ; ---------------------------------------------------------- 1295 get_next_token: 1296 mov eax, [input_eof] 1297 CALLDEF eat_spaces ; skip whitespace 1298 cmp dword [input_eof], 1 ; end of input? 1299 je .end_of_input ; yes, time to die 1300 ; Get the next character in the input stream to see what 1301 ; it is. Check for end of input, quotes, and numbers. 1302 mov esi, [input_buffer_pos] ; source 1303 mov al, [esi] ; first char 1304 .try_quote: 1305 cmp al, '"' ; next char a quote? 1306 jne .try_num ; nope, continue 1307 CALLDEF quote ; yes, get string, leaves addr 1308 jmp get_next_token 1309 .try_num: 1310 cmp al, '0' 1311 jl .try_token ; nope! 1312 cmp al, '9' 1313 jg .try_token ; nope! 1314 CALLDEF number ; parse number, leaves value 1315 jmp get_next_token 1316 .try_token: 1317 CALLDEF get_token 1318 pop eax ; get_token returns address or 0 1319 cmp eax, 0 1320 je .end_of_input ; all out of tokens! 1321 push token_buffer ; for find 1322 CALLDEF find ; find token, returns tail addr 1323 pop eax ; find's return value 1324 cmp eax, 0 ; did find fail? 1325 je .token_not_found ; yup 1326 push eax ; find successful, put result back 1327 cmp dword [mode], IMMEDIATE 1328 je .exec_def 1329 ; We're in compile mode... 1330 CALLDEF get_flags 1331 CALLDEF is_runcomp 1332 pop eax ; get result 1333 cmp eax, 0 ; if NOT equal, def was RUNCOMP 1334 jne .exec_def ; yup, RUNCOMP 1335 CALLDEF inline ; nope, "compile" it. 1336 jmp get_next_token 1337 .exec_def: 1338 ; Run current def in immediate mode! 1339 ; We currently have the tail of a found def. 1340 pop ebx ; addr of def tail left on stack by 'find' 1341 mov eax, [ebx + T_CODE_OFFSET] 1342 sub ebx, eax ; set to start of def's machine code 1343 CALLDEF ebx ; call def with that addr (via reg) 1344 jmp get_next_token 1345 .end_of_input: 1346 push 0 ; exit status 1347 CALLDEF exit 1348 .token_not_found: 1349 ; Putting strings together this way is quite painful... 1350 ; "Could not find def "foo" while looking in <mode> mode." 1351 PRINTSTR 'Could not find def "' 1352 push token_buffer 1353 CALLDEF print 1354 PRINTSTR '" while looking in ' 1355 mov eax, [mode] 1356 push eax 1357 PRINTMODE_CODE 1358 PRINTSTR ` mode.\n` 1359 jmp get_next_token