1 ; +--------------------------------------------------------+
2 ; | >o.o< Meow5: A very conCATenative language |
3 ; +--------------------------------------------------------+
4
5 ; Meow5 Constants
6 %assign INPUT_SIZE 1024 ; size of input buffer
7 %assign COMPILE 00000001b ; flag: can be compiled
8 %assign IMMEDIATE 00000010b ; flag: can be called
9 %assign RUNCOMP 00000100b ; flag: runs in comp mode
10
11 ; Linux Constants
12 %assign STDIN 0
13 %assign STDOUT 1
14 %assign STDERR 2
15 %assign SYS_EXIT 1
16 %assign SYS_READ 3
17 %assign SYS_WRITE 4
18 %assign SYS_OPEN 5
19 %assign SYS_CLOSE 6
20
21 ; TODO add SYS_CREATE 8
22
23 ; ----------------------------------------------------------
24 ; BSS - reserved space
25 ; ----------------------------------------------------------
26 section .bss
27 mode: resb 4 ; IMMEDATE or COMPILE
28 var_radix: resb 4 ; decimal=10, hex=16, etc.
29 input_file: resb 4 ; input file desc. (STDIN, etc.)
30 last: resb 4 ; Pointer to last def tail
31 here: resb 4 ; Will point to compile_area
32 free: resb 4 ; Will point to data_area
33 stack_start: resb 4 ; Will point to first stack addr
34 token_buffer: resb 32 ; For get_token
35 name_buffer: resb 32 ; For 'def' (copy of token)
36 compile_area: resb 4096 ; We inline ("compile") here!
37 data_area: resb 1024 ; All variables go here!
38
39 input_buffer: resb INPUT_SIZE ; input from user (or file?)
40 input_buffer_end: resb 4 ; current last addr of input
41 input_buffer_pos: resb 4 ; current position in input
42 input_eof: resb 4 ; flag 1=EOF reached
43
44 ; Return address for immediate mode execution only
45 return_addr: resb 4 ; To "push/pop" return stack
46
47 ; ----------------------------------------------------------
48 ; MACROS!
49 ; ----------------------------------------------------------
50
51 ; PRINTSTR "Foo bar."
52 %macro PRINTSTR 1
53 ; Param is the string to print - put it in data section
54 ; with macro-local label %%str. No need for null
55 ; termination.
56 %strlen mystr_len %1 ; and get length for later
57 section .data
58 %%mystr: db %1
59 ; now the executable part
60 section .text
61 pusha ; preserve all registers
62 ; Print the string
63 mov ebx, STDOUT
64 mov edx, mystr_len
65 mov ecx, %%mystr
66 mov eax, SYS_WRITE
67 int 0x80
68 popa ; restore all registers
69 %endmacro ; PRINTSTR
70
71 ; Macro to compile in a number printing diagnostic message.
72 ; DEBUG "<string>", <valid second param of MOV>
73 %macro DEBUG 2
74 PRINTSTR %1
75 ; Make this safe to plop absolutely anywhere
76 ; by pushing the 4 registers used.
77 push eax ;A
78 push ebx ;B
79 push ecx ;C
80 push edx ;D
81 ; Second param is the source expression for this
82 ; MOV instruction - we'll print this value as a
83 ; 32bit (4 byte, dword, 8 digit) hex num.
84 ; We must perform the MOV now before the register
85 ; values are overwritten by printing the string.
86 mov eax, %2
87 ; Now print the value. We'll use the stack as a
88 ; scratch space to construct the ASCII string of the
89 ; hex value. Only 9 bytes are needed (8 digits +
90 ; newline), but due to a tricky "fencepost" issue,
91 ; I've elected to leave room for 10 bytes and
92 ; "waste" the first one.
93 lea ebx, [esp - 10] ; make room for string
94 mov ecx, 8 ; counter - 8 characters
95 %%digit_loop:
96 mov edx, eax
97 and edx, 0x0f ; just keep lowest 4 bits
98 cmp edx, 9 ; bigger than 9?
99 jg %%af ; yes, print 'a'-'f'
100 add edx, '0' ; no, turn it into ascii number
101 jmp %%continue
102 %%af:
103 add edx, 'a'-10 ; because 10 is 'a'...
104 %%continue:
105 mov byte [ebx + ecx], dl ; store character
106 ror eax, 4 ; rotate 4 bits
107 dec ecx ; update counter
108 jnz %%digit_loop ; loop
109 ; Print hex number string
110 mov byte [ebx + 9], 0x0A ; add newline
111 lea ecx, [ebx+1] ; because ecx went 8...1
112 mov ebx, STDOUT
113 mov edx, 9 ; 8 hex digits + newline
114 mov eax, SYS_WRITE
115 int 0x80
116 ; Restore all registers. (Reverse order)
117 pop edx ;D
118 pop ecx ;C
119 pop ebx ;B
120 pop eax ;A
121 %endmacro ; DEBUG print
122
123 %macro CALLDEF 1 ; takes label/addr of def to call
124 ; For faking call/ret to def as if 'twas a function
125 ; within assembly while creating the meow5 executable.
126 ; Note that '%%return_to' is a macro-local label.
127 mov dword [return_addr], %%return_to ; CALLDEF
128 jmp %1 ; CALLDEF
129 %%return_to: ; CALLDEF
130 %endmacro
131
132 %macro STARTDEF 1 ; takes name of def to make
133 ; Start a definition
134 %1:
135 %endmacro
136
137 %macro RETURN_CODE 0
138 mov eax, [return_addr] ; RETURN
139 jmp eax ; RETURN
140 %endmacro
141
142 %macro ENDDEF 3
143 ; End a definiton with a tail, etc.
144 ; params:
145 ; %1 - name for label (must be NASM-safe)
146 ; %2 - string name for find
147 ; %3 - 32 bits of flags
148 ; Here ends the machine code for the def:
149 end_%1:
150 ; If we've called this in immediate mode, we'll
151 ; This part won't be inlined, so it won't get
152 ; in the way of the flow of "compiled" code.
153 RETURN_CODE
154 tail_%1:
155 dd LAST_DEF_TAIL ; 32b address, linked list
156 %define LAST_DEF_TAIL tail_%1
157 dd (end_%1 - %1) ; 32b length of def machine code
158 dd (tail_%1 - %1) ; 32b distance from tail to start
159 dd %3 ; 32b flags for def
160 db %2, 0 ; xxb null-terminated name string
161 %endmacro
162
163 ; Memory offsets for each item in tail:
164 %define T_CODE_LEN 4
165 %define T_CODE_OFFSET 8
166 %define T_FLAGS 12
167 %define T_NAME 16
168
169 ; ----------------------------------------------------------
170 ; TEXT - executable program - starting with defs
171 ; ----------------------------------------------------------
172 section .text
173
174 ; Keep track of def addresses for linked list.
175 ; We start at 0 (null pointer) to indicate end of list.
176 %define LAST_DEF_TAIL 0
177
178 %macro EXIT_CODE 0
179 pop ebx ; param1: exit code
180 mov eax, SYS_EXIT
181 int 0x80
182 %endmacro
183 STARTDEF exit
184 EXIT_CODE
185 ENDDEF exit, "exit", (COMPILE | IMMEDIATE)
186
187 ; Gets length of null-terminated string
188 %macro STRLEN_CODE 0
189 pop eax
190 mov ecx, 0 ; byte counter will contain len
191 %%find_null:
192 cmp byte [eax + ecx], 0 ; null term?
193 je %%strlen_done ; yes, done
194 inc ecx ; no, continue
195 jmp %%find_null ; loop
196 %%strlen_done:
197 push ecx ; return len
198 %endmacro
199 STARTDEF strlen ; (straddr) strlen (straddr len)
200 STRLEN_CODE
201 ENDDEF strlen, "strlen", (IMMEDIATE | COMPILE)
202
203 ; Prints a string by address and length
204 %macro LEN_PRINT_CODE 0
205 pop edx ; strlen from stack
206 pop ecx ; string address from stack
207 mov ebx, STDOUT ; write destination file
208 mov eax, SYS_WRITE ; syscall
209 int 0x80 ; interrupt to linux!
210 %endmacro
211
212 ; Prints a null-terminated string by address on stack.
213 %macro PRINT_CODE 0
214 pop eax
215 push eax ; one for strlen
216 push eax ; one for write
217 STRLEN_CODE ; (after: straddr, len)
218 LEN_PRINT_CODE
219 %endmacro
220 STARTDEF print ; (straddr) print ()
221 PRINT_CODE
222 ENDDEF print, "print", (IMMEDIATE | COMPILE)
223
224 %macro INLINE_CODE 0
225 pop esi ; param1: tail of def to inline
226 mov edi, [here] ; destination
227 mov eax, [esi + T_CODE_LEN] ; get len of code
228 mov ebx, [esi + T_CODE_OFFSET] ; get start of code
229 sub esi, ebx ; set start of code for movsb
230 mov ecx, eax ; set len of code for movsb
231 rep movsb ; copy [esi]...[esi+ecx] into [edi]
232 ;add [here], eax ; save current position
233 mov [here], edi ; movsb updates edi for us
234 %endmacro
235 STARTDEF inline
236 INLINE_CODE
237 ENDDEF inline, "inline", (IMMEDIATE)
238
239 ; Given a tail addr, leaves def's flags AND the tail addr
240 %macro GET_FLAGS_CODE 0
241 mov ebp, [esp] ; get tail addr without popping
242 mov eax, [ebp + T_FLAGS] ; get flags!
243 push eax
244 %endmacro
245 STARTDEF get_flags ; (tail_addr) get_flags (tailaddr flags)
246 GET_FLAGS_CODE
247 ENDDEF get_flags, "get_flags", (IMMEDIATE | COMPILE)
248
249 ; Consumes def flags, leaves truthy/falsy if RUNCOMP
250 ; flag existed. (Non-zero is true!)
251 %macro IS_RUNCOMP_CODE 0
252 pop eax ; param: flags
253 and eax, RUNCOMP ; AND mask to leave truthy/falsy
254 push eax
255 %endmacro
256 STARTDEF is_runcomp ; (flags) is_runcomp (true/false)
257 IS_RUNCOMP_CODE
258 ENDDEF is_runcomp, "is_runcomp", (IMMEDIATE | COMPILE)
259
260 %macro FIND_CODE 0
261 pop ebp ; param1 - start of def string to find
262 ; in-def register use:
263 ; al - to-find name character being checked
264 ; ebx - start of dict def's name string
265 ; ecx - byte offset counter (each string character)
266 ; edx - dictionary list pointer
267 ; search backwards from last def
268 mov edx, [last]
269 %%test_def:
270 cmp edx, 0 ; a null pointer (0) is end of list
271 je %%not_found
272 ; First, see if this def is for the mode we're
273 ; currently in (IMMEDIATE vs COMPILE):
274 mov eax, [mode]
275 and eax, [edx + T_FLAGS] ; see if mode bit is set in def tail
276 cmp eax, 0
277 jz %%try_next_def ; bit wasn't set to match this mode
278 ; Now we'll compare name to find vs this dictionary name
279 ; (ebx vs edx) byte-by-byte until a mismatch or one hits
280 ; a 0 terminator first. Only having all correct letters
281 ; AND hitting 0 at the same time is a match.
282 lea ebx, [edx + T_NAME] ; set dict. def name pointer
283 mov ecx, 0 ; reset byte offset counter
284 %%compare_names_loop:
285 mov al, [ebp + ecx] ; get next to-find name byte
286 cmp al, [ebx + ecx] ; compare with next dict def byte
287 jne %%try_next_def ; found a mismatch!
288 cmp al, 0 ; both hit 0 terminator at same time
289 je %%found_it
290 inc ecx
291 jmp %%compare_names_loop
292 %%try_next_def:
293 mov edx, [edx] ; follow the tail! (linked list)
294 jmp %%test_def
295 %%not_found:
296 push 0 ; return 0 to indicate not found
297 jmp %%done
298 %%found_it:
299 push edx ; return pointer to tail of dictionary def
300 %%done:
301 %endmacro
302 STARTDEF find
303 FIND_CODE
304 ENDDEF find, "find", (IMMEDIATE)
305
306 ; Gets input from a file, filling input_buffer and resetting
307 ; input_buffer_pos.
308 %macro GET_INPUT_CODE 0
309 pusha ; preserve all reg
310 ; Fill input buffer via linux 'read' syscall
311 mov ebx, [input_file] ; file descriptor (default STDIN)
312 mov ecx, input_buffer ; buffer for read
313 mov edx, INPUT_SIZE ; max bytes to read
314 mov eax, SYS_READ ; linux syscall 'read'
315 int 0x80 ; syscall interrupt!
316 cmp eax, 0 ; 0=EOF, -1=error
317 jg %%normal
318 mov dword [input_eof], 1 ; set EOF reached
319 %%normal:
320 lea ebx, [input_buffer + eax] ; end of current input
321 mov dword [input_buffer_end], ebx ; save it
322 ; cmp eax, INPUT_SIZE ; we read less than full buffer?
323 ; jge %%done ; No, continue
324 ; mov byte [input_buffer + eax], 0 ; Yes, null-terminate
325 ;%%done:
326 mov dword [input_buffer_pos], input_buffer ; reset pos
327 popa ; restore all reg
328 %endmacro
329 STARTDEF get_input
330 GET_INPUT_CODE
331 ENDDEF get_input, "get_input", (IMMEDIATE | COMPILE)
332
333 ; Comments in source! Eats input until newline (0x0a)
334 STARTDEF comment
335 .reset:
336 mov esi, [input_buffer_pos] ; set input index
337 cmp dword [input_eof], 1
338 je .done ; we hit eof at some point, we're done
339 mov ebx, [input_buffer_end] ; store for comparison
340 .check:
341 cmp esi, ebx ; have we hit end pos?
342 jl .continue ; no, keep going
343 GET_INPUT_CODE ; yes, get some
344 jmp .reset ; got more input, reset and continue
345 .continue:
346 mov al, [esi] ; input addr + position index
347 cmp al, 0 ; end of input (null terminator)?
348 je .done ; yes, return
349 cmp al, 0x0a ; newline?
350 je .done ; yup, done with comment line
351 inc esi ; 'eat' input
352 jmp .check ; loop
353 .done:
354 mov [input_buffer_pos], esi ; save input index
355 ENDDEF comment, "#", (IMMEDIATE | COMPILE | RUNCOMP)
356
357 ; Skips any characters space and below from input buffer.
358 %macro EAT_SPACES_CODE 0
359 .reset:
360 mov esi, [input_buffer_pos] ; set input index
361 cmp dword [input_eof], 1
362 je .done ; we hit eof at some point, we're done
363 mov ebx, [input_buffer_end] ; store for comparison
364 .check:
365 cmp esi, ebx ; have we hit end pos?
366 jl .continue ; no, keep going
367 GET_INPUT_CODE ; yes, get some
368 jmp .reset ; got more input, reset and continue
369 .continue:
370 mov al, [esi] ; input addr + position index
371 cmp al, 0 ; end of input (null terminator)?
372 je .done ; yes, return
373 cmp al, 0x20 ; anything space and below?
374 jg .done ; nope, we're done
375 inc esi ; 'eat' space by advancing input
376 jmp .check ; loop
377 .done:
378 mov [input_buffer_pos], esi ; save input index
379 %endmacro
380 STARTDEF eat_spaces
381 EAT_SPACES_CODE
382 ENDDEF eat_spaces, "eat_spaces", (IMMEDIATE | COMPILE)
383
384 ; Gets a space-separated "token" of input.
385 ; Returns a null-terminated string OR 0 if we're out of
386 ; input.
387 %macro GET_TOKEN_CODE 0
388 ; was:
389 ; ebx = input <-- esi
390 ; edx = output <-- edi
391 mov esi, [input_buffer_pos] ; input source index
392 mov edi, token_buffer ; destination index
393 .get_char:
394 cmp esi, [input_buffer_end] ; need to get more input?
395 jl .skip_read ; no, keep going
396 GET_INPUT_CODE ; yes, get some
397 cmp dword [input_eof], 1
398 je .return ; we hit eof, we're done
399 mov esi, [input_buffer_pos] ; reset source index
400 .skip_read:
401 mov al, [esi] ; input addr + position index
402 cmp al, 0x20 ; end of token (spece or lower?)
403 jle .end_of_token ; yes
404 mov byte [edi], al ; write character
405 inc esi ; next source
406 inc edi ; next destination
407 jmp .get_char
408 .end_of_token:
409 cmp edi, token_buffer ; did we write anything?
410 jg .return_token ; yes, push the token addr
411 push dword 0 ; no, push 0 ("no token")
412 jmp .return
413 .return_token:
414 mov [input_buffer_pos], esi ; save position
415 mov byte [edi], 0 ; null-terminate token str
416 push dword token_buffer ; return str address
417 .return:
418 %endmacro
419 STARTDEF get_token
420 GET_TOKEN_CODE
421 ENDDEF get_token, "get_token", (IMMEDIATE)
422
423 ; Copy null-terminated string.
424 %macro COPYSTR_CODE 0
425 pop edi ; dest
426 pop esi ; source
427 mov ecx, 0 ; index
428 %%copy_char:
429 mov al, [esi + ecx] ; from source
430 mov [edi + ecx], al ; to dest
431 inc ecx
432 cmp al, 0 ; hit terminator?
433 jnz %%copy_char
434 %endmacro
435 STARTDEF copystr ; (sourceaddr, destaddr) copystr ()
436 COPYSTR_CODE
437 ENDDEF copystr, "copystr", (IMMEDIATE | COMPILE)
438
439 STARTDEF def
440 mov dword [mode], COMPILE
441 ; get name from next token and store it...
442 EAT_SPACES_CODE
443 GET_TOKEN_CODE ; leaves source addr for copystr
444 push name_buffer ; dest
445 COPYSTR_CODE ; copy name into name_buffer
446 ; copy the here pointer so we have the start address
447 ; of the def
448 mov eax, [here]
449 push eax ; leave 'here' on stack - the start of the def
450 ENDDEF def, "def", (IMMEDIATE)
451
452 ; This exists just so we can inline it at the end of
453 ; definitions with the semicolon (;) def
454 STARTDEF return
455 RETURN_CODE
456 ENDDEF return, "return", (IMMEDIATE)
457
458 ; Does what ENDDEF macro does, but into memory at runtime.
459 %macro SEMICOLON_CODE 0
460 ; End of Machine Code
461 ; 'here' currently points to the end of the new def's
462 ; machine code. We need to save that.
463 mov eax, [here]
464 push eax ; push end of machine code to stack
465 ; Return Code
466 ; Inline 'return' before the tail to allow our new
467 ; def to be callable in immdiate mode.
468 ; (Future improvement: Don't include this if this is
469 ; not an immediate-capable def!)
470 push tail_return ; push what to inline on stack
471 INLINE_CODE ; inline the 'return' machine code
472 ; Start of Tail
473 ; The above inline will have advanced 'here' again.
474 mov eax, [here] ; Current 'here' position
475 mov ecx, eax ; another copy, for tail start calc
476 ; Link previous def 'last'
477 mov ebx, [last] ; get prev tail pointer 'last'
478 mov [eax], ebx ; link it here
479 mov [last], eax ; and store this tail as new 'last'
480 add eax, 4 ; advance 'here' 4 bytes
481 ; Store length of new def's machine code
482 pop ebx ; get end of machine code addr pushed above
483 pop edx ; get start of machine code addr pushed by ':'
484 sub ebx, edx ; calc length of machine code
485 mov [eax], ebx
486 add eax, 4 ; advance 'here' 4 bytes
487 ; Store distance from start of tail to start of machine
488 ; code.
489 sub ecx, edx ; tail - start of mc
490 mov [eax], ecx
491 add eax, 4 ; advance 'here' 4 bytes
492 ; Store flags
493 ; dd %3 ; 32b flags for def
494 ; NOTE: Temporarily hard-coded value!
495 mov dword [eax], (IMMEDIATE | COMPILE)
496 add eax, 4 ; advance 'here' 4 bytes
497 push eax ; save a copy of 'here'
498 ; Store name string
499 ; db %2, 0 ; xxb null-terminated name string
500 push name_buffer ; source
501 push eax ; destination
502 COPYSTR_CODE ; copy name into tail
503 ; Call strlen so we know how much string name we
504 ; wrote to the tail:
505 push name_buffer
506 STRLEN_CODE
507 pop ebx ; get string len pushed by STRLEN_CODE
508 pop eax ; get saved 'here' position
509 add eax, ebx ; advance 'here' by that amt
510 inc eax ; plus one for the null
511 ; Store here in 'here'
512 mov [here], eax
513 ; return us to immediate mode now that we're done
514 mov dword [mode], IMMEDIATE
515 %endmacro
516 STARTDEF semicolon
517 SEMICOLON_CODE
518 ENDDEF semicolon, ";", (COMPILE | RUNCOMP)
519
520 ; Takes an addr and number from stack, writes string
521 ; representation (not null-terminated) of number to the
522 ; address and returns number of bytes (characters) written.
523 %macro NUM2STR_CODE 0
524 pop ebp ; address of string destination
525 pop eax ; number
526 mov ecx, 0 ; counter of digit characters
527 mov ebx, [var_radix]
528 %%divide_next: ; idiv divides
529 mov edx, 0 ; div actually divides edx:eax / ebx!
530 div ebx ; eax / ebx = eax, remainder in edx
531 cmp edx, 9 ; digit bigger than 9? (radix allows a-z)
532 jg %%toalpha ; yes, convert to 'a'-'z'
533 add edx, '0' ; no, convert to '0'-'9'
534 jmp %%store_char
535 %%toalpha:
536 add edx, ('a'-10) ; to convert 10 to 'a'
537 %%store_char:
538 push edx ; put on stack (pop later to reverse order)
539 inc ecx
540 cmp eax, 0 ; are we done converting?
541 jne %%divide_next ; no, loop
542 mov eax, ecx ; yes, store counter as return value
543 mov ecx, 0 ; now we'll count up
544 %%store_next:
545 pop edx ; popping to reverse order
546 mov [ebp + ecx], edx ; store it at addr!
547 inc ecx
548 cmp ecx, eax ; are we done storing?
549 jl %%store_next
550 push eax ; return num chars written
551 %endmacro
552 STARTDEF num2str ; (num addr -- bytes_written)
553 NUM2STR_CODE
554 ENDDEF num2str, "num2str", (IMMEDIATE | COMPILE)
555
556 ; To be called when we're in a quoted string (a start quote
557 ; was found). Puts string's address on the stack.
558 STARTDEF quote
559 mov esi, [input_buffer_pos] ; string source
560 inc esi ; move past initial quote '"'
561
562 ; In immediate mode, we'll write the string to free
563 ; memory. In compile mode, we'll change edi to point
564 ; to "here" where the def is being compiled instead.
565 mov edi, [free]
566
567 cmp dword [mode], COMPILE ; compile mode?
568 jne .copy_char ; no, skip it
569
570 ; Setup destination for "compiled" string
571 mov edi, [here]
572 push edi ; save position for 'call' opcode
573 add edi, 5 ; leave space for 'call' opcode
574
575 ; copy string (and handle escapes) to wherever edi points!
576 .copy_char:
577 cmp esi, [input_buffer_end] ; need to get more input?
578 jl .skip_read ; no, keep going
579 GET_INPUT_CODE ; yes, get some
580 cmp dword [input_eof], 1
581 je .quote_done ; we hit eof, we're done
582 mov esi, [input_buffer_pos] ; reset source index
583 .skip_read:
584 mov al, [esi] ; get char from source
585 cmp al, '"' ; look for endquote
586 je .end_quote
587 cmp al, '\' ; escape sequence
588 je .insert_esc
589 mov [edi], al ; copy char to desination
590 inc esi ; next source char
591 inc edi ; next desination pos
592 jmp .copy_char ; loop
593 .insert_esc:
594 ; read the next character to determine what to do:
595 inc esi
596 mov al, [esi]
597 cmp al, '\' ; literal backslash
598 jne .esc2
599 mov byte [edi], '\'
600 inc esi
601 inc edi
602 jmp .copy_char
603 .esc2:
604 cmp al, '$' ; literal $
605 jne .esc3
606 mov byte [edi], '$'
607 inc esi
608 inc edi
609 jmp .copy_char
610 .esc3:
611 cmp al, 'n' ; newline
612 jne .esc4
613 mov byte [edi], 0xa
614 inc esi
615 inc edi
616 jmp .copy_char
617 .esc4:
618 .end_quote:
619 lea eax, [esi + 1] ; get next input position
620 mov [input_buffer_pos], eax ; save it
621 mov [edi], byte 0 ; terminate str null
622 ; Done copying string to either "free" memory or
623 ; to the "here" compiled def area.
624
625 ; Check again if we're in immediate or compile mode
626 cmp dword [mode], IMMEDIATE
627 je .finish_immediate
628
629 ; do compile mode stuff
630 inc edi
631 mov [here], edi ; save new "here"
632 pop edx ; back to destination for opcode
633 mov byte [edx], 0xE8 ; i386 CALL relative
634 sub edi, edx ; calc length of string into edi
635 sub edi, 5 ; and subtract the opcode len!
636 mov dword [edx+1], edi ; is jump length
637 jmp .end_if
638
639 ; else
640 .finish_immediate:
641 ; do immediate stuff
642 push dword [free] ; yes, push string addr
643 lea eax, [edi + 1] ; calc next "free" space
644 mov [free], eax ; save it
645
646 .end_if:
647 EAT_SPACES_CODE ; advance to next token
648 .quote_done:
649 ENDDEF quote, 'quote', (IMMEDIATE | COMPILE)
650
651 ; Attempts to parse num from string using radix.
652 ; Doesn't handle negative sign. Leaves just 0
653 ; (false) on stack if not successful.
654 %macro STR2NUM_CODE 0 ; (str_addr -- [num] success)
655 pop ebp ; address of input token
656 mov eax, 0 ; result
657 mov ebx, 0 ; char conversion
658 mov ecx, 0 ; char counter/pointer
659 mov edx, [var_radix]
660 .next_char:
661 mov bl, [ebp + ecx] ; put char in bl
662 cmp bl, 0 ; null terminator?
663 je .return_num ; yup, return value
664 inc ecx
665 ; Multiply the current value by the radix to prepare for
666 ; the next, less significant digit. If we're starting
667 ; out, the current value is 0, which is no problem.
668 imul eax, edx
669 cmp bl, '0' ; ASCII less than '0' is invalid
670 jl .error
671 cmp bl, '9' ; is it '0'-'9'?
672 jg .try_upper ; no, try 'A'-'Z'
673 sub bl, '0' ; yes, convert ASCII '0' to 0
674 jmp .add_value
675 .try_upper:
676 cmp bl, 'A'
677 jl .error
678 cmp bl, 'Z'
679 jg .try_lower
680 sub bl, ('A'-10) ; it's uppercase, convert 'A' to 10
681 jmp .add_value
682 .try_lower:
683 cmp bl, 'z'
684 jg .error
685 sub bl, ('a'-10) ; it's lowercase, convert 'a' to 10
686 jmp .add_value
687 .add_value:
688 ; Make sure the number is within the radix
689 cmp bl, dl ; edx has radix
690 jg .error ; greater than radix
691 add eax, ebx ; bl has converted char's value
692 jmp .next_char ; loop
693 .error:
694 push 0 ; failure code (false)
695 jmp .str2num_done
696 .return_num:
697 cmp ecx, 0 ; did we actually get any chars?
698 je .error ; no, empty token string! error
699 push eax ; push number
700 push 1 ; success (true)
701 .str2num_done:
702 %endmacro
703 STARTDEF str2num ; (str_addr -- [num] success)
704 STR2NUM_CODE
705 ENDDEF str2num, 'str2num', (IMMEDIATE | COMPILE)
706
707 %macro RADIX_CODE 0
708 pop eax
709 mov [var_radix], eax
710 %endmacro
711 STARTDEF radix
712 RADIX_CODE
713 ENDDEF radix, 'radix', (IMMEDIATE | COMPILE)
714 STARTDEF hex
715 mov dword [var_radix], 16
716 ENDDEF hex, 'hex', (IMMEDIATE | COMPILE)
717 STARTDEF oct
718 mov dword [var_radix], 8
719 ENDDEF oct, 'oct', (IMMEDIATE | COMPILE)
720 STARTDEF bin
721 mov dword [var_radix], 2
722 ENDDEF bin, 'bin', (IMMEDIATE | COMPILE)
723 STARTDEF decimal
724 mov dword [var_radix], 10
725 ENDDEF decimal, 'decimal', (IMMEDIATE | COMPILE)
726
727 ; see if token starts with number. if it does, parse it
728 STARTDEF number
729 GET_TOKEN_CODE
730 STR2NUM_CODE
731 pop eax ; return value from str2num
732 cmp eax, 0 ; did it fail?
733 je .invalid_number
734 cmp dword [mode], COMPILE
735 je .compile_number
736 ; We got number in IMMEDIATE mode, so just keep the
737 ; value on the stack and keep going!
738 jmp .done
739 .compile_number:
740 ; like 'quote' and 'var', this writes a raw x86
741 ; opcode to push an immediate value on the stack
742 ; at runtime
743 pop eax ; get number from stack
744 mov edx, [here] ; compile var code here
745 mov byte [edx], 0x68 ; i386 opcode for PUSH imm32
746 mov dword [edx + 1], eax ; the number literal
747 add edx, 5 ; update here
748 mov [here], edx
749 jmp .done
750 .invalid_number:
751 ; If we got here, there was a token that started with a
752 ; digit, but could not be parsed as a number. We're
753 ; defining that as a fatal error.
754 PRINTSTR 'Error parsing "'
755 push token_buffer
756 CALLDEF print
757 PRINTSTR `" as a number.\n`
758 EXIT_CODE
759 .done:
760 ENDDEF number, 'number', (IMMEDIATE | COMPILE)
761
762 ; Call with num to to be printed on the stack
763 %macro PRINTNUM_CODE 0
764 ; param2 address desination for number str
765 mov eax, [free] ; use free space temporarily
766 push eax ; addr for num2str
767 NUM2STR_CODE ; leaves length of string
768 pop ebx
769 mov eax, [free]
770 push eax ; addr
771 push ebx ; len
772 LEN_PRINT_CODE
773 %endmacro
774 STARTDEF printnum
775 PRINTNUM_CODE
776 ENDDEF printnum, 'printnum', (IMMEDIATE | COMPILE)
777
778 %macro PRINT_FMT_CODE 0
779 pop esi ; string addr from stack is source pointer
780 mov ecx, 0 ; length of string to print
781 .examine_char:
782 mov al, [esi + ecx] ; get next char
783 cmp al, '$'
784 je .print_num
785 cmp al, 0 ; regular end of string!
786 je .print_the_rest
787 inc ecx ; neither, keep going
788 jmp .examine_char
789 .print_num:
790 ; first print the string segment before the num
791 pop eax ; get number to print from stack
792 push esi ; str addr (save a copy)
793 push ecx ; str len (save a copy)
794 push eax ; num to print
795 push esi ; str addr
796 push ecx ; str len
797 LEN_PRINT_CODE
798 PRINTNUM_CODE ; print number from stack
799 pop ecx ; restore str len
800 pop esi ; restore str addr
801 ; reset string to *after* the '$' placeholder and
802 ; keep printing
803 lea esi, [esi + ecx + 1]
804 mov ecx, 0
805 jmp .examine_char
806 .print_the_rest:
807 ; now we just need to print a "normal" string at
808 ; the end, so push the start address and print!
809 push esi ; print just needs start address
810 PRINT_CODE
811 %endmacro
812 STARTDEF print_fmt
813 PRINT_FMT_CODE
814 ENDDEF print_fmt, 'print$', (IMMEDIATE | COMPILE)
815
816 STARTDEF say
817 PRINT_FMT_CODE
818 mov eax, [free]
819 mov byte [eax], 0xa ; '\n'
820 push eax ; addr of string
821 push 1 ; length to print
822 LEN_PRINT_CODE
823 ENDDEF say, 'say', (IMMEDIATE | COMPILE)
824
825 ; Given a mode (dword) on the stack, prints the matching
826 ; modes (immediate/compile/runcomp).
827 %macro PRINTMODE_CODE 0
828 pop eax ; get mode dword
829 mov ebx, eax
830 and ebx, IMMEDIATE
831 jz %%try_compile
832 push eax ; save
833 PRINTSTR 'IMMEDIATE '
834 pop eax ; restore
835 %%try_compile:
836 mov ebx, eax
837 and ebx, COMPILE
838 jz %%try_runcomp
839 push eax ; save
840 PRINTSTR 'COMPILE '
841 pop eax ; restore
842 %%try_runcomp:
843 mov ebx, eax
844 and ebx, RUNCOMP
845 jz %%done
846 push eax ; save
847 PRINTSTR 'RUNCOMP '
848 pop eax ; restore
849 %%done:
850 %endmacro
851 STARTDEF printmode
852 PRINTMODE_CODE
853 ENDDEF printmode, 'printmode', (IMMEDIATE | COMPILE)
854
855 %macro PRINTSTACK_CODE 0
856 mov ecx, [stack_start]
857 sub ecx, esp ; difference between start and current
858 %%dword_loop:
859 cmp ecx, 0 ; reached start?
860 jl %%done ; yup, done
861 mov eax, [esp + ecx] ; no, print this value
862 push ecx ; preserve
863 push eax ; print this value
864 PRINTNUM_CODE
865 PRINTSTR " "
866 pop ecx ; restore
867 sub ecx, 4 ; reduce stack index by dword
868 jmp %%dword_loop
869 %%done:
870 PRINTSTR `\n`
871 %endmacro
872 STARTDEF printstack
873 PRINTSTACK_CODE
874 ENDDEF printstack, 'ps', (IMMEDIATE | COMPILE)
875
876 ; Takes def tail addr, prints meta-info (from tail)
877 ; and prints a hex dump of the def's machine code.
878 ; Example: inspect foo
879 %macro INSPECT_CODE 0
880 EAT_SPACES_CODE
881 GET_TOKEN_CODE ; get address of next token's string
882 FIND_CODE ; get tail of def matching token
883 pop esi ; get tail addr
884 lea eax, [esi + T_NAME]
885 push esi ; preserve tail addr
886 push eax
887 PRINT_CODE
888 PRINTSTR ": "
889 pop esi ; restore tail addr
890 mov eax, [esi + T_CODE_LEN]
891 push esi ; preserve tail addr
892 ; param 1: num to be stringified
893 push eax
894 PRINTNUM_CODE
895 PRINTSTR " bytes "
896 pop esi ; restore tail
897 mov eax, [esi + T_FLAGS]
898 push esi ; preserve tail addr
899 push eax
900 PRINTMODE_CODE
901 PRINTSTR `\n `
902 ; Now do a hex dump of the machine code for
903 ; this word!
904 pop esi ; get tail addr
905 mov ecx, [esi + T_CODE_LEN] ; len of code
906 mov eax, [esi + T_CODE_OFFSET] ; offset of code start
907 sub esi, eax ; esi is now start addr of code
908 add ecx, esi ; ecx is now end addr of code
909 mov dword ebx, [var_radix] ; save current radix
910 mov dword [var_radix], 16 ; set to hex
911 push ebx
912 %%byte_loop:
913 cmp ecx, esi ; end addr reached?
914 je %%done ; yup
915 mov al, [esi] ; no, print this value
916 push ecx ; preserve end addr
917 push eax ; print this value
918 PRINTNUM_CODE
919 PRINTSTR " "
920 pop ecx ; restore end addr
921 inc esi ; move to next addr
922 jmp %%byte_loop
923 %%done:
924 PRINTSTR `\n`
925 pop ebx
926 mov dword [var_radix], ebx ; restore previous radix
927 %endmacro
928 STARTDEF inspect
929 INSPECT_CODE
930 ENDDEF inspect, 'inspect', (IMMEDIATE)
931
932 ; Print all def names
933 STARTDEF all_names
934 mov esi, [last] ; tail addr of last def defined
935 .print_loop:
936 lea eax, [esi + T_NAME] ; name
937 mov esi, [esi] ; get prev tail pointer
938 push esi ; preserve it
939 push eax ; name for print
940 PRINT_CODE
941 PRINTSTR " " ; space between names
942 pop esi ; restore tail
943 cmp esi, 0 ; done?
944 jne .print_loop ; nope, keep going!
945 PRINTSTR `\n`
946 ENDDEF all_names, 'all', (IMMEDIATE)
947
948 STARTDEF add
949 pop eax
950 pop ebx
951 add eax, ebx
952 push eax
953 ENDDEF add, '+', (IMMEDIATE | COMPILE)
954
955 STARTDEF sub
956 pop ebx
957 pop eax
958 sub eax, ebx
959 push eax
960 ENDDEF sub, '-', (IMMEDIATE | COMPILE)
961
962 STARTDEF mul
963 pop eax
964 pop ebx
965 imul eax, ebx
966 push eax
967 ENDDEF mul, '*', (IMMEDIATE | COMPILE)
968
969 STARTDEF div
970 mov edx, 0
971 pop ebx
972 pop eax
973 idiv ebx
974 push edx ; remainder
975 push eax ; answer (quotient)
976 ENDDEF div, '/', (IMMEDIATE | COMPILE)
977
978 STARTDEF inc
979 pop ecx
980 inc ecx
981 push ecx
982 ENDDEF inc, 'inc', (IMMEDIATE | COMPILE)
983
984 STARTDEF dec
985 pop ecx
986 dec ecx
987 push ecx
988 ENDDEF dec, 'dec', (IMMEDIATE | COMPILE)
989
990 STARTDEF popstack
991 pop eax
992 ENDDEF popstack, 'pop', (IMMEDIATE | COMPILE)
993
994 STARTDEF dupstack
995 pop eax
996 push eax
997 push eax
998 ENDDEF dupstack, 'dup', (IMMEDIATE | COMPILE)
999
1000 ; Immediate mode 'if' (?) skips whatever token follows
1001 ; if the "condition" on the stack is false (0).
1002 STARTDEF if
1003 pop eax
1004 test eax, eax ; zero?
1005 jnz .continue_for_true
1006 EAT_SPACES_CODE
1007 GET_TOKEN_CODE
1008 pop eax ; throw away token!
1009 .continue_for_true:
1010 ENDDEF if, '?', (IMMEDIATE)
1011
1012 ; Compile mode 'if' (?) skips the next def's code.
1013 ; So we bake in a conditional jump over the next
1014 ; def's inlined code!
1015 STARTDEF if_compiled
1016 EAT_SPACES_CODE
1017 GET_TOKEN_CODE ; MUST be a def
1018 FIND_CODE ; get tail of def matching token
1019 pop esi ; tail of def to inline
1020 mov eax, [esi + T_CODE_LEN] ; get len of code
1021 push esi ; put tail back on stack for inline
1022 mov edx, [here] ; compile code here
1023 mov byte [edx], 0x58 ; i386 opcode: pop eax
1024 mov byte [edx+1], 0x85 ; i386 opcode: test eax
1025 mov byte [edx+2], 0xc0 ; (continued)
1026 mov byte [edx+3], 0x0f ; i386 opcode: jz
1027 mov byte [edx+4], 0x84 ; (continued)
1028 mov dword [edx+5], eax ; length of def's code
1029 add edx, 9
1030 mov [here], edx
1031 INLINE_CODE
1032 ENDDEF if_compiled, '?', (COMPILE | RUNCOMP)
1033
1034 ; Compile mode "loop" keeps replaying a def so long as the
1035 ; top of the stack contains "true" (non-zero)
1036 ; [jnz][...def...][jmp]
1037 ; ^ | | ^
1038 ; +--|------B--------+ |
1039 ; +--------A---------+
1040 ; A=len(def) + len(jmp)
1041 ; B=A + len(jnz)
1042 STARTDEF loop_compiled
1043 %assign LEN_BEFORE 10
1044 %assign LEN_AFTER 5
1045 EAT_SPACES_CODE
1046 GET_TOKEN_CODE ; MUST be a def
1047 FIND_CODE ; get tail of def matching token
1048 pop esi ; tail of def to inline
1049 mov eax, [esi + T_CODE_LEN] ; get len of code
1050 add eax, LEN_AFTER ; add length of unconditional jump at end
1051 push eax ; preserve length of def for end
1052 push esi ; put tail back on stack for inline
1053 ; Compile in the conditional jump over the def
1054 ; (Note that unlike "?" (if), it leaves test value on stack!)
1055 mov edx, [here] ; compile code here
1056 mov byte [edx], 0x58 ; i386 opcode: pop eax
1057 mov byte [edx+1], 0x50 ; i386 opcode: push eax
1058 mov byte [edx+2], 0x85 ; i386 opcode: test eax
1059 mov byte [edx+3], 0xc0 ; (continued)
1060 mov byte [edx+4], 0x0f ; i386 opcode: jz
1061 mov byte [edx+5], 0x84 ; (continued)
1062 mov dword [edx+6], eax ; distance to jump
1063 add edx, LEN_BEFORE
1064 mov [here], edx
1065 INLINE_CODE
1066 ; Compile in the unconditional jump to the start
1067 pop eax ; retrieve length of def plus "after"
1068 add eax, LEN_BEFORE ; now add the "before" len
1069 neg eax ; negate the jump (go back!)
1070 mov edx, [here] ; compile var code here
1071 mov byte [edx], 0xe9 ; i386 opcode: jmp
1072 mov dword [edx+1], eax ; distance to jump
1073 add edx, LEN_AFTER
1074 mov [here], edx
1075 ENDDEF loop_compiled, 'loop?', (COMPILE | RUNCOMP)
1076
1077 ; 'var' reserves a new space in memory (4 bytes for now) and
1078 ; creates a new def that puts the ADDRESS of that memory on
1079 ; the stack when it is called.
1080 STARTDEF var
1081 mov dword [mode], COMPILE
1082 ; get name from next token and store it...
1083 EAT_SPACES_CODE
1084 GET_TOKEN_CODE
1085 push name_buffer ; dest
1086 COPYSTR_CODE ; copy name into name_buffer
1087 mov eax, [free] ; get current free space addr
1088 mov edx, [here] ; compile var code here
1089 push edx ; save it for semicolon!
1090 mov byte [edx], 0x68 ; i386 opcode for PUSH imm32
1091 mov dword [edx + 1], eax ; address of var space
1092 add edx, 5 ; update here
1093 mov [here], edx
1094 add eax, 4 ; update free pointer
1095 mov [free], eax
1096 SEMICOLON_CODE
1097 ENDDEF var, 'var', (IMMEDIATE | COMPILE)
1098
1099 STARTDEF setvar
1100 pop edi ; address from stack
1101 pop eax ; value from stack
1102 mov [edi], eax ; set it!
1103 ENDDEF setvar, 'set', (IMMEDIATE | COMPILE)
1104
1105 STARTDEF getvar
1106 pop esi ; address from stack
1107 mov eax, [esi] ; get it!
1108 push eax ; put it on the stack
1109 ENDDEF getvar, 'get', (IMMEDIATE | COMPILE)
1110
1111
1112 ; *******************************************
1113 ; * ELF HEADER! *
1114 ; *******************************************
1115 ; This is defined in meow5's program data. It
1116 ; gets filled in with the correct values for
1117 ; whatever def we're "compiling" via:
1118 ;
1119 ; elf <def name>
1120 ;
1121 section .data
1122 %assign elf_va 0x08048000 ; elf virt mem start address
1123 elf_header:
1124 ; **********************************************
1125 ; ELF Identification (16 bytes)
1126 db 0x7F,'ELF' ; Magic String
1127 db 1 ; "File class" 32 bit
1128 db 1 ; "Data encoding" 1=LSB (x86 little endian)
1129 db 1 ; "File version" ELF version (1="current")
1130 times 9 db 0 ; padding (to fill up 16 bytes)
1131 ; Section Header Table
1132 dw 2 ; type - 2="Executable file"
1133 dw 3 ; machine - 3="Intel 80386"
1134 dd 1 ; version - 1="Current"
1135 ; For reasons I do not understand, I had not had any
1136 ; luck setting this program entry to the beginning of
1137 ; the virtual memory start address and loading the file
1138 ; starting immediately after the ELF header where the
1139 ; def's machine code was written. It always segfaulted.
1140 ;
1141 ; Update: I still don't *really* get that when working
1142 ; just the ELF documentation, but I've accepted that
1143 ; Linux mmaps a file to an entire page at a time, so
1144 ; there are certain alignment-y rules at play here. We
1145 ; don't fight it.
1146 dd elf_va + elf_size ; entry - execution start address
1147 dd phdr1 - elf_header ; phoff - bytes to program header
1148 dd 0 ; shoff - 0 for no section header
1149 dd 0 ; flags - processor-specific flags
1150 dw hdr_size ; ehsize - this header bytes, see below
1151 dw phdr_size ; phentsize - program header size
1152 dw 1 ; phnum - program header count
1153 dw 0 ; shentsize - section header size (none)
1154 dw 0 ; shnum - section header count
1155 dw 0 ; shstrndx - section header offset
1156 hdr_size equ $ - elf_header ; calulate elf header size
1157
1158 ; **********************************************
1159 ; Program Header 1 - The one and only.
1160 phdr1:
1161 dd 1 ; type of program header (1=PT_LOAD)
1162 dd 0 ; offset in file of data to load
1163 dd elf_va ; target memory start address
1164 dd elf_va ; target physical memory, same
1165 prog_bytes1:
1166 dd 0 ; bytes to load from file (placeholder)
1167 prog_bytes2:
1168 dd 0 ; bytes of memory to allocate (placeholder)
1169 dd 7 ; flags: 1=exec, 2=write, 4=read (7=RWX)
1170 dd 0 ; Memory alignment
1171 ; **********************************************
1172
1173 ; Calculate program header size. Only need to do this
1174 ; once. All program headers are the same size. (And now
1175 ; there's just one again. So that's doubly true.)
1176 phdr_size equ $ - phdr1 ; calculate program header size
1177
1178 ; End of entire ELF header.
1179 ; Calculate size of ELF header, used above.
1180 elf_size equ $ - elf_header
1181
1182 temp_test_string: db 'HELLO WORLD' ; 11 bytes test
1183
1184 section .text
1185
1186 STARTDEF elf
1187 EAT_SPACES_CODE
1188 GET_TOKEN_CODE ; get address of next token's string
1189 FIND_CODE ; get tail of def matching token
1190 pop esi ; addr of tail (and keep in esi the whole time)
1191 mov eax, [esi + T_CODE_LEN] ; get len of code
1192
1193 ; Overwrite the placeholder program size values in the
1194 ; ELF program header with this def's code size.
1195 add eax, elf_size
1196 mov [prog_bytes1], eax ; file
1197 mov [prog_bytes2], eax ; memory
1198
1199 ; We can create and open at the same time!
1200 ; From open(2) man page:
1201 ; "A call to creat() is equivalent to calling open()
1202 ; with flags equal to O_CREAT|O_WRONLY|O_TRUNC."
1203 ; I got the flags by searching all of /usr/include and
1204 ; finding /usr/include/asm-generic/fcntl.h
1205 ; That yielded (along with bizarre comment "not fcntl"):
1206 ; #define O_CREAT 00000100
1207 ; #define O_WRONLY 00000001
1208 ; #define O_TRUNC 00001000
1209 ; which are apparently octal values??? (NOT binary)
1210 ; Hence this flag value for 'open':
1211 mov ecx, (0100o | 0001o | 1000o)
1212 ; ebx contains null-terminated def name (FIND_CODE)
1213 mov edx, 755o ; mode (permissions)
1214 mov eax, SYS_OPEN
1215 int 80h ; now eax will contain the new file desc.
1216 push ebx ; SAVE def tail address for end
1217 ; Write ELF header
1218 mov edx, elf_size ; bytes to write
1219 mov ecx, elf_header ; source address
1220 mov ebx, eax ; the fd for writing (opened/created above)
1221 mov eax, SYS_WRITE
1222 int 80h
1223 ;
1224 ; Write def (executable program segment)
1225 mov edx, [esi + T_CODE_LEN] ; bytes to write
1226 mov eax, [esi + T_CODE_OFFSET] ; for source addr
1227 mov ecx, esi ; tail addr
1228 sub ecx, eax ; source addr (code offset from tail)
1229 mov eax, SYS_WRITE ; ebx still has fd
1230 int 80h
1231 ; Write our memory (for data program segment)
1232 ; mov edx, 11 ; bytes to write
1233 ; mov ecx, temp_test_string ; for source addr
1234 ; mov eax, SYS_WRITE ; ebx still has fd
1235 ; int 80h
1236 ; Close file (ebx still has fd)
1237 mov eax, SYS_CLOSE
1238 int 80h
1239 ; File name pushed above. Display message about
1240 ; having written file.
1241 PRINTSTR 'Wrote to "'
1242 PRINT_CODE ; print fname from stack (see "SAVE" above)
1243 PRINTSTR `".\n`
1244 ENDDEF elf, 'elf', (IMMEDIATE)
1245
1246 ; ----------------------------------------------------------
1247 ; PROGRAM START!
1248 ; ----------------------------------------------------------
1249 global _start
1250 _start:
1251 cld ; use increment order for certain cmds
1252
1253 ; Start in immediate mode - execute def immediately!
1254 mov dword [mode], IMMEDIATE
1255
1256 ; Default to input file descriptor STDIN. We can change
1257 ; this to make get_input read from different sources.
1258 mov dword [input_file], STDIN
1259
1260 ; Here points to the current spot where we're going to
1261 ; inline ("compile") the next def
1262 mov dword [here], compile_area
1263
1264 ; Free points to the next free space in the data area
1265 ; where all variables and non-stack data goes.
1266 mov dword [free], data_area
1267
1268 ; Store the first stack address so we can reference it
1269 ; later (such as printing contents of stack). Subtract 4
1270 ; so that we mark the *next* position as the first (sich
1271 ; it's the first position to which we'll push a value).
1272 lea eax, [esp - 4]
1273 mov [stack_start], eax
1274
1275 ; Store last tail for dictionary searches (note that
1276 ; find just happens to be the last def defined in the
1277 ; dictionary at the moment).
1278 mov dword [last], LAST_DEF_TAIL
1279
1280 ; In order to signal that we need to read input on
1281 ; start, set both the current read index and the end
1282 ; location to the start of the buffer. Currently, the
1283 ; 'eat_spaces' def will see that and read more input.
1284 mov dword [input_buffer_pos], input_buffer
1285 mov dword [input_buffer_end], input_buffer
1286 mov dword [input_eof], 0 ; EOF flag
1287
1288 ; Start off parsing and printing numbers as decimals.
1289 mov dword [var_radix], 10
1290
1291
1292 ; ----------------------------------------------------------
1293 ; Interpreter!
1294 ; ----------------------------------------------------------
1295 get_next_token:
1296 mov eax, [input_eof]
1297 CALLDEF eat_spaces ; skip whitespace
1298 cmp dword [input_eof], 1 ; end of input?
1299 je .end_of_input ; yes, time to die
1300 ; Get the next character in the input stream to see what
1301 ; it is. Check for end of input, quotes, and numbers.
1302 mov esi, [input_buffer_pos] ; source
1303 mov al, [esi] ; first char
1304 .try_quote:
1305 cmp al, '"' ; next char a quote?
1306 jne .try_num ; nope, continue
1307 CALLDEF quote ; yes, get string, leaves addr
1308 jmp get_next_token
1309 .try_num:
1310 cmp al, '0'
1311 jl .try_token ; nope!
1312 cmp al, '9'
1313 jg .try_token ; nope!
1314 CALLDEF number ; parse number, leaves value
1315 jmp get_next_token
1316 .try_token:
1317 CALLDEF get_token
1318 pop eax ; get_token returns address or 0
1319 cmp eax, 0
1320 je .end_of_input ; all out of tokens!
1321 push token_buffer ; for find
1322 CALLDEF find ; find token, returns tail addr
1323 pop eax ; find's return value
1324 cmp eax, 0 ; did find fail?
1325 je .token_not_found ; yup
1326 push eax ; find successful, put result back
1327 cmp dword [mode], IMMEDIATE
1328 je .exec_def
1329 ; We're in compile mode...
1330 CALLDEF get_flags
1331 CALLDEF is_runcomp
1332 pop eax ; get result
1333 cmp eax, 0 ; if NOT equal, def was RUNCOMP
1334 jne .exec_def ; yup, RUNCOMP
1335 CALLDEF inline ; nope, "compile" it.
1336 jmp get_next_token
1337 .exec_def:
1338 ; Run current def in immediate mode!
1339 ; We currently have the tail of a found def.
1340 pop ebx ; addr of def tail left on stack by 'find'
1341 mov eax, [ebx + T_CODE_OFFSET]
1342 sub ebx, eax ; set to start of def's machine code
1343 CALLDEF ebx ; call def with that addr (via reg)
1344 jmp get_next_token
1345 .end_of_input:
1346 push 0 ; exit status
1347 CALLDEF exit
1348 .token_not_found:
1349 ; Putting strings together this way is quite painful...
1350 ; "Could not find def "foo" while looking in <mode> mode."
1351 PRINTSTR 'Could not find def "'
1352 push token_buffer
1353 CALLDEF print
1354 PRINTSTR '" while looking in '
1355 mov eax, [mode]
1356 push eax
1357 PRINTMODE_CODE
1358 PRINTSTR ` mode.\n`
1359 jmp get_next_token