1 ; +----------------------------------------------------------------------------+
2 ; | Dave's NASM port of JONESFORTH |
3 ; +----------------------------------------------------------------------------+
4 ;
5 ; This port will have explanitory comments in my own words.
6 ;
7 ; For the full "JONESFORTH experience", read the original source files which
8 ; you should find in this repo at:
9 ;
10 ; jonesforth/jonesforth.S
11 ; jonesforth/jonesforth.f
12 ;
13 %assign NASMJF_VERSION 48 ; One more than JONESFORTH
14
15 ; Guide to register use:
16 ; esi - Next Forth word address pointer
17 ; ebp - Return stack pointer ("RSP")
18 ; esp - THE STACK (aka "parameter stack") pointer
19 ;
20 ; A Forth system is composed of words. Words are like functions: they contain
21 ; a series of instructions. Those instructions are actually just a list of
22 ; pointers to other words. So it's words all the way down.
23 ;
24 ; Well, actually, nothing really useful happens until you get to one of the
25 ; base words that is written in machine code. These "code words" are the low
26 ; level primitives that provide the "bootstrapping" fuctionality for all of
27 ; the other words built on top of them.
28 ;
29 ; Whether a word is a regular word or a code word, it has the same basic
30 ; structure, starting with a header:
31 ;
32 ; Here's one called "FOO":
33 ;
34 ; Word Header (For *all* word types)
35 ; +------------------+
36 ; <----0x8C5FCD8 | Pointer to the previous word
37 ; +------------------+
38 ; | 3 | Name length (3 for "FOO") & flags (none)
39 ; +------------------+
40 ; | 'F' |
41 ; +------------------+
42 ; | 'O' |
43 ; +------------------+
44 ; | 'O' |
45 ; +------------------+
46 ;
47 ; After the header, a code word looks like this:
48 ;
49 ; ... <--- Header ends here, word body begins
50 ; Code Word
51 ; +------------------+
52 ; +---0x80490A3 | Pointer to the machine code that follows!
53 ; | +------------------+
54 ; +-->D4 88 F8 02 | Machine code!
55 ; +------------------+
56 ; | A8 0F 98 C3 | More machine code...
57 ; +------------------+
58 ; ...
59 ; +------------------+
60 ; | NEXT |
61 ; +------------------+
62 ;
63 ;
64 ; Which seems weird and pointless (why not just start executing the machine
65 ; code directly?) until we look at a regular word that is not a code word
66 ; made of machine code. A regular word looks like this:
67 ;
68 ; ... <--- Header ends here, word body begins
69 ; Regular Word
70 ; +------------------+
71 ; <----0x80490A3 | Pointer to special *interpreter code word*
72 ; +------------------+
73 ; <----0x804A2F0 | Address of _another_ word's code word
74 ; +------------------+
75 ; <----0x804A2F0 | And another...
76 ; +------------------+
77 ; ...
78 ; +------------------+
79 ; | EXIT |
80 ; +------------------+
81 ;
82 ; Regular words use the "EXIT" word, a return stack, and the ebp register to
83 ; do the same thing. And here's the fun part: EXIT ends with NEXT!
84 ;
85 ; What the regular words and "code words" have in common is that they both
86 ; start (after the header) with a pointer that points to machine code to be
87 ; executed. Also known as the *interpreter code word*. For many regular words,
88 ; the interpreter code word will be:
89 ;
90 ; DOCOL
91 ;
92 ; The interpreter code word executes the rest of the current word by
93 ; incrementing the instruction pointer (esi) and calling the NEXT macro.
94 ;
95 ; This is called "indirect threaded code" because of the second level of
96 ; pointer indirection.
97 ;
98 ; It may be helpful to summarize at this point:
99 ;
100 ; |==============|=============|============|===========================|
101 ; | Type of word | Starts with | Ends with | Which uses |
102 ; |--------------|-------------|------------|---------------------------|
103 ; | Regular word | Ptr to code | EXIT ptr | esi, main data memory |
104 ; | Codeword | Ptr to self | NEXT macro | ebp ("RSP"), return stack |
105 ; |==============|=============|============|===========================|
106 ;
107 ; Also, let's visualize the layout of a code word and regular word side-by side:
108 ;
109 ; Code Word Regular Word
110 ; +------------------+ +------------------+
111 ; | Link pointer | | Link pointer |
112 ; +------------------+ +------------------+
113 ; | Name/flags | | Name/flags |
114 ; +------------------+ +------------------+ +------------+
115 ; | Pointer to code | ---+ | Pointer to DOCOL | ---> | DOCOL |
116 ; +------------------+ | +------------------+ +------------+
117 ; | <machine code> |<---+ | Pointer to word | <--- | NEXT |
118 ; +------------------+ +------------------+ +------------+
119 ; | <machine code> | | Pointer to EXIT |
120 ; +------------------+ +------------------+ +------------+
121 ; | NEXT | | Pointer to EXIT | ---> + EXIT |
122 ; +------------------+ +------------------+ +------------+
123 ; | NEXT |
124 ; +------------+
125 ;
126 ; Without further ado, here's the next macro:
127 ;
128 ; +----------------------------------------------------------------------------+
129 ; | The NEXT Macro |
130 ; +----------------------------------------------------------------------------+
131 ; Register esi is the instruction pointer. NEXT puts the pointer it's pointing
132 ; to into register eax and advances esi
133 ;
134 ; +------------------+ NEXT:
135 ; <----0x8000000 | <-- esi * eax = 0x8000000
136 ; +------------------+ +--- * esi points to next pointer
137 ; <----0x8AAAAAA | <---------+ * jump to address in eax
138 ; +------------------+
139 ;
140 ; The only thing that keeps this whole thing moving is the fact that *every*
141 ; word ends in NEXT. There is no other mechanism propelling this threaded
142 ; interpreter forward.
143 %macro NEXT 0
144 lodsd ; NEXT: Load from memory into eax, inc esi to point to next word.
145 jmp [eax] ; Jump to whatever code we're now pointing at.
146 %endmacro
147
148 ; That's a lot of stuff pointing at stuff.
149
150 ; By the way, the thing that makes Forth so hard to understand isn't all the
151 ; little details. It's the fact that *none of it makes sense in pieces*. Only
152 ; with the entire puzzle together in your head can you comprehend the machine.
153
154 ; +----------------------------------------------------------------------------+
155 ; | Return stack PUSH/POP macros |
156 ; +----------------------------------------------------------------------------+
157 ; The ebp register will be the return stack pointer ("RSP")
158 ; The PUSHRSP and POPRSP macros handle pushing registers onto stack memory.
159 ; The return stack is used to
160 ; (NASM macros use placeholders %1, %2, etc. as sequential params to substitute
161 ; into the machine code verbatim.)
162 %macro PUSHRSP 1
163 lea ebp, [ebp-4] ; "load effective address" of next stack position
164 mov [ebp], %1 ; "push" the register value to the address at ebp
165 %endmacro
166 %macro POPRSP 1
167 mov %1, [ebp]
168 lea ebp, [ebp+4]
169 %endmacro
170
171 ; +----------------------------------------------------------------------------+
172 ; | System Call Numbers |
173 ; +----------------------------------------------------------------------------+
174 ; JONESFORTH uses an external include file which you may not have. I'm just
175 ; gonna hardcode them here. I can't imagine these changing often.
176 ; (I found them in Linux source in file arch/x86/include/asm/unistd_32.h)
177 %assign __NR_exit 1
178 %assign __NR_open 5
179 %assign __NR_close 6
180 %assign __NR_read 3
181 %assign __NR_write 4
182 %assign __NR_creat 8
183 %assign __NR_brk 45
184
185 ; +----------------------------------------------------------------------------+
186 ; | Return stack and main memory - initial memory allocations |
187 ; +----------------------------------------------------------------------------+
188 ; The BSS section is for uninitialized storage space. We'll reserve bytes (resb)
189 ; and make labels so we can refer to these addresses later. The following are
190 ; reserved:
191 ; * buffer - storage for user input
192 ; * return stack - addresses of words so EXIT can return to them
193 ; * emit scratch - just a 4-byte bit of memory to store characters to print
194 SECTION .bss
195 %define buffer_size 4096
196 return_stack: resb 8192
197 return_stack_top: resb 4
198 buffer: resb buffer_size
199 emit_scratch: resb 4 ; (note: JF had this in .data as .space 1)
200
201 ; +----------------------------------------------------------------------------+
202 ; | A label used as a pointer to the first word that will be executed |
203 ; +----------------------------------------------------------------------------+
204 ; We need to be able to indirectly address the first word because that's how
205 ; NEXT works - it can only jump to an address _in memory_. So we'll use this
206 ; cold_start label pointing to memory containing the address of the first
207 ; codeword by putting it in the esi register and then calling NEXT. And yes,
208 ; that's right, in standard Forth convention, we start with "QUIT"!
209 SECTION .data
210 cold_start: dd QUIT ; we need a way to indirectly address the first word
211
212 ; +----------------------------------------------------------------------------+
213 ; | "LOADJF" |
214 ; +----------------------------------------------------------------------------+
215 ; I load the rest of the interpreter - a Forth source file - upon startup.
216 ;
217 ; This is a major difference with my port. Search for 'LOADJF' in this file to
218 ; see all of the places where I made changes or additions to support this.
219 ;
220 ; This path is relative by default to make it easy to run 'nasmjf' from
221 ; the repo dir. But you can set it to an absolute path to allow running
222 ; from any location.
223 jfsource: db "jonesforth/jonesforth.f", 0h ; LOADJF path, null-terminated
224 jfsource_end: db 0h ; LOADJF null-terminated string
225
226
227 ; +----------------------------------------------------------------------------+
228 ; | Program entry point - start the interpreter! |
229 ; +----------------------------------------------------------------------------+
230 ; Now begins the real program. There's some housekeeping to do and almost all
231 ; of it is setting up memory and pointers to memory.
232 SECTION .text
233 global _start
234
235 _start:
236 ; Clear the "direction flag" which means the string instructions
237 ; (such as LODSD) work in increment order instead of decrement.
238 cld
239
240 ; Save the current value of the stack pointer to S0. This is the first
241 ; variable we've seen that is available in Forth. You can examine and change
242 ; the value of this variable in the interpreter!
243 mov [var_SZ], esp
244
245 ; We will use ebp to keep track of the return stack, used by EXIT
246 ; to return to the previous word when the current one is finished.
247 mov ebp, return_stack_top
248
249 ; Now allocate main memory for Forth dictionary and data!
250 ; First, we get the start address of the "break", which is where
251 ; the data segment starts. Then we request a break that is at a new
252 ; address N bytes larger. The OS does it and now we've got more
253 ; memory available to us!
254 ;
255 ; Note that brk returns the current break address on failure, so
256 ; the first call we make with 0 in ebx is a way of making brk fail
257 ; on purpose! Most examples on the web scrupulously avoid explaining
258 ; this.
259 ;
260 ; We store the start, end, and current "position" in this main
261 ; memory in variables. HERE is particularly important!
262 xor ebx, ebx
263 mov eax, __NR_brk ; syscall brk
264 int 0x80
265 mov [var_HERE], eax ; eax has start addr of data segment
266 mov [var_CSTART], eax ; store info: start address of data segment
267 add eax, 0x16000 ; add our desired number of bytes to break addr
268 mov ebx, eax ; reserve memory by setting this new break addr
269 mov [var_CEND], eax ; store info: end address of data segment
270 mov eax, __NR_brk ; syscall brk again
271 int 0x80
272
273 ; "LOADJF" Process jonesforth.f upon startup. Open the file.
274 ; Then store the file descriptor (fd) so we can make the interpreter
275 ; read from the file rather than from STDIN.
276 mov ecx, 0 ; LOADJF read only flag for open
277 mov ebx, jfsource ; LOADJF address of string path for open
278 mov eax, __NR_open ; LOADJF open syscall
279 int 80h ; LOADJF fd now in eax
280 cmp eax, 0 ; LOADJF fd < 0 is an error!
281 jl .loadjf_open_fail
282 mov [read_from_fd], eax ; LOADJF store fd and tell KEY to read from this
283
284 ; Now "prime the pump" for the NEXT macro by sticking an indirect
285 ; address in esi. NEXT will jump to whatever's stored there.
286 ; Housekeeping stuff is over. The interpreter will start running now.
287 mov esi, cold_start
288 NEXT ; Start Forthing!
289
290 ; Handle failure of LOADJF!
291 ; I could have avoided a lot of code if I just exited when opening
292 ; the jonesforth.f file fails. But I thought it was important to make
293 ; a proper error message that was as helpful as possible. Because
294 ; if it fails, it's going to be _very_ confusing. I'm sure it will
295 ; happen to me years from now when I revisit this. And I don't want
296 ; to be confused!
297 .loadjf_open_fail: ; LOADJF
298 ; For each of these write syscalls:
299 ; ebx = stderr fd
300 ; ecx = start address of string
301 ; edx = length of string
302 ; Print first half of error message
303 mov ebx, 2 ; LOADJF
304 mov ecx, loadjf_fail_msg ; LOADJF
305 mov edx, (loadjf_fail_msg_end - loadjf_fail_msg)
306 mov eax, __NR_write ; LOADJF
307 int 80h ; LOADJF
308 ; Print jonesforth source path
309 mov ebx, 2 ; LOADJF
310 mov ecx, jfsource ; LOADJF
311 mov edx, (jfsource_end - jfsource)
312 mov eax, __NR_write ; LOADJF
313 int 80h ; LOADJF
314 ; Print second half of error message
315 mov ebx, 2 ; LOADJF
316 mov ecx, loadjf_fail_msg2 ; LOADJF
317 mov edx, (loadjf_fail_msg_end2 - loadjf_fail_msg2)
318 mov eax, __NR_write ; LOADJF
319 int 80h ; LOADJF
320 mov ebx, 1 ; LOADJF exit code and fall through to exit
321
322 ; Exit program.
323 ; I define this here so the above LOADJF failure can fall through into it.
324 ; But it is also called when the user ends input (Ctrl+d) in the normal use
325 ; of the interpreter.
326 exit_with_grace_and_beauty: ; (don't forget to set ebx to exit code)
327 mov eax,__NR_exit ; syscall: exit
328 int 0x80 ; invoke syscall
329
330 ; +----------------------------------------------------------------------------+
331 ; | |
332 ; | Part Two: Words! |
333 ; | |
334 ; +----------------------------------------------------------------------------+
335 ; Everything from here on out is Forth bootstrapping itself as a series of word
336 ; definitions - first in machine code (written in assembly), then as words
337 ; defined as lists of addresses of other words. Lastly, as text in the Forth
338 ; language!
339 ;
340 ; +----------------------------------------------------------------------------+
341 ; | Forth DOCOL implementation |
342 ; +----------------------------------------------------------------------------+
343 ; This is the "interpreter" word - it is used at the beginning of "normal" Forth
344 ; words (composed of other words, not machine code). All DOCOL does is gets the
345 ; esi register pointed at the first word address and starts the NEXT macro.
346 ; (See my ASCII art boxes at the top of this document. Search for "DOCOL".)
347 ;
348 ; Note that esi doesn't contain the address of the next word to run. Instead,
349 ; it contains the next address that will *point to* the next word to run!
350 DOCOL:
351 PUSHRSP esi ; push esi on to the "RSP" return stack
352 add eax, 4 ; eax currently points to DOCOL (me!), point to next addr
353 mov esi, eax ; Load the next word pointer into esi
354 NEXT
355
356 ; +----------------------------------------------------------------------------+
357 ; | Word header flags |
358 ; +----------------------------------------------------------------------------+
359 ; These are bits that can be ANDed together to indicate special properties of
360 ; the word:
361 ; * IMMED - an "immediate" word runs in compile mode
362 ; * HIDDEN - a word is usually hidden while it's being compiled!
363 ; * LENMASK - to save space (!), the word name length is combined with flags
364 %assign F_IMMED 0x80
365 %assign F_HIDDEN 0x20
366 %assign F_LENMASK 0x1f
367
368 ; Link holds address of last word defined (to make linked list)
369 ; (NASM Note: Must be %define rather than %assign or we'll run afoul
370 ; assigning the name_label address below.)
371 %define link 0 ; null link - beginning of the linked list
372
373 ; +----------------------------------------------------------------------------+
374 ; | DEFWORD and DEFCODE macros |
375 ; +----------------------------------------------------------------------------+
376 ; As mentioned in the beginning, there are two kinds of words in Forth:
377 ;
378 ; 1. Code words are pure machine language
379 ; 2. Regular words are defined as a series of pointers to other words
380 ;
381 ; Both start with a header: link, name length + flags, name.
382 ;
383 ; After the header, both start with an address: a pointer to code to be
384 ; executed immediatly when we run the word.
385 ;
386 ; The big difference is that a code word points to its own instructions, while
387 ; a regular word points to DOCOL, the "interpreter word", which sets the esi
388 ; register and uses NEXT to execute the rest of the word definition.
389 ;
390 ; Refer again to the ASCII art boxes a the top of this file to see how the two
391 ; types of words are laid out in memory.
392 ;
393 ; The following assembler macros help us create both types of words from
394 ; within assembly.
395 ;
396 ; The two macros are very similar. But notice how DEFWORD begins the body
397 ; of the word after the header with the address of DOCOL.
398 ;
399 ; Define a regular word. Create header from name and flags, then start the
400 ; word body with the address of DOCOL.
401 %macro DEFWORD 3 ; 1=name 2=label 3=flags
402 %strlen namelen %1 ; NASM calculates this for us!
403 SECTION .data
404 align 4 ; Everything is aligned on 4 byte boundaries.
405
406 ; Start of the word header
407 ; ------------------------
408 global name_%2 ; name_<label> for use in assembly
409 name_%2:
410 dd link ; link the previous word's addr
411 %define link name_%2 ; store *my* link addr for next time
412 db %3 + namelen ; flags + namelen (packed into byte)
413 db %1 ; name string ("FOO")
414 align 4
415
416 ; Start of the word body
417 ; ----------------------
418 global %2 ; <label> for use in assembly
419 %2:
420 dd DOCOL ; Pointer to DOCOL code word that will execute the
421 ; word pointer that will follow the use of this macro.
422 %endmacro
423
424 ; Define a code word. Create header from name and flags, then start the
425 ; word body with the next address after itself. See comments in DEFWORD
426 ; above for explanation of the header portion.
427 %macro DEFCODE 3 ; 1=name 2=label 3=flags
428 %strlen namelen %1
429 SECTION .data
430 align 4
431
432 ; Start of the word header
433 ; ------------------------
434 global name_%2
435 name_%2:
436 dd link
437 %define link name_%2 ; store this link addr for next time
438 db %3 + namelen ; flags + namelen
439 db %1 ; name string
440 align 4
441
442 ; Start of the word body
443 ; ----------------------
444 global %2
445 %2:
446 dd code_%2 ; The address of the label that follows...
447 align 4
448
449 SECTION .text ; Assembly intructions (machine code) will follow.
450 global code_%2
451 code_%2:
452 ; Whatever follows the use of this macro is the machine code
453 ; definition of the code word. We can execute this word directly
454 ; in assembly by jumping to this label. We can "compile it" into
455 ; a regular word with the body label (%2). And like all words,
456 ; we can execute it in Forth using it's string name.
457 %endmacro
458
459 ; +----------------------------------------------------------------------------+
460 ; +----------------------------------------------------------------------------+
461 ; What follow are 9 regular words and 130 code words. Only some fraction of the
462 ; code words are really be required here. They're just more efficient if they're
463 ; implemented in assembly rather than Forth itself.
464
465 ; +----------------------------------------------------------------------------+
466 ; | QUIT: the "outer interpreter" |
467 ; +----------------------------------------------------------------------------+
468 ; At the top of this file, I describe the shape of word definitions and the
469 ; "interpreter word" and the NEXT and EXIT mechanisms. Now we can take a gander
470 ; at the outer main loop that really holds all of this together and makes this
471 ; Forth an actual interpreter in the sense most of us expect.
472 ;
473 ; You'll notice that QUIT contains neither a NEXT nor EXIT. This is the outer
474 ; loop ; and a NEXT will eventually bring us back here, where an unconditional
475 ; loop ; will keep looking for input and executing it.
476 ;
477 ; (And yes, "QUIT" is a bizarre name for this word.)
478 ;
479 ; I think this might be a helpful way to think of the nested nature of QUIT
480 ; and the two types of words:
481 ;
482 ; QUIT (INTERPRET)
483 ; * regular word
484 ; DOCOL
485 ; NEXT
486 ; * regular word
487 ; DOCOL (codeword
488 ; NEXT
489 ; * code word
490 ; <machine code>
491 ; NEXT
492 ; * code word
493 ; <machine code>
494 ; NEXT
495 ; EXIT
496 ; NEXT
497 ; EXIT
498 ; NEXT
499 ; QUIT (BRANCH -8 back to INTERPRET for more)
500 ;
501 ; Notice how every code word ends in a NEXT and every regular word ends in
502 ; an EXIT, which also has a NEXT to go to back up the call stack.
503 ;
504 ; And when those words are done, the next address to execute happens to
505 ; be the unconditional branch in QUIT that starts the "outer interpreter"
506 ; loop all over again.
507 ;
508 ; Here's the definition of QUIT. Notice how much easier it is to write than
509 ; to describe!
510 DEFWORD "QUIT",QUIT,0
511 dd R0 ; push R0 (addr of top of return stack)
512 dd RSPSTORE ; store R0 in return stack pointer (ebp)
513 dd INTERPRET ; interpret the next word
514 dd BRANCH,-8 ; and loop (indefinitely)
515
516 ; +----------------------------------------------------------------------------+
517 ; | EXIT |
518 ; +----------------------------------------------------------------------------+
519 ; And here's EXIT. Look at how tiny this is! This ends every regular word by
520 ; popping the "return address" pushed by DOCOL when the word began.
521 DEFCODE "EXIT",EXIT,0
522 POPRSP esi ; pop return stack into esi
523 NEXT
524
525 ; +----------------------------------------------------------------------------+
526 ; | The Forth Interpreter words |
527 ; +----------------------------------------------------------------------------+
528 ; The following three words contain some pretty beefy assembly code. They get
529 ; input, split it into words, find the word definitions, and execute them:
530 ;
531 ; KEY - Buffers input from STDIN (or a file)
532 ; WORD - Calls KEY, gets a whitespace-delimited "word" of text
533 ; INTERPRET - Calls WORD, looks up words in dictionary, attempts to
534 ; handle literal number values, and executes the results.
535 ;
536 ; Now, here they are in the opposite order:
537 ;
538 ; +----------------------------------------------------------------------------+
539 ; | INTERPRET |
540 ; +----------------------------------------------------------------------------+
541 ; Get's "word" of input (that term is overloaded here) and determines what to
542 ; do with it.
543 DEFCODE "INTERPRET",INTERPRET,0
544 call _WORD ; Returns %ecx = length, %edi = pointer to word.
545
546 ; Is it in the dictionary?
547 xor eax,eax ; back from _WORD...zero eax
548 mov [interpret_is_lit], eax ; 0 means not a literal number (yet)
549 call _FIND ; Returns %eax = pointer to header or 0 if not found.
550 test eax,eax ; Found?
551 jz .try_literal
552
553 ; In the dictionary. Is it an IMMEDIATE codeword?
554 mov edi,eax ; edi = dictionary entry YES WE HAVE MATCHED A WORD!!!
555 mov al,[edi+4] ; Get name+flags.
556 push ax ; Just save it for now.
557 call _TCFA ; Convert dictionary entry (in %edi) to codeword pointer.
558 pop ax
559 and al,F_IMMED ; is IMMED flag set?
560 mov eax,edi
561 jnz .execute ; If IMMED, jump straight to executing.
562 jmp .check_state
563
564 .try_literal: ; (1) Not in the dictionary (not a word) so assume it's a literal number.
565 inc byte [interpret_is_lit] ; DID NOT MATCH a word, trying literal number
566 call _NUMBER ; Returns the parsed number in %eax, %ecx > 0 if error
567 test ecx,ecx
568 jnz .parse_error
569 mov ebx,eax
570 mov eax,LIT ; The word is now LIT
571
572 .check_state: ; (2) Are we compiling or executing?
573 mov edx,[var_STATE]
574 test edx,edx
575 jz .execute ; Jump if executing.
576
577 ; Compiling - just append the word to the current dictionary definition.
578 call _COMMA
579 mov ecx,[interpret_is_lit] ; Was it a literal?
580 test ecx,ecx
581 jz .go_next ; nope, done
582 mov eax,ebx ; Yes, so LIT is followed by a number.
583 call _COMMA
584 .go_next: ; (3)
585 NEXT
586
587 .execute: ; (4) Executing - run it!
588 mov ecx,[interpret_is_lit] ; Literal?
589 test ecx,ecx ; Literal?
590 jnz .do_literal
591
592 ; Not a literal, execute it now. This never returns, but the codeword will
593 ; eventually call NEXT which will reenter the loop in QUIT.
594 jmp [eax]
595
596 .do_literal: ; (5) Executing a literal, which means push it on the stack.
597 push ebx
598 NEXT
599
600 .parse_error: ; (6) Parse error (not a known word or a number in the current BASE).
601 ; Print an error message followed by up to 40 characters of context.
602 mov ebx,2 ; 1st param: stderr
603 mov ecx,errmsg ; 2nd param: error message
604 mov edx,(errmsgend - errmsg) ; 3rd param: length of string
605 mov eax,__NR_write ; write syscall
606 int 80h
607
608 mov ecx,[currkey] ; the error occurred just before currkey position
609 mov edx,ecx
610 sub edx,buffer ; edx = currkey - buffer (length in buffer before currkey)
611 cmp edx,40 ; if >= 40, then print only 40 characters
612 jle .print_error
613 mov edx,40
614 .print_error: ; (7)
615 sub ecx,edx ; ecx = start of area to print, edx = length
616 mov eax,__NR_write ; write syscall
617 int 80h
618
619 mov ecx,errmsgnl ; newline
620 mov edx,1 ; 1 char long
621 mov eax,__NR_write ; write syscall
622 int 80h
623 NEXT
624
625 ; +----------------------------------------------------------------------------+
626 ; | WORD |
627 ; +----------------------------------------------------------------------------+
628 ; Return a Forth string: an address and length (unlike C strings, we don't end
629 ; with a sentinel NUL.) This should perhaps be called "token".
630 DEFCODE "WORD",FWORD,0 ; Note changed nasm reserved keyword WORD to FWORD!
631 call _WORD
632 push edi ; push base address
633 push ecx ; push length
634 NEXT
635 _WORD:
636 ; Search for first non-blank character. Also skip \ comments.
637 .skip_non_words:
638 call _KEY ; get next key, returned in %eax
639 cmp al,'\' ; start of a comment?
640 je .skip_comment ; if so, skip the comment
641 cmp al,' ' ; compare to ASCII space (0x20)
642 jbe .skip_non_words ; Is space or lower, keep scanning
643
644 ; now we've reached a word - start storing the chars
645 mov edi,word_buffer ; put addr to word return buffer in edi (used by stosb)
646 .collect_word:
647 stosb ; add character to return buffer (8 bits from al)
648 call _KEY ; get next key, returned in %al
649 cmp al,' ' ; compare to ASCII space (0x20)
650 ja .collect_word ; Is higher than space, keep collecting
651
652 ; return word buffer addr and length...
653 sub edi, word_buffer ; calculate the length of the word
654 mov ecx, edi ; return it
655 mov edi, word_buffer ; return start address of the word
656 ret
657 .skip_comment: ; skip \ comment to end of current line
658 call _KEY
659 cmp al,`\n` ; eol? (escapes okay in backtick strings in nasm)
660 jne .skip_comment
661 jmp .skip_non_words
662
663 SECTION .data
664 word_buffer:
665 times 32 db 0x0 ; 32 bytes of buffer for word names
666 SECTION .text
667
668 ; +----------------------------------------------------------------------------+
669 ; | KEY |
670 ; +----------------------------------------------------------------------------+
671 ; This should really be called "char" because it gets a character of input, not
672 ; a "key". It's easy to imagine the historical implementation fitting the name.
673 DEFCODE "KEY",KEY,0
674 call _KEY
675 push eax ; push return value on stack
676 NEXT
677 _KEY:
678 mov ebx, [currkey]
679 cmp ebx, [bufftop]
680 jge .get_more_input
681 xor eax, eax
682 mov al, [ebx] ; get next key from input buffer
683
684 .continue_with_key:
685 inc ebx
686 mov [currkey], ebx ; increment currkey
687 ret
688
689 .get_more_input: ; Use read(2) to fetch more input
690 mov ebx, [read_from_fd] ; LOADJF 1st param: input file (STDIN when getting user input)
691 ;xor ebx,ebx ; 1st param: stdin
692 mov ecx,buffer ; 2nd param: buffer
693 mov [currkey],ecx
694 mov edx,buffer_size ; 3rd param: max length
695 mov eax,__NR_read ; syscall: read
696 int 0x80 ; syscall!
697 test eax,eax ; If %eax <= 0, then exit.
698 jbe .eof
699 add ecx,eax ; buffer+%eax = bufftop
700 mov [bufftop],ecx
701 jmp _KEY
702
703 .eof: ; Error or end of input
704 cmp dword [read_from_fd], 0 ; LOADJF If we were reading from STDIN (0)...
705 je .eof_stdin ; LOADJF ...then exit the program normally.
706 mov ebx, [read_from_fd] ; LOADJF Otherwise, close the file.
707 mov eax, __NR_close ; LOADJF
708 int 80h
709 mov dword [read_from_fd], 0 ; LOADJF Change the read-from fd to STDIN.
710 jmp .get_more_input ; LOADJF And continue reading!
711 .eof_stdin: ; Exit peacefully!
712 xor ebx,ebx ; set ebx to exit with no error (0)
713 jmp exit_with_grace_and_beauty
714
715
716 ; +----------------------------------------------------------------------------+
717 ; | Some Forth primitives |
718 ; +----------------------------------------------------------------------------+
719 ; TICK (or single quote: ') gets the address of the word
720 ; that matches the next word of input text. Uses the same
721 ; lodsd trick as LIT to grab the next word of input without
722 ; executing it. Only works while in compile state. (: ... ;)
723 ; It's not an immediate word, so it executes at run time,
724 ; which is why we end up with the address of the next word
725 ; (which was matched at compile time) to put on the stack!
726 DEFCODE "'",TICK,0
727 lodsd ; Moves value at esi to eax, esi++
728 push eax ; Push address on the stack
729 NEXT
730
731 ; BRANCH is the simplest possible way to loop - it always
732 ; moves the word pointer by the amount in the next value
733 ; pointed to by esi! It's helpful to see how LIT works because
734 ; it's a similar premise - the value after BRANCH isn't a
735 ; word address, it's the amount to add to esi.
736 ; To branch/loop back to a previous instruction, you provide
737 ; a negative offset.
738 ; esi currently points at the offset number.
739 DEFCODE "BRANCH",BRANCH,0
740 add esi, [esi] ; add the offset to the instruction pointer
741 NEXT
742
743 ; 0BRANCH is the same thing, but with a condition: it only
744 ; jumps if the top of the stack is zero.
745 DEFCODE "0BRANCH",ZBRANCH,0
746 pop eax
747 test eax, eax ; top of stack is zero?
748 jz code_BRANCH ; if so, jump back to BRANCH
749 lodsd ; or skip the offset (esi to eax, esi++)
750 NEXT
751
752 ; Another primitive - this one is used to implement the string
753 ; words in Forth (." and S"). I'll just port it for now, then
754 ; test it later.
755 ; The lodsd "trick" (see also LIT) to load the next 4 bytes of
756 ; memory from the address at the current instruction pointer
757 ; (esi) into eax and then increment esi to skip over it so
758 ; NEXT doesnt try to execute it.
759 DEFCODE "LITSTRING",LITSTRING,0
760 lodsd ; get the length of the string into eax
761 push esi ; push the address of the start of the string
762 push eax ; push it on the stack
763 add esi, eax ; skip past the string
764 add esi, 3 ; but round up to next 4 byte boundary
765 and esi, ~3
766 NEXT
767
768 ; Same deal here - another primitive. This one uses a Linux syscall
769 ; to print a string.
770 DEFCODE "TELL",TELL,0
771 mov ebx, 1 ; 1st param: stdout
772 pop edx ; 3rd param: length of string
773 pop ecx ; 2nd param: address of string
774 mov eax,__NR_write ; write syscall
775 int 80h
776 NEXT
777
778 ; Turn a dictionary pointer into a codeword pointer.
779 ; This is where we use the stored length of the word name
780 ; to skip to the beginning of the code.
781 DEFCODE ">CFA",TCFA,0
782 pop edi
783 call _TCFA
784 push edi
785 NEXT
786 _TCFA:
787 xor eax,eax
788 add edi,4 ; Skip link pointer.
789 mov al,[edi] ; Load flags+len into %al.
790 inc edi ; Skip flags+len byte.
791 and al,F_LENMASK ; Just the length, not the flags.
792 add edi,eax ; Skip the name.
793 add edi,3 ; The codeword is 4-byte aligned:
794 and edi,~3 ; Add ...00000011 and mask ...11111100.
795 ret ; For more, see log06.txt in this repo.
796
797 ; Turn a dictionary pointer into a "data" pointer.
798 ; Data simply being the word addresses immediately
799 ; following the codeword (4 bytes later).
800 DEFWORD ">DFA",TDFA,0
801 dd TCFA ; get codeword address
802 dd INCR4 ; advance 4 bytes
803 dd EXIT ; return from this word
804
805 ; parse numeric literal from input using BASE as radix
806 DEFCODE "NUMBER",NUMBER,0
807 pop ecx ; length of string
808 pop edi ; start address of string
809 call _NUMBER
810 push eax ; parsed number
811 push ecx ; number of unparsed characters (0 = no error)
812 NEXT
813 _NUMBER:
814 xor eax,eax
815 xor ebx,ebx
816
817 test ecx,ecx ; trying to parse a zero-length string is an error, but returns 0
818 jz .return
819
820 mov edx, [var_BASE] ; get BASE (in dl)
821
822 ; Check if first character is '-'.
823 mov bl,[edi] ; bl = first character in string
824 inc edi
825 push eax ; push 0 on stack
826 cmp bl,'-' ; negative number?
827 jnz .convert_char
828 pop eax
829 push ebx ; push non-0 on stack, indicating negative
830 dec ecx
831 jnz .next_char
832 pop ebx ; error: string is only '-'.
833 mov ecx,1
834 ret
835
836 .next_char: ; (1) Loop reading digits.
837 imul eax,edx ; eax *= BASE
838 mov bl,[edi] ; bl = next character in string
839 inc edi
840
841 .convert_char: ; (2) Convert 0-9, A-Z to a number 0-35.
842 sub bl,'0' ; < '0'?
843 jb .negate
844 cmp bl,10 ; <= '9'?
845 jb .compare_base
846 sub bl,17 ; < 'A'? (17 is 'A'-'0')
847 jb .negate
848 add bl,10
849
850 .compare_base: ; (3)
851 cmp bl,dl ; >= BASE?
852 jge .negate
853
854 ; add it to eax and loop.
855 add eax,ebx
856 dec ecx
857 jnz .next_char
858
859 .negate: ; (4) Negate the result if first character was '-' (saved on the stack).
860 pop ebx
861 test ebx,ebx
862 jz .return
863 neg eax
864
865 .return: ;(5)
866 ret
867
868 ; esi always points to the next thing. Usually this is
869 ; the next word. But in this case, it's the literal value
870 ; to push onto the stack.
871 DEFCODE "LIT",LIT,0
872 lodsd ; loads the value at esi into eax, increments esi
873 push eax ; push the literal number on to stack
874 NEXT
875
876 ; Before this, we'll have called _WORD which pushed (returned):
877 ; ecx = length
878 ; edi = start of word (addr)
879 DEFCODE "FIND",FIND,0
880 pop ecx ; length of word
881 pop edi ; buffer with word
882 call _FIND
883 push eax ; push address of dict entry (or null) as return val
884 NEXT
885 _FIND:
886 push esi ; _FIND! Save esi, we'll use this reg for string comparison
887
888 ; Now we start searching backwards through the dictionary for this word.
889 mov edx,[var_LATEST] ; LATEST points to name header of the latest word in the dictionary
890 .test_word:
891 test edx,edx ; NULL pointer? (end of the linked list)
892 je .not_found
893
894 ; First compare the length expected and the length of the word.
895 ; Note that if the F_HIDDEN flag is set on the word, then by a bit of trickery
896 ; this won't pick the word (the length will appear to be wrong).
897 xor eax,eax
898 mov al, [edx+4] ; al = flags+length field
899 and al,(F_HIDDEN|F_LENMASK) ; al = length, but including hidden bit!
900 cmp cl,al ; Length is the same?
901 jne .prev_word ; nope, try prev
902
903 ; Compare the strings in detail.
904 push ecx ; Save the length
905 push edi ; Save the address (repe cmpsb will move this pointer)
906 lea esi,[edx+5] ; Dictionary string we are checking against.
907 repe cmpsb ; Compare the strings.
908 pop edi
909 pop ecx
910 jne .prev_word ; nope, try prev
911
912 ; The strings are the same - return the header pointer in eax
913 pop esi
914 mov eax,edx
915 ret ; Found!
916
917 .prev_word:
918 mov edx,[edx] ; Move back through the link field to the previous word
919 jmp .test_word ; loop, test prev word
920
921 .not_found:
922 pop esi
923 xor eax,eax ; Return zero to indicate not found (aka null ptr)
924 ret
925
926 ; CREATE makes words! Specifically, the header portion of words.
927 DEFCODE "CREATE",CREATE,0
928 pop ecx ; length of word name
929 pop ebx ; address of word name
930
931 ; link pointer
932 mov edi, [var_HERE] ; the address of the header
933 mov eax, [var_LATEST] ; get link pointer
934 stosd ; and store it in the header.
935
936 ; Length byte and the word itself.
937 mov al, cl ; Get the length.
938 stosb ; Store the length/flags byte.
939 push esi
940 mov esi, ebx ; esi = word
941 rep movsb ; Copy the word
942 pop esi
943 add edi, 3 ; Align to next 4 byte boundary. See TCFA
944 and edi, ~3
945
946 ; Update LATEST and HERE.
947 mov eax, [var_HERE]
948 mov [var_LATEST], eax
949 mov [var_HERE], edi
950 NEXT
951
952 ; COMMA (,)
953 ; This is a super primitive word used to compile words. It puts the
954 ; currently-pushed value from the stack to the position pointed to
955 ; by HERE and increments HERE to the next 4 bytes.
956 DEFCODE ",",COMMA,0
957 pop eax ; Code pointer to store.
958 call _COMMA
959 NEXT
960 _COMMA:
961 mov edi, [var_HERE]
962 cmp edi, [var_CSTART]
963 jl .oops
964 cmp edi, [var_CEND]
965 jg .oops
966 stosd ; puts the value in eax at edi, increments edi
967 mov [var_HERE], edi
968 ret
969 .oops:
970 nop
971
972 ; LBRAC and RBRAC ([ and ])
973 ; Simply toggle the STATE variable (0=immediate, 1=compile)
974 ; So:
975 ; <compile mode> [ <immediate mode> ] <compile mode>
976 ;
977 ; Note that LBRAC has the immediate flag set because otherwise
978 ; it would get compiled rather than switch modes then and there.
979 DEFCODE "[",LBRAC,F_IMMED
980 xor eax, eax
981 mov [var_STATE], eax ; Set STATE to 0 (immediate)
982 NEXT
983
984 DEFCODE "]",RBRAC,0
985 mov [var_STATE], word 1 ; Set STATE to 1 (compile)
986 NEXT
987
988 ; HIDDEN toggles the hidden flag for the dictionary entry
989 ; at the address on the stack
990 DEFCODE "HIDDEN",HIDDEN,0
991 pop edi ; Dictionary entry, first byte is link
992 add edi, 4 ; Move to name/flags byte.
993 xor [edi], word F_HIDDEN ; Toggle the HIDDEN bit in place.
994 NEXT
995
996
997 ; +----------------------------------------------------------------------------+
998 ; | COLON and SEMICOLON: The Compiler! |
999 ; +----------------------------------------------------------------------------+
1000 ; COLON (:) creates the new word header and starts compile mode
1001 ; It also sets the new definition to hidden so the word isn't
1002 ; discovered while it is being compiled.
1003 DEFWORD ":",COLON,0
1004 dd FWORD ; Get the name of the new word
1005 dd CREATE ; CREATE the dictionary entry / header
1006 dd LIT, DOCOL, COMMA ; Append DOCOL (the codeword).
1007 dd LATEST, FETCH, HIDDEN ; Make the word hidden while it's being compiled.
1008 ;dd LATEST, HIDDEN ; Make the word hidden while it's being compiled.
1009 dd RBRAC ; Go into compile mode.
1010 dd EXIT ; Return from the function.
1011
1012 ; SEMICOLON (;) is an immediate word (F_IMMED) and it ends compile
1013 ; mode and unhides the word entry being compiled.
1014 DEFWORD ";",SEMICOLON,F_IMMED
1015 dd LIT, EXIT, COMMA ; Append EXIT (so the word will return).
1016 dd LATEST, FETCH, HIDDEN ; Unhide word now that it's been compiled.
1017 ;dd LATEST, HIDDEN ; Unhide word now that it's been compiled.
1018 dd LBRAC ; Go back to IMMEDIATE mode.
1019 dd EXIT ; Return from the function.
1020
1021 ; EMIT just displays a character of output from the stack.
1022 ; It doesnt attempt to be efficient at all (no buffering, etc.)
1023 DEFCODE "EMIT",EMIT,0
1024 pop eax
1025 call _EMIT
1026 NEXT
1027 _EMIT:
1028 mov [emit_scratch], al ; put char to print at scratch space
1029 mov ebx, 1 ; syscall param 1: stdout
1030 mov ecx, emit_scratch ; syscall param 2: address to print
1031 mov edx, 1 ; syscall param 3: length in bytes to print
1032 mov eax, __NR_write ; syscall 'write'
1033 int 0x80 ; request syscall!
1034 ret
1035
1036 ; DOT (temporary definiion) displays ascii decimal represention
1037 ; of numbers. Based on "echoi" proc written as part of asmtutor.com
1038 ; The real dot will be written as pure Forth later.
1039 DEFCODE ".",DOT,0
1040 pop eax
1041 call _DOT
1042 NEXT
1043 _DOT:
1044 push esi ; preserve
1045 mov ecx, 0 ; counter of digits to print at the end
1046 .divideloop:
1047 inc ecx
1048 mov edx, 0
1049 mov esi, 10
1050 idiv esi ; divide eax by this
1051 add edx, 48 ; convert remainder to ascii digit
1052 push edx ; push on stack to be echoed later (for correct order)
1053 ; what's clever about pushing the ascii digits onto the
1054 ; stack is that we can use the stack memory as our
1055 ; buffer by using the stack pointer at esp in our
1056 ; syscall to print the digits
1057 cmp eax, 0 ; are we done?
1058 jnz .divideloop
1059 mov esi, ecx ; printing... we use ecx for syscall, so count down with esi
1060 .printloop:
1061 ; arguably, I should be making use of EMIT...but this is all temporary
1062 ; anyway so I'm just going to inline the syscall to print the digits...
1063 dec esi
1064 mov ebx, 1 ; syscall param 1: stdout
1065 mov ecx, esp ; syscall param 2: address to print
1066 mov edx, 1 ; syscall param 3: length in bytes to print
1067 mov eax, __NR_write ; syscall 'write'
1068 int 0x80 ; request syscall!
1069 pop eax ; next digit
1070 cmp esi, 0 ; are we done?
1071 jnz .printloop
1072 pop esi ;restore our word address pointer
1073 ret
1074
1075 ; PRINTWORD
1076 ; Super killer debugging word! Prints the name of the word pointed to
1077 ; on the stack. Example: LATEST PRINTWORD
1078 DEFCODE "PRINTWORD",PRINTWORD,0
1079 pop eax
1080 call _PRINTWORD
1081 NEXT
1082 _PRINTWORD:
1083 mov edx,eax ; stack had addr of header of dictionary word
1084 xor eax,eax ; zero out all of eax
1085 mov al, [edx+4] ; al = flags+length field
1086 and al, F_LENMASK ; al = just length of name
1087 add edx,5 ; move pointer to name string
1088 mov ebx,1 ; 1st param: stdout
1089 mov ecx,edx ; 2nd param: address to print
1090 mov edx,eax ; 3rd param: length of string
1091 mov eax,__NR_write ; write syscall
1092 int 80h
1093
1094 ; +----------------------------------------------------------------------------+
1095 ; | Stack manipulation words |
1096 ; +----------------------------------------------------------------------------+
1097
1098 ; drop top of stack
1099 DEFCODE "DROP",DROP,0
1100 pop eax
1101 NEXT
1102
1103 ; swap top two elements
1104 DEFCODE "SWAP",SWAP,0
1105 pop eax
1106 pop ebx
1107 push eax
1108 push ebx
1109 NEXT
1110
1111 ; duplicate element on top of stack
1112 DEFCODE "DUP",DUP,0
1113 mov eax, [esp]
1114 push eax
1115 NEXT
1116
1117 ; duplicate second element of stack to top
1118 DEFCODE "OVER",OVER,0
1119 mov eax, [esp+4]
1120 push eax
1121 NEXT
1122
1123 ; rotate the top three items on stack (ABC -> BCA)
1124 DEFCODE "ROT",ROT,0
1125 pop eax
1126 pop ebx
1127 pop ecx
1128 push ebx
1129 push eax
1130 push ecx
1131 NEXT
1132
1133 ; reverse rotate top three items on stack (ABC -> CAB)
1134 DEFCODE "-ROT",NROT,0
1135 pop eax
1136 pop ebx
1137 pop ecx
1138 push eax
1139 push ecx
1140 push ebx
1141 NEXT
1142
1143 ; drop top two elements from stack
1144 DEFCODE "2DROP",TWODROP,0
1145 pop eax
1146 pop eax
1147 NEXT
1148
1149 ; duplicate top two elements on stack
1150 DEFCODE "2DUP",TWODUP,0
1151 mov eax, [esp]
1152 mov ebx, [esp + 4]
1153 push ebx
1154 push eax
1155 NEXT
1156
1157 ; swap top two pairs (ABCD -> CDAB)
1158 DEFCODE "2SWAP",TWOSWAP,0
1159 pop eax
1160 pop ebx
1161 pop ecx
1162 pop edx
1163 push ebx
1164 push eax
1165 push edx
1166 push ecx
1167 NEXT
1168
1169 ; duplicate top element on stack if it's non-zero
1170 DEFCODE "?DUP",QDUP,0
1171 mov eax, [esp]
1172 test eax, eax
1173 jz .skip
1174 push eax
1175 .skip:
1176 NEXT
1177
1178
1179 ; +----------------------------------------------------------------------------+
1180 ; | Math words |
1181 ; +----------------------------------------------------------------------------+
1182
1183 DEFCODE "1+",INCR,0
1184 inc dword [esp] ; increment top of stack
1185 NEXT
1186
1187 DEFCODE "1-",DECR,0
1188 dec dword [esp] ; decrement top of stack
1189 NEXT
1190
1191 DEFCODE "4+",INCR4,0
1192 add dword [esp], 4 ; add 4 to top of stack
1193 NEXT
1194
1195 DEFCODE "4-",DECR4,0
1196 sub dword [esp], 4 ; subtract 4 from top of stack
1197 NEXT
1198
1199 DEFCODE "+",ADD,0
1200 pop eax ; get top of stack
1201 add [esp], eax ; and add it to next word on stack
1202 NEXT
1203
1204 DEFCODE "-",SUB,0
1205 pop eax ; get top of stack
1206 sub [esp], eax ; and subtract it from next word on stack
1207 NEXT
1208
1209 DEFCODE "*",MUL,0
1210 pop eax
1211 pop ebx
1212 imul eax, ebx
1213 push eax ; ignore overflow
1214 NEXT
1215
1216 ; In JonesFORTH, /MOD is defined in asm. / and MOD will
1217 ; be defined later in FORTH. This is because i386 idiv
1218 ; gives us both the quotient and remainder.
1219 DEFCODE "/MOD",DIVMOD,0
1220 xor edx, edx
1221 pop ebx
1222 pop eax
1223 idiv ebx
1224 push edx ; push remainder
1225 push eax ; push quotient
1226 NEXT
1227
1228 ; +----------------------------------------------------------------------------+
1229 ; | Comparison/conditional words |
1230 ; +----------------------------------------------------------------------------+
1231
1232 DEFCODE "=",EQU,0 ; top two values are equal?
1233 pop eax
1234 pop ebx
1235 cmp eax, ebx
1236 sete al ; sete sets operand (al) to 1 if cmp was true
1237 movzx eax, al ; movzx moves the value, then fills in zeros
1238 push eax ; push answer on stack
1239 NEXT
1240
1241 DEFCODE "<>",NEQU,0 ; top two words are not equal?
1242 pop eax
1243 pop ebx
1244 cmp eax, ebx
1245 setne al
1246 movzx eax, al
1247 push eax
1248 NEXT
1249
1250 DEFCODE "<",LT,0
1251 pop eax
1252 pop ebx
1253 cmp ebx, eax
1254 setl al
1255 movzx eax, al
1256 push eax
1257 NEXT
1258
1259 DEFCODE ">",GT,0
1260 pop eax
1261 pop ebx
1262 cmp ebx, eax
1263 setg al
1264 movzx eax, al
1265 push eax
1266 NEXT
1267
1268 DEFCODE "<=",LE,0
1269 pop eax
1270 pop ebx
1271 cmp ebx, eax
1272 setle al
1273 movzx eax, al
1274 push eax
1275 NEXT
1276
1277 DEFCODE ">=",GE,0
1278 pop eax
1279 pop ebx
1280 cmp ebx, eax
1281 setge al
1282 movzx eax, al
1283 push eax
1284 NEXT
1285
1286 DEFCODE "0=",ZEQU,0 ; top of stack equals 0?
1287 pop eax
1288 test eax,eax
1289 setz al
1290 movzx eax, al
1291 push eax
1292 NEXT
1293
1294 DEFCODE "0<>",ZNEQU,0 ; top of stack not 0?
1295 pop eax
1296 test eax,eax
1297 setnz al
1298 movzx eax, al
1299 push eax
1300 NEXT
1301
1302 DEFCODE "0<",ZLT,0 ; greater than zero
1303 pop eax
1304 test eax,eax
1305 setl al
1306 movzx eax, al
1307 push eax
1308 NEXT
1309
1310 DEFCODE "0>",ZGT,0 ; less than zero
1311 pop eax
1312 test eax,eax
1313 setg al
1314 movzx eax, al
1315 push eax
1316 NEXT
1317
1318 DEFCODE "0<=",ZLE,0
1319 pop eax
1320 test eax,eax
1321 setle al
1322 movzx eax,al
1323 push eax
1324 NEXT
1325
1326 DEFCODE "0>=",ZGE,0
1327 pop eax
1328 test eax,eax
1329 setge al
1330 movzx eax,al
1331 push eax
1332 NEXT
1333
1334 ; +----------------------------------------------------------------------------+
1335 ; | Bitwise logic words |
1336 ; +----------------------------------------------------------------------------+
1337
1338 DEFCODE "AND",AND,0
1339 pop eax
1340 and [esp],eax
1341 NEXT
1342
1343 DEFCODE "OR",OR,0
1344 pop eax
1345 or [esp],eax
1346 NEXT
1347
1348 DEFCODE "XOR",XOR,0
1349 pop eax
1350 xor [esp], eax
1351 NEXT
1352
1353 DEFCODE "INVERT",INVERT,0
1354 not dword [esp]
1355 NEXT
1356
1357 ; +----------------------------------------------------------------------------+
1358 ; | Primitive memory words |
1359 ; +----------------------------------------------------------------------------+
1360
1361 DEFCODE "!",STORE,0
1362 pop ebx ; address to store at
1363 pop eax ; data to store there
1364 mov [ebx], eax
1365 NEXT
1366
1367 DEFCODE "@",FETCH,0
1368 pop ebx ; address to fetch
1369 mov eax, [ebx] ; fetch it
1370 push eax ; push value onto stack
1371 NEXT
1372
1373 DEFCODE "+!",ADDSTORE,0
1374 pop ebx ; address
1375 pop eax ; the amount to add
1376 add [ebx], eax
1377 NEXT
1378
1379 DEFCODE "-!",SUBSTORE,0
1380 pop ebx ; address
1381 pop eax ; the amount to subtract
1382 sub [ebx], eax
1383 NEXT
1384
1385 ; Primitive byte-oriented operations are like the above 32-bit
1386 ; operations, but work on 8 bits. x86 has instructions for this
1387 ; so we can define these.
1388 DEFCODE "C!",STOREBYTE,0
1389 pop ebx ; address to store at
1390 pop eax ; data to store there
1391 mov [ebx], al
1392 NEXT
1393
1394 DEFCODE "C@",FETCHBYTE,0
1395 pop ebx ; address to fetch
1396 xor eax, eax ; clear the register
1397 mov al, [ebx] ; grab a byte
1398 push eax
1399 NEXT
1400
1401 DEFCODE "C@C!",CCOPY,0 ; byte copy
1402 mov ebx, [esp+4] ; source address
1403 mov al, [ebx] ; source byte
1404 pop edi ; destination address
1405 stosb ; copy to destination
1406 push edi ; increment destination address
1407 inc byte [esp+4] ; increment source address
1408 NEXT
1409
1410 DEFCODE "CMOVE",CMOVE,0 ; copy n bytes
1411 mov edx, esi ; preserve esi
1412 pop ecx ; length
1413 pop edi ; destination address
1414 pop esi ; source address
1415 rep movsb ; copy source to destination
1416 mov esi, edx ; restore esi
1417 NEXT
1418
1419 ; +----------------------------------------------------------------------------+
1420 ; | Return stack manipulation words |
1421 ; +----------------------------------------------------------------------------+
1422 ; ebp is the return stack pointer (RSP)
1423 ; In traditional Forth implementations, you're encouraged to put temporary
1424 ; values on the return stack (and you'd better not forget to clean up after
1425 ; yourself! Can you imagine proposing that to someone today? You'd be burned
1426 ; at the stake as a heretic!
1427
1428 DEFCODE ">R",TOR,0 ; move value from param stack to return stack
1429 pop eax
1430 PUSHRSP eax
1431 NEXT
1432
1433 DEFCODE "R>",FROMR,0 ; move value from return stack to param stack
1434 POPRSP eax
1435 push eax
1436 NEXT
1437
1438 DEFCODE "RSP@",RSPFETCH,0 ; get the actual address RSP points to
1439 push ebp
1440 NEXT
1441
1442 DEFCODE "RSP!",RSPSTORE,0 ; set the address RSP points to
1443 pop ebp
1444 NEXT
1445
1446 DEFCODE "RDROP",RDROP,0 ; move RSP to "pop" value and throw it away
1447 add ebp, 4
1448 NEXT
1449
1450 ; +----------------------------------------------------------------------------+
1451 ; | Param stack manipulation words |
1452 ; +----------------------------------------------------------------------------+
1453 ; esp is the param (or "data" or "main") stack pointer (DSP)
1454
1455 DEFCODE "DSP@",DSPFETCH,0
1456 mov eax, esp
1457 push eax
1458 NEXT
1459
1460 DEFCODE "DSP!",DSPSTORE,0
1461 pop esp
1462 NEXT
1463
1464 ; +----------------------------------------------------------------------------+
1465 ; | Misc words needed for interpreter/compiler |
1466 ; +----------------------------------------------------------------------------+
1467
1468 DEFCODE "IMMEDIATE",IMMEDIATE,F_IMMED ; makes latest word immediate
1469 mov edi, [var_LATEST] ; addr of LATEST word.
1470 add edi, 4 ; Point to name/flags byte.
1471 xor byte [edi], F_IMMED ; Toggle the IMMED bit.
1472 NEXT
1473
1474 DEFWORD "HIDE",HIDE,0
1475 dd FWORD ; Get the word (after HIDE).
1476 dd FIND ; Look up in the dictionary.
1477 dd HIDDEN ; Set F_HIDDEN flag.
1478 dd EXIT ; Return.
1479
1480 DEFCODE "CHAR",CHAR,0
1481 call _WORD ; Returns %ecx = length, %edi = pointer to word.
1482 xor eax,eax
1483 mov al,[edi] ; Get the first character of the word.
1484 push eax ; Push it onto the stack.
1485 NEXT
1486
1487 DEFCODE "EXECUTE",EXECUTE,0
1488 pop eax ; Get xt into %eax
1489 jmp [eax] ; and jump to it. After xt runs its NEXT will
1490 ; continue executing the current word.
1491
1492 DEFCODE "SYSCALL3",SYSCALL3,0
1493 pop eax ; System call number (see <asm/unistd.h>)
1494 pop ebx ; First parameter.
1495 pop ecx ; Second parameter
1496 pop edx ; Third parameter
1497 int 80h
1498 push eax ; Result (negative for -errno)
1499 NEXT
1500
1501 DEFCODE "SYSCALL2",SYSCALL2,0
1502 pop eax ; System call number (see <asm/unistd.h>)
1503 pop ebx ; First parameter.
1504 pop ecx ; Second parameter
1505 int 80h
1506 push eax ; Result (negative for -errno)
1507 NEXT
1508
1509 DEFCODE "SYSCALL1",SYSCALL1,0
1510 pop eax ; System call number (see <asm/unistd.h>)
1511 pop ebx ; First parameter.
1512 int 80h
1513 push eax ; Result (negative for -errno)
1514 NEXT
1515
1516 DEFCODE "SYSCALL0",SYSCALL0,0
1517 pop eax ; System call number (see <asm/unistd.h>)
1518 int 80h
1519 push eax ; Result (negative for -errno)
1520 NEXT
1521
1522 ; +----------------------------------------------------------------------------+
1523 ; | Forth constants |
1524 ; +----------------------------------------------------------------------------+
1525 ;
1526 ; VERSION Is the current version of this FORTH.
1527 ; R0 The address of the top of the return stack.
1528 ; DOCOL Pointer to DOCOL.
1529 ; F_IMMED The IMMEDIATE flag's actual value.
1530 ; F_HIDDEN The HIDDEN flag's actual value.
1531 ; F_LENMASK The length mask in the flags/len byte.
1532 ; SYS_* and the numeric codes of various Linux syscalls
1533 ;
1534 ; Check it out! A const is just a word that pushes a value!
1535 %macro DEFCONST 4 ; 1=name 2=label 3=flags 4=value
1536 DEFCODE %1,%2,%3
1537 push %4
1538 NEXT
1539 %endmacro
1540
1541 DEFCONST "VERSION",VERSION,0,NASMJF_VERSION
1542 DEFCONST "R0",R0,0,return_stack_top
1543 DEFCONST "DOCOL",__DOCOL,0,DOCOL
1544 DEFCONST "F_IMMED",__F_IMMED,0,F_IMMED
1545 DEFCONST "F_HIDDEN",__F_HIDDEN,0,F_HIDDEN
1546 DEFCONST "F_LENMASK",__F_LENMASK,0,F_LENMASK
1547
1548 DEFCONST "SYS_EXIT",SYS_EXIT,0,__NR_exit
1549 DEFCONST "SYS_OPEN",SYS_OPEN,0,__NR_open
1550 DEFCONST "SYS_CLOSE",SYS_CLOSE,0,__NR_close
1551 DEFCONST "SYS_READ",SYS_READ,0,__NR_read
1552 DEFCONST "SYS_WRITE",SYS_WRITE,0,__NR_write
1553 DEFCONST "SYS_CREAT",SYS_CREAT,0,__NR_creat
1554 DEFCONST "SYS_BRK",SYS_BRK,0,__NR_brk
1555
1556 DEFCONST "O_RDONLY",__O_RDONLY,0,0
1557 DEFCONST "O_WRONLY",__O_WRONLY,0,1
1558 DEFCONST "O_RDWR",__O_RDWR,0,2
1559 DEFCONST "O_CREAT",__O_CREAT,0,0100
1560 DEFCONST "O_EXCL",__O_EXCL,0,0200
1561 DEFCONST "O_TRUNC",__O_TRUNC,0,01000
1562 DEFCONST "O_APPEND",__O_APPEND,0,02000
1563 DEFCONST "O_NONBLOCK",__O_NONBLOCK,0,04000
1564
1565 ; +----------------------------------------------------------------------------+
1566 ; | Forth built-in variables |
1567 ; +----------------------------------------------------------------------------+
1568 ;
1569 ; STATE Is the interpreter executing code (0) or compiling a word (non-zero)?
1570 ; LATEST Points to the latest (most recently defined) word in the dictionary.
1571 ; HERE Points to the next free byte of memory. When compiling, compiled words go here.
1572 ; S0 Stores the address of the top of the parameter stack.
1573 ; BASE The current base for printing and reading numbers.
1574 ;
1575 ; A variable is a word that leaves its *address* on the stack. Use '@' and '!' to
1576 ; read or write a *value* at that address.
1577 %macro DEFVAR 4 ; 1=name 2=label 3=flags 4=value
1578 DEFCODE %1,%2,%3
1579 push dword var_%2
1580 NEXT
1581 section .data
1582 align 4
1583 var_%2: ; Give it an asm label. Example: var_SZ for 'S0'
1584 dd %4 ; note dd to reserve a "double" (4b)
1585 %endmacro
1586
1587 DEFVAR "STATE",STATE,0,0
1588 DEFVAR "HERE",HERE,0,0
1589 DEFVAR "S0",SZ,0,0
1590 DEFVAR "BASE",BASE,0,10
1591 DEFVAR "CSTART",CSTART,0,0
1592 DEFVAR "CEND",CEND,0,0
1593 DEFVAR "READFROM",READFROM,0,read_from_fd ; LOADJF - make available to Forth???
1594 DEFVAR "LATEST",LATEST,0,name_LATEST ; points to last word defined...which will just
1595 ; happen to be self. We'll see if this works.
1596
1597 ; +----------------------------------------------------------------------------+
1598 ; | Data section - reserve memory for interpreter use |
1599 ; +----------------------------------------------------------------------------+
1600 ;
1601 ; db - "define byte(s)"
1602 ; dd - "define double" (4 bytes)
1603 ;
1604 SECTION .data
1605 align 4
1606 currkey: dd 0 ; Current place in input buffer (next character to read).
1607 bufftop: dd 0 ; Last valid data in input buffer + 1.
1608 interpret_is_lit: dd 0 ; 1 means "reading a literal"
1609 read_from_fd: dd 0 ; 0=STDIN, etc.
1610 errmsg: db "PARSE ERROR: "
1611 errmsgend:
1612 errmsgnl: db `\n`
1613 loadjf_fail_msg: db "ERROR Could not open '" ; LOADJF
1614 loadjf_fail_msg_end: ; LOADJF
1615 loadjf_fail_msg2: db "'." ; LOADJF
1616 db `\n` ; LOADJF
1617 loadjf_fail_msg_end2: ; LOADJF