Retro Programming: Itsy Forth: Implementing the Primitives

Sunday, 15 April 2012

Itsy Forth: Implementing the Primitives

Itsy Forth is a tiny subset of the Forth programming language. So far we've looked at the Forth outer interpreter, inner interpreter and dictionary. This time we'll define the words required to complete the interpreter.

Peek and Poke

The Forth words to read and write memory are @ and !:

@ - ( addr -- x ) read x from addr
! - ( x addr -- ) store x at addr
c@ - ( addr -- char ) read char from addr

( before -- after ) shows the contents of the stack before and after the word executes. Here's how @, c@ and ! are implemented. Remember we're keeping the top element of the data stack in the bx register.

        primitive '@',fetch
        mov bx,word[bx]
        jmp next

        primitive '!',store 
        pop word[bx]
        pop bx
        jmp next

        primitive 'c@',c_fetch
        mov bl,byte[bx]
        mov bh,0
        jmp next

Manipulating the Stack

drop - ( x -- ) remove x from the stack
dup - ( x -- x x ) add a copy of x to the stack
swap - ( x y -- y x ) exchange x and y
rot - ( x y z -- y z x ) rotate x, y and z

        primitive 'drop',drop
        pop bx
        jmp next

        primitive 'dup',dupe
        push bx
        jmp next

        primitive 'swap',swap
        pop ax
        push bx
        xchg ax,bx
        jmp next

        primitive 'rot',rote
        pop dx
        pop ax
        push dx
        push bx
        xchg ax,bx
        jmp next

Flow Control

if, else, then, begin and again all compile to branch or 0branch.

0branch - ( x -- ) jump if x is zero
branch - ( -- ) unconditional jump
execute - ( xt -- ) call the word at xt
exit - ( -- ) return from the current word

The destination address for the jump is compiled in the cell straight after the branch or 0branch instruction. execute stores the return address on the return stack and exit removes it.

        primitive '0branch',zero_branch
        lodsw
        test bx,bx
        jne zerob_z
        xchg ax,si
zerob_z pop bx
        jmp next

        primitive 'branch',branch
        mov si,word[si]
        jmp next

        primitive 'execute',execute
        mov di,bx
        pop bx
        jmp word[di]

        primitive 'exit',exit
        mov si,word[bp]
        inc bp
        inc bp
        jmp next

Variables and Constants

tib - ( -- addr ) address of the input buffer
#tib - ( -- addr ) number of characters in the input buffer
>in - ( -- addr ) next character in input buffer
state - ( -- addr ) true = compiling, false = interpreting
dp - ( -- addr ) first free cell in the dictionary
base - ( -- addr ) number base
last - ( -- addr ) the last word to be defined

        constant 'tib',t_i_b,32768

        variable '#tib',number_t_i_b,0

        variable '>in',to_in,0

        variable 'state',state,0

        variable 'dp',dp,freemem

        variable 'base',base,10

        variable 'last',last,final

        ; execution token for constants
doconst push bx
        mov bx,word[di+2]
        jmp next

        ; execution token for variables
dovar   push bx
        lea bx,[di+2]
        jmp next

Compilation

, - ( x -- ) compile x to the current definition
c, - ( char -- ) compile char to the current definition
lit - ( -- ) push the value in the cell straight after lit

        primitive ',',comma
        mov ax,word[val_dp]
        xchg ax,bx
        add word[val_dp],2
        mov word[bx],ax
        pop bx
        jmp next

        primitive 'c,',c_comma
        mov ax,word[val_dp]
        xchg ax,bx
        inc word[val_dp]
        mov byte[bx],al
        pop bx
        jmp next

        primitive 'lit',lit
        push bx
        lodsw
        xchg ax,bx
        jmp next

Maths / Logic

+ - ( x y -- z) calculate z=x+y then return z
= - ( x y -- flag ) return true if x=y

        primitive '+',plus
        pop ax
        add bx,ax
        jmp next

        primitive '=',equals
        pop ax
        sub bx,ax
        sub bx,1
        sbb bx,bx
        jmp next

Handling Strings

count - ( addr -- addr2 len ) addr contains a counted string. Return the address of the first character and the string's length
>number - ( double addr len -- double2 addr2 len2 ) convert string to number

addr contains a string of len characters which >number attempts to convert to a number using the current number base. >number returns the portion of the string which can't be converted, if any. If you're a Forth purist this is where Itsy starts to get ugly :-)

        primitive 'count',count
        inc bx
        push bx
        mov bl,byte[bx-1]
        mov bh,0
        jmp next

        primitive '>number',to_number
        pop di
        pop cx
        pop ax
to_numl test bx,bx
        je to_numz
        push ax
        mov al,byte[di]
        cmp al,'a'
        jc to_nums
        sub al,32
to_nums cmp al,'9'+1
        jc to_numg
        cmp al,'A'
        jc to_numh
        sub al,7
to_numg sub al,48
        mov ah,0
        cmp al,byte[val_base]
        jnc to_numh
        xchg ax,dx
        pop ax
        push dx
        xchg ax,cx
        mul word[val_base]
        xchg ax,cx
        mul word[val_base]
        add cx,dx
        pop dx
        add ax,dx
        dec bx
        inc di
        jmp to_numl
to_numz push ax
to_numh push cx
        push di
        jmp next

Terminal Input / Output

accept - ( addr len -- len2 ) read a string from the terminal
emit - ( char -- ) display char on the terminal
word - ( char -- addr ) parse the next word in the input buffer

accept reads a string of characters from the terminal. The string is stored at addr and can be up to len characters long. accept returns the actual length of the string.
word reads the next word from the terminal input buffer, delimited by char. The address of a counted string is returned. The string length will be 0 if the input buffer is empty.

        primitive 'accept',accept
        pop di
        xor cx,cx
acceptl call getchar
        cmp al,8
        jne acceptn
        jcxz acceptb
        call outchar
        mov al,' '
        call outchar
        mov al,8
        call outchar
        dec cx
        dec di
        jmp acceptl
acceptn cmp al,13
        je acceptz
        cmp cx,bx
        jne accepts
acceptb mov al,7
        call outchar
        jmp acceptl
accepts stosb
        inc cx
        call outchar
        jmp acceptl
acceptz jcxz acceptb
        mov al,13
        call outchar
        mov al,10
        call outchar
        mov bx,cx
        jmp next

getchar mov ah,7
        int 021h
        mov ah,0
        ret

outchar xchg ax,dx
        mov ah,2
        int 021h
        ret

        primitive 'word',word
        mov di,word[val_dp]
        push di
        mov dx,bx
        mov bx,word[val_t_i_b]
        mov cx,bx
        add bx,word[val_to_in]
        add cx,word[val_number_t_i_b]
wordf   cmp cx,bx
        je wordz
        mov al,byte[bx]
        inc bx
        cmp al,dl
        je wordf
wordc   inc di
        mov byte[di],al
        cmp cx,bx
        je wordz
        mov al,byte[bx]
        inc bx
        cmp al,dl
        jne wordc
wordz   mov byte[di+1],32
        mov ax,word[val_dp]
        xchg ax,di
        sub ax,di
        mov byte[di],al
        sub bx,word[val_t_i_b]
        mov word[val_to_in],bx
        pop bx
        jmp next

        primitive 'emit',emit
        xchg ax,bx
        call outchar
        pop bx
        jmp next

Searching the Dictionary

find - ( addr -- addr2 flag ) look up word in the dictionary

find looks in the Forth dictionary for the word in the counted string at addr. One of the following will be returned:

flag = 0, addr2 = counted string - if word not found
flag = 1, addr2 = call address if word is immediate
flag = -1, addr2 = call address if word is not immediate

        primitive 'find',find
        mov di,val_last
findl   push di
        push bx
        mov cl,byte[bx]
        mov ch,0
        inc cx
findc   mov al,byte[di+2]
        and al,07Fh
        cmp al,byte[bx]
        je findm
        pop bx
        pop di
        mov di,word[di]
        test di,di
        jne findl
findnf  push bx
        xor bx,bx
        jmp next
findm   inc di
        inc bx
        loop findc
        pop bx
        pop di
        mov bx,1
        inc di
        inc di
        mov al,byte[di]
        test al,080h
        jne findi
        neg bx
findi   and ax,31
        add di,ax
 inc di
        push di
        jmp next

Initialisation

abort - ( -- ) initialise Itsy then jump to interpret

abort initialises the stacks and a few variables before running the outer interpreter. When Itsy first runs it jumps to abort to set up the system.

        primitive 'abort',abort
        xor ax,ax
        mov word[val_state],ax
        mov word[val_to_in],ax
        mov word[val_number_t_i_b],ax
        xchg ax,bp
        mov sp,-256
        mov si,xt_interpret+2
        jmp next

Up and Running?

Itsy is now around 900 bytes and it's time to give the interpreter a quick test run:

Everything seems to be working fine. Next we'll define the compiler so we can continue building Itsy from the Itsy prompt :-)

24 comments:

Mentifex15 April 2012 at 14:08
Great to see a Forth resource like this one that I saw on Reddit.
ReplyDelete
Replies
Unknown15 April 2012 at 22:40
Well done John. I bet it was a great feeling when the interpreter output "Itsy". Keep up the good work.
ReplyDelete
Replies
Anonymous15 April 2012 at 23:35
Could you please provide an example in Visual Basic .NET. It is not a very good practice to use assembly language in high level software.
ReplyDelete
Replies
nlandsberg16 April 2012 at 07:45
Feel free to post the complete source code for Itsy Forth :)
ReplyDelete
Replies
Anonymous17 April 2012 at 04:19
Thanks for these posts - fascinating!

How did you get this up and running when (unless I've missed it) you don't yet have definitions of the control strucutres (IF, AGAIN) used by the outer interpreter? Did you hand-compile the 0BRANCH calls in the outer interpreter?
ReplyDelete
Replies
eventi20 April 2012 at 08:31
Thanks for these posts - I had forgotten how much fun Forth and asm are :)

Can't count be implemented with extant primitives?

: count ( addr -- addr2 len ) dup 1 + swap c@ ;
ReplyDelete
Replies
Rick (Ratty)4 May 2012 at 17:05
It's been a very long time since I've done any programming. This does look like a good language to learn.
ReplyDelete
Replies
error107928 May 2012 at 15:36
hi................
fantastic post .I really impressed to see your way of explaining codes .can you tell me what to do to learn the assembly language.
ReplyDelete
Replies
Mike Adams17 August 2012 at 14:32
Hi John,

I believe there's a minor bug in Itsy Forth's >number routine. If it completes successfully, it returns four words on the top of the stack, just as you describe in your article: the converted double-precision number, the leftover string pointer, and a 0 that's left after counting through the string. However, if it fails due to a non-numeric string, it does not push both words of the double back onto the stack; it only pushes the high-order word, followed by the string pointer and the non-zero
counter value. Three words rather than four. This isn't a problem for interpret, which almost immediately calls abort and clears the stack out anyway, but for somebody else who might try to use >number independently, this could be the source of some serious hair-ripping frustration. It looks to me like it would be easy to fix this simply by changing the "jc to_numh" line to read "jc to_numz".

Mike
ReplyDelete
Replies
Anonymous6 July 2013 at 04:53
A Question. If we changed the getchar and outchar to bios calls instead of int 21h
would itsy forth then run as a standalone system (ie no operating system)?
ReplyDelete
Replies
BruceMcF6 June 2014 at 14:05
Note that if you have NAND R> and >R you can get the ANS Forth CORE logic words and most of the rest of the CORE stack manipulation words.

nand is ( x y -- z=NOT{x&y} )

: invert ( x -- y=NOT{x} ) dup nand ;
: and ( x y -- z=x&y ) nand invert ;
: over ( x y -- x y x ) >r dup r> swap ;
: 2dup ( x y -- x y x y ) over over ;
: or ( x y -- z=x|y ) invert swap invert nand ;
: xor ( x y -- z=x^y ) 2dup nand dup >r nand swap r> nand nand ;

XOR is involved enough that I'd go with at least >R R> NAND XOR

We also have a subtraction definition available from the above:

: - ( x y -- z=x-y ) invert 1 + + ;

You can get >R and R> NAND and XOR for the cost of three additional primitives rather than four, since they allow you to make ROT a Forth definition:

: rot ( x y z -- y z x ) >r swap r> swap ;
ReplyDelete
Replies
BruceMcF6 June 2014 at 14:10
Add NAND XOR >R and R> and you can synthesize the rest of the CORE logic and much of the stack manipulation. You can even synthesize ROT so that's three extra primitives rather than four:

: rot ( x y z -- y z x ) >r swap r> swap ;
: over ( x y -- x y x ) >r dup r> swap ;
: 2dup ( x y -- x y x y ) over over ;

: invert ( x -- z=NOT{x} ) dup nand ;
; and ( x y -- z=A&B ) nand invert ;
: or ( x y -- z=A|B ) invert swap invert nand ;
: negate ( x -- -x ) invert 1 + ;
: - ( x y z=x-y) negate + ;

ReplyDelete
Replies
BruceMcF7 June 2014 at 16:16
May as well add some more thoughts on extending this through to a Standard ANS Forth (at least as far as ANS Forth CORE).

CORE and CORE EXT R operations:

Core: R> >R R@
Core EXT: 2R> 2>R 2R@
Not Standard but common factor: RDROP

If you have R> and >R than you can define the rest ... doing the rest in primitives is therefore a speed vs space tradeoff, not a functional requirement:

: R@ ( -- x | R: x -- x ) R> DUP R> ;
: 2R> ( -- x y | R: x y -- ) R> R> SWAP ;
: 2>R ( x y -- | R: -- x y ) SWAP >R >R ;
: 2R@ ( -- x | R: x -- x ) R> DUP R> DUP ROT >R >R ;

If you have 0< and 0= and use NAND to get your CORE bitwise logic, you can get the boolean operators 0> < = >

: 0> ( x -- fl ) DUP 0= SWAP 0< NAND ;
: < ( x y -- fl ) - 0< ;
: = ( x y -- fl ) - 0= ;
: > ( x y -- fl ) - 0> ;
: ABS ( x -- |x| ) dup 0< if negate then ;

+! can just be:
: +! ( x a -- ) dup >r @ + R> ! ;

1+ CHAR+ 2+ and CELL+ are just

: 1+ ( x -- y ) 1 + ;
: CHAR+ ( ca1 -- ca2 ) 1+ ;
: 2+ ( x -- y ) 2 + ;
: CELL+ ( a1 -- a2 ) 2+ ;

Then 2! and 2@ are (high word first is ANS standard):

: 2! ( x y a -- ) dup >r ! r> 2+ ! ;
: 2@ ( a -- x y ) dup >r cell+ @ r> @ ;

You need 2* and 2/ which then lets you define CELLS with 2* for 16-bit and 2* 2* for 32-bit. CHARS is just a no-op if using ASCII or an 8bit code page:

: CHARS ( x -- y ) ;

* / */ MOD /MOD M* can all be defined from UM/MOD and UM* and D>S S>D from 0< NEGATE 0 and -1 (defined -1 as a constant if negatives are not yet in the INTERPRET routine) ~ definitions omitted here, but they are mostly sign manipulation with 0< and XOR

Whether hardware division is floored or symmetric ~ that is, whether UM/MOD is FM/MOD or SM/REM ~ the other can be derived from the result, and using the hardware integer divide then correcting is both more compact and quicker than writing a division routine in Forth.

?DUP is simply
: ?dup ( x -- 0 | x x ) dup if dup then ;

EVALUATE looks like it would be tricky, but it can be done with your interpret by adding a SOURCE-ID variable and testing it in your INTERPRET. Pushing the current TIB #TIB >IN and SOURCE-ID onto the return stack, store the evaluated string address and count, set SOURCE-ID to -1 >IN to 0 and call INTERPRET, then restore them.

: evaluate ( ca u -- )
#tib @ >r tib @ >in @ >r source-id @ >r
#tib ! tib ! 0 >in ! -1 source-id ! interpreter
r> source-id ! r> >in ! r> tib ! r> #tib ! ;

Extend INTERPRETER so that the test for whether to refill the buffer is:

...
#tib @ >in @ =
if
source-id @ if exit then
tib 50 accept #tib ! 0 >in !
then
...

And the cold start so that 0 is stored in source-id. Now the outer interpreter will loop endlessly for source-id of 0 but will return when called for a single input line by EVALUATE.

If you have a DO LOOP, LSHIFT and RSHIFT can be defined from 2* and 2/

MAX and MIN are:

: max ( x y -- x | y ) 2dup < if swap then drop ;
: min ( x y -- x | y ) 2dup < INVERT if swap then drop ;

UMAX and UMIN similarly, but they require U<.

x U< y ... same as < if both are the same sign
x U< y ... same as ... y 0< ... if they are different signs

: U< ( ux uy -- fl ) \ defined by using signed < and 0<
2dup 0< r> 0< XOR if swap drop 0< else < then ;

.... though the hardware primitive of subtracting them and setting the flag based on whether there was an overflow carry can easily be shorter, and if so, implement it as a primitive.
ReplyDelete
Replies

Add comment

Note: only a member of this blog may post a comment.