home bbs files messages ]

Forums before death by AOL, social media and spammers... "We can't have nice things"

   comp.lang.forth      Forth programmers eat a lot of Bratwurst      117,927 messages   

[   << oldest   |   < older   |   list   |   newer >   |   newest >>   ]

   Message 117,842 of 117,927   
   peter to Paul Rubin   
   Re: EuroForth 2025 preliminary proceedin   
   19 Jan 26 23:26:35   
   
   From: peter.noreply@tin.it   
      
   On Fri, 16 Jan 2026 23:10:24 -0800   
   Paul Rubin  wrote:   
      
   > Hans Bezemer  writes:   
   > > 5. I added GCC extension support to 4tH in version 3.62.0. At the   
   > > time, it improved performance by about 25%. By accident I found out   
   > > that was no longer true. switch() based was faster. I didn't know   
   > > there had been changes in that regard to GCC.   
   >   
   > If you mean the goto *a feature, these days you might try using tail   
   > calls instead.  GCC and LLVM both now support a musttail attribute that   
   > ensures this optimization, or signals a compile-time error if it can't.   
   >   
   > https://lwn.net/Articles/1033373/   
      
   I got interested to understand how tail calls could improve compared   
   to computed gotos. So I took the five first "opcodes" from the VM in   
   NTF64/LXF64 to compared the generated asm.   
   The VM was written from the begining in X64 assembler (13 years ago)   
   4 years ago I also implemented the VM i C to simplify porting to ARM64.   
   At that time the asm version was about 10% faster then the generated   
   C code, today the speed is about the same. C compilers have improved.   
   It was implemented using computed gotos, usingthe following macro   
   as the nesting code ending each "opcode"   
      
   #define RELOAD()  code=*ip++; goto *jmp_table[code]	   
      
   for the tail call version it was changed to   
      
   RELOAD() opcode func=(opcode)tbl[*ip++]; __attribute__((musttail))   
                    return func(ip, tbl, TOP, FTOP, sp, rp, fp, lp)   
      
   (line brooken to be readable)   
      
   The noop "opcode has just the nesting and produces the following code   
      
   	movzx	r9d, byte ptr [rcx]   
   	inc	rcx   
   	jmp	qword ptr [rax + 8*r9]   
      
   and for the tailcall version   
      
   	movzx	eax, byte ptr [r12]   
   	inc	r12   
   	mov	rax, qword ptr [r13 + 8*rax]   
   	rex64 jmp	rax   
      
   both compiled with   
   clang -S -Wall -O2 -masm=intel -o vm8test3.asm vm8tail.c   
      
   As I suspected the code is practically identical!   
      
   It also turns out that the musttail attribute is not necessary   
   It will generate a tailcall aanyway. The difference is that with   
   musttail it will report an error if it cannot do the tailcall.   
      
   Much more important is the __attribute__((preserve_none)) before   
   each function. This indicated that more registers will be used to pass   
   parameters. As seen above I pass 8 parameters to each function and   
   they need to be in registers to match the asmbler written code.   
   This is done automatically in the goto version as everything is in   
   one function there.   
      
   In the end it is more how you like to write your VM, as one function   
   or one for each "opcode".   
      
   Unfortunately GCC does not recognize preserve_none and uses the stack   
   for some parameters   
      
   Here is my test code   
      
   // VM8 C variant using computed goto   
      
   #include    
      
   #define UNS8  unsigned char   
   #define INT64 long long int   
   #define UNS64 unsigned long long int   
      
   #define RELOAD()  code=*ip++; goto *jmp_table[code]	   
      
   void VM8(UNS8 *ip, UNS64 *sp, UNS64 *rp, double *fp, UNS64 *lp ) {   
      
   const static void* jmp_table[] = {	   
   	&&noop,   
   	&&swap,   
   	&&rot,   
   	&&eqzero,   
   	&&negate,   
   };   
      
   	UNS8 code=*ip;   
   	UNS64 tmp;   
   	UNS64 TOP=*sp++;   
   //	double FTOP=*fp++;   
      
   	RELOAD();   
   	   
      
   	noop: 			// do nothing   
   		RELOAD();   
   	swap: 			//  swap   
           tmp=sp[0];   
   		sp[0]=TOP;   
   		TOP=tmp;   
   		RELOAD();   
   	rot: 			//  rot   
   		tmp=TOP;   
   		TOP=sp[1];   
   		sp[1]=sp[0];   
   		sp[0]=tmp;   
   		RELOAD();   
   	eqzero: 		//  0=   
   		TOP=-(TOP==0);   
   		RELOAD();   
   	negate:  		// negate   
   		TOP=-TOP;   
   		RELOAD();   
   	   
   	   
   } //vm8   
      
      
   And here is the tail call version. Sorry for the long lines!   
      
   // VM8 C variant using tailcalls   
      
   #include    
      
   #define UNS8  unsigned char   
   #define INT64 long long int   
   #define UNS64 unsigned long long int   
      
      
   typedef  __attribute__((preserve_none)) void (*opcode) (UNS8*, UNS64*, UNS64,   
   double, UNS64*, UNS64*, double*, UNS64*);   
      
   #define RELOAD() opcode func=(opcode)tbl[*ip++]; __attribute__((musttail))   
   return func(ip, tbl, TOP, FTOP, sp, rp, fp, lp)	   
      
   #define FUNC   __attribute__((preserve_none)) void   
      
   FUNC	noop(UNS8 *ip, UNS64 *tbl, UNS64 TOP, double FTOP, UNS64 *sp, UNS64 *rp,   
   double *fp, UNS64 *lp )  			// do nothing   
           {   
   		RELOAD();   
           }   
      
   FUNC	swap(UNS8 *ip, UNS64 *tbl, UNS64 TOP, double FTOP, UNS64 *sp, UNS64 *rp,   
   double *fp, UNS64 *lp )  			//  swap   
   		{UNS64 tmp;   
           tmp=sp[0];   
   		sp[0]=TOP;   
   		TOP=tmp;   
   		RELOAD();}   
      
   FUNC	rot(UNS8 *ip, UNS64 *tbl, UNS64 TOP, double FTOP, UNS64 *sp, UNS64 *rp,   
   double *fp, UNS64 *lp )  			//  rot   
   		{UNS64 tmp=TOP;   
   		TOP=sp[1];   
   		sp[1]=sp[0];   
   		sp[0]=tmp;   
   		RELOAD();}   
      
   FUNC	eqzero(UNS8 *ip, UNS64 *tbl, UNS64 TOP, double FTOP, UNS64 *sp, UNS64   
   *rp, double *fp, UNS64 *lp )  		//  0=   
   		{TOP=-(TOP==0);   
   		RELOAD();}   
      
   FUNC	negate(UNS8 *ip, UNS64 *tbl, UNS64 TOP, double FTOP, UNS64 *sp, UNS64   
   *rp, double *fp, UNS64 *lp )   		// negate   
   		{TOP=-TOP;   
   		RELOAD();}   
      
   opcode jmp_table[]={	   
   	noop,   
   	swap,   
   	rot,   
   	eqzero,   
   	negate,   
   };   
      
      
      
   void VM8(UNS8 *ip, UNS64 *sp, UNS64 *rp, double *fp, UNS64 *lp ) {   
      
      
       UNS64 *tbl=(UNS64*)&jmp_table;   
       UNS64 TOP=*sp++;   
       double FTOP=*fp++;   
      
      
       opcode func=(opcode)tbl[*ip++];   
       func( ip, tbl, TOP, FTOP, sp, rp, fp, lp);   
      
   }	   
   	   
    //vm8   
      
   BR   
   Peter   
      
   --- SoupGate-Win32 v1.05   
    * Origin: you cannot sedate... all the things you hate (1:229/2)   

[   << oldest   |   < older   |   list   |   newer >   |   newest >>   ]


(c) 1994,  bbs@darkrealms.ca