... darkrealms ...

Forums before death by AOL, social media and spammers... "We can't have nice things"
comp.lang.asm.x86
Ahh, the lost art of x86 assembly
4,675 messages
[ << oldest | < older | list | newer > | newest >> ]
Message 3,490 of 4,675
Bart to Rick C. Hodgin
Re: Fast Fizz Buzz program
21 Jul 18 18:30:09
   From: bc@nospicedham.freeuk.com   
      
   On 18/07/2018 23:44, Rick C. Hodgin wrote:   
   > On 07/18/2018 04:20 PM, Rod Pemberton wrote:   
   >> Here's the corrected and bitmask extended C version of Rick's:   
   >>   
   >> #include    
   >> #include    
   >> #include    
   >>   
   >> void fizz(uint32_t num) {printf("Fizz\n");}   
   >> void buzz(uint32_t num) {printf("Buzz\n");}   
   >> void fizzbuzz(uint32_t num) {printf("FizzBuzz\n");}   
   >> void other(uint32_t num) {printf("%d\n",num);}   
   >>   
   >> struct STargets {void (*func)(uint32_t num);};   
   >> struct STargets funcs[4]={{other},{fizz},{buzz},{fizzbuzz}};   
   >>   
   >> uint64_t three_data[3]={0x4924924924924924,0x2492492492492492,   
   >>                          0x9249249249249249};   
   >> uint64_t five_data[5]={0x0842108421084210,0x1084210842108421,   
   >>                         0x2108421084210842,0x4210842108421084,   
   >>                         0x8421084210842108};   
   >>   
   >> int main(int argc, char* argv[])   
   >> {   
   >>    uint32_t lnI;   
   >>    uint64_t saved_data;   
   >>   
   >> /* WARNING bitmasks start at value one */   
   >> /* bitmasks must be shifted for lnI != 1 */   
   >>   
   >>    for(lnI=1;lnI<=1000000;++lnI)   
   >>    {   
   >>      funcs[(three_data[0]&0x1)+(2*(five_data[0]&0x1))].func(lnI);   
   >>   
   >>      saved_data=three_data[0];   
   >>      three_data[0]=(three_data[0]>>1)|((three_data[1]&1)<<63);   
   >>      three_data[1]=(three_data[1]>>1)|((three_data[2]&1)<<63);   
   >>      three_data[2]=(three_data[2]>>1)|((saved_data&1)<<63);   
   >>   
   >>      saved_data=five_data[0];   
   >>      five_data[0]=(five_data[0]>>1)|((five_data[1]&1)<<63);   
   >>      five_data[1]=(five_data[1]>>1)|((five_data[2]&1)<<63);   
   >>      five_data[2]=(five_data[2]>>1)|((five_data[3]&1)<<63);   
   >>      five_data[3]=(five_data[3]>>1)|((five_data[4]&1)<<63);   
   >>      five_data[4]=(five_data[4]>>1)|((saved_data&1)<<63);   
   >>      }   
   >>   
   >> return(0);   
   >> }   
   >   
   > There's a simpler version that's been posted now on comp.lang.c.   
   > It is written by Ben Bacarisse, altered slightly here by me.  I   
   > have not tested it, but you can see it here and see if it's any   
   > faster using less values, and less complexity:   
   >   
   >      #include    
   >   
   >      void fizz(void)         { printf("fizz\n");      }   
   >      void buzz(void)         { printf("buzz\n");      }   
   >      void fizzbuzz(void)     { printf("fizzbuzz\n");  }   
   >      void other(void)        { printf("%d\n", num);   }   
   >   
   >      void (*funcs[4])(int) = { other, fizz, buzz, fizzbuzz };   
   >   
   >      int num;   
   >   
   >      int main(void)   
   >      {   
   >           int threes = 1 << 2, fives = 1 << 4;   
   >   
   >           for (num = 1; num <= 100; ++num) {   
   >                funcs[(threes & 1) + (2 * (fives & 1))]();   
   >   
   >                threes = (threes >> 1) | ((threes & 1) << 2);   
   >                fives  = (fives  >> 1) | ((fives  & 1) << 4);   
   >          }   
   >      }   
   >   
      
   Maybe you /should/ have tested it, because it doesn't compile!   
      
   But I ran Ben's original version under gcc-O3, and it was the same speed   
   as any other C compiler with or without optimisation.   
      
   In fact, it was slightly slower than a version I did using interpreted   
   byte-code. Which sort of shows the pointlessness of optimising a program   
   like this, even with ultra-slow console i/o eliminated.   
      
   (Tested with N increased to 1 million, and redirecting output to a file   
   for the C version, writing (in one go) to a file for the interpreted   
   version [see after sig]. Both measured by calling clock() at entry and   
   exit from program, and both run under Windows.)   
      
   Saving modulo operations also seems pointless when you still use a "%d"   
   format which usually involves divide and remainder ops to turn a binary   
   number into text.   
      
   It might be a little more interesting if the task is reduced to one of   
   of filling a text buffer full of numbers, fizz and buzz (some 6.3MB to   
   be filled when N is 1 million and using '\n' separators, or a possible   
   7.3MB on Windows if using cr,lf).   
      
   With i/o eliminated, changing the algorithms might then make a bigger   
   difference, and you would expect optimised C to do better than   
   interpreted byte-code.   
      
   --   
   bart   
      
      
   proc fb2=   
        s:=""   
        n:=1 million   
        for i to n do   
            a:=i rem 3   
            b:=i rem 5   
            if a+b=0 then   
                s+:="FizzBuzz\n"   
            elsif a=0 then   
                s+:="Fizz\n"   
            elsif b=0 then   
                s+:="Buzz\n"   
            else   
                s+:=tostr(i)   
                s+:=10   
            fi   
        od   
      
        writestrfile("output",s)   
   end   
      
   --- SoupGate-Win32 v1.05   
    * Origin: you cannot sedate... all the things you hate (1:229/2)
[ << oldest | < older | list | newer > | newest >> ]