home bbs files messages ]

Forums before death by AOL, social media and spammers... "We can't have nice things"

   comp.arch      Apparently more than just beeps & boops      131,241 messages   

[   << oldest   |   < older   |   list   |   newer >   |   newest >>   ]

   Message 130,541 of 131,241   
   Michael S to Thomas Koenig   
   Re: Tonights Tradeoff   
   17 Dec 25 12:02:12   
   
   From: already5chosen@yahoo.com   
      
   On Tue, 16 Dec 2025 17:51:28 -0000 (UTC)   
   Thomas Koenig  wrote:   
      
   > Michael S  schrieb:   
   >   
   > > Today I tested speed of gcc implementation of Decimal128   
   > > (BID-encoded, of course) on Intel Core i7-14700.   
   > > Average time in nsec:   
   > > op     Add Sub Mul Div   
   > > P-Core  33  33  86  76   
   > > E-Core  46  48 121 108   
   > >   
   > > Counter-intuitively, division is faster than multiplications.   
   > > And both appear much slower than necessary.   
   >   
   > Interesting.  Could you provide the benchmark used?   
      
   // tb.cpp   
   #include    
   #include    
   #include    
   #include    
   #include    
   #include    
      
   extern "C" {   
   void uut(void*, const void*, const void*);   
   };   
      
   static inline   
   uint64_t umulh(uint64_t a,uint64_t b) {   
     return uint64_t(((unsigned __int128)a * b)>>64);   
   }   
      
   int main(int , char** )   
   {   
     const int N_PAIRS = 1000000;   
     const int N_ITER  = 17;   
     typedef unsigned __int128 u128;   
     std::vector src(N_PAIRS*2);   
     std::mt19937_64 prng(1);   
     const unsigned EXP_BIAS = 6143;   
     const unsigned EXP_SHIFT = 113;   
     for (int i = 0; i < N_PAIRS*2; ++i) {   
       // generate pseudo-random number in range [1e33:1e34-1]   
       const uint64_t RNG_LO  = (long long)1e17;   
       const uint64_t RNG_HI  = (long long)9e16;   
       const uint64_t BASE_HI = (long long)1e16;   
       uint64_t lo = umulh(prng(), RNG_LO); // [0:1e17-1]   
       uint64_t hi = umulh(prng(), RNG_HI) + BASE_HI;  // [1e16:1e17-1]   
       u128 val = (u128)hi*RNG_LO + lo;   
       unsigned exp = EXP_BIAS + umulh(prng(), 50) - 25;   
       const u128 exp_val = (u128)exp << EXP_SHIFT;   
       src[i] = val | exp_val;   
     }   
      
     std::vector dt(N_ITER);   
     for (int it = 0; it < N_ITER; ++it) {   
       struct timespec t0;   
       clock_gettime(CLOCK_MONOTONIC, &t0);   
       u128 dummy1 = 0;   
       const u128* pSrc = src.data();   
       for (int i = 0; i < N_PAIRS; ++i) {   
         u128 rat;   
         uut(&rat, &pSrc[i*2+0], &pSrc[i*2+1]);   
         dummy1 ^= rat;   
       }   
       if (dummy1 == 42)   
         printf("Blue Moon\n");   
       struct timespec t1;   
       clock_gettime(CLOCK_MONOTONIC, &t1);   
       dt[it] = (t1.tv_sec - t0.tv_sec)*(long long)(1e9) + (long   
     long)t1.tv_nsec - (long long)t0.tv_nsec; }   
     // find median   
     std::nth_element(&dt[0], &dt[N_ITER/2], &dt[N_ITER]);   
     long long dt_med = dt[N_ITER/2];   
     printf("%.1f nsec\n", (double)dt_med / N_PAIRS);   
      
     return 0;   
   }   
   // end tb.cpp   
      
      
   // gcc_dec128add.c   
   #include    
      
   void uut(void* pRes, const void* pA, const void* pB)   
   {   
     _Decimal128 a, b, res;   
     memcpy(&a, pA, sizeof(a));   
     memcpy(&b, pB, sizeof(b));   
     res = a + b;   
     memcpy(pRes, &res, sizeof(res));   
   }   
   // end gcc_dec128add.c   
      
   // gcc_dec128sub.c   
   #include    
      
   void uut(void* pRes, const void* pA, const void* pB)   
   {   
     _Decimal128 a, b, res;   
     memcpy(&a, pA, sizeof(a));   
     memcpy(&b, pB, sizeof(b));   
     res = a - b;   
     memcpy(pRes, &res, sizeof(res));   
   }   
   // end gcc_dec128sub.c   
      
   // gcc_dec128mul.c   
   #include    
      
   void uut(void* pRes, const void* pA, const void* pB)   
   {   
     _Decimal128 a, b, res;   
     memcpy(&a, pA, sizeof(a));   
     memcpy(&b, pB, sizeof(b));   
     res = a * b;   
     memcpy(pRes, &res, sizeof(res));   
   }   
   // end gcc_dec128mul.c   
      
   // gcc_dec128div.c   
   #include    
      
   void uut(void* pRat, const void* pNum, const void* pDen)   
   {   
     _Decimal128 den, num, rat;   
     memcpy(&num, pNum, sizeof(num));   
     memcpy(&den, pDen, sizeof(den));   
     rat = num / den;   
     memcpy(pRat, &rat, sizeof(rat));   
   }   
   // end gcc_dec128div.c   
      
      
   Build script   
   COPT="-O2 -Wall -march=haswell -mtune=skylake"   
   mkdir -p obj   
   mkdir -p out   
   g++ -c $COPT tb.cpp -o obj/tb.o   
   gcc -c $COPT gcc_dec128add.c -o obj/gcc_dec128add.o   
   gcc -c $COPT gcc_dec128sub.c -o obj/gcc_dec128sub.o   
   gcc -c $COPT gcc_dec128mul.c -o obj/gcc_dec128mul.o   
   gcc -c $COPT gcc_dec128div.c -o obj/gcc_dec128div.o   
   g++ -s obj/tb.o obj/gcc_dec128add.o -o out/tst_add.exe   
   g++ -s obj/tb.o obj/gcc_dec128sub.o -o out/tst_sub.exe   
   g++ -s obj/tb.o obj/gcc_dec128mul.o -o out/tst_mul.exe   
   g++ -s obj/tb.o obj/gcc_dec128div.o -o out/tst_div.exe   
      
   --- SoupGate-Win32 v1.05   
    * Origin: you cannot sedate... all the things you hate (1:229/2)   

[   << oldest   |   < older   |   list   |   newer >   |   newest >>   ]


(c) 1994,  bbs@darkrealms.ca