home bbs files messages ]

Forums before death by AOL, social media and spammers... "We can't have nice things"

   comp.lang.asm.x86      Ahh, the lost art of x86 assembly      4,675 messages   

[   << oldest   |   < older   |   list   |   newer >   |   newest >>   ]

   Message 4,484 of 4,675   
   Bonita Montero to All   
   Performance of denormal numbers   
   17 Sep 22 16:35:18   
   
   XPost: comp.lang.c++   
   From: Bonita.Montero@nospicedham.gmail.com   
      
   I wanted to check if denormal numbers have slower performance on   
   modern CPUs. Intel introduced the DAZ / FTZ Bits with SSE1 because   
   denormals were even handled in microcode:   
      
   #include    
   #include    
   #include    
   #include    
   #include    
   #include    
      
   using namespace std;   
   using namespace chrono;   
      
   uint64_t denScale( uint64_t rounds, bool den );   
      
   int main()   
   {   
   	auto bench = []( bool den ) -> double   
   	{   
   		constexpr uint64_t ROUNDS = 25'000'000;   
   		auto start = high_resolution_clock::now();   
   		int64_t nScale = denScale( ROUNDS, den );   
   		return (double)duration_cast(   
   high_resolution_clock::now() - start ).count() / nScale;   
   	};   
   	double   
   		tDen = bench( true ),   
   		tNorm = bench( false ),   
   		rel = tDen / tNorm - 1;   
   	cout << tDen << endl;   
   	cout << tNorm << endl;   
   	cout << trunc( 100 * 10 * rel + 0.5 ) / 10 << "%" << endl;   
   }   
      
   MASM code:   
      
   PUBLIC ?denScale@@YA_K_K_N@Z   
      
   CONST SEGMENT   
   DEN DQ 00008000000000000h   
   ONE DQ 03FF0000000000000h   
   P5  DQ 03fe0000000000000h   
   CONST ENDS   
      
   _TEXT SEGMENT   
   ?denScale@@YA_K_K_N@Z PROC   
   	xor     rax, rax   
   	test    rcx, rcx   
   	jz      byeBye   
   	mov     r8, ONE   
   	mov     r9, DEN   
   	test    dl, dl   
   	cmovnz  r8, r9   
   	movq    xmm1, P5   
   	mov     rax, rcx   
   loopThis:   
   	movq    xmm0, r8   
   REPT 52   
   	mulsd   xmm0, xmm1   
   ENDM   
   	sub     rcx, 1   
   	jae     loopThis   
   	mov     rdx, 52   
   	mul     rdx   
   byeBye:   
   	ret   
   ?denScale@@YA_K_K_N@Z ENDP   
   _TEXT ENDS   
   END   
      
   For my PC normal numbers have a 25% higher throughput.   
   Feel free to post your results also.   
      
   --- SoupGate-Win32 v1.05   
    * Origin: you cannot sedate... all the things you hate (1:229/2)   

[   << oldest   |   < older   |   list   |   newer >   |   newest >>   ]


(c) 1994,  bbs@darkrealms.ca