home bbs files messages ]

Forums before death by AOL, social media and spammers... "We can't have nice things"

   comp.lang.asm.x86      Ahh, the lost art of x86 assembly      4,675 messages   

[   << oldest   |   < older   |   list   |   newer >   |   newest >>   ]

   Message 3,058 of 4,675   
   aen@spamtrap.com to All   
   cycles   
   10 Nov 17 10:30:20   
   
   Hi!   
      
   I'm trying to find out how many cycles this subroutine takes on a   
   Nehalem.  According to the output it's 12 cycles.   
   The data in the comments is from Agner Fog's tables.   
   The algorithm is from Donald Knuth's TAOCP Vol. 2.   
      
   Thoughts, comments?   
      
   .intel_syntax noprefix   
   # as -gdwarf2 -o posting.o posting.asm   
   # gcc -static -o posting posting.o   
   .macro TSCStart   
     rdtsc   
     shl     rdx,32   
     or      rax,rdx   
     push    rax   
   .endm # TSCStart   
      
   .macro TSCEnd   
     rdtsc   
     shl     rdx,32   
     or      rax,rdx   
     sub     rax,[rsp]   
     add     rsp,8   
   .endm # TSCEnd   
      
             .data   
   format:   .string   "%llu\n"   
      
     .text   
     .globl  main   
   main: ;   _start:   nop   
     sub     rsp,8   
      
     mov     rbx,1000000   
     TSCStart   
                                   # fus p015 p0 p1 p5 p2 p3 p4 lat reci   
   1:mov     rdi,0x1234567890123456 # 1   1   x  x  x            1  0.33   
     call    bcd2bin               #  2   2         1     1  1       2   
     dec     rbx                   #  1   1   x  x  x            1  0.33   
     jnz     1b                    #  1   1         1            0   2   
   # 12 cylces   
     TSCEnd   
     mov     rdi,offset format   
     mov     rsi,rax   
     call    printf   
      
   ExitProg:   
     mov     rdi,0   
     mov     rax,60   
     syscall   
      
             .data   
             .align    16   
   c_1:      .quad     0x6000000000000000  # 1 - 10/16   
   c_2:      .quad     0x9c00000000000000  # 1 - 10^2/16^2   
   c_3:      .quad     0xd8f0000000000000  # 1 - 10^4/16^4   
   c_4:      .quad     0xfa0a1f0000000000  # 1 - 10^8/16^8   
      
   m_1:      .quad     0xf0f0f0f0f0f0f0f0   
   m_2:      .quad     0xff00ff00ff00ff00   
   m_3:      .quad     0xffff0000ffff0000   
   m_4:      .quad     0xffffffff00000000   
      
     .text   
     .align  16   
                                 # fus p015 p0 p1 p5 p2 p3 p4 l reci   
   bcd2bin:   
     mov     rax,rdi             #  1   1   x  x  x           1 0.33   
     and     rax,qword ptr [m_1] #  1   1   x  x  x  1           1   
   # 6 cycles   
     mul     qword ptr [c_1]     #  3   2   2        1        3  2   
     sub     rdi,rdx             #  1   1   x  x  x           1 0-33   
      
     mov     rax,rdi             #  1   1   x  x  x           1 0.33   
     and     rax,qword ptr [m_2] #  1   1   x  x  x  1           1   
   # 7 cycles   
     mul     qword ptr [c_2]     #  3   2   2        1        3  2   
     sub     rdi,rdx             #  1   1   x  x  x           1 0.33   
      
     mov     rax,rdi             #  1   1   x  x  x           1 0.33   
     and     rax,qword ptr [m_3] #  1   1   x  x  x  1           1   
   # 8 cycles   
     mul     qword ptr [c_3]     #  3   2   2        1        3  2   
     sub     rdi,rdx             #  1   1   x  x  x           1 0.33   
      
     mov     rax,rdi             #  1   1   x  x  x           1 0.33   
     and     rax,qword ptr [m_4] #  1   1   x  x  x  1           1   
   # 9 cycles   
     mul     qword ptr [c_4]     #  3   2   2        1        3  2   
     mov     rax,rdi             #  1   1   x  x  x           1 0.33   
     sub     rax,rdx             #  1   1   x  x  x           1 0.33   
      
     ret                         #  1   1         1  1           2   
      
     .end   
   --   
   aen   
      
   --- SoupGate-Win32 v1.05   
    * Origin: you cannot sedate... all the things you hate (1:229/2)   

[   << oldest   |   < older   |   list   |   newer >   |   newest >>   ]


(c) 1994,  bbs@darkrealms.ca