Forums before death by AOL, social media and spammers... "We can't have nice things"
|    comp.lang.asm.x86    |    Ahh, the lost art of x86 assembly    |    4,675 messages    |
[   << oldest   |   < older   |   list   |   newer >   |   newest >>   ]
|    Message 3,058 of 4,675    |
|    aen@spamtrap.com to All    |
|    cycles    |
|    10 Nov 17 10:30:20    |
      Hi!              I'm trying to find out how many cycles this subroutine takes on a       Nehalem. According to the output it's 12 cycles.       The data in the comments is from Agner Fog's tables.       The algorithm is from Donald Knuth's TAOCP Vol. 2.              Thoughts, comments?              .intel_syntax noprefix       # as -gdwarf2 -o posting.o posting.asm       # gcc -static -o posting posting.o       .macro TSCStart        rdtsc        shl rdx,32        or rax,rdx        push rax       .endm # TSCStart              .macro TSCEnd        rdtsc        shl rdx,32        or rax,rdx        sub rax,[rsp]        add rsp,8       .endm # TSCEnd               .data       format: .string "%llu\n"               .text        .globl main       main: ; _start: nop        sub rsp,8               mov rbx,1000000        TSCStart        # fus p015 p0 p1 p5 p2 p3 p4 lat reci       1:mov rdi,0x1234567890123456 # 1 1 x x x 1 0.33        call bcd2bin # 2 2 1 1 1 2        dec rbx # 1 1 x x x 1 0.33        jnz 1b # 1 1 1 0 2       # 12 cylces        TSCEnd        mov rdi,offset format        mov rsi,rax        call printf              ExitProg:        mov rdi,0        mov rax,60        syscall               .data        .align 16       c_1: .quad 0x6000000000000000 # 1 - 10/16       c_2: .quad 0x9c00000000000000 # 1 - 10^2/16^2       c_3: .quad 0xd8f0000000000000 # 1 - 10^4/16^4       c_4: .quad 0xfa0a1f0000000000 # 1 - 10^8/16^8              m_1: .quad 0xf0f0f0f0f0f0f0f0       m_2: .quad 0xff00ff00ff00ff00       m_3: .quad 0xffff0000ffff0000       m_4: .quad 0xffffffff00000000               .text        .align 16        # fus p015 p0 p1 p5 p2 p3 p4 l reci       bcd2bin:        mov rax,rdi # 1 1 x x x 1 0.33        and rax,qword ptr [m_1] # 1 1 x x x 1 1       # 6 cycles        mul qword ptr [c_1] # 3 2 2 1 3 2        sub rdi,rdx # 1 1 x x x 1 0-33               mov rax,rdi # 1 1 x x x 1 0.33        and rax,qword ptr [m_2] # 1 1 x x x 1 1       # 7 cycles        mul qword ptr [c_2] # 3 2 2 1 3 2        sub rdi,rdx # 1 1 x x x 1 0.33               mov rax,rdi # 1 1 x x x 1 0.33        and rax,qword ptr [m_3] # 1 1 x x x 1 1       # 8 cycles        mul qword ptr [c_3] # 3 2 2 1 3 2        sub rdi,rdx # 1 1 x x x 1 0.33               mov rax,rdi # 1 1 x x x 1 0.33        and rax,qword ptr [m_4] # 1 1 x x x 1 1       # 9 cycles        mul qword ptr [c_4] # 3 2 2 1 3 2        mov rax,rdi # 1 1 x x x 1 0.33        sub rax,rdx # 1 1 x x x 1 0.33               ret # 1 1 1 1 2               .end       --       aen              --- SoupGate-Win32 v1.05        * Origin: you cannot sedate... all the things you hate (1:229/2)    |
[   << oldest   |   < older   |   list   |   newer >   |   newest >>   ]
(c) 1994, bbs@darkrealms.ca