Forums before death by AOL, social media and spammers... "We can't have nice things"
|    comp.lang.asm.x86    |    Ahh, the lost art of x86 assembly    |    4,675 messages    |
[   << oldest   |   < older   |   list   |   newer >   |   newest >>   ]
|    Message 3,313 of 4,675    |
|    Bernhard Schornak to John    |
|    Re: Hex 2 Bin (1/2)    |
|    20 Mar 18 23:28:09    |
      From: schornak@nospicedham.web.de              Kerr-Mudd,John wrote:                     > I want something with far less jmps!                     I obviously mismatched the direction in my first post. The other way       around is about *15 times* slower, because there are a lot of things       to sort out to get a proper result.              This one requires a bunch of lookup tables:              CVTCUT 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF       0xFF 0xFF        0xFF 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00       0x00 0x00 0x00        0xFF 0xFF 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00       0x00 0x00 0x00        0xFF 0xFF 0xFF 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00       0x00 0x00 0x00        0xFF 0xFF 0xFF 0xFF 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00       0x00 0x00 0x00        0xFF 0xFF 0xFF 0xFF 0xFF 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00       0x00 0x00 0x00        0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0x00 0x00 0x00 0x00 0x00 0x00 0x00       0x00 0x00 0x00        0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0x00 0x00 0x00 0x00 0x00 0x00       0x00 0x00 0x00        0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0x00 0x00 0x00 0x00 0x00       0x00 0x00 0x00        0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0x00 0x00 0x00 0x00       0x00 0x00 0x00        0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0x00 0x00 0x00       0x00 0x00 0x00        0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0x00 0x00       0x00 0x00 0x00        0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0x00       0x00 0x00 0x00        0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF       0x00 0x00 0x00        0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF       0xFF 0x00 0x00        0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF       0xFF 0xFF 0x00              CVTCUT() is used to mask out bytes beyond the trailing zero.       CVTxx 16 consecutive byte with content xx.              These LUTs are used in multiple functions, so the "extra space" (528       byte, including other characters not used here) pays off for me.                      /*        ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~       ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~        hex2D hexadecimal ASCII string -> DWORD        ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~       ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~        -> RCX EA source        ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~       ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~        <- RAX dword (8 digits)        ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~       ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~        BUFFER 0        0123456789ABCDEF               "-16 byte string-"        ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~       ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~        average latency ~ 115 clock cycles (1st call), 96 clock cycles       (2nd an up)        ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~       ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~        */        .p2align 5,,31        .globl _hex2D        .def _hex2D; .scl 2; .type 32; .endef        _hex2D:subq $0xF8, %rsp        movdqu 0x00(%rcx), %xmm0        pxor %xmm1, %xmm1        movq _BNR(%rip), %rax # RAX = BNR        pcmpeqb %xmm0, %xmm1        movq %r8, 0x68(%rsp)        movq %r9, 0x70(%rsp)        movq %rdx, 0x78(%rsp)        pmovmskb %xmm1, %r8d        movq %rbp, 0x80(%rsp)        movq %rbx, 0x88(%rsp)        movdqa %xmm9, 0x90(%rsp)        movdqa %xmm8, 0xA0(%rsp)        movdqa %xmm7, 0xB0(%rsp)        bsf %r8d, %r8d # R14 = 1st zero       P_0        movdqa %xmm6, 0xC0(%rsp)        movdqa %xmm5, 0xD0(%rsp)        movdqa %xmm4, 0xE0(%rsp)        movq %rcx, 0xF0(%rsp)        leaq CVTCUT(%rax), %rbx # R13 = LUT cut        shlq %r8 # R08 = index 0        cmove %r8d, %eax # zero =>       RES = 0        je 1f        movdqa CVT_60(%rax), %xmm9 # X09 = 6060...60        pand 0x00(%rbx, %r8, 8), %xmm0 # cut off       trailing garbage        movdqa CVT_20(%rax), %xmm4 # X04 = 2020...20        movdqa %xmm0, %xmm1 # X01 = TMP_0        movdqa CVT_30(%rax), %xmm5 # X05 = 3030...30        pcmpgtb %xmm9, %xmm1 # X01 = mask 0        movdqa CVT_39(%rax), %xmm6 # X06 = 3939...39        pand %xmm4, %xmm1 # X01 = diff 0        movdqa CVT_40(%rax), %xmm7 # X07 = 4040...40        psubb %xmm1, %xmm0 # X00 = temp res 0        movdqa CVT_46(%rax), %xmm8 # X08 = 4646...46        movdqa %xmm0, %xmm1 # X01 = TMP_0        pcmpgtb %xmm8, %xmm1 # X01 = mask > "F"        pand %xmm0, %xmm1 # X01 = erase > "F"        psubb %xmm1, %xmm0 # X00 = temp res 0        movdqa %xmm0, %xmm1 # X01 = TMP_0.0        movdqa %xmm0, %xmm8 # X08 = TMP_0.1        pcmpgtb %xmm6, %xmm1 # X01 = mask 0.0 >       "9"        pcmpgtb %xmm7, %xmm8 # X08 = mask 0.1 >       "@"        movdqa CVT_07(%rax), %xmm7 # X07 = 0707...07        pxor %xmm8, %xmm1 # X01 = mask 0        pand %xmm0, %xmm1 # X01 = cut 0        psubb %xmm1, %xmm0 # X00 = temp res 0        pand %xmm7, %xmm8 # X08 = diff 0        psubb %xmm8, %xmm0 # X00 = temp res 0        psubb %xmm5, %xmm0 # X00 = res - "0"        movl $0x08, %ebp # RBP = words              [continued in next message]              --- SoupGate-Win32 v1.05        * Origin: you cannot sedate... all the things you hate (1:229/2)    |
[   << oldest   |   < older   |   list   |   newer >   |   newest >>   ]
(c) 1994, bbs@darkrealms.ca