home bbs files messages ]

Forums before death by AOL, social media and spammers... "We can't have nice things"

   comp.lang.asm.x86      Ahh, the lost art of x86 assembly      4,675 messages   

[   << oldest   |   < older   |   list   |   newer >   |   newest >>   ]

   Message 3,313 of 4,675   
   Bernhard Schornak to John   
   Re: Hex 2 Bin (1/2)   
   20 Mar 18 23:28:09   
   
   From: schornak@nospicedham.web.de   
      
   Kerr-Mudd,John wrote:   
      
      
   > I want something with far less jmps!   
      
      
   I obviously mismatched the direction in my first post. The other way   
   around is about *15 times* slower, because there are a lot of things   
   to sort out to get a proper result.   
      
   This one requires a bunch of lookup tables:   
      
   CVTCUT   0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF   
   0xFF 0xFF   
             0xFF 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00   
   0x00 0x00 0x00   
             0xFF 0xFF 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00   
   0x00 0x00 0x00   
             0xFF 0xFF 0xFF 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00   
   0x00 0x00 0x00   
             0xFF 0xFF 0xFF 0xFF 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00   
   0x00 0x00 0x00   
             0xFF 0xFF 0xFF 0xFF 0xFF 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00   
   0x00 0x00 0x00   
             0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0x00 0x00 0x00 0x00 0x00 0x00 0x00   
   0x00 0x00 0x00   
             0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0x00 0x00 0x00 0x00 0x00 0x00   
   0x00 0x00 0x00   
             0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0x00 0x00 0x00 0x00 0x00   
   0x00 0x00 0x00   
             0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0x00 0x00 0x00 0x00   
   0x00 0x00 0x00   
             0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0x00 0x00 0x00   
   0x00 0x00 0x00   
             0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0x00 0x00   
   0x00 0x00 0x00   
             0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0x00   
   0x00 0x00 0x00   
             0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF   
   0x00 0x00 0x00   
             0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF   
   0xFF 0x00 0x00   
             0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF   
   0xFF 0xFF 0x00   
      
   CVTCUT() is used to mask out bytes beyond the trailing zero.   
   CVTxx    16 consecutive byte with content xx.   
      
   These LUTs are used in multiple functions, so the "extra space" (528   
   byte, including other characters not used here) pays off for me.   
      
      
               /*   
                 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~   
   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~   
                 hex2D    hexadecimal ASCII string -> DWORD   
                 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~   
   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~   
                 -> RCX   EA source   
                 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~   
   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~   
                 <- RAX   dword (8 digits)   
                 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~   
   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~   
                 BUFFER   0   
                          0123456789ABCDEF   
      
                         "-16 byte string-"   
                 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~   
   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~   
                 average latency ~ 115 clock cycles (1st call), 96 clock cycles   
   (2nd an up)   
                 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~   
   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~   
               */   
               .p2align   5,,31   
               .globl     _hex2D   
               .def       _hex2D; .scl 2; .type 32; .endef   
        _hex2D:subq       $0xF8,               %rsp   
               movdqu     0x00(%rcx),          %xmm0   
               pxor       %xmm1,               %xmm1   
               movq       _BNR(%rip),          %rax            # RAX = BNR   
               pcmpeqb    %xmm0,               %xmm1   
               movq       %r8,                 0x68(%rsp)   
               movq       %r9,                 0x70(%rsp)   
               movq       %rdx,                0x78(%rsp)   
               pmovmskb   %xmm1,               %r8d   
               movq       %rbp,                0x80(%rsp)   
               movq       %rbx,                0x88(%rsp)   
               movdqa     %xmm9,               0x90(%rsp)   
               movdqa     %xmm8,               0xA0(%rsp)   
               movdqa     %xmm7,               0xB0(%rsp)   
               bsf        %r8d,                %r8d            # R14 = 1st zero   
   P_0   
               movdqa     %xmm6,               0xC0(%rsp)   
               movdqa     %xmm5,               0xD0(%rsp)   
               movdqa     %xmm4,               0xE0(%rsp)   
               movq       %rcx,                0xF0(%rsp)   
               leaq       CVTCUT(%rax),        %rbx            # R13 = LUT cut   
               shlq       %r8                                  # R08 = index 0   
               cmove      %r8d,                %eax            #       zero  =>   
   RES = 0   
               je         1f   
               movdqa     CVT_60(%rax),        %xmm9           # X09 = 6060...60   
               pand       0x00(%rbx, %r8, 8),  %xmm0           #       cut off   
   trailing garbage   
               movdqa     CVT_20(%rax),        %xmm4           # X04 = 2020...20   
               movdqa     %xmm0,               %xmm1           # X01 = TMP_0   
               movdqa     CVT_30(%rax),        %xmm5           # X05 = 3030...30   
               pcmpgtb    %xmm9,               %xmm1           # X01 = mask 0   
               movdqa     CVT_39(%rax),        %xmm6           # X06 = 3939...39   
               pand       %xmm4,               %xmm1           # X01 = diff 0   
               movdqa     CVT_40(%rax),        %xmm7           # X07 = 4040...40   
               psubb      %xmm1,               %xmm0           # X00 = temp res 0   
               movdqa     CVT_46(%rax),        %xmm8           # X08 = 4646...46   
               movdqa     %xmm0,               %xmm1           # X01 = TMP_0   
               pcmpgtb    %xmm8,               %xmm1           # X01 = mask  > "F"   
               pand       %xmm0,               %xmm1           # X01 = erase > "F"   
               psubb      %xmm1,               %xmm0           # X00 = temp res 0   
               movdqa     %xmm0,               %xmm1           # X01 = TMP_0.0   
               movdqa     %xmm0,               %xmm8           # X08 = TMP_0.1   
               pcmpgtb    %xmm6,               %xmm1           # X01 = mask 0.0 >   
   "9"   
               pcmpgtb    %xmm7,               %xmm8           # X08 = mask 0.1 >   
   "@"   
               movdqa     CVT_07(%rax),        %xmm7           # X07 = 0707...07   
               pxor       %xmm8,               %xmm1           # X01 = mask 0   
               pand       %xmm0,               %xmm1           # X01 = cut  0   
               psubb      %xmm1,               %xmm0           # X00 = temp res 0   
               pand       %xmm7,               %xmm8           # X08 = diff 0   
               psubb      %xmm8,               %xmm0           # X00 = temp res 0   
               psubb      %xmm5,               %xmm0           # X00 = res - "0"   
               movl       $0x08,               %ebp            # RBP = words   
      
   [continued in next message]   
      
   --- SoupGate-Win32 v1.05   
    * Origin: you cannot sedate... all the things you hate (1:229/2)   

[   << oldest   |   < older   |   list   |   newer >   |   newest >>   ]


(c) 1994,  bbs@darkrealms.ca