Forums before death by AOL, social media and spammers... "We can't have nice things"
|    comp.lang.c    |    Meh, in C you gotta define EVERYTHING    |    243,242 messages    |
[   << oldest   |   < older   |   list   |   newer >   |   newest >>   ]
|    Message 242,036 of 243,242    |
|    Bonita Montero to All    |
|    Re: Unicode...    |
|    22 Nov 25 14:10:46    |
   
   From: Bonita.Montero@gmail.com   
      
   This code with AVX512BW and BMI1 is 13,5 times faster than yours on my   
   Zen4-PC.   
      
   size_t utf8Width2( const char *s )   
      
   {   
    __m512i const   
    ZERO = _mm512_setzero_si512(),   
    ONE_MASK = _mm512_set1_epi8( (char)0x80 ),   
    ONE_HEAD = ZERO,   
    TWO_MASK = _mm512_set1_epi8( (char)0xE0 ),   
    TWO_HEAD = _mm512_set1_epi8( (char)0xC0 ),   
    THREE_MASK = _mm512_set1_epi8( (char)0xF0 ),   
    THREE_HEAD = _mm512_set1_epi8( (char)0xE0 ),   
    FOUR_MASK = _mm512_set1_epi8( (char)0xF8 ),   
    FOUR_HEAD = _mm512_set1_epi8( (char)0xF0 );   
    uintptr_t   
    begin = (uintptr_t)s,   
    base = begin & -64;   
    s = (char *)base;   
    size_t count = 0;   
    __m512i chunk;   
    uint64_t nzMask;   
    auto doChunk = [&]() L_FORCEINLINE   
    {   
    uint64_t   
    one = _mm512_cmpeq_epi8_mask( _mm512_and_si512( chunk,   
   ONE_MASK ), ONE_HEAD ) & nzMask,   
    two = _mm512_cmpeq_epi8_mask( _mm512_and_si512( chunk,   
   TWO_MASK ), TWO_HEAD ) & nzMask,   
    three = _mm512_cmpeq_epi8_mask( _mm512_and_si512( chunk,   
   THREE_MASK ), THREE_HEAD ) & nzMask,   
    four = _mm512_cmpeq_epi8_mask( _mm512_and_si512( chunk,   
   FOUR_MASK ), FOUR_HEAD ) & nzMask;   
    count += _mm_popcnt_u64( one ) + _mm_popcnt_u64( two ) +   
   _mm_popcnt_u64( three ) + _mm_popcnt_u64( four );   
    };   
    chunk = _mm512_loadu_si512( s );   
    unsigned head = (unsigned)(begin - base);   
    nzMask = ~_mm512_cmpeq_epi8_mask( chunk, ZERO ) >> head;   
    unsigned ones = countr_one( nzMask );   
    nzMask &= ones < 64 ? (1ull << ones) - 1 : -1;   
    nzMask <<= head;   
    doChunk();   
    if( (int64_t)nzMask >= 0 )   
    return count;   
    for( ; ; )   
    {   
    s += 64;   
    chunk = _mm512_loadu_si512( s );   
    nzMask = ~_mm512_cmpeq_epi8_mask( chunk, ZERO );   
    ones = countr_one( nzMask );   
    nzMask = ones < 64 ? (1ull << ones) - 1 : -1;   
    if( !nzMask )   
    break;   
    doChunk();   
    }   
    return count;   
   }   
      
   --- SoupGate-Win32 v1.05   
    * Origin: you cannot sedate... all the things you hate (1:229/2)   
|
[   << oldest   |   < older   |   list   |   newer >   |   newest >>   ]
(c) 1994, bbs@darkrealms.ca