home bbs files messages ]

Forums before death by AOL, social media and spammers... "We can't have nice things"

   comp.lang.c      Meh, in C you gotta define EVERYTHING      243,242 messages   

[   << oldest   |   < older   |   list   |   newer >   |   newest >>   ]

   Message 242,036 of 243,242   
   Bonita Montero to All   
   Re: Unicode...   
   22 Nov 25 14:10:46   
   
   From: Bonita.Montero@gmail.com   
      
   This code with AVX512BW and BMI1 is 13,5 times faster than yours on my   
   Zen4-PC.   
      
   size_t utf8Width2( const char *s )   
      
   {   
        __m512i const   
            ZERO = _mm512_setzero_si512(),   
            ONE_MASK = _mm512_set1_epi8( (char)0x80 ),   
            ONE_HEAD = ZERO,   
            TWO_MASK = _mm512_set1_epi8( (char)0xE0 ),   
            TWO_HEAD = _mm512_set1_epi8( (char)0xC0 ),   
            THREE_MASK = _mm512_set1_epi8( (char)0xF0 ),   
            THREE_HEAD = _mm512_set1_epi8( (char)0xE0 ),   
            FOUR_MASK = _mm512_set1_epi8( (char)0xF8 ),   
            FOUR_HEAD = _mm512_set1_epi8( (char)0xF0 );   
        uintptr_t   
            begin = (uintptr_t)s,   
            base = begin & -64;   
        s = (char *)base;   
        size_t count = 0;   
        __m512i chunk;   
        uint64_t nzMask;   
        auto doChunk = [&]() L_FORCEINLINE   
        {   
            uint64_t   
                one = _mm512_cmpeq_epi8_mask( _mm512_and_si512( chunk,   
   ONE_MASK ), ONE_HEAD ) & nzMask,   
                two = _mm512_cmpeq_epi8_mask( _mm512_and_si512( chunk,   
   TWO_MASK ), TWO_HEAD ) & nzMask,   
                three = _mm512_cmpeq_epi8_mask( _mm512_and_si512( chunk,   
   THREE_MASK ), THREE_HEAD ) & nzMask,   
                four = _mm512_cmpeq_epi8_mask( _mm512_and_si512( chunk,   
   FOUR_MASK ), FOUR_HEAD ) & nzMask;   
            count += _mm_popcnt_u64( one ) + _mm_popcnt_u64( two ) +   
   _mm_popcnt_u64( three ) + _mm_popcnt_u64( four );   
        };   
        chunk = _mm512_loadu_si512( s );   
        unsigned head = (unsigned)(begin - base);   
        nzMask = ~_mm512_cmpeq_epi8_mask( chunk, ZERO ) >> head;   
        unsigned ones = countr_one( nzMask );   
        nzMask &= ones < 64 ? (1ull << ones) - 1 : -1;   
        nzMask <<= head;   
        doChunk();   
        if( (int64_t)nzMask >= 0 )   
            return count;   
        for( ; ; )   
        {   
            s += 64;   
            chunk = _mm512_loadu_si512( s );   
            nzMask = ~_mm512_cmpeq_epi8_mask( chunk, ZERO );   
            ones = countr_one( nzMask );   
            nzMask = ones < 64 ? (1ull << ones) - 1 : -1;   
            if( !nzMask )   
                break;   
            doChunk();   
        }   
        return count;   
   }   
      
   --- SoupGate-Win32 v1.05   
    * Origin: you cannot sedate... all the things you hate (1:229/2)   

[   << oldest   |   < older   |   list   |   newer >   |   newest >>   ]


(c) 1994,  bbs@darkrealms.ca