home bbs files messages ]

Forums before death by AOL, social media and spammers... "We can't have nice things"

   comp.lang.c      Meh, in C you gotta define EVERYTHING      243,242 messages   

[   << oldest   |   < older   |   list   |   newer >   |   newest >>   ]

   Message 242,038 of 243,242   
   bart to Bonita Montero   
   Re: Unicode...   
   22 Nov 25 13:38:27   
   
   From: bc@freeuk.com   
      
   On 22/11/2025 13:10, Bonita Montero wrote:   
   > This code with AVX512BW and BMI1 is 13,5 times faster than yours on my   
   > Zen4-PC.   
   >   
   > size_t utf8Width2( const char *s )   
   >   
   > {   
   >      __m512i const   
   >          ZERO = _mm512_setzero_si512(),   
   >          ONE_MASK = _mm512_set1_epi8( (char)0x80 ),   
   >          ONE_HEAD = ZERO,   
   >          TWO_MASK = _mm512_set1_epi8( (char)0xE0 ),   
   >          TWO_HEAD = _mm512_set1_epi8( (char)0xC0 ),   
   >          THREE_MASK = _mm512_set1_epi8( (char)0xF0 ),   
   >          THREE_HEAD = _mm512_set1_epi8( (char)0xE0 ),   
   >          FOUR_MASK = _mm512_set1_epi8( (char)0xF8 ),   
   >          FOUR_HEAD = _mm512_set1_epi8( (char)0xF0 );   
   >      uintptr_t   
   >          begin = (uintptr_t)s,   
   >          base = begin & -64;   
   >      s = (char *)base;   
   >      size_t count = 0;   
   >      __m512i chunk;   
   >      uint64_t nzMask;   
   >      auto doChunk = [&]() L_FORCEINLINE   
   >      {   
   >          uint64_t   
   >              one = _mm512_cmpeq_epi8_mask( _mm512_and_si512( chunk,   
   > ONE_MASK ), ONE_HEAD ) & nzMask,   
   >              two = _mm512_cmpeq_epi8_mask( _mm512_and_si512( chunk,   
   > TWO_MASK ), TWO_HEAD ) & nzMask,   
   >              three = _mm512_cmpeq_epi8_mask( _mm512_and_si512( chunk,   
   > THREE_MASK ), THREE_HEAD ) & nzMask,   
   >              four = _mm512_cmpeq_epi8_mask( _mm512_and_si512( chunk,   
   > FOUR_MASK ), FOUR_HEAD ) & nzMask;   
   >          count += _mm_popcnt_u64( one ) + _mm_popcnt_u64( two ) +   
   > _mm_popcnt_u64( three ) + _mm_popcnt_u64( four );   
   >      };   
   >      chunk = _mm512_loadu_si512( s );   
   >      unsigned head = (unsigned)(begin - base);   
   >      nzMask = ~_mm512_cmpeq_epi8_mask( chunk, ZERO ) >> head;   
   >      unsigned ones = countr_one( nzMask );   
   >      nzMask &= ones < 64 ? (1ull << ones) - 1 : -1;   
   >      nzMask <<= head;   
   >      doChunk();   
   >      if( (int64_t)nzMask >= 0 )   
   >          return count;   
   >      for( ; ; )   
   >      {   
   >          s += 64;   
   >          chunk = _mm512_loadu_si512( s );   
   >          nzMask = ~_mm512_cmpeq_epi8_mask( chunk, ZERO );   
   >          ones = countr_one( nzMask );   
   >          nzMask = ones < 64 ? (1ull << ones) - 1 : -1;   
   >          if( !nzMask )   
   >              break;   
   >          doChunk();   
   >      }   
   >      return count;   
   > }   
      
      
   Doesn't compile, even after I add suitable *intrin headers.   
      
   I took out L_FORCEINLINE (not recognised); added std:: to countr_one,   
   but it still gave me errors like this:   
      
   C:/tdm/lib/gcc/x86_64-w64-mingw32/14.1.0/include/popcntintrin.h: In   
   lambda function:   
   C:/tdm/lib/gcc/x86_64-w64-mingw32/14.1.0/include/popcntintrin.h:42:1:   
   error: inlining failed in call to 'always_inline' 'long long int   
   _mm_popcnt_u64(long long unsigned int)': target specific option mismatch   
       42 | _mm_popcnt_u64 (unsigned long long __X)   
          | ^~~~~~~~~~~~~~   
      
      
   You have to give complete compilable code or have only simple   
   dependencies like stdio.h.   
      
   --- SoupGate-Win32 v1.05   
    * Origin: you cannot sedate... all the things you hate (1:229/2)   

[   << oldest   |   < older   |   list   |   newer >   |   newest >>   ]


(c) 1994,  bbs@darkrealms.ca