From: bc@freeuk.com   
      
   On 15/11/2025 05:24, Bonita Montero wrote:   
   > A little bugfix and a perfect style:   
   >   
   > #include    
   > #include    
   > #include    
   > #include    
   >   
   > using namespace std;   
   >   
   > optional utf8Width( u8string_view str )   
   > {   
   > size_t w = 0;   
   > for( auto it = str.begin(); it != str.end(); ++w ) [[likely]]   
   > if( size_t head = countl_zero( (unsigned char)~*it ); head <= 4   
   > && (size_t)(str.end() - it) >= head + 1 ) [[likely]]   
   > it += head + 1;   
   > else   
   > return nullopt;   
   > return w;   
   > }   
   >   
   > int main()   
   > {   
   > cout << *utf8Width( u8"Hello, 世界!" ) << endl;   
   > }   
   >   
      
   The trouble with this is that I haven't a clue how it works or what   
   those extras do, or how they impact on performance.   
      
   A version in C is given below. This is much more straightforward. It   
   doesn't verify anything, but then I don't know if yours does either.   
      
   As for performance: I duplicated that test string to form one 104 times   
   as long, then called that function one million times. Here are the timings:   
      
    C gcc-O2 1.06 seconds   
    C bcc 1.17 seconds   
    C tcc 2.81 seconds   
      
    C++ g++-O2 4.6 seconds   
    C++ g++-O0 19 seconds   
      
   --------------------------   
      
   size_t utf8width(char* s) {   
    size_t length;   
    int c, n;   
      
    length=0;   
    while (c=*s) {   
    if ((c & 0x80) == 0) n = 1;   
    else if ((c & 0xE0) == 0xC0) n = 2;   
    else if ((c & 0xF0) == 0xE0) n = 3;   
    else n = 4;   
    s += n;   
    ++length;   
    }   
    return length;   
   }   
      
   --- SoupGate-Win32 v1.05   
    * Origin: you cannot sedate... all the things you hate (1:229/2)   
|