From: Bonita.Montero@gmail.com   
      
   Am 15.11.2025 um 20:28 schrieb Michael Sanders:   
   > On Sat, 15 Nov 2025 06:24:39 +0100, Bonita Montero wrote:   
   >   
   >> A little bugfix and a perfect style:   
   >>   
   >> #include    
   >> #include    
   >> #include    
   >> #include    
   >>   
   >> using namespace std;   
   >>   
   >> optional utf8Width( u8string_view str )   
   >> {   
   >> size_t w = 0;   
   >> for( auto it = str.begin(); it != str.end(); ++w ) [[likely]]   
   >> if( size_t head = countl_zero( (unsigned char)~*it ); head <=   
   4   
   >> && (size_t)(str.end() - it) >= head + 1 ) [[likely]]   
   >> it += head + 1;   
   >> else   
   >> return nullopt;   
   >> return w;   
   >> }   
   >>   
   >> int main()   
   >> {   
   >> cout << *utf8Width( u8"Hello, 世界!" ) << endl;   
   >> }   
   > Very nice!   
   >   
   #include    
   #include    
   #include    
      
   using namespace std;   
      
   template   
    requires std::same_as || std::same_as   
   size_t utf8Width( View str )   
   {   
    size_t rem = str.end() - str.begin(), w = 0, chunk;   
    for( auto it = str.begin(); rem; rem -= chunk, ++w ) [[likely]]   
    {   
    chunk = countl_one( (unsigned char)*it ) + 1;   
    if constexpr( Validate )   
    if( (*it & 0xC0) == 0x80 || chunk > 5 || rem < chunk )   
   [[unlikely]]   
    return -1;   
    auto end = it + chunk;   
    if constexpr( !Validate )   
    it = end;   
    else   
    while( ++it != end )   
    if( (unsigned char)(*it & 0xC0) != 0x80 )   
    return -1;   
    }   
    return w;   
   }   
      
   int main()   
   {   
    char8_t strU8[] = u8"Hello, 世界!";   
    string_view sv( (char *)strU8 );   
    cout << utf8Width( sv ) << endl;   
    cout << utf8Width( sv ) << endl;   
    u8string_view svU8( strU8 );   
    cout << utf8Width( svU8 ) << endl;   
    cout << utf8Width( svU8 ) << endl;   
   }   
      
   Even cooler. Now the code accepts usual string_views as well as   
   u8string_views.   
   And if you supply a boolean temlpate parameter before the ()-parameter which   
   is true the data is verified to be a valid UTF-8 string. If you supply false   
   or omit the parameter the string isn't valiedated.   
      
   --- SoupGate-Win32 v1.05   
    * Origin: you cannot sedate... all the things you hate (1:229/2)   
|