home bbs files messages ]

Forums before death by AOL, social media and spammers... "We can't have nice things"

   comp.lang.c      Meh, in C you gotta define EVERYTHING      243,242 messages   

[   << oldest   |   < older   |   list   |   newer >   |   newest >>   ]

   Message 242,004 of 243,242   
   Bonita Montero to All   
   Re: Unicode...   
   19 Nov 25 11:56:55   
   
   From: Bonita.Montero@gmail.com   
      
   Am 15.11.2025 um 20:28 schrieb Michael Sanders:   
   > On Sat, 15 Nov 2025 06:24:39 +0100, Bonita Montero wrote:   
   >   
   >> A little bugfix and a perfect style:   
   >>   
   >> #include    
   >> #include    
   >> #include    
   >> #include    
   >>   
   >> using namespace std;   
   >>   
   >> optional utf8Width( u8string_view str )   
   >> {   
   >>       size_t w = 0;   
   >>       for( auto it = str.begin(); it != str.end(); ++w ) [[likely]]   
   >>           if( size_t head = countl_zero( (unsigned char)~*it ); head <=   
   4   
   >> && (size_t)(str.end() - it) >= head + 1 ) [[likely]]   
   >>               it += head + 1;   
   >>           else   
   >>               return nullopt;   
   >>       return w;   
   >> }   
   >>   
   >> int main()   
   >> {   
   >>       cout << *utf8Width( u8"Hello, 世界!" ) << endl;   
   >> }   
   > Very nice!   
   >   
   #include    
   #include    
   #include    
      
   using namespace std;   
      
   template   
        requires std::same_as || std::same_as   
   size_t utf8Width( View str )   
   {   
        size_t rem = str.end() - str.begin(), w = 0, chunk;   
        for( auto it = str.begin(); rem; rem -= chunk, ++w ) [[likely]]   
        {   
            chunk = countl_one( (unsigned char)*it ) + 1;   
            if constexpr( Validate )   
                if( (*it & 0xC0) == 0x80 || chunk > 5 || rem < chunk )   
   [[unlikely]]   
                    return -1;   
            auto end = it + chunk;   
            if constexpr( !Validate )   
                it = end;   
            else   
                while( ++it != end )   
                    if( (unsigned char)(*it & 0xC0) != 0x80 )   
                        return -1;   
        }   
        return w;   
   }   
      
   int main()   
   {   
        char8_t strU8[] = u8"Hello, 世界!";   
        string_view sv( (char *)strU8 );   
        cout << utf8Width( sv ) << endl;   
        cout << utf8Width( sv ) << endl;   
        u8string_view svU8( strU8 );   
        cout << utf8Width( svU8 ) << endl;   
        cout << utf8Width( svU8 ) << endl;   
   }   
      
   Even cooler. Now the code accepts usual string_views as well as   
   u8string_views.   
   And if you supply a boolean temlpate parameter before the ()-parameter which   
   is true the data is verified to be a valid UTF-8 string. If you supply false   
   or omit the parameter the string isn't valiedated.   
      
   --- SoupGate-Win32 v1.05   
    * Origin: you cannot sedate... all the things you hate (1:229/2)   

[   << oldest   |   < older   |   list   |   newer >   |   newest >>   ]


(c) 1994,  bbs@darkrealms.ca