home bbs files messages ]

Forums before death by AOL, social media and spammers... "We can't have nice things"

   comp.arch      Apparently more than just beeps & boops      131,241 messages   

[   << oldest   |   < older   |   list   |   newer >   |   newest >>   ]

   Message 130,702 of 131,241   
   Chris M. Thomasson to BGB   
   Re: Variable-length instructions   
   30 Dec 25 14:23:39   
   
   From: chris.m.thomasson.1@gmail.com   
      
   On 12/30/2025 12:59 PM, BGB wrote:   
   > On 12/30/2025 12:00 PM, Scott Lurndal wrote:   
   >> "Chris M. Thomasson"  writes:   
   >>> On 12/28/2025 4:41 PM, BGB wrote:   
   >>> [...]   
   >>>   
   >>> Also, if using something like LOCK CMPXCHG you MUST make sure to align   
   >>> and pad your relevant data structures to a l2 cache line.   
   >>   
   >> That may not be necessary if there is otherwise no false sharing in   
   >> the same cache line.   Yes, the operand should be naturally aligned,   
   >> (which ensures it is entirely contained within a single cache line),   
   >> but there's no reason that other data cannot be stored in the same   
   >> cache line, so long as it is unlikely to be accessed by a competing   
   >> thread.   
   >>   
   >   
   > Yes, or the "small brain" option of just making the mutex larger than   
   > the size of the cache line and putting the relevant part in the middle...   
   >   
   > struct PaddedMutex_s {   
   > u64 pad1, pad2, pad3;   
   > u64 real_part;   
   > u64 pad4, pad5, pad6;   
   > };   
   >   
   > Then say (assuming a 32 byte cache line), no non-pad values can be in   
   > the same cache line as real_part.   
   >   
   > Little bigger for a 64 byte cache line, but same general idea.   
      
   :^) Yeah. That can help. I was referring to the anchor of, say a   
   lock-free stack. That anchor better be aligned and padded. An anchor:   
      
   struct ct_anchor   
   {   
        struct node* next;   
        uintptr_t ver;   
   };   
      
   ct_anchor is (better be ;^) a double word ripe for a DWCAS say, LOCK   
   CMPXCHG8B on a 32 bit system.   
      
   that ct_anchor needs to be properly aligned and padded up to a l2 cache   
   line. LL/SC is a different story. The version is not needed because a   
   proper LL/SC gets around ABA. But! That single word should be padded and   
   aligned on a reservation granule.   
      
   Now, the struct node's. Heck they can be l2 cache line aligned and   
   padded regions of memory. Say a l2 cacheblock lock free allocator.   
      
   Fwiw, here is some of my old code test of a region allocator that can   
   help align things. This was before std alignment (say, _Alignof) support   
   was widely supported:   
      
      
   #if ! defined (RALLOC_H)   
   #  define RALLOC_H   
   #  if defined (__cplusplus)   
         extern "C" {   
   #  endif   
   /**************************************************************/   
      
      
      
      
   #include    
   #include    
      
      
      
      
   #if defined (_MSC_VER)   
   /* warning C4116: unnamed type definition in parentheses */   
   #  pragma warning (disable : 4116)   
   #endif   
      
      
      
      
   #if ! defined (NDEBUG)   
   #  include    
   #  define RALLOC_DBG_PRINTF(mp_exp) printf mp_exp   
   #else   
   #  define RALLOC_DBG_PRINTF(mp_exp) ((void)0)   
   #endif   
      
      
      
      
   #if ! defined (RALLOC_UINTPTR_TYPE)   
   #  define RALLOC_UINTPTR_TYPE size_t   
   #endif   
      
      
      
      
   typedef RALLOC_UINTPTR_TYPE ralloc_uintptr_type;   
      
      
   typedef char ralloc_static_assert[   
      sizeof(ralloc_uintptr_type) == sizeof(void*) ? 1 : -1   
   ];   
      
      
      
      
   enum ralloc_align_enum {   
      ALIGN_ENUM   
   };   
      
      
   struct ralloc_align_struct {   
      char pad;   
      double type;   
   };   
      
      
   union ralloc_align_max {   
      char char_;   
      short int short_;   
      int int_;   
      long int long_;   
      float float_;   
      double double_;   
      long double long_double_;   
      void* ptr_;   
      void* (*fptr_) (void*);   
      enum ralloc_align_enum enum_;   
      struct ralloc_align_struct struct_;   
      size_t size_t_;   
      ptrdiff_t ptrdiff_t;   
   };   
      
      
   #define RALLOC_ALIGN_OF(mp_type) \   
      offsetof( \   
        struct { \   
          char pad_RALLOC_ALIGN_OF; \   
          mp_type type_RALLOC_ALIGN_OF; \   
        }, \   
        type_RALLOC_ALIGN_OF \   
      )   
      
      
   #define RALLOC_ALIGN_MAX RALLOC_ALIGN_OF(union ralloc_align_max)   
      
      
   #define RALLOC_ALIGN_UP(mp_ptr, mp_align) \   
      ((void*)( \   
        (((ralloc_uintptr_type)(mp_ptr)) + ((mp_align) - 1)) \   
        & ~(((mp_align) - 1)) \   
      ))   
      
      
   #define RALLOC_ALIGN_ASSERT(mp_ptr, mp_align) \   
      (((void*)(mp_ptr)) == RALLOC_ALIGN_UP(mp_ptr, mp_align))   
      
      
      
      
   struct region {   
      unsigned char* buffer;   
      size_t size;   
      size_t offset;   
   };   
      
      
   static void   
   rinit(   
     struct region* const self,   
     void* buffer,   
     size_t size   
   ) {   
      self->buffer = buffer;   
      self->size = size;   
      self->offset = 0;   
      
      RALLOC_DBG_PRINTF((   
        "rinit(%p) {\n"   
        "  buffer          = %p\n"   
        "  size            = %lu\n"   
        "}\n\n\n",   
        (void*)self,   
        buffer,   
        (unsigned long int)size   
      ));   
   }   
      
      
   static void*   
   rallocex(   
     struct region* const self,   
     size_t size,   
     size_t align   
   ) {   
      unsigned char* align_buffer;   
      size_t offset = self->offset;   
      unsigned char* raw_buffer = self->buffer + offset;   
      
      if (! size) {   
        size = 1;   
      }   
      
      if (! align) {   
        align = RALLOC_ALIGN_MAX;   
      }   
      
      assert(align == 1 || RALLOC_ALIGN_ASSERT(align, 2));   
      
      align_buffer = RALLOC_ALIGN_UP(raw_buffer, align);   
      
      assert(RALLOC_ALIGN_ASSERT(align_buffer, align));   
      
      size += align_buffer - raw_buffer;   
      
      if (offset + size > self->size) {   
        return NULL;   
      }   
      
      self->offset = offset + size;   
      
      RALLOC_DBG_PRINTF((   
        "rallocex(%p) {\n"   
        "  size            = %lu\n"   
        "  alignment       = %lu\n"   
        "  origin offset   = %lu\n"   
        "  final offset    = %lu\n"   
        "  raw_buffer      = %p\n"   
        "  align_buffer    = %p\n"   
        "  size adjustment = %lu\n"   
        "  final size      = %lu\n"   
        "}\n\n\n",   
        (void*)self,   
        (unsigned long int)size - (align_buffer - raw_buffer),   
        (unsigned long int)align,   
        (unsigned long int)offset,   
        (unsigned long int)self->offset,   
        (void*)raw_buffer,   
        (void*)align_buffer,   
        (unsigned long int)(align_buffer - raw_buffer),   
        (unsigned long int)size   
      ));   
      
      return align_buffer;   
   }   
      
      
   #define ralloc(mp_self, mp_size) \   
      rallocex((mp_self), (mp_size), RALLOC_ALIGN_MAX)   
      
   #define ralloct(mp_self, mp_count, mp_type) \   
      rallocex( \   
        (mp_self), \   
        sizeof(mp_type) * (mp_count),\   
        RALLOC_ALIGN_OF(mp_type) \   
      )   
      
      
   static void   
   rflush(   
     struct region* const self   
   ) {   
      self->offset = 0;   
      
      RALLOC_DBG_PRINTF((   
        "rflush(%p) {}\n\n\n",   
        (void*)self   
      ));   
   }   
      
      
      
      
   #undef RALLOC_DBG_PRINTF   
   #undef RALLOC_UINTPTR_TYPE   
      
      
      
      
   /**************************************************************/   
   #  if defined (__cplusplus)   
         }   
   #  endif   
   #endif   
      
   --- SoupGate-Win32 v1.05   
    * Origin: you cannot sedate... all the things you hate (1:229/2)   

[   << oldest   |   < older   |   list   |   newer >   |   newest >>   ]


(c) 1994,  bbs@darkrealms.ca