XPost: linux.debian.bugs.dist   
   From: csmate@nop.hu   
      
   documenting....   
      
      
   -------- Forwarded Message --------   
   Subject: [PATCH net v2] xsk: avoid data corruption on cq descriptor number   
   Date: Fri, 24 Oct 2025 12:40:49 +0200   
   From: Fernando Fernandez Mancera    
   To: netdev@vger.kernel.org   
   CC: csmate@nop.hu, kerneljasonxing@gmail.com, maciej.fijalkowski@intel.com,   
   bjorn@kernel.org, sdf@fomichev.me, jonathan.lemon@gmail.com, bpf   
   vger.kernel.org, davem@davemloft.net,   
   edumazet@google.com, kuba@kernel.org, pabeni@redhat.com, horms@kernel.org,   
   Fernando Fernandez Mancera    
      
   Since commit 30f241fcf52a ("xsk: Fix immature cq descriptor   
   production"), the descriptor number is stored in skb control block and   
   xsk_cq_submit_addr_locked() relies on it to put the umem addrs onto   
   pool's completion queue.   
      
   skb control block shouldn't be used for this purpose as after transmit   
   xsk doesn't have control over it and other subsystems could use it. This   
   leads to the following kernel panic due to a NULL pointer dereference.   
      
    BUG: kernel NULL pointer dereference, address: 0000000000000000   
    #PF: supervisor read access in kernel mode   
    #PF: error_code(0x0000) - not-present page   
    PGD 0 P4D 0   
    Oops: Oops: 0000 [#1] SMP NOPTI   
    CPU: 2 UID: 1 PID: 927 Comm: p4xsk.bin Not tainted 6.16.12+deb14-cloud-amd64   
   #1 PREEMPT(lazy) Debian 6.16.12-1   
    Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.   
   7.0-debian-1.17.0-1 04/01/2014   
    RIP: 0010:xsk_destruct_skb+0xd0/0x180   
    [...]   
    Call Trace:   
       
    ? napi_complete_done+0x7a/0x1a0   
    ip_rcv_core+0x1bb/0x340   
    ip_rcv+0x30/0x1f0   
    __netif_receive_skb_one_core+0x85/0xa0   
    process_backlog+0x87/0x130   
    __napi_poll+0x28/0x180   
    net_rx_action+0x339/0x420   
    handle_softirqs+0xdc/0x320   
    ? handle_edge_irq+0x90/0x1e0   
    do_softirq.part.0+0x3b/0x60   
       
       
    __local_bh_enable_ip+0x60/0x70   
    __dev_direct_xmit+0x14e/0x1f0   
    __xsk_generic_xmit+0x482/0xb70   
    ? __remove_hrtimer+0x41/0xa0   
    ? __xsk_generic_xmit+0x51/0xb70   
    ? _raw_spin_unlock_irqrestore+0xe/0x40   
    xsk_sendmsg+0xda/0x1c0   
    __sys_sendto+0x1ee/0x200   
    __x64_sys_sendto+0x24/0x30   
    do_syscall_64+0x84/0x2f0   
    ? __pfx_pollwake+0x10/0x10   
    ? __rseq_handle_notify_resume+0xad/0x4c0   
    ? restore_fpregs_from_fpstate+0x3c/0x90   
    ? switch_fpu_return+0x5b/0xe0   
    ? do_syscall_64+0x204/0x2f0   
    ? do_syscall_64+0x204/0x2f0   
    ? do_syscall_64+0x204/0x2f0   
    entry_SYSCALL_64_after_hwframe+0x76/0x7e   
       
    [...]   
    Kernel panic - not syncing: Fatal exception in interrupt   
    Kernel Offset: 0x1c000000 from 0xffffffff81000000 (relocation range:   
   0xffffffff80000000-0xffffffffbfffffff)   
      
   The approach proposed stores the first address also in the xsk_addr_node   
   along with the number of descriptors. The head xsk_addr_node is   
   referenced in skb_shinfo(skb)->destructor_arg. The rest of the fragments   
   store the address on the list.   
      
   This is less efficient as 4 bytes are wasted when storing each address.   
      
   Fixes: 30f241fcf52a ("xsk: Fix immature cq descriptor production")   
   Closes: https://lore.kernel.org/netdev/0435b904-f44f-48f8-afb0-6   
   868474bf1c@nop.hu/   
   Signed-off-by: Fernando Fernandez Mancera    
   ---   
   v2: fix erroneous page handling and fix typos on commit message.   
   ---   
    net/xdp/xsk.c | 52 ++++++++++++++++++++++++++++-----------------------   
    1 file changed, 29 insertions(+), 23 deletions(-)   
      
   diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c   
   index 7b0c68a70888..965cf071b036 100644   
   --- a/net/xdp/xsk.c   
   +++ b/net/xdp/xsk.c   
   @@ -37,18 +37,14 @@   
    #define MAX_PER_SOCKET_BUDGET 32   
    struct xsk_addr_node {   
   + u32 num_descs;   
    u64 addr;   
    struct list_head addr_node;   
    };   
    -struct xsk_addr_head {   
   - u32 num_descs;   
   - struct list_head addrs_list;   
   -};   
   -   
    static struct kmem_cache *xsk_tx_generic_cache;   
    -#define XSKCB(skb) ((struct xsk_addr_head *)((skb)->cb))   
   +#define XSK_TX_HEAD(skb) ((struct xsk_addr_node *)((skb_shinfo(   
   kb)->destructor_arg)))   
    void xsk_set_rx_need_wakeup(struct xsk_buff_pool *pool)   
    {   
   @@ -569,12 +565,11 @@ static void xsk_cq_submit_addr_locked(struct   
   xsk_buff_pool *pool,   
    spin_lock_irqsave(&pool->cq_lock, flags);   
    idx = xskq_get_prod(pool->cq);   
    - xskq_prod_write_addr(pool->cq, idx,   
   - (u64)(uintptr_t)skb_shinfo(skb)->destructor_arg);   
   + xskq_prod_write_addr(pool->cq, idx, XSK_TX_HEAD(skb)->addr);   
    descs_processed++;   
    - if (unlikely(XSKCB(skb)->num_descs > 1)) {   
   - list_for_each_entry_safe(pos, tmp, &XSKCB(skb)->addrs_list, addr_node) {   
   + if (unlikely(XSK_TX_HEAD(skb)->num_descs > 1)) {   
   + list_for_each_entry_safe(pos, tmp, &XSK_TX_HEAD(skb)->addr_node, addr_node)   
   {   
    xskq_prod_write_addr(pool->cq, idx + descs_processed,   
    pos->addr);   
    descs_processed++;   
   @@ -582,6 +577,7 @@ static void xsk_cq_submit_addr_locked(struct xsk_buff_pool   
   *pool,   
    kmem_cache_free(xsk_tx_generic_cache, pos);   
    }   
    }   
   + kmem_cache_free(xsk_tx_generic_cache, XSK_TX_HEAD(skb));   
    xskq_prod_submit_n(pool->cq, descs_processed);   
    spin_unlock_irqrestore(&pool->cq_lock, flags);   
    }   
   @@ -597,12 +593,12 @@ static void xsk_cq_cancel_locked(struct xsk_buff_pool   
   *pool, u32 n)   
    static void xsk_inc_num_desc(struct sk_buff *skb)   
    {   
   - XSKCB(skb)->num_descs++;   
   + XSK_TX_HEAD(skb)->num_descs++;   
    }   
    static u32 xsk_get_num_desc(struct sk_buff *skb)   
    {   
   - return XSKCB(skb)->num_descs;   
   + return XSK_TX_HEAD(skb)->num_descs;   
    }   
    static void xsk_destruct_skb(struct sk_buff *skb)   
   @@ -619,16 +615,16 @@ static void xsk_destruct_skb(struct sk_buff *skb)   
    }   
    static void xsk_skb_init_misc(struct sk_buff *skb, struct xdp_sock *xs,   
   - u64 addr)   
   + struct xsk_addr_node *head, u64 addr)   
    {   
   - BUILD_BUG_ON(sizeof(struct xsk_addr_head) > sizeof(skb->cb));   
   - INIT_LIST_HEAD(&XSKCB(skb)->addrs_list);   
   + INIT_LIST_HEAD(&head->addr_node);   
   + head->addr = addr;   
      
   [continued in next message]   
      
   --- SoupGate-Win32 v1.05   
    * Origin: you cannot sedate... all the things you hate (1:229/2)   
|