From: Arjan van de Ven Subject: Re: [syzbot] [net?] WARNING: ODEBUG bug in lane_ioctl (3) This email is created by automation to help kernel developers deal with a large volume of bug reports by decoding oopses into more actionable information. Decoded Backtrace 1. __debug_object_init -- crash site (lib/debugobjects.c:632) The WARN fires inside debug_print_object (inlined into __debug_object_init). The object at 0xffff888077a60f78 (a timer_list) is in the ACTIVE state when debug_object_init is called on it a second time. 611 static void debug_print_object(struct debug_obj *obj, char *msg) 612 { 613 const struct debug_obj_descr *descr = obj->descr; 614 static int limit; 622 if (!debug_objects_enabled) 623 return; 625 if (limit < 5 && descr != descr_test) { 626 void *hint = descr->debug_hint ? 627 descr->debug_hint(obj->object) : NULL; 628 limit++; -> 629 WARN(1, KERN_ERR "ODEBUG: %s %s (active state %u) " 630 "object: %p object type: %s hint: %pS\n", 631 msg, obj_states[obj->state], obj->astate, 632 obj->object, descr->name, hint); 633 } 634 debug_objects_warnings++; 635 } 747 static void 748 __debug_object_init(void *addr, const struct debug_obj_descr *descr, 749 int onstack) 750 { 751 struct debug_obj *obj, o; 752 struct debug_bucket *db; 753 unsigned long flags; 755 debug_objects_fill_pool(); 757 db = get_bucket((unsigned long) addr); 759 raw_spin_lock_irqsave(&db->lock, flags); 761 obj = lookup_object_or_alloc(addr, db, descr, onstack, false); 762 if (unlikely(!obj)) { 763 raw_spin_unlock_irqrestore(&db->lock, flags); 764 debug_objects_oom(); 765 return; 766 } 768 switch (obj->state) { 769 case ODEBUG_STATE_NONE: 770 case ODEBUG_STATE_INIT: 771 case ODEBUG_STATE_INACTIVE: 772 obj->state = ODEBUG_STATE_INIT; 773 raw_spin_unlock_irqrestore(&db->lock, flags); 774 return; 775 default: 776 break; 777 } 779 o = *obj; 780 raw_spin_unlock_irqrestore(&db->lock, flags); -> 780 debug_print_object(&o, "init"); 783 if (o.state == ODEBUG_STATE_ACTIVE) 784 debug_object_fixup(descr->fixup_init, addr, o.state); 785 } 2. timer_init_key -- kernel/time/timer.c:880 786 static inline void debug_timer_init(struct timer_list *timer) 787 { -> 788 debug_object_init(timer, &timer_debug_descr); 789 } 834 static inline void debug_init(struct timer_list *timer) 835 { -> 836 debug_timer_init(timer); 837 trace_timer_init(timer); 838 } 876 void timer_init_key(struct timer_list *timer, 877 void (*func)(struct timer_list *), unsigned int flags, 878 const char *name, struct lock_class_key *key) 879 { -> 880 debug_init(timer); 881 do_init_timer(timer, func, flags, name, key); 882 } 3. lane_ioctl / lecd_attach / lec_arp_init (net/atm/lec.c:1037) 1264 static void lec_arp_init(struct lec_priv *priv) 1265 { 1266 unsigned short i; 1268 for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) 1269 INIT_HLIST_HEAD(&priv->lec_arp_tables[i]); 1270 INIT_HLIST_HEAD(&priv->lec_arp_empty_ones); 1271 INIT_HLIST_HEAD(&priv->lec_no_forward); 1272 INIT_HLIST_HEAD(&priv->mcast_fwds); 1273 spin_lock_init(&priv->lec_arp_lock); ->1274 INIT_DELAYED_WORK(&priv->lec_arp_work, lec_arp_check_expire); 1275 schedule_delayed_work(&priv->lec_arp_work, LEC_ARP_REFRESH_INTERVAL); 1276 } 748 static int lecd_attach(struct atm_vcc *vcc, int arg) 749 { 750 int i; 751 struct lec_priv *priv; 753 lockdep_assert_held(&lec_mutex); 754 if (arg < 0) 755 arg = 0; 756 if (arg >= MAX_LEC_ITF) 757 return -EINVAL; 758 i = array_index_nospec(arg, MAX_LEC_ITF); 759 if (!dev_lec[i]) { 763 dev_lec[i] = alloc_etherdev(size); 775 priv = netdev_priv(dev_lec[i]); 776 } else { 776 priv = netdev_priv(dev_lec[i]); 777 if (rcu_access_pointer(priv->lecd)) 778 return -EADDRINUSE; 779 } ->781 lec_arp_init(priv); // called unconditionally for both new and // existing priv -- no work cancellation 1018 static int lane_ioctl(struct socket *sock, unsigned int cmd, 1019 unsigned long arg) 1020 { 1034 mutex_lock(&lec_mutex); 1035 switch (cmd) { 1036 case ATMLEC_CTRL: ->1037 err = lecd_attach(vcc, (int)arg); 1038 if (err >= 0) 1039 sock->state = SS_CONNECTED; 1040 break; 1049 mutex_unlock(&lec_mutex); 1050 return err; 1051 } 4. do_vcc_ioctl (net/atm/ioctl.c:159) 153 error = -ENOIOCTLCMD; 155 mutex_lock(&ioctl_mutex); 156 list_for_each(pos, &ioctl_list) { 157 struct atm_ioctl *ic = list_entry(pos, struct atm_ioctl, list); 158 if (try_module_get(ic->owner)) { ->159 error = ic->ioctl(sock, cmd, arg); // dispatches to lane_ioctl 160 module_put(ic->owner); 161 if (error != -ENOIOCTLCMD) 162 break; 163 } 164 } 165 mutex_unlock(&ioctl_mutex); Tentative Analysis The ODEBUG WARNING fires when INIT_DELAYED_WORK() is called on a timer_list (lec_priv.lec_arp_work) that is already in the ACTIVE state. lec_arp_init() always calls INIT_DELAYED_WORK(&priv->lec_arp_work, ...) followed by schedule_delayed_work(). lecd_attach() calls lec_arp_init() unconditionally -- both for a brand-new device and for an existing one in the else-branch. The only guard for the existing-device path is that priv->lecd is NULL (no daemon currently attached). The race is opened by lec_atm_close(), the ATM VCC close handler: Thread A (lec_atm_close): Thread B (lecd_attach via lane_ioctl): rcu_assign_pointer(lecd, NULL) synchronize_rcu() mutex_lock(&lec_mutex) [window open] sees priv->lecd == NULL -- passes guard lec_arp_init(priv) INIT_DELAYED_WORK on active timer --> ODEBUG WARN lec_arp_destroy(priv) [too late: work already re-initialized] lec_atm_close() clears priv->lecd to NULL *before* calling lec_arp_destroy() (which contains cancel_delayed_work_sync). Because lec_atm_close() does not hold lec_mutex, Thread B can observe priv->lecd == NULL while lec_arp_work is still active, pass the guard in lecd_attach(), and call lec_arp_init() on a live timer. The lec_mutex protecting dev_lec[] was introduced by commit d13a3824bfd2 ("net: atm: add lec_mutex"), which serialised lecd_attach() and friends but did not update lec_atm_close() to also acquire the mutex. The unconditional lec_arp_init() call for existing devices predates that commit and has always been present. Potential Solution Add cancel_delayed_work_sync(&priv->lec_arp_work) in the else-branch of lecd_attach(), immediately before the call to lec_arp_init(). This ensures any in-flight work is drained before the timer is re-initialized, regardless of whether lec_atm_close() has already cancelled it. cancel_delayed_work_sync() is safe to call from a lec_mutex-held context because lec_arp_check_expire() only acquires priv->lec_arp_lock (a spinlock) and never tries to take lec_mutex. } else { priv = netdev_priv(dev_lec[i]); if (rcu_access_pointer(priv->lecd)) return -EADDRINUSE; + cancel_delayed_work_sync(&priv->lec_arp_work); } lec_arp_init(priv); More information Oops-Analysis: http://oops.fenrus.org/reports/lkml/69f16c26.170a0220.34e5b8.0013.GAE@google.com/ Assisted-by: GitHub-Copilot linux-kernel-oops.