@@ -3163,6 +3163,143 @@ struct bpf_iter__udp {
31633163 int bucket __aligned (8 );
31643164};
31653165
3166+ struct bpf_udp_iter_state {
3167+ struct udp_iter_state state ;
3168+ unsigned int cur_sk ;
3169+ unsigned int end_sk ;
3170+ unsigned int max_sk ;
3171+ int offset ;
3172+ struct sock * * batch ;
3173+ bool st_bucket_done ;
3174+ };
3175+
3176+ static int bpf_iter_udp_realloc_batch (struct bpf_udp_iter_state * iter ,
3177+ unsigned int new_batch_sz );
3178+ static struct sock * bpf_iter_udp_batch (struct seq_file * seq )
3179+ {
3180+ struct bpf_udp_iter_state * iter = seq -> private ;
3181+ struct udp_iter_state * state = & iter -> state ;
3182+ struct net * net = seq_file_net (seq );
3183+ struct udp_table * udptable ;
3184+ unsigned int batch_sks = 0 ;
3185+ bool resized = false;
3186+ struct sock * sk ;
3187+
3188+ /* The current batch is done, so advance the bucket. */
3189+ if (iter -> st_bucket_done ) {
3190+ state -> bucket ++ ;
3191+ iter -> offset = 0 ;
3192+ }
3193+
3194+ udptable = udp_get_table_seq (seq , net );
3195+
3196+ again :
3197+ /* New batch for the next bucket.
3198+ * Iterate over the hash table to find a bucket with sockets matching
3199+ * the iterator attributes, and return the first matching socket from
3200+ * the bucket. The remaining matched sockets from the bucket are batched
3201+ * before releasing the bucket lock. This allows BPF programs that are
3202+ * called in seq_show to acquire the bucket lock if needed.
3203+ */
3204+ iter -> cur_sk = 0 ;
3205+ iter -> end_sk = 0 ;
3206+ iter -> st_bucket_done = false;
3207+ batch_sks = 0 ;
3208+
3209+ for (; state -> bucket <= udptable -> mask ; state -> bucket ++ ) {
3210+ struct udp_hslot * hslot2 = & udptable -> hash2 [state -> bucket ];
3211+
3212+ if (hlist_empty (& hslot2 -> head )) {
3213+ iter -> offset = 0 ;
3214+ continue ;
3215+ }
3216+
3217+ spin_lock_bh (& hslot2 -> lock );
3218+ udp_portaddr_for_each_entry (sk , & hslot2 -> head ) {
3219+ if (seq_sk_match (seq , sk )) {
3220+ /* Resume from the last iterated socket at the
3221+ * offset in the bucket before iterator was stopped.
3222+ */
3223+ if (iter -> offset ) {
3224+ -- iter -> offset ;
3225+ continue ;
3226+ }
3227+ if (iter -> end_sk < iter -> max_sk ) {
3228+ sock_hold (sk );
3229+ iter -> batch [iter -> end_sk ++ ] = sk ;
3230+ }
3231+ batch_sks ++ ;
3232+ }
3233+ }
3234+ spin_unlock_bh (& hslot2 -> lock );
3235+
3236+ if (iter -> end_sk )
3237+ break ;
3238+
3239+ /* Reset the current bucket's offset before moving to the next bucket. */
3240+ iter -> offset = 0 ;
3241+ }
3242+
3243+ /* All done: no batch made. */
3244+ if (!iter -> end_sk )
3245+ return NULL ;
3246+
3247+ if (iter -> end_sk == batch_sks ) {
3248+ /* Batching is done for the current bucket; return the first
3249+ * socket to be iterated from the batch.
3250+ */
3251+ iter -> st_bucket_done = true;
3252+ goto done ;
3253+ }
3254+ if (!resized && !bpf_iter_udp_realloc_batch (iter , batch_sks * 3 / 2 )) {
3255+ resized = true;
3256+ /* After allocating a larger batch, retry one more time to grab
3257+ * the whole bucket.
3258+ */
3259+ state -> bucket -- ;
3260+ goto again ;
3261+ }
3262+ done :
3263+ return iter -> batch [0 ];
3264+ }
3265+
3266+ static void * bpf_iter_udp_seq_next (struct seq_file * seq , void * v , loff_t * pos )
3267+ {
3268+ struct bpf_udp_iter_state * iter = seq -> private ;
3269+ struct sock * sk ;
3270+
3271+ /* Whenever seq_next() is called, the iter->cur_sk is
3272+ * done with seq_show(), so unref the iter->cur_sk.
3273+ */
3274+ if (iter -> cur_sk < iter -> end_sk ) {
3275+ sock_put (iter -> batch [iter -> cur_sk ++ ]);
3276+ ++ iter -> offset ;
3277+ }
3278+
3279+ /* After updating iter->cur_sk, check if there are more sockets
3280+ * available in the current bucket batch.
3281+ */
3282+ if (iter -> cur_sk < iter -> end_sk )
3283+ sk = iter -> batch [iter -> cur_sk ];
3284+ else
3285+ /* Prepare a new batch. */
3286+ sk = bpf_iter_udp_batch (seq );
3287+
3288+ ++ * pos ;
3289+ return sk ;
3290+ }
3291+
3292+ static void * bpf_iter_udp_seq_start (struct seq_file * seq , loff_t * pos )
3293+ {
3294+ /* bpf iter does not support lseek, so it always
3295+ * continue from where it was stop()-ped.
3296+ */
3297+ if (* pos )
3298+ return bpf_iter_udp_batch (seq );
3299+
3300+ return SEQ_START_TOKEN ;
3301+ }
3302+
31663303static int udp_prog_seq_show (struct bpf_prog * prog , struct bpf_iter_meta * meta ,
31673304 struct udp_sock * udp_sk , uid_t uid , int bucket )
31683305{
@@ -3183,18 +3320,37 @@ static int bpf_iter_udp_seq_show(struct seq_file *seq, void *v)
31833320 struct bpf_prog * prog ;
31843321 struct sock * sk = v ;
31853322 uid_t uid ;
3323+ int ret ;
31863324
31873325 if (v == SEQ_START_TOKEN )
31883326 return 0 ;
31893327
3328+ lock_sock (sk );
3329+
3330+ if (unlikely (sk_unhashed (sk ))) {
3331+ ret = SEQ_SKIP ;
3332+ goto unlock ;
3333+ }
3334+
31903335 uid = from_kuid_munged (seq_user_ns (seq ), sock_i_uid (sk ));
31913336 meta .seq = seq ;
31923337 prog = bpf_iter_get_info (& meta , false);
3193- return udp_prog_seq_show (prog , & meta , v , uid , state -> bucket );
3338+ ret = udp_prog_seq_show (prog , & meta , v , uid , state -> bucket );
3339+
3340+ unlock :
3341+ release_sock (sk );
3342+ return ret ;
3343+ }
3344+
3345+ static void bpf_iter_udp_put_batch (struct bpf_udp_iter_state * iter )
3346+ {
3347+ while (iter -> cur_sk < iter -> end_sk )
3348+ sock_put (iter -> batch [iter -> cur_sk ++ ]);
31943349}
31953350
31963351static void bpf_iter_udp_seq_stop (struct seq_file * seq , void * v )
31973352{
3353+ struct bpf_udp_iter_state * iter = seq -> private ;
31983354 struct bpf_iter_meta meta ;
31993355 struct bpf_prog * prog ;
32003356
@@ -3205,12 +3361,15 @@ static void bpf_iter_udp_seq_stop(struct seq_file *seq, void *v)
32053361 (void )udp_prog_seq_show (prog , & meta , v , 0 , 0 );
32063362 }
32073363
3208- udp_seq_stop (seq , v );
3364+ if (iter -> cur_sk < iter -> end_sk ) {
3365+ bpf_iter_udp_put_batch (iter );
3366+ iter -> st_bucket_done = false;
3367+ }
32093368}
32103369
32113370static const struct seq_operations bpf_iter_udp_seq_ops = {
3212- .start = udp_seq_start ,
3213- .next = udp_seq_next ,
3371+ .start = bpf_iter_udp_seq_start ,
3372+ .next = bpf_iter_udp_seq_next ,
32143373 .stop = bpf_iter_udp_seq_stop ,
32153374 .show = bpf_iter_udp_seq_show ,
32163375};
@@ -3350,21 +3509,55 @@ static struct pernet_operations __net_initdata udp_sysctl_ops = {
33503509DEFINE_BPF_ITER_FUNC (udp , struct bpf_iter_meta * meta ,
33513510 struct udp_sock * udp_sk , uid_t uid , int bucket )
33523511
3512+ static int bpf_iter_udp_realloc_batch (struct bpf_udp_iter_state * iter ,
3513+ unsigned int new_batch_sz )
3514+ {
3515+ struct sock * * new_batch ;
3516+
3517+ new_batch = kvmalloc_array (new_batch_sz , sizeof (* new_batch ),
3518+ GFP_USER | __GFP_NOWARN );
3519+ if (!new_batch )
3520+ return - ENOMEM ;
3521+
3522+ bpf_iter_udp_put_batch (iter );
3523+ kvfree (iter -> batch );
3524+ iter -> batch = new_batch ;
3525+ iter -> max_sk = new_batch_sz ;
3526+
3527+ return 0 ;
3528+ }
3529+
3530+ #define INIT_BATCH_SZ 16
3531+
33533532static int bpf_iter_init_udp (void * priv_data , struct bpf_iter_aux_info * aux )
33543533{
3355- return bpf_iter_init_seq_net (priv_data , aux );
3534+ struct bpf_udp_iter_state * iter = priv_data ;
3535+ int ret ;
3536+
3537+ ret = bpf_iter_init_seq_net (priv_data , aux );
3538+ if (ret )
3539+ return ret ;
3540+
3541+ ret = bpf_iter_udp_realloc_batch (iter , INIT_BATCH_SZ );
3542+ if (ret )
3543+ bpf_iter_fini_seq_net (priv_data );
3544+
3545+ return ret ;
33563546}
33573547
33583548static void bpf_iter_fini_udp (void * priv_data )
33593549{
3550+ struct bpf_udp_iter_state * iter = priv_data ;
3551+
33603552 bpf_iter_fini_seq_net (priv_data );
3553+ kvfree (iter -> batch );
33613554}
33623555
33633556static const struct bpf_iter_seq_info udp_seq_info = {
33643557 .seq_ops = & bpf_iter_udp_seq_ops ,
33653558 .init_seq_private = bpf_iter_init_udp ,
33663559 .fini_seq_private = bpf_iter_fini_udp ,
3367- .seq_priv_size = sizeof (struct udp_iter_state ),
3560+ .seq_priv_size = sizeof (struct bpf_udp_iter_state ),
33683561};
33693562
33703563static struct bpf_iter_reg udp_reg_info = {
0 commit comments