23
23
#include <linux/vmalloc.h>
24
24
#include <linux/rtnetlink.h>
25
25
#include <linux/reciprocal_div.h>
26
+ #include <linux/rbtree.h>
26
27
27
28
#include <net/netlink.h>
28
29
#include <net/pkt_sched.h>
68
69
*/
69
70
70
71
struct netem_sched_data {
71
- /* internal t(ime)fifo qdisc uses sch->q and sch->limit */
72
+ /* internal t(ime)fifo qdisc uses t_root and sch->limit */
73
+ struct rb_root t_root ;
72
74
73
75
/* optional qdisc for classful handling (NULL at netem init) */
74
76
struct Qdisc * qdisc ;
@@ -128,10 +130,35 @@ struct netem_sched_data {
128
130
*/
129
131
struct netem_skb_cb {
130
132
psched_time_t time_to_send ;
133
+ ktime_t tstamp_save ;
131
134
};
132
135
136
+ /* Because space in skb->cb[] is tight, netem overloads skb->next/prev/tstamp
137
+ * to hold a rb_node structure.
138
+ *
139
+ * If struct sk_buff layout is changed, the following checks will complain.
140
+ */
141
+ static struct rb_node * netem_rb_node (struct sk_buff * skb )
142
+ {
143
+ BUILD_BUG_ON (offsetof(struct sk_buff , next ) != 0 );
144
+ BUILD_BUG_ON (offsetof(struct sk_buff , prev ) !=
145
+ offsetof(struct sk_buff , next ) + sizeof (skb -> next ));
146
+ BUILD_BUG_ON (offsetof(struct sk_buff , tstamp ) !=
147
+ offsetof(struct sk_buff , prev ) + sizeof (skb -> prev ));
148
+ BUILD_BUG_ON (sizeof (struct rb_node ) > sizeof (skb -> next ) +
149
+ sizeof (skb -> prev ) +
150
+ sizeof (skb -> tstamp ));
151
+ return (struct rb_node * )& skb -> next ;
152
+ }
153
+
154
+ static struct sk_buff * netem_rb_to_skb (struct rb_node * rb )
155
+ {
156
+ return (struct sk_buff * )rb ;
157
+ }
158
+
133
159
static inline struct netem_skb_cb * netem_skb_cb (struct sk_buff * skb )
134
160
{
161
+ /* we assume we can use skb next/prev/tstamp as storage for rb_node */
135
162
qdisc_cb_private_validate (skb , sizeof (struct netem_skb_cb ));
136
163
return (struct netem_skb_cb * )qdisc_skb_cb (skb )-> data ;
137
164
}
@@ -333,20 +360,23 @@ static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sche
333
360
334
361
static void tfifo_enqueue (struct sk_buff * nskb , struct Qdisc * sch )
335
362
{
336
- struct sk_buff_head * list = & sch -> q ;
363
+ struct netem_sched_data * q = qdisc_priv ( sch ) ;
337
364
psched_time_t tnext = netem_skb_cb (nskb )-> time_to_send ;
338
- struct sk_buff * skb = skb_peek_tail ( list ) ;
365
+ struct rb_node * * p = & q -> t_root . rb_node , * parent = NULL ;
339
366
340
- /* Optimize for add at tail */
341
- if (likely (!skb || tnext >= netem_skb_cb (skb )-> time_to_send ))
342
- return __skb_queue_tail (list , nskb );
367
+ while (* p ) {
368
+ struct sk_buff * skb ;
343
369
344
- skb_queue_reverse_walk (list , skb ) {
370
+ parent = * p ;
371
+ skb = netem_rb_to_skb (parent );
345
372
if (tnext >= netem_skb_cb (skb )-> time_to_send )
346
- break ;
373
+ p = & parent -> rb_right ;
374
+ else
375
+ p = & parent -> rb_left ;
347
376
}
348
-
349
- __skb_queue_after (list , skb , nskb );
377
+ rb_link_node (netem_rb_node (nskb ), parent , p );
378
+ rb_insert_color (netem_rb_node (nskb ), & q -> t_root );
379
+ sch -> q .qlen ++ ;
350
380
}
351
381
352
382
/*
@@ -436,23 +466,28 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
436
466
now = psched_get_time ();
437
467
438
468
if (q -> rate ) {
439
- struct sk_buff_head * list = & sch -> q ;
469
+ struct sk_buff * last ;
440
470
441
- if (!skb_queue_empty (list )) {
471
+ if (!skb_queue_empty (& sch -> q ))
472
+ last = skb_peek_tail (& sch -> q );
473
+ else
474
+ last = netem_rb_to_skb (rb_last (& q -> t_root ));
475
+ if (last ) {
442
476
/*
443
477
* Last packet in queue is reference point (now),
444
478
* calculate this time bonus and subtract
445
479
* from delay.
446
480
*/
447
- delay -= netem_skb_cb (skb_peek_tail ( list ) )-> time_to_send - now ;
481
+ delay -= netem_skb_cb (last )-> time_to_send - now ;
448
482
delay = max_t (psched_tdiff_t , 0 , delay );
449
- now = netem_skb_cb (skb_peek_tail ( list ) )-> time_to_send ;
483
+ now = netem_skb_cb (last )-> time_to_send ;
450
484
}
451
485
452
486
delay += packet_len_2_sched_time (skb -> len , q );
453
487
}
454
488
455
489
cb -> time_to_send = now + delay ;
490
+ cb -> tstamp_save = skb -> tstamp ;
456
491
++ q -> counter ;
457
492
tfifo_enqueue (skb , sch );
458
493
} else {
@@ -476,6 +511,21 @@ static unsigned int netem_drop(struct Qdisc *sch)
476
511
unsigned int len ;
477
512
478
513
len = qdisc_queue_drop (sch );
514
+
515
+ if (!len ) {
516
+ struct rb_node * p = rb_first (& q -> t_root );
517
+
518
+ if (p ) {
519
+ struct sk_buff * skb = netem_rb_to_skb (p );
520
+
521
+ rb_erase (p , & q -> t_root );
522
+ sch -> q .qlen -- ;
523
+ skb -> next = NULL ;
524
+ skb -> prev = NULL ;
525
+ len = qdisc_pkt_len (skb );
526
+ kfree_skb (skb );
527
+ }
528
+ }
479
529
if (!len && q -> qdisc && q -> qdisc -> ops -> drop )
480
530
len = q -> qdisc -> ops -> drop (q -> qdisc );
481
531
if (len )
@@ -488,19 +538,32 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
488
538
{
489
539
struct netem_sched_data * q = qdisc_priv (sch );
490
540
struct sk_buff * skb ;
541
+ struct rb_node * p ;
491
542
492
543
if (qdisc_is_throttled (sch ))
493
544
return NULL ;
494
545
495
546
tfifo_dequeue :
496
- skb = qdisc_peek_head ( sch );
547
+ skb = __skb_dequeue ( & sch -> q );
497
548
if (skb ) {
498
- const struct netem_skb_cb * cb = netem_skb_cb (skb );
549
+ deliver :
550
+ sch -> qstats .backlog -= qdisc_pkt_len (skb );
551
+ qdisc_unthrottled (sch );
552
+ qdisc_bstats_update (sch , skb );
553
+ return skb ;
554
+ }
555
+ p = rb_first (& q -> t_root );
556
+ if (p ) {
557
+ skb = netem_rb_to_skb (p );
499
558
500
559
/* if more time remaining? */
501
- if (cb -> time_to_send <= psched_get_time ()) {
502
- __skb_unlink (skb , & sch -> q );
503
- sch -> qstats .backlog -= qdisc_pkt_len (skb );
560
+ if (netem_skb_cb (skb )-> time_to_send <= psched_get_time ()) {
561
+ rb_erase (p , & q -> t_root );
562
+
563
+ sch -> q .qlen -- ;
564
+ skb -> next = NULL ;
565
+ skb -> prev = NULL ;
566
+ skb -> tstamp = netem_skb_cb (skb )-> tstamp_save ;
504
567
505
568
#ifdef CONFIG_NET_CLS_ACT
506
569
/*
@@ -522,18 +585,16 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
522
585
}
523
586
goto tfifo_dequeue ;
524
587
}
525
- deliver :
526
- qdisc_unthrottled (sch );
527
- qdisc_bstats_update (sch , skb );
528
- return skb ;
588
+ goto deliver ;
529
589
}
530
590
531
591
if (q -> qdisc ) {
532
592
skb = q -> qdisc -> ops -> dequeue (q -> qdisc );
533
593
if (skb )
534
594
goto deliver ;
535
595
}
536
- qdisc_watchdog_schedule (& q -> watchdog , cb -> time_to_send );
596
+ qdisc_watchdog_schedule (& q -> watchdog ,
597
+ netem_skb_cb (skb )-> time_to_send );
537
598
}
538
599
539
600
if (q -> qdisc ) {
0 commit comments