Skip to content

Commit 3015f3d

Browse files
paolo-githubdavem330
authored andcommitted
pkt_sched: enable QFQ to support TSO/GSO
If the max packet size for some class (configured through tc) is violated by the actual size of the packets of that class, then QFQ would not schedule classes correctly, and the data structures implementing the bucket lists may get corrupted. This problem occurs with TSO/GSO even if the max packet size is set to the MTU, and is, e.g., the cause of the failure reported in [1]. Two patches have been proposed to solve this problem in [2], one of them is a preliminary version of this patch. This patch addresses the above issues by: 1) setting QFQ parameters to proper values for supporting TSO/GSO (in particular, setting the maximum possible packet size to 64KB), 2) automatically increasing the max packet size for a class, lmax, when a packet with a larger size than the current value of lmax arrives. The drawback of the first point is that the maximum weight for a class is now limited to 4096, which is equal to 1/16 of the maximum weight sum. Finally, this patch also forcibly caps the timestamps of a class if they are too high to be stored in the bucket list. This capping, taken from QFQ+ [3], handles the unfrequent case described in the comment to the function slot_insert. [1] http://marc.info/?l=linux-netdev&m=134968777902077&w=2 [2] http://marc.info/?l=linux-netdev&m=135096573507936&w=2 [3] http://marc.info/?l=linux-netdev&m=134902691421670&w=2 Signed-off-by: Paolo Valente <[email protected]> Tested-by: Cong Wang <[email protected]> Acked-by: Stephen Hemminger <[email protected]> Acked-by: Stephen Hemminger <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent cacb6ba commit 3015f3d

File tree

1 file changed

+79
-30
lines changed

1 file changed

+79
-30
lines changed

net/sched/sch_qfq.c

Lines changed: 79 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -84,18 +84,19 @@
8484
* grp->index is the index of the group; and grp->slot_shift
8585
* is the shift for the corresponding (scaled) sigma_i.
8686
*/
87-
#define QFQ_MAX_INDEX 19
88-
#define QFQ_MAX_WSHIFT 16
87+
#define QFQ_MAX_INDEX 24
88+
#define QFQ_MAX_WSHIFT 12
8989

9090
#define QFQ_MAX_WEIGHT (1<<QFQ_MAX_WSHIFT)
91-
#define QFQ_MAX_WSUM (2*QFQ_MAX_WEIGHT)
91+
#define QFQ_MAX_WSUM (16*QFQ_MAX_WEIGHT)
9292

9393
#define FRAC_BITS 30 /* fixed point arithmetic */
9494
#define ONE_FP (1UL << FRAC_BITS)
9595
#define IWSUM (ONE_FP/QFQ_MAX_WSUM)
9696

97-
#define QFQ_MTU_SHIFT 11
97+
#define QFQ_MTU_SHIFT 16 /* to support TSO/GSO */
9898
#define QFQ_MIN_SLOT_SHIFT (FRAC_BITS + QFQ_MTU_SHIFT - QFQ_MAX_INDEX)
99+
#define QFQ_MIN_LMAX 256 /* min possible lmax for a class */
99100

100101
/*
101102
* Possible group states. These values are used as indexes for the bitmaps
@@ -231,14 +232,40 @@ static void qfq_update_class_params(struct qfq_sched *q, struct qfq_class *cl,
231232
q->wsum += delta_w;
232233
}
233234

235+
static void qfq_update_reactivate_class(struct qfq_sched *q,
236+
struct qfq_class *cl,
237+
u32 inv_w, u32 lmax, int delta_w)
238+
{
239+
bool need_reactivation = false;
240+
int i = qfq_calc_index(inv_w, lmax);
241+
242+
if (&q->groups[i] != cl->grp && cl->qdisc->q.qlen > 0) {
243+
/*
244+
* shift cl->F back, to not charge the
245+
* class for the not-yet-served head
246+
* packet
247+
*/
248+
cl->F = cl->S;
249+
/* remove class from its slot in the old group */
250+
qfq_deactivate_class(q, cl);
251+
need_reactivation = true;
252+
}
253+
254+
qfq_update_class_params(q, cl, lmax, inv_w, delta_w);
255+
256+
if (need_reactivation) /* activate in new group */
257+
qfq_activate_class(q, cl, qdisc_peek_len(cl->qdisc));
258+
}
259+
260+
234261
static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
235262
struct nlattr **tca, unsigned long *arg)
236263
{
237264
struct qfq_sched *q = qdisc_priv(sch);
238265
struct qfq_class *cl = (struct qfq_class *)*arg;
239266
struct nlattr *tb[TCA_QFQ_MAX + 1];
240267
u32 weight, lmax, inv_w;
241-
int i, err;
268+
int err;
242269
int delta_w;
243270

244271
if (tca[TCA_OPTIONS] == NULL) {
@@ -270,16 +297,14 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
270297

271298
if (tb[TCA_QFQ_LMAX]) {
272299
lmax = nla_get_u32(tb[TCA_QFQ_LMAX]);
273-
if (!lmax || lmax > (1UL << QFQ_MTU_SHIFT)) {
300+
if (lmax < QFQ_MIN_LMAX || lmax > (1UL << QFQ_MTU_SHIFT)) {
274301
pr_notice("qfq: invalid max length %u\n", lmax);
275302
return -EINVAL;
276303
}
277304
} else
278-
lmax = 1UL << QFQ_MTU_SHIFT;
305+
lmax = psched_mtu(qdisc_dev(sch));
279306

280307
if (cl != NULL) {
281-
bool need_reactivation = false;
282-
283308
if (tca[TCA_RATE]) {
284309
err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
285310
qdisc_root_sleeping_lock(sch),
@@ -291,24 +316,8 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
291316
if (lmax == cl->lmax && inv_w == cl->inv_w)
292317
return 0; /* nothing to update */
293318

294-
i = qfq_calc_index(inv_w, lmax);
295319
sch_tree_lock(sch);
296-
if (&q->groups[i] != cl->grp && cl->qdisc->q.qlen > 0) {
297-
/*
298-
* shift cl->F back, to not charge the
299-
* class for the not-yet-served head
300-
* packet
301-
*/
302-
cl->F = cl->S;
303-
/* remove class from its slot in the old group */
304-
qfq_deactivate_class(q, cl);
305-
need_reactivation = true;
306-
}
307-
308-
qfq_update_class_params(q, cl, lmax, inv_w, delta_w);
309-
310-
if (need_reactivation) /* activate in new group */
311-
qfq_activate_class(q, cl, qdisc_peek_len(cl->qdisc));
320+
qfq_update_reactivate_class(q, cl, inv_w, lmax, delta_w);
312321
sch_tree_unlock(sch);
313322

314323
return 0;
@@ -663,15 +672,48 @@ static void qfq_make_eligible(struct qfq_sched *q, u64 old_V)
663672

664673

665674
/*
666-
* XXX we should make sure that slot becomes less than 32.
667-
* This is guaranteed by the input values.
668-
* roundedS is always cl->S rounded on grp->slot_shift bits.
675+
* If the weight and lmax (max_pkt_size) of the classes do not change,
676+
* then QFQ guarantees that the slot index is never higher than
677+
* 2 + ((1<<QFQ_MTU_SHIFT)/QFQ_MIN_LMAX) * (QFQ_MAX_WEIGHT/QFQ_MAX_WSUM).
678+
*
679+
* With the current values of the above constants, the index is
680+
* then guaranteed to never be higher than 2 + 256 * (1 / 16) = 18.
681+
*
682+
* When the weight of a class is increased or the lmax of the class is
683+
* decreased, a new class with smaller slot size may happen to be
684+
* activated. The activation of this class should be properly delayed
685+
* to when the service of the class has finished in the ideal system
686+
* tracked by QFQ. If the activation of the class is not delayed to
687+
* this reference time instant, then this class may be unjustly served
688+
* before other classes waiting for service. This may cause
689+
* (unfrequently) the above bound to the slot index to be violated for
690+
* some of these unlucky classes.
691+
*
692+
* Instead of delaying the activation of the new class, which is quite
693+
* complex, the following inaccurate but simple solution is used: if
694+
* the slot index is higher than QFQ_MAX_SLOTS-2, then the timestamps
695+
* of the class are shifted backward so as to let the slot index
696+
* become equal to QFQ_MAX_SLOTS-2. This threshold is used because, if
697+
* the slot index is above it, then the data structure implementing
698+
* the bucket list either gets immediately corrupted or may get
699+
* corrupted on a possible next packet arrival that causes the start
700+
* time of the group to be shifted backward.
669701
*/
670702
static void qfq_slot_insert(struct qfq_group *grp, struct qfq_class *cl,
671703
u64 roundedS)
672704
{
673705
u64 slot = (roundedS - grp->S) >> grp->slot_shift;
674-
unsigned int i = (grp->front + slot) % QFQ_MAX_SLOTS;
706+
unsigned int i; /* slot index in the bucket list */
707+
708+
if (unlikely(slot > QFQ_MAX_SLOTS - 2)) {
709+
u64 deltaS = roundedS - grp->S -
710+
((u64)(QFQ_MAX_SLOTS - 2)<<grp->slot_shift);
711+
cl->S -= deltaS;
712+
cl->F -= deltaS;
713+
slot = QFQ_MAX_SLOTS - 2;
714+
}
715+
716+
i = (grp->front + slot) % QFQ_MAX_SLOTS;
675717

676718
hlist_add_head(&cl->next, &grp->slots[i]);
677719
__set_bit(slot, &grp->full_slots);
@@ -892,6 +934,13 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
892934
}
893935
pr_debug("qfq_enqueue: cl = %x\n", cl->common.classid);
894936

937+
if (unlikely(cl->lmax < qdisc_pkt_len(skb))) {
938+
pr_debug("qfq: increasing maxpkt from %u to %u for class %u",
939+
cl->lmax, qdisc_pkt_len(skb), cl->common.classid);
940+
qfq_update_reactivate_class(q, cl, cl->inv_w,
941+
qdisc_pkt_len(skb), 0);
942+
}
943+
895944
err = qdisc_enqueue(skb, cl->qdisc);
896945
if (unlikely(err != NET_XMIT_SUCCESS)) {
897946
pr_debug("qfq_enqueue: enqueue failed %d\n", err);

0 commit comments

Comments
 (0)