Skip to content

Commit 3289025

Browse files
RDS: add receive message trace used by application
Socket option to tap receive path latency in various stages in nano seconds. It can be enabled on selective sockets using using SO_RDS_MSG_RXPATH_LATENCY socket option. RDS will return the data to application with RDS_CMSG_RXPATH_LATENCY in defined format. Scope is left to add more trace points for future without need of change in the interface. Reviewed-by: Sowmini Varadhan <[email protected]> Signed-off-by: Santosh Shilimkar <[email protected]>
1 parent f9fb69a commit 3289025

File tree

6 files changed

+109
-3
lines changed

6 files changed

+109
-3
lines changed

include/uapi/linux/rds.h

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,13 @@
5252
#define RDS_GET_MR_FOR_DEST 7
5353
#define SO_RDS_TRANSPORT 8
5454

55+
/* Socket option to tap receive path latency
56+
* SO_RDS: SO_RDS_MSG_RXPATH_LATENCY
57+
* Format used struct rds_rx_trace_so
58+
*/
59+
#define SO_RDS_MSG_RXPATH_LATENCY 10
60+
61+
5562
/* supported values for SO_RDS_TRANSPORT */
5663
#define RDS_TRANS_IB 0
5764
#define RDS_TRANS_IWARP 1
@@ -77,6 +84,12 @@
7784
* the same as for the GET_MR setsockopt.
7885
* RDS_CMSG_RDMA_STATUS (recvmsg)
7986
* Returns the status of a completed RDMA operation.
87+
* RDS_CMSG_RXPATH_LATENCY(recvmsg)
88+
* Returns rds message latencies in various stages of receive
89+
* path in nS. Its set per socket using SO_RDS_MSG_RXPATH_LATENCY
90+
* socket option. Legitimate points are defined in
91+
* enum rds_message_rxpath_latency. More points can be added in
92+
* future. CSMG format is struct rds_cmsg_rx_trace.
8093
*/
8194
#define RDS_CMSG_RDMA_ARGS 1
8295
#define RDS_CMSG_RDMA_DEST 2
@@ -87,6 +100,7 @@
87100
#define RDS_CMSG_ATOMIC_CSWP 7
88101
#define RDS_CMSG_MASKED_ATOMIC_FADD 8
89102
#define RDS_CMSG_MASKED_ATOMIC_CSWP 9
103+
#define RDS_CMSG_RXPATH_LATENCY 11
90104

91105
#define RDS_INFO_FIRST 10000
92106
#define RDS_INFO_COUNTERS 10000
@@ -171,6 +185,25 @@ struct rds_info_rdma_connection {
171185
uint32_t rdma_mr_size;
172186
};
173187

188+
/* RDS message Receive Path Latency points */
189+
enum rds_message_rxpath_latency {
190+
RDS_MSG_RX_HDR_TO_DGRAM_START = 0,
191+
RDS_MSG_RX_DGRAM_REASSEMBLE,
192+
RDS_MSG_RX_DGRAM_DELIVERED,
193+
RDS_MSG_RX_DGRAM_TRACE_MAX
194+
};
195+
196+
struct rds_rx_trace_so {
197+
u8 rx_traces;
198+
u8 rx_trace_pos[RDS_MSG_RX_DGRAM_TRACE_MAX];
199+
};
200+
201+
struct rds_cmsg_rx_trace {
202+
u8 rx_traces;
203+
u8 rx_trace_pos[RDS_MSG_RX_DGRAM_TRACE_MAX];
204+
u64 rx_trace[RDS_MSG_RX_DGRAM_TRACE_MAX];
205+
};
206+
174207
/*
175208
* Congestion monitoring.
176209
* Congestion control in RDS happens at the host connection

net/rds/af_rds.c

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,30 @@ static int rds_enable_recvtstamp(struct sock *sk, char __user *optval,
298298
return 0;
299299
}
300300

301+
static int rds_recv_track_latency(struct rds_sock *rs, char __user *optval,
302+
int optlen)
303+
{
304+
struct rds_rx_trace_so trace;
305+
int i;
306+
307+
if (optlen != sizeof(struct rds_rx_trace_so))
308+
return -EFAULT;
309+
310+
if (copy_from_user(&trace, optval, sizeof(trace)))
311+
return -EFAULT;
312+
313+
rs->rs_rx_traces = trace.rx_traces;
314+
for (i = 0; i < rs->rs_rx_traces; i++) {
315+
if (trace.rx_trace_pos[i] > RDS_MSG_RX_DGRAM_TRACE_MAX) {
316+
rs->rs_rx_traces = 0;
317+
return -EFAULT;
318+
}
319+
rs->rs_rx_trace[i] = trace.rx_trace_pos[i];
320+
}
321+
322+
return 0;
323+
}
324+
301325
static int rds_setsockopt(struct socket *sock, int level, int optname,
302326
char __user *optval, unsigned int optlen)
303327
{
@@ -338,6 +362,9 @@ static int rds_setsockopt(struct socket *sock, int level, int optname,
338362
ret = rds_enable_recvtstamp(sock->sk, optval, optlen);
339363
release_sock(sock->sk);
340364
break;
365+
case SO_RDS_MSG_RXPATH_LATENCY:
366+
ret = rds_recv_track_latency(rs, optval, optlen);
367+
break;
341368
default:
342369
ret = -ENOPROTOOPT;
343370
}
@@ -484,6 +511,7 @@ static int __rds_create(struct socket *sock, struct sock *sk, int protocol)
484511
INIT_LIST_HEAD(&rs->rs_cong_list);
485512
spin_lock_init(&rs->rs_rdma_lock);
486513
rs->rs_rdma_keys = RB_ROOT;
514+
rs->rs_rx_traces = 0;
487515

488516
spin_lock_bh(&rds_sock_lock);
489517
list_add_tail(&rs->rs_item, &rds_sock_list);

net/rds/ib_recv.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -911,8 +911,12 @@ static void rds_ib_process_recv(struct rds_connection *conn,
911911
ic->i_ibinc = ibinc;
912912

913913
hdr = &ibinc->ii_inc.i_hdr;
914+
ibinc->ii_inc.i_rx_lat_trace[RDS_MSG_RX_HDR] =
915+
local_clock();
914916
memcpy(hdr, ihdr, sizeof(*hdr));
915917
ic->i_recv_data_rem = be32_to_cpu(hdr->h_len);
918+
ibinc->ii_inc.i_rx_lat_trace[RDS_MSG_RX_START] =
919+
local_clock();
916920

917921
rdsdebug("ic %p ibinc %p rem %u flag 0x%x\n", ic, ibinc,
918922
ic->i_recv_data_rem, hdr->h_flags);

net/rds/rds.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,11 @@ struct rds_ext_header_rdma_dest {
253253
#define RDS_EXTHDR_GEN_NUM 6
254254

255255
#define __RDS_EXTHDR_MAX 16 /* for now */
256+
#define RDS_RX_MAX_TRACES (RDS_MSG_RX_DGRAM_TRACE_MAX + 1)
257+
#define RDS_MSG_RX_HDR 0
258+
#define RDS_MSG_RX_START 1
259+
#define RDS_MSG_RX_END 2
260+
#define RDS_MSG_RX_CMSG 3
256261

257262
struct rds_incoming {
258263
atomic_t i_refcount;
@@ -265,6 +270,7 @@ struct rds_incoming {
265270

266271
rds_rdma_cookie_t i_rdma_cookie;
267272
struct timeval i_rx_tstamp;
273+
u64 i_rx_lat_trace[RDS_RX_MAX_TRACES];
268274
};
269275

270276
struct rds_mr {
@@ -575,6 +581,10 @@ struct rds_sock {
575581
unsigned char rs_recverr,
576582
rs_cong_monitor;
577583
u32 rs_hash_initval;
584+
585+
/* Socket receive path trace points*/
586+
u8 rs_rx_traces;
587+
u8 rs_rx_trace[RDS_MSG_RX_DGRAM_TRACE_MAX];
578588
};
579589

580590
static inline struct rds_sock *rds_sk_to_rs(const struct sock *sk)

net/rds/recv.c

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,13 +43,18 @@
4343
void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn,
4444
__be32 saddr)
4545
{
46+
int i;
47+
4648
atomic_set(&inc->i_refcount, 1);
4749
INIT_LIST_HEAD(&inc->i_item);
4850
inc->i_conn = conn;
4951
inc->i_saddr = saddr;
5052
inc->i_rdma_cookie = 0;
5153
inc->i_rx_tstamp.tv_sec = 0;
5254
inc->i_rx_tstamp.tv_usec = 0;
55+
56+
for (i = 0; i < RDS_RX_MAX_TRACES; i++)
57+
inc->i_rx_lat_trace[i] = 0;
5358
}
5459
EXPORT_SYMBOL_GPL(rds_inc_init);
5560

@@ -373,6 +378,7 @@ void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr,
373378
if (sock_flag(sk, SOCK_RCVTSTAMP))
374379
do_gettimeofday(&inc->i_rx_tstamp);
375380
rds_inc_addref(inc);
381+
inc->i_rx_lat_trace[RDS_MSG_RX_END] = local_clock();
376382
list_add_tail(&inc->i_item, &rs->rs_recv_queue);
377383
__rds_wake_sk_sleep(sk);
378384
} else {
@@ -534,7 +540,7 @@ static int rds_cmsg_recv(struct rds_incoming *inc, struct msghdr *msg,
534540
ret = put_cmsg(msg, SOL_RDS, RDS_CMSG_RDMA_DEST,
535541
sizeof(inc->i_rdma_cookie), &inc->i_rdma_cookie);
536542
if (ret)
537-
return ret;
543+
goto out;
538544
}
539545

540546
if ((inc->i_rx_tstamp.tv_sec != 0) &&
@@ -543,10 +549,30 @@ static int rds_cmsg_recv(struct rds_incoming *inc, struct msghdr *msg,
543549
sizeof(struct timeval),
544550
&inc->i_rx_tstamp);
545551
if (ret)
546-
return ret;
552+
goto out;
547553
}
548554

549-
return 0;
555+
if (rs->rs_rx_traces) {
556+
struct rds_cmsg_rx_trace t;
557+
int i, j;
558+
559+
inc->i_rx_lat_trace[RDS_MSG_RX_CMSG] = local_clock();
560+
t.rx_traces = rs->rs_rx_traces;
561+
for (i = 0; i < rs->rs_rx_traces; i++) {
562+
j = rs->rs_rx_trace[i];
563+
t.rx_trace_pos[i] = j;
564+
t.rx_trace[i] = inc->i_rx_lat_trace[j + 1] -
565+
inc->i_rx_lat_trace[j];
566+
}
567+
568+
ret = put_cmsg(msg, SOL_RDS, RDS_CMSG_RXPATH_LATENCY,
569+
sizeof(t), &t);
570+
if (ret)
571+
goto out;
572+
}
573+
574+
out:
575+
return ret;
550576
}
551577

552578
int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,

net/rds/tcp_recv.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,9 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb,
180180
rdsdebug("alloced tinc %p\n", tinc);
181181
rds_inc_path_init(&tinc->ti_inc, cp,
182182
cp->cp_conn->c_faddr);
183+
tinc->ti_inc.i_rx_lat_trace[RDS_MSG_RX_HDR] =
184+
local_clock();
185+
183186
/*
184187
* XXX * we might be able to use the __ variants when
185188
* we've already serialized at a higher level.
@@ -204,6 +207,8 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb,
204207
/* could be 0 for a 0 len message */
205208
tc->t_tinc_data_rem =
206209
be32_to_cpu(tinc->ti_inc.i_hdr.h_len);
210+
tinc->ti_inc.i_rx_lat_trace[RDS_MSG_RX_START] =
211+
local_clock();
207212
}
208213
}
209214

0 commit comments

Comments
 (0)