SUNRPC: provide a mechanism for collecting stats in the RPC client
Add a simple mechanism for collecting stats in the RPC client. Stats are
tabulated during xprt_release. Note that per_cpu shenanigans are not
required here because the RPC client already serializes on the transport
write lock.
Test plan:
Compile kernel with CONFIG_NFS enabled. Basic performance regression
testing with high-speed networking and high performance server.
Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index f147e6b..0f36620 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -45,7 +45,8 @@
char * cl_server; /* server machine name */
char * cl_protname; /* protocol name */
struct rpc_auth * cl_auth; /* authenticator */
- struct rpc_stat * cl_stats; /* statistics */
+ struct rpc_stat * cl_stats; /* per-program statistics */
+ struct rpc_iostats * cl_metrics; /* per-client statistics */
unsigned int cl_softrtry : 1,/* soft timeouts */
cl_intr : 1,/* interruptible */
diff --git a/include/linux/sunrpc/metrics.h b/include/linux/sunrpc/metrics.h
new file mode 100644
index 0000000..8f96e9d
--- /dev/null
+++ b/include/linux/sunrpc/metrics.h
@@ -0,0 +1,77 @@
+/*
+ * linux/include/linux/sunrpc/metrics.h
+ *
+ * Declarations for RPC client per-operation metrics
+ *
+ * Copyright (C) 2005 Chuck Lever <cel@netapp.com>
+ *
+ * RPC client per-operation statistics provide latency and retry
+ * information about each type of RPC procedure in a given RPC program.
+ * These statistics are not for detailed problem diagnosis, but simply
+ * to indicate whether the problem is local or remote.
+ *
+ * These counters are not meant to be human-readable, but are meant to be
+ * integrated into system monitoring tools such as "sar" and "iostat". As
+ * such, the counters are sampled by the tools over time, and are never
+ * zeroed after a file system is mounted. Moving averages can be computed
+ * by the tools by taking the difference between two instantaneous samples
+ * and dividing that by the time between the samples.
+ *
+ * The counters are maintained in a single array per RPC client, indexed
+ * by procedure number. There is no need to maintain separate counter
+ * arrays per-CPU because these counters are always modified behind locks.
+ */
+
+#ifndef _LINUX_SUNRPC_METRICS_H
+#define _LINUX_SUNRPC_METRICS_H
+
+#include <linux/seq_file.h>
+
+#define RPC_IOSTATS_VERS "1.0"
+
+struct rpc_iostats {
+ /*
+ * These counters give an idea about how many request
+ * transmissions are required, on average, to complete that
+ * particular procedure. Some procedures may require more
+ * than one transmission because the server is unresponsive,
+ * the client is retransmitting too aggressively, or the
+ * requests are large and the network is congested.
+ */
+ unsigned long om_ops, /* count of operations */
+ om_ntrans, /* count of RPC transmissions */
+ om_timeouts; /* count of major timeouts */
+
+ /*
+ * These count how many bytes are sent and received for a
+ * given RPC procedure type. This indicates how much load a
+ * particular procedure is putting on the network. These
+ * counts include the RPC and ULP headers, and the request
+ * payload.
+ */
+ unsigned long long om_bytes_sent, /* count of bytes out */
+ om_bytes_recv; /* count of bytes in */
+
+ /*
+ * The length of time an RPC request waits in queue before
+ * transmission, the network + server latency of the request,
+ * and the total time the request spent from init to release
+ * are measured.
+ */
+ unsigned long long om_queue, /* jiffies queued for xmit */
+ om_rtt, /* jiffies for RPC RTT */
+ om_execute; /* jiffies for RPC execution */
+} ____cacheline_aligned;
+
+struct rpc_task;
+struct rpc_clnt;
+
+/*
+ * EXPORTed functions for managing rpc_iostats structures
+ */
+struct rpc_iostats * rpc_alloc_iostats(struct rpc_clnt *);
+void rpc_count_iostats(struct rpc_task *);
+void rpc_print_iostats(struct seq_file *, struct rpc_clnt *);
+void rpc_free_iostats(struct rpc_iostats *);
+
+#endif /* _LINUX_SUNRPC_METRICS_H */
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 84eb5b4..0bb23e8 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -28,12 +28,11 @@
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/utsname.h>
+#include <linux/workqueue.h>
#include <linux/sunrpc/clnt.h>
-#include <linux/workqueue.h>
#include <linux/sunrpc/rpc_pipe_fs.h>
-
-#include <linux/nfs.h>
+#include <linux/sunrpc/metrics.h>
#define RPC_SLACK_SPACE (1024) /* total overkill */
@@ -147,6 +146,7 @@
clnt->cl_vers = version->number;
clnt->cl_prot = xprt->prot;
clnt->cl_stats = program->stats;
+ clnt->cl_metrics = rpc_alloc_iostats(clnt);
rpc_init_wait_queue(&clnt->cl_pmap_default.pm_bindwait, "bindwait");
if (!clnt->cl_port)
@@ -245,6 +245,7 @@
if (new->cl_auth)
atomic_inc(&new->cl_auth->au_count);
new->cl_pmap = &new->cl_pmap_default;
+ new->cl_metrics = rpc_alloc_iostats(clnt);
rpc_init_wait_queue(&new->cl_pmap_default.pm_bindwait, "bindwait");
return new;
out_no_clnt:
@@ -315,6 +316,8 @@
if (clnt->cl_server != clnt->cl_inline_name)
kfree(clnt->cl_server);
out_free:
+ rpc_free_iostats(clnt->cl_metrics);
+ clnt->cl_metrics = NULL;
if (clnt->cl_dentry)
dput(clnt->cl_dentry);
kfree(clnt);
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index 4979f22..24ac716 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -21,6 +21,7 @@
#include <linux/seq_file.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/svcsock.h>
+#include <linux/sunrpc/metrics.h>
#define RPCDBG_FACILITY RPCDBG_MISC
@@ -106,6 +107,110 @@
}
}
+/**
+ * rpc_alloc_iostats - allocate an rpc_iostats structure
+ * @clnt: RPC program, version, and xprt
+ *
+ */
+struct rpc_iostats *rpc_alloc_iostats(struct rpc_clnt *clnt)
+{
+ unsigned int ops = clnt->cl_maxproc;
+ size_t size = ops * sizeof(struct rpc_iostats);
+ struct rpc_iostats *new;
+
+ new = kmalloc(size, GFP_KERNEL);
+ if (new)
+ memset(new, 0 , size);
+ return new;
+}
+EXPORT_SYMBOL(rpc_alloc_iostats);
+
+/**
+ * rpc_free_iostats - release an rpc_iostats structure
+ * @stats: doomed rpc_iostats structure
+ *
+ */
+void rpc_free_iostats(struct rpc_iostats *stats)
+{
+ kfree(stats);
+}
+EXPORT_SYMBOL(rpc_free_iostats);
+
+/**
+ * rpc_count_iostats - tally up per-task stats
+ * @task: completed rpc_task
+ *
+ * Relies on the caller for serialization.
+ */
+void rpc_count_iostats(struct rpc_task *task)
+{
+ struct rpc_rqst *req = task->tk_rqstp;
+ struct rpc_iostats *stats = task->tk_client->cl_metrics;
+ struct rpc_iostats *op_metrics;
+ long rtt, execute, queue;
+
+ if (!stats || !req)
+ return;
+ op_metrics = &stats[task->tk_msg.rpc_proc->p_proc];
+
+ op_metrics->om_ops++;
+ op_metrics->om_ntrans += req->rq_ntrans;
+ op_metrics->om_timeouts += task->tk_timeouts;
+
+ op_metrics->om_bytes_sent += task->tk_bytes_sent;
+ op_metrics->om_bytes_recv += req->rq_received;
+
+ queue = (long)req->rq_xtime - task->tk_start;
+ if (queue < 0)
+ queue = -queue;
+ op_metrics->om_queue += queue;
+
+ rtt = task->tk_rtt;
+ if (rtt < 0)
+ rtt = -rtt;
+ op_metrics->om_rtt += rtt;
+
+ execute = (long)jiffies - task->tk_start;
+ if (execute < 0)
+ execute = -execute;
+ op_metrics->om_execute += execute;
+}
+
+#define MILLISECS_PER_JIFFY (1000UL / HZ)
+
+void rpc_print_iostats(struct seq_file *seq, struct rpc_clnt *clnt)
+{
+ struct rpc_iostats *stats = clnt->cl_metrics;
+ struct rpc_xprt *xprt = clnt->cl_xprt;
+ unsigned int op, maxproc = clnt->cl_maxproc;
+
+ if (!stats)
+ return;
+
+ seq_printf(seq, "\tRPC iostats version: %s ", RPC_IOSTATS_VERS);
+ seq_printf(seq, "p/v: %u/%u (%s)\n",
+ clnt->cl_prog, clnt->cl_vers, clnt->cl_protname);
+
+ if (xprt)
+ xprt->ops->print_stats(xprt, seq);
+
+ seq_printf(seq, "\tper-op statistics\n");
+ for (op = 0; op < maxproc; op++) {
+ struct rpc_iostats *metrics = &stats[op];
+ seq_printf(seq, "%12u: ", op);
+ seq_printf(seq, "%lu %lu %lu %Lu %Lu %Lu %Lu %Lu\n",
+ metrics->om_ops,
+ metrics->om_ntrans,
+ metrics->om_timeouts,
+ metrics->om_bytes_sent,
+ metrics->om_bytes_recv,
+ metrics->om_queue * MILLISECS_PER_JIFFY,
+ metrics->om_rtt * MILLISECS_PER_JIFFY,
+ metrics->om_execute * MILLISECS_PER_JIFFY);
+ }
+}
+EXPORT_SYMBOL(rpc_print_iostats);
+
/*
* Register/unregister RPC proc files
*/
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index c624197..eb5a262 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -44,6 +44,7 @@
#include <linux/random.h>
#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/metrics.h>
/*
* Local variables
@@ -859,6 +860,7 @@
if (!(req = task->tk_rqstp))
return;
+ rpc_count_iostats(task);
spin_lock_bh(&xprt->transport_lock);
xprt->ops->release_xprt(xprt, task);
if (xprt->ops->release_request)