net: cdc_ncm: simplify and optimize frame padding

We can avoid the costly division for the common case where
we pad the frame to tx_max size as long as we ensure that
tx_max is either the device specified dwNtbOutMaxSize or not
a multiplum of wMaxPacketSize.

Using the preconverted 'maxpacket' field avoids converting
wMaxPacketSize to CPU endianness for every transmitted frame

And since we only will hit the one byte padding rule for short
frames, we can drop testing the skb for tailroom.

The change means that tx_max now represents the real maximum
skb size, enabling us to allocate the correct size instead of
always making room for one extra byte.

Cc: Alexey Orishko <alexey.orishko@gmail.com>
Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index 43afde8..38566bf 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -506,6 +506,15 @@
 	dev->status = ctx->status_ep;
 	dev->rx_urb_size = ctx->rx_max;
 
+	/* cdc_ncm_setup will override dwNtbOutMaxSize if it is
+	 * outside the sane range. Adding a pad byte here if necessary
+	 * simplifies the handling in cdc_ncm_fill_tx_frame, making
+	 * tx_max always represent the real skb max size.
+	 */
+	if (ctx->tx_max != le32_to_cpu(ctx->ncm_parm.dwNtbOutMaxSize) &&
+	    ctx->tx_max % usb_maxpacket(dev->udev, dev->out, 1) == 0)
+		ctx->tx_max++;
+
 	ctx->tx_speed = ctx->rx_speed = 0;
 	return 0;
 
@@ -664,6 +673,7 @@
 struct sk_buff *
 cdc_ncm_fill_tx_frame(struct cdc_ncm_ctx *ctx, struct sk_buff *skb, __le32 sign)
 {
+	struct usbnet *dev = netdev_priv(ctx->netdev);
 	struct usb_cdc_ncm_nth16 *nth16;
 	struct usb_cdc_ncm_ndp16 *ndp16;
 	struct sk_buff *skb_out;
@@ -683,7 +693,7 @@
 
 	/* allocate a new OUT skb */
 	if (!skb_out) {
-		skb_out = alloc_skb((ctx->tx_max + 1), GFP_ATOMIC);
+		skb_out = alloc_skb(ctx->tx_max, GFP_ATOMIC);
 		if (skb_out == NULL) {
 			if (skb != NULL) {
 				dev_kfree_skb_any(skb);
@@ -788,19 +798,16 @@
 		/* variables will be reset at next call */
 	}
 
-	/*
-	 * If collected data size is less or equal CDC_NCM_MIN_TX_PKT bytes,
-	 * we send buffers as it is. If we get more data, it would be more
-	 * efficient for USB HS mobile device with DMA engine to receive a full
-	 * size NTB, than canceling DMA transfer and receiving a short packet.
+	/* If collected data size is less or equal CDC_NCM_MIN_TX_PKT
+	 * bytes, we send buffers as it is. If we get more data, it
+	 * would be more efficient for USB HS mobile device with DMA
+	 * engine to receive a full size NTB, than canceling DMA
+	 * transfer and receiving a short packet.
 	 */
 	if (skb_out->len > CDC_NCM_MIN_TX_PKT)
-		/* final zero padding */
-		memset(skb_put(skb_out, ctx->tx_max - skb_out->len), 0, ctx->tx_max - skb_out->len);
-
-	/* do we need to prevent a ZLP? */
-	if (((skb_out->len % le16_to_cpu(ctx->out_ep->desc.wMaxPacketSize)) == 0) &&
-	    (skb_out->len < le32_to_cpu(ctx->ncm_parm.dwNtbOutMaxSize)) && skb_tailroom(skb_out))
+		memset(skb_put(skb_out, ctx->tx_max - skb_out->len), 0,
+		       ctx->tx_max - skb_out->len);
+	else if ((skb_out->len % dev->maxpacket) == 0)
 		*skb_put(skb_out, 1) = 0;	/* force short packet */
 
 	/* set final frame length */