cifs: vary timeout on writes past EOF based on offset (try #5)
This is the fourth version of this patch:
The first three generated a compiler warning asking for explicit curly
braces.
The first two didn't handle update the size correctly when writes that
didn't start at the eof were done.
The first patch also didn't update the size correctly when it explicitly
set via truncate().
This patch adds code to track the client's current understanding of the
size of the file on the server separate from the i_size, and then to use
this info to semi-intelligently set the timeout for writes past the EOF.
This helps prevent timeouts when trying to write large, sparse files on
windows servers.
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 38491fd..34f5701 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -316,6 +316,7 @@
cifs_inode->clientCanCacheAll = false;
cifs_inode->delete_pending = false;
cifs_inode->vfs_inode.i_blkbits = 14; /* 2**14 = CIFS_MAX_MSGSIZE */
+ cifs_inode->server_eof = 0;
/* Can not set i_flags here - they get immediately overwritten
to zero by the VFS */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 9fbf4df..7ae1986 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -370,6 +370,7 @@
bool clientCanCacheAll:1; /* read and writebehind oplock */
bool oplockPending:1;
bool delete_pending:1; /* DELETE_ON_CLOSE is set */
+ u64 server_eof; /* current file size on server */
struct inode vfs_inode;
};
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 3f36b1e..a0845dc 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -1626,6 +1626,8 @@
int smb_hdr_len;
int resp_buf_type = 0;
+ *nbytes = 0;
+
cFYI(1, ("write2 at %lld %d bytes", (long long)offset, count));
if (tcon->ses->capabilities & CAP_LARGE_FILES) {
@@ -1682,11 +1684,9 @@
cifs_stats_inc(&tcon->num_writes);
if (rc) {
cFYI(1, ("Send error Write2 = %d", rc));
- *nbytes = 0;
} else if (resp_buf_type == 0) {
/* presumably this can not happen, but best to be safe */
rc = -EIO;
- *nbytes = 0;
} else {
WRITE_RSP *pSMBr = (WRITE_RSP *)iov[0].iov_base;
*nbytes = le16_to_cpu(pSMBr->CountHigh);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 81747ac..dfd3e6c 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -971,6 +971,40 @@
return rc;
}
+/*
+ * Set the timeout on write requests past EOF. For some servers (Windows)
+ * these calls can be very long.
+ *
+ * If we're writing >10M past the EOF we give a 180s timeout. Anything less
+ * than that gets a 45s timeout. Writes not past EOF get 15s timeouts.
+ * The 10M cutoff is totally arbitrary. A better scheme for this would be
+ * welcome if someone wants to suggest one.
+ *
+ * We may be able to do a better job with this if there were some way to
+ * declare that a file should be sparse.
+ */
+static int
+cifs_write_timeout(struct cifsInodeInfo *cifsi, loff_t offset)
+{
+ if (offset <= cifsi->server_eof)
+ return CIFS_STD_OP;
+ else if (offset > (cifsi->server_eof + (10 * 1024 * 1024)))
+ return CIFS_VLONG_OP;
+ else
+ return CIFS_LONG_OP;
+}
+
+/* update the file size (if needed) after a write */
+static void
+cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
+ unsigned int bytes_written)
+{
+ loff_t end_of_write = offset + bytes_written;
+
+ if (end_of_write > cifsi->server_eof)
+ cifsi->server_eof = end_of_write;
+}
+
ssize_t cifs_user_write(struct file *file, const char __user *write_data,
size_t write_size, loff_t *poffset)
{
@@ -981,6 +1015,7 @@
struct cifsTconInfo *pTcon;
int xid, long_op;
struct cifsFileInfo *open_file;
+ struct cifsInodeInfo *cifsi = CIFS_I(file->f_path.dentry->d_inode);
cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
@@ -1000,11 +1035,7 @@
xid = GetXid();
- if (*poffset > file->f_path.dentry->d_inode->i_size)
- long_op = CIFS_VLONG_OP; /* writes past EOF take long time */
- else
- long_op = CIFS_LONG_OP;
-
+ long_op = cifs_write_timeout(cifsi, *poffset);
for (total_written = 0; write_size > total_written;
total_written += bytes_written) {
rc = -EAGAIN;
@@ -1048,8 +1079,10 @@
FreeXid(xid);
return rc;
}
- } else
+ } else {
+ cifs_update_eof(cifsi, *poffset, bytes_written);
*poffset += bytes_written;
+ }
long_op = CIFS_STD_OP; /* subsequent writes fast -
15 seconds is plenty */
}
@@ -1085,6 +1118,7 @@
struct cifsTconInfo *pTcon;
int xid, long_op;
struct cifsFileInfo *open_file;
+ struct cifsInodeInfo *cifsi = CIFS_I(file->f_path.dentry->d_inode);
cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
@@ -1099,11 +1133,7 @@
xid = GetXid();
- if (*poffset > file->f_path.dentry->d_inode->i_size)
- long_op = CIFS_VLONG_OP; /* writes past EOF can be slow */
- else
- long_op = CIFS_LONG_OP;
-
+ long_op = cifs_write_timeout(cifsi, *poffset);
for (total_written = 0; write_size > total_written;
total_written += bytes_written) {
rc = -EAGAIN;
@@ -1166,8 +1196,10 @@
FreeXid(xid);
return rc;
}
- } else
+ } else {
+ cifs_update_eof(cifsi, *poffset, bytes_written);
*poffset += bytes_written;
+ }
long_op = CIFS_STD_OP; /* subsequent writes fast -
15 seconds is plenty */
}
@@ -1380,11 +1412,12 @@
int nr_pages;
__u64 offset = 0;
struct cifsFileInfo *open_file;
+ struct cifsInodeInfo *cifsi = CIFS_I(mapping->host);
struct page *page;
struct pagevec pvec;
int rc = 0;
int scanned = 0;
- int xid;
+ int xid, long_op;
cifs_sb = CIFS_SB(mapping->host->i_sb);
@@ -1528,12 +1561,15 @@
cERROR(1, ("No writable handles for inode"));
rc = -EBADF;
} else {
+ long_op = cifs_write_timeout(cifsi, offset);
rc = CIFSSMBWrite2(xid, cifs_sb->tcon,
open_file->netfid,
bytes_to_write, offset,
&bytes_written, iov, n_iov,
- CIFS_LONG_OP);
+ long_op);
atomic_dec(&open_file->wrtPending);
+ cifs_update_eof(cifsi, offset, bytes_written);
+
if (rc || bytes_written < bytes_to_write) {
cERROR(1, ("Write2 ret %d, wrote %d",
rc, bytes_written));
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index fceebee..09082ac 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -143,6 +143,7 @@
inode->i_nlink = le64_to_cpu(info->Nlinks);
+ cifsInfo->server_eof = end_of_file;
spin_lock(&inode->i_lock);
if (is_size_safe_to_change(cifsInfo, end_of_file)) {
/*
@@ -606,12 +607,12 @@
inode->i_mode |= S_IFREG;
}
+ cifsInfo->server_eof = le64_to_cpu(pfindData->EndOfFile);
spin_lock(&inode->i_lock);
- if (is_size_safe_to_change(cifsInfo,
- le64_to_cpu(pfindData->EndOfFile))) {
+ if (is_size_safe_to_change(cifsInfo, cifsInfo->server_eof)) {
/* can not safely shrink the file size here if the
client is writing to it due to potential races */
- i_size_write(inode, le64_to_cpu(pfindData->EndOfFile));
+ i_size_write(inode, cifsInfo->server_eof);
/* 512 bytes (2**9) is the fake blocksize that must be
used for this calculation */
@@ -1755,6 +1756,7 @@
}
if (rc == 0) {
+ cifsInode->server_eof = attrs->ia_size;
rc = cifs_vmtruncate(inode, attrs->ia_size);
cifs_truncate_page(inode->i_mapping, inode->i_size);
}
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index c3c3e62..1a8be62 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -239,6 +239,7 @@
if (atomic_read(&cifsInfo->inUse) == 0)
atomic_set(&cifsInfo->inUse, 1);
+ cifsInfo->server_eof = end_of_file;
spin_lock(&tmp_inode->i_lock);
if (is_size_safe_to_change(cifsInfo, end_of_file)) {
/* can not safely change the file size here if the
@@ -375,6 +376,7 @@
tmp_inode->i_gid = le64_to_cpu(pfindData->Gid);
tmp_inode->i_nlink = le64_to_cpu(pfindData->Nlinks);
+ cifsInfo->server_eof = end_of_file;
spin_lock(&tmp_inode->i_lock);
if (is_size_safe_to_change(cifsInfo, end_of_file)) {
/* can not safely change the file size here if the