Do not account for hugetlbfs quota at mmap() time if mapping [SHM|MAP]_NORESERVE

Commit 5a6fe125950676015f5108fb71b2a67441755003 brought hugetlbfs more
in line with the core VM by obeying VM_NORESERVE and not reserving
hugepages for both shared and private mappings when [SHM|MAP]_NORESERVE
are specified.  However, it is still taking filesystem quota
unconditionally.

At fault time, if there are no reserves and attempt is made to allocate
the page and account for filesystem quota.  If either fail, the fault
fails.  The impact is that quota is getting accounted for twice.  This
patch partially reverts 5a6fe125950676015f5108fb71b2a67441755003.  To
help prevent this mistake happening again, it improves the documentation
of hugetlb_reserve_pages()

Reported-by: Andy Whitcroft <apw@canonical.com>
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: Andy Whitcroft <apw@canonical.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 2074642..107da3d 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2272,10 +2272,18 @@
 					struct vm_area_struct *vma,
 					int acctflag)
 {
-	long ret = 0, chg;
+	long ret, chg;
 	struct hstate *h = hstate_inode(inode);
 
 	/*
+	 * Only apply hugepage reservation if asked. At fault time, an
+	 * attempt will be made for VM_NORESERVE to allocate a page
+	 * and filesystem quota without using reserves
+	 */
+	if (acctflag & VM_NORESERVE)
+		return 0;
+
+	/*
 	 * Shared mappings base their reservation on the number of pages that
 	 * are already allocated on behalf of the file. Private mappings need
 	 * to reserve the full area even if read-only as mprotect() may be
@@ -2283,42 +2291,47 @@
 	 */
 	if (!vma || vma->vm_flags & VM_SHARED)
 		chg = region_chg(&inode->i_mapping->private_list, from, to);
-	else
-		chg = to - from;
-
-	if (chg < 0)
-		return chg;
-
-	if (hugetlb_get_quota(inode->i_mapping, chg))
-		return -ENOSPC;
-
-	/*
-	 * Only apply hugepage reservation if asked. We still have to
-	 * take the filesystem quota because it is an upper limit
-	 * defined for the mount and not necessarily memory as a whole
-	 */
-	if (acctflag & VM_NORESERVE) {
-		reset_vma_resv_huge_pages(vma);
-		return 0;
-	}
-
-	ret = hugetlb_acct_memory(h, chg);
-	if (ret < 0) {
-		hugetlb_put_quota(inode->i_mapping, chg);
-		return ret;
-	}
-	if (!vma || vma->vm_flags & VM_SHARED)
-		region_add(&inode->i_mapping->private_list, from, to);
 	else {
 		struct resv_map *resv_map = resv_map_alloc();
-
 		if (!resv_map)
 			return -ENOMEM;
 
+		chg = to - from;
+
 		set_vma_resv_map(vma, resv_map);
 		set_vma_resv_flags(vma, HPAGE_RESV_OWNER);
 	}
 
+	if (chg < 0)
+		return chg;
+
+	/* There must be enough filesystem quota for the mapping */
+	if (hugetlb_get_quota(inode->i_mapping, chg))
+		return -ENOSPC;
+
+	/*
+	 * Check enough hugepages are available for the reservation.
+	 * Hand back the quota if there are not
+	 */
+	ret = hugetlb_acct_memory(h, chg);
+	if (ret < 0) {
+		hugetlb_put_quota(inode->i_mapping, chg);
+		return ret;
+	}
+
+	/*
+	 * Account for the reservations made. Shared mappings record regions
+	 * that have reservations as they are shared by multiple VMAs.
+	 * When the last VMA disappears, the region map says how much
+	 * the reservation was and the page cache tells how much of
+	 * the reservation was consumed. Private mappings are per-VMA and
+	 * only the consumed reservations are tracked. When the VMA
+	 * disappears, the original reservation is the VMA size and the
+	 * consumed reservations are stored in the map. Hence, nothing
+	 * else has to be done for private mappings here
+	 */
+	if (!vma || vma->vm_flags & VM_SHARED)
+		region_add(&inode->i_mapping->private_list, from, to);
 	return 0;
 }