drbd: Allow online change of al-stripes and al-stripe-size

Allow to change the AL layout with an resize operation. For that
the reisze command gets two new fields: al_stripes and al_stripe_size.

In order to make the operation crash save:
1) Lock out all IO and MD-IO
2) Write the super block with MDF_PRIMARY_IND clear
3) write the bitmap to the new location (all zeros, since
   we allow only while connected)
4) Initialize the new AL-area
5) Write the super block with the restored MDF_PRIMARY_IND.
6) Unfreeze all IO

Since the AL-layout has no influence on the protocol, this operation
needs to be beforemed on both sides of a resource (if intended).

Signed-off-by: Andreas Gruenbacher <agruen@linbit.com>
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 45d1275..8cc1e64 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -827,12 +827,17 @@
  * Returns 0 on success, negative return values indicate errors.
  * You should call drbd_md_sync() after calling this function.
  */
-enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds_flags flags) __must_hold(local)
+enum determine_dev_size
+drbd_determine_dev_size(struct drbd_conf *mdev, enum dds_flags flags, struct resize_parms *rs) __must_hold(local)
 {
 	sector_t prev_first_sect, prev_size; /* previous meta location */
 	sector_t la_size_sect, u_size;
+	struct drbd_md *md = &mdev->ldev->md;
+	u32 prev_al_stripe_size_4k;
+	u32 prev_al_stripes;
 	sector_t size;
 	char ppb[10];
+	void *buffer;
 
 	int md_moved, la_size_changed;
 	enum determine_dev_size rv = DS_UNCHANGED;
@@ -847,6 +852,11 @@
 	 * still lock the act_log to not trigger ASSERTs there.
 	 */
 	drbd_suspend_io(mdev);
+	buffer = drbd_md_get_buffer(mdev); /* Lock meta-data IO */
+	if (!buffer) {
+		drbd_resume_io(mdev);
+		return DS_ERROR;
+	}
 
 	/* no wait necessary anymore, actually we could assert that */
 	wait_event(mdev->al_wait, lc_try_lock(mdev->act_log));
@@ -855,7 +865,17 @@
 	prev_size = mdev->ldev->md.md_size_sect;
 	la_size_sect = mdev->ldev->md.la_size_sect;
 
-	/* TODO: should only be some assert here, not (re)init... */
+	if (rs) {
+		/* rs is non NULL if we should change the AL layout only */
+
+		prev_al_stripes = md->al_stripes;
+		prev_al_stripe_size_4k = md->al_stripe_size_4k;
+
+		md->al_stripes = rs->al_stripes;
+		md->al_stripe_size_4k = rs->al_stripe_size / 4;
+		md->al_size_4k = (u64)rs->al_stripes * rs->al_stripe_size / 4;
+	}
+
 	drbd_md_set_sector_offsets(mdev, mdev->ldev);
 
 	rcu_read_lock();
@@ -863,6 +883,21 @@
 	rcu_read_unlock();
 	size = drbd_new_dev_size(mdev, mdev->ldev, u_size, flags & DDSF_FORCED);
 
+	if (size < la_size_sect) {
+		if (rs && u_size == 0) {
+			/* Remove "rs &&" later. This check should always be active, but
+			   right now the receiver expects the permissive behavior */
+			dev_warn(DEV, "Implicit shrink not allowed. "
+				 "Use --size=%llus for explicit shrink.\n",
+				 (unsigned long long)size);
+			rv = DS_ERROR_SHRINK;
+		}
+		if (u_size > size)
+			rv = DS_ERROR_SPACE_MD;
+		if (rv != DS_UNCHANGED)
+			goto err_out;
+	}
+
 	if (drbd_get_capacity(mdev->this_bdev) != size ||
 	    drbd_bm_capacity(mdev) != size) {
 		int err;
@@ -886,38 +921,57 @@
 		dev_info(DEV, "size = %s (%llu KB)\n", ppsize(ppb, size>>1),
 		     (unsigned long long)size>>1);
 	}
-	if (rv == DS_ERROR)
-		goto out;
+	if (rv <= DS_ERROR)
+		goto err_out;
 
 	la_size_changed = (la_size_sect != mdev->ldev->md.la_size_sect);
 
 	md_moved = prev_first_sect != drbd_md_first_sector(mdev->ldev)
 		|| prev_size	   != mdev->ldev->md.md_size_sect;
 
-	if (la_size_changed || md_moved) {
-		int err;
+	if (la_size_changed || md_moved || rs) {
+		u32 prev_flags;
 
 		drbd_al_shrink(mdev); /* All extents inactive. */
+
+		prev_flags = md->flags;
+		md->flags &= ~MDF_PRIMARY_IND;
+		drbd_md_write(mdev, buffer);
+
 		dev_info(DEV, "Writing the whole bitmap, %s\n",
 			 la_size_changed && md_moved ? "size changed and md moved" :
 			 la_size_changed ? "size changed" : "md moved");
 		/* next line implicitly does drbd_suspend_io()+drbd_resume_io() */
-		err = drbd_bitmap_io(mdev, md_moved ? &drbd_bm_write_all : &drbd_bm_write,
-				     "size changed", BM_LOCKED_MASK);
-		if (err) {
-			rv = DS_ERROR;
-			goto out;
-		}
-		drbd_md_mark_dirty(mdev);
+		drbd_bitmap_io(mdev, md_moved ? &drbd_bm_write_all : &drbd_bm_write,
+			       "size changed", BM_LOCKED_MASK);
+		drbd_initialize_al(mdev, buffer);
+
+		md->flags = prev_flags;
+		drbd_md_write(mdev, buffer);
+
+		if (rs)
+			dev_info(DEV, "Changed AL layout to al-stripes = %d, al-stripe-size-kB = %d\n",
+				 md->al_stripes, md->al_stripe_size_4k * 4);
 	}
 
 	if (size > la_size_sect)
 		rv = DS_GREW;
 	if (size < la_size_sect)
 		rv = DS_SHRUNK;
-out:
+
+	if (0) {
+	err_out:
+		if (rs) {
+			md->al_stripes = prev_al_stripes;
+			md->al_stripe_size_4k = prev_al_stripe_size_4k;
+			md->al_size_4k = (u64)prev_al_stripes * prev_al_stripe_size_4k;
+
+			drbd_md_set_sector_offsets(mdev, mdev->ldev);
+		}
+	}
 	lc_unlock(mdev->act_log);
 	wake_up(&mdev->al_wait);
+	drbd_md_put_buffer(mdev);
 	drbd_resume_io(mdev);
 
 	return rv;
@@ -1618,8 +1672,8 @@
 	    !drbd_md_test_flag(mdev->ldev, MDF_CONNECTED_IND))
 		set_bit(USE_DEGR_WFC_T, &mdev->flags);
 
-	dd = drbd_determine_dev_size(mdev, 0);
-	if (dd == DS_ERROR) {
+	dd = drbd_determine_dev_size(mdev, 0, NULL);
+	if (dd <= DS_ERROR) {
 		retcode = ERR_NOMEM_BITMAP;
 		goto force_diskless_dec;
 	} else if (dd == DS_GREW)
@@ -2316,6 +2370,7 @@
 	struct drbd_conf *mdev;
 	enum drbd_ret_code retcode;
 	enum determine_dev_size dd;
+	bool change_al_layout = false;
 	enum dds_flags ddsf;
 	sector_t u_size;
 	int err;
@@ -2326,31 +2381,33 @@
 	if (retcode != NO_ERROR)
 		goto fail;
 
+	mdev = adm_ctx.mdev;
+	if (!get_ldev(mdev)) {
+		retcode = ERR_NO_DISK;
+		goto fail;
+	}
+
 	memset(&rs, 0, sizeof(struct resize_parms));
+	rs.al_stripes = mdev->ldev->md.al_stripes;
+	rs.al_stripe_size = mdev->ldev->md.al_stripe_size_4k * 4;
 	if (info->attrs[DRBD_NLA_RESIZE_PARMS]) {
 		err = resize_parms_from_attrs(&rs, info);
 		if (err) {
 			retcode = ERR_MANDATORY_TAG;
 			drbd_msg_put_info(from_attrs_err_to_txt(err));
-			goto fail;
+			goto fail_ldev;
 		}
 	}
 
-	mdev = adm_ctx.mdev;
 	if (mdev->state.conn > C_CONNECTED) {
 		retcode = ERR_RESIZE_RESYNC;
-		goto fail;
+		goto fail_ldev;
 	}
 
 	if (mdev->state.role == R_SECONDARY &&
 	    mdev->state.peer == R_SECONDARY) {
 		retcode = ERR_NO_PRIMARY;
-		goto fail;
-	}
-
-	if (!get_ldev(mdev)) {
-		retcode = ERR_NO_DISK;
-		goto fail;
+		goto fail_ldev;
 	}
 
 	if (rs.no_resync && mdev->tconn->agreed_pro_version < 93) {
@@ -2369,6 +2426,28 @@
 		}
 	}
 
+	if (mdev->ldev->md.al_stripes != rs.al_stripes ||
+	    mdev->ldev->md.al_stripe_size_4k != rs.al_stripe_size / 4) {
+		u32 al_size_k = rs.al_stripes * rs.al_stripe_size;
+
+		if (al_size_k > (16 * 1024 * 1024)) {
+			retcode = ERR_MD_LAYOUT_TOO_BIG;
+			goto fail_ldev;
+		}
+
+		if (al_size_k < MD_32kB_SECT/2) {
+			retcode = ERR_MD_LAYOUT_TOO_SMALL;
+			goto fail_ldev;
+		}
+
+		if (mdev->state.conn != C_CONNECTED) {
+			retcode = ERR_MD_LAYOUT_CONNECTED;
+			goto fail_ldev;
+		}
+
+		change_al_layout = true;
+	}
+
 	if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev))
 		mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev);
 
@@ -2384,12 +2463,18 @@
 	}
 
 	ddsf = (rs.resize_force ? DDSF_FORCED : 0) | (rs.no_resync ? DDSF_NO_RESYNC : 0);
-	dd = drbd_determine_dev_size(mdev, ddsf);
+	dd = drbd_determine_dev_size(mdev, ddsf, change_al_layout ? &rs : NULL);
 	drbd_md_sync(mdev);
 	put_ldev(mdev);
 	if (dd == DS_ERROR) {
 		retcode = ERR_NOMEM_BITMAP;
 		goto fail;
+	} else if (dd == DS_ERROR_SPACE_MD) {
+		retcode = ERR_MD_LAYOUT_NO_FIT;
+		goto fail;
+	} else if (dd == DS_ERROR_SHRINK) {
+		retcode = ERR_IMPLICIT_SHRINK;
+		goto fail;
 	}
 
 	if (mdev->state.conn == C_CONNECTED) {