diff options
Diffstat (limited to 'drivers/md/md.c')
| -rw-r--r-- | drivers/md/md.c | 235 | 
1 files changed, 139 insertions, 96 deletions
| diff --git a/drivers/md/md.c b/drivers/md/md.c index aeceedfc530b..4c74424c78b0 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -72,6 +72,9 @@ static DECLARE_WAIT_QUEUE_HEAD(resync_wait);  static struct workqueue_struct *md_wq;  static struct workqueue_struct *md_misc_wq; +static int remove_and_add_spares(struct mddev *mddev, +				 struct md_rdev *this); +  #define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); }  /* @@ -1564,8 +1567,8 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_  					     sector, count, 1) == 0)  				return -EINVAL;  		} -	} else if (sb->bblog_offset == 0) -		rdev->badblocks.shift = -1; +	} else if (sb->bblog_offset != 0) +		rdev->badblocks.shift = 0;  	if (!refdev) {  		ret = 1; @@ -2411,6 +2414,11 @@ static void md_update_sb(struct mddev * mddev, int force_change)  	int nospares = 0;  	int any_badblocks_changed = 0; +	if (mddev->ro) { +		if (force_change) +			set_bit(MD_CHANGE_DEVS, &mddev->flags); +		return; +	}  repeat:  	/* First make sure individual recovery_offsets are correct */  	rdev_for_each(rdev, mddev) { @@ -2800,12 +2808,10 @@ slot_store(struct md_rdev *rdev, const char *buf, size_t len)  		/* personality does all needed checks */  		if (rdev->mddev->pers->hot_remove_disk == NULL)  			return -EINVAL; -		err = rdev->mddev->pers-> -			hot_remove_disk(rdev->mddev, rdev); -		if (err) -			return err; -		sysfs_unlink_rdev(rdev->mddev, rdev); -		rdev->raid_disk = -1; +		clear_bit(Blocked, &rdev->flags); +		remove_and_add_spares(rdev->mddev, rdev); +		if (rdev->raid_disk >= 0) +			return -EBUSY;  		set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);  		md_wakeup_thread(rdev->mddev->thread);  	} else if (rdev->mddev->pers) { @@ -3221,7 +3227,7 @@ int md_rdev_init(struct md_rdev *rdev)  	 * be used - I wonder if that matters  	 */  	rdev->badblocks.count = 0; -	rdev->badblocks.shift = 0; +	rdev->badblocks.shift = -1; /* disabled until explicitly enabled */  	rdev->badblocks.page = kmalloc(PAGE_SIZE, GFP_KERNEL);  	seqlock_init(&rdev->badblocks.lock);  	if (rdev->badblocks.page == NULL) @@ -3293,9 +3299,6 @@ static struct md_rdev *md_import_device(dev_t newdev, int super_format, int supe  			goto abort_free;  		}  	} -	if (super_format == -1) -		/* hot-add for 0.90, or non-persistent: so no badblocks */ -		rdev->badblocks.shift = -1;  	return rdev; @@ -4225,8 +4228,6 @@ action_show(struct mddev *mddev, char *page)  	return sprintf(page, "%s\n", type);  } -static void reap_sync_thread(struct mddev *mddev); -  static ssize_t  action_store(struct mddev *mddev, const char *page, size_t len)  { @@ -4241,7 +4242,7 @@ action_store(struct mddev *mddev, const char *page, size_t len)  	if (cmd_match(page, "idle") || cmd_match(page, "frozen")) {  		if (mddev->sync_thread) {  			set_bit(MD_RECOVERY_INTR, &mddev->recovery); -			reap_sync_thread(mddev); +			md_reap_sync_thread(mddev);  		}  	} else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||  		   test_bit(MD_RECOVERY_NEEDED, &mddev->recovery)) @@ -5279,7 +5280,7 @@ static void __md_stop_writes(struct mddev *mddev)  	if (mddev->sync_thread) {  		set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);  		set_bit(MD_RECOVERY_INTR, &mddev->recovery); -		reap_sync_thread(mddev); +		md_reap_sync_thread(mddev);  	}  	del_timer_sync(&mddev->safemode_timer); @@ -5287,7 +5288,8 @@ static void __md_stop_writes(struct mddev *mddev)  	bitmap_flush(mddev);  	md_super_wait(mddev); -	if (!mddev->in_sync || mddev->flags) { +	if (mddev->ro == 0 && +	    (!mddev->in_sync || mddev->flags)) {  		/* mark array as shutdown cleanly */  		mddev->in_sync = 1;  		md_update_sb(mddev, 1); @@ -5810,7 +5812,7 @@ static int add_new_disk(struct mddev * mddev, mdu_disk_info_t *info)  		else  			sysfs_notify_dirent_safe(rdev->sysfs_state); -		md_update_sb(mddev, 1); +		set_bit(MD_CHANGE_DEVS, &mddev->flags);  		if (mddev->degraded)  			set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);  		set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); @@ -5877,6 +5879,9 @@ static int hot_remove_disk(struct mddev * mddev, dev_t dev)  	if (!rdev)  		return -ENXIO; +	clear_bit(Blocked, &rdev->flags); +	remove_and_add_spares(mddev, rdev); +  	if (rdev->raid_disk >= 0)  		goto busy; @@ -6490,6 +6495,28 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,  		err = md_set_readonly(mddev, bdev);  		goto done_unlock; +	case HOT_REMOVE_DISK: +		err = hot_remove_disk(mddev, new_decode_dev(arg)); +		goto done_unlock; + +	case ADD_NEW_DISK: +		/* We can support ADD_NEW_DISK on read-only arrays +		 * on if we are re-adding a preexisting device. +		 * So require mddev->pers and MD_DISK_SYNC. +		 */ +		if (mddev->pers) { +			mdu_disk_info_t info; +			if (copy_from_user(&info, argp, sizeof(info))) +				err = -EFAULT; +			else if (!(info.state & (1<<MD_DISK_SYNC))) +				/* Need to clear read-only for this */ +				break; +			else +				err = add_new_disk(mddev, &info); +			goto done_unlock; +		} +		break; +  	case BLKROSET:  		if (get_user(ro, (int __user *)(arg))) {  			err = -EFAULT; @@ -6560,10 +6587,6 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,  		goto done_unlock;  	} -	case HOT_REMOVE_DISK: -		err = hot_remove_disk(mddev, new_decode_dev(arg)); -		goto done_unlock; -  	case HOT_ADD_DISK:  		err = hot_add_disk(mddev, new_decode_dev(arg));  		goto done_unlock; @@ -7644,14 +7667,16 @@ void md_do_sync(struct md_thread *thread)  }  EXPORT_SYMBOL_GPL(md_do_sync); -static int remove_and_add_spares(struct mddev *mddev) +static int remove_and_add_spares(struct mddev *mddev, +				 struct md_rdev *this)  {  	struct md_rdev *rdev;  	int spares = 0;  	int removed = 0;  	rdev_for_each(rdev, mddev) -		if (rdev->raid_disk >= 0 && +		if ((this == NULL || rdev == this) && +		    rdev->raid_disk >= 0 &&  		    !test_bit(Blocked, &rdev->flags) &&  		    (test_bit(Faulty, &rdev->flags) ||  		     ! test_bit(In_sync, &rdev->flags)) && @@ -7666,74 +7691,52 @@ static int remove_and_add_spares(struct mddev *mddev)  	if (removed && mddev->kobj.sd)  		sysfs_notify(&mddev->kobj, NULL, "degraded"); +	if (this) +		goto no_add; +  	rdev_for_each(rdev, mddev) {  		if (rdev->raid_disk >= 0 &&  		    !test_bit(In_sync, &rdev->flags) &&  		    !test_bit(Faulty, &rdev->flags))  			spares++; -		if (rdev->raid_disk < 0 -		    && !test_bit(Faulty, &rdev->flags)) { -			rdev->recovery_offset = 0; -			if (mddev->pers-> -			    hot_add_disk(mddev, rdev) == 0) { -				if (sysfs_link_rdev(mddev, rdev)) -					/* failure here is OK */; -				spares++; -				md_new_event(mddev); -				set_bit(MD_CHANGE_DEVS, &mddev->flags); -			} +		if (rdev->raid_disk >= 0) +			continue; +		if (test_bit(Faulty, &rdev->flags)) +			continue; +		if (mddev->ro && +		    rdev->saved_raid_disk < 0) +			continue; + +		rdev->recovery_offset = 0; +		if (rdev->saved_raid_disk >= 0 && mddev->in_sync) { +			spin_lock_irq(&mddev->write_lock); +			if (mddev->in_sync) +				/* OK, this device, which is in_sync, +				 * will definitely be noticed before +				 * the next write, so recovery isn't +				 * needed. +				 */ +				rdev->recovery_offset = mddev->recovery_cp; +			spin_unlock_irq(&mddev->write_lock); +		} +		if (mddev->ro && rdev->recovery_offset != MaxSector) +			/* not safe to add this disk now */ +			continue; +		if (mddev->pers-> +		    hot_add_disk(mddev, rdev) == 0) { +			if (sysfs_link_rdev(mddev, rdev)) +				/* failure here is OK */; +			spares++; +			md_new_event(mddev); +			set_bit(MD_CHANGE_DEVS, &mddev->flags);  		}  	} +no_add:  	if (removed)  		set_bit(MD_CHANGE_DEVS, &mddev->flags);  	return spares;  } -static void reap_sync_thread(struct mddev *mddev) -{ -	struct md_rdev *rdev; - -	/* resync has finished, collect result */ -	md_unregister_thread(&mddev->sync_thread); -	if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) && -	    !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) { -		/* success...*/ -		/* activate any spares */ -		if (mddev->pers->spare_active(mddev)) { -			sysfs_notify(&mddev->kobj, NULL, -				     "degraded"); -			set_bit(MD_CHANGE_DEVS, &mddev->flags); -		} -	} -	if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && -	    mddev->pers->finish_reshape) -		mddev->pers->finish_reshape(mddev); - -	/* If array is no-longer degraded, then any saved_raid_disk -	 * information must be scrapped.  Also if any device is now -	 * In_sync we must scrape the saved_raid_disk for that device -	 * do the superblock for an incrementally recovered device -	 * written out. -	 */ -	rdev_for_each(rdev, mddev) -		if (!mddev->degraded || -		    test_bit(In_sync, &rdev->flags)) -			rdev->saved_raid_disk = -1; - -	md_update_sb(mddev, 1); -	clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery); -	clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); -	clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); -	clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery); -	clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); -	/* flag recovery needed just to double check */ -	set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); -	sysfs_notify_dirent_safe(mddev->sysfs_action); -	md_new_event(mddev); -	if (mddev->event_work.func) -		queue_work(md_misc_wq, &mddev->event_work); -} -  /*   * This routine is regularly called by all per-raid-array threads to   * deal with generic issues like resync and super-block update. @@ -7789,22 +7792,16 @@ void md_check_recovery(struct mddev *mddev)  		int spares = 0;  		if (mddev->ro) { -			/* Only thing we do on a ro array is remove -			 * failed devices. +			/* On a read-only array we can: +			 * - remove failed devices +			 * - add already-in_sync devices if the array itself +			 *   is in-sync. +			 * As we only add devices that are already in-sync, +			 * we can activate the spares immediately.  			 */ -			struct md_rdev *rdev; -			rdev_for_each(rdev, mddev) -				if (rdev->raid_disk >= 0 && -				    !test_bit(Blocked, &rdev->flags) && -				    test_bit(Faulty, &rdev->flags) && -				    atomic_read(&rdev->nr_pending)==0) { -					if (mddev->pers->hot_remove_disk( -						    mddev, rdev) == 0) { -						sysfs_unlink_rdev(mddev, rdev); -						rdev->raid_disk = -1; -					} -				}  			clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery); +			remove_and_add_spares(mddev, NULL); +			mddev->pers->spare_active(mddev);  			goto unlock;  		} @@ -7836,7 +7833,7 @@ void md_check_recovery(struct mddev *mddev)  			goto unlock;  		}  		if (mddev->sync_thread) { -			reap_sync_thread(mddev); +			md_reap_sync_thread(mddev);  			goto unlock;  		}  		/* Set RUNNING before clearing NEEDED to avoid @@ -7867,7 +7864,7 @@ void md_check_recovery(struct mddev *mddev)  				goto unlock;  			set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);  			clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery); -		} else if ((spares = remove_and_add_spares(mddev))) { +		} else if ((spares = remove_and_add_spares(mddev, NULL))) {  			clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);  			clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);  			clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery); @@ -7917,6 +7914,51 @@ void md_check_recovery(struct mddev *mddev)  	}  } +void md_reap_sync_thread(struct mddev *mddev) +{ +	struct md_rdev *rdev; + +	/* resync has finished, collect result */ +	md_unregister_thread(&mddev->sync_thread); +	if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) && +	    !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) { +		/* success...*/ +		/* activate any spares */ +		if (mddev->pers->spare_active(mddev)) { +			sysfs_notify(&mddev->kobj, NULL, +				     "degraded"); +			set_bit(MD_CHANGE_DEVS, &mddev->flags); +		} +	} +	if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && +	    mddev->pers->finish_reshape) +		mddev->pers->finish_reshape(mddev); + +	/* If array is no-longer degraded, then any saved_raid_disk +	 * information must be scrapped.  Also if any device is now +	 * In_sync we must scrape the saved_raid_disk for that device +	 * do the superblock for an incrementally recovered device +	 * written out. +	 */ +	rdev_for_each(rdev, mddev) +		if (!mddev->degraded || +		    test_bit(In_sync, &rdev->flags)) +			rdev->saved_raid_disk = -1; + +	md_update_sb(mddev, 1); +	clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery); +	clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); +	clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); +	clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery); +	clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); +	/* flag recovery needed just to double check */ +	set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); +	sysfs_notify_dirent_safe(mddev->sysfs_action); +	md_new_event(mddev); +	if (mddev->event_work.func) +		queue_work(md_misc_wq, &mddev->event_work); +} +  void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev)  {  	sysfs_notify_dirent_safe(rdev->sysfs_state); @@ -8642,6 +8684,7 @@ EXPORT_SYMBOL(md_register_thread);  EXPORT_SYMBOL(md_unregister_thread);  EXPORT_SYMBOL(md_wakeup_thread);  EXPORT_SYMBOL(md_check_recovery); +EXPORT_SYMBOL(md_reap_sync_thread);  MODULE_LICENSE("GPL");  MODULE_DESCRIPTION("MD RAID framework");  MODULE_ALIAS("md"); | 
