ore/exofs: Change ore_check_io API

Current ore_check_io API receives a residual pointer, to report partial IO. But it is actually not used, because in a multiple devices IO there is never a linearity in the IO failure. On the other hand if every failing device is reported through a received callback measures can be taken to handle only failed devices. One at a time. This will also be needed by the objects-layout-driver for it's error reporting facility. Exofs is not currently using the new information and keeps the old behaviour of failing the complete IO in case of an error. (No partial completion) TODO: Use an ore_check_io callback to set_page_error only the failing pages. And re-dirty write pages. Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
author: Boaz Harrosh <bharrosh@panasas.com> 2011-09-28 13:25:50 +0300
committer: Boaz Harrosh <bharrosh@panasas.com> 2011-10-14 18:54:42 +0200
commit: 4b46c9f5cf69505f0bc708995b88b0cc60317ffd (patch)
tree: e5369fe948509c230470f922a0cd89cda60f2692 /fs/exofs
parent: 5a51c0c7e9a913649aa65d8233470682bcbb7694 (diff)
2 files changed, 20 insertions, 23 deletions
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 5a62420cbdb..86c0ac87b8e 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -194,19 +194,16 @@ static void update_write_page(struct page *page, int ret)
 static int __readpages_done(struct page_collect *pcol)
 {
 	int i;
-	u64 resid;
 	u64 good_bytes;
 	u64 length = 0;
-	int ret = ore_check_io(pcol->ios, &resid);
+	int ret = ore_check_io(pcol->ios, NULL);
 
 	if (likely(!ret)) {
 		good_bytes = pcol->length;
 		ret = PAGE_WAS_NOT_IN_IO;
 	} else {
-		good_bytes = pcol->length - resid;
+		good_bytes = 0;
 	}
-	if (good_bytes > pcol->ios->length)
-		good_bytes = pcol->ios->length;
 
 	EXOFS_DBGMSG2("readpages_done(0x%lx) good_bytes=0x%llx"
 		     " length=0x%lx nr_pages=%u\n",
@@ -519,10 +516,9 @@ static void writepages_done(struct ore_io_state *ios, void *p)
 {
 	struct page_collect *pcol = p;
 	int i;
-	u64 resid;
 	u64  good_bytes;
 	u64  length = 0;
-	int ret = ore_check_io(ios, &resid);
+	int ret = ore_check_io(ios, NULL);
 
 	atomic_dec(&pcol->sbi->s_curr_pending);
 
@@ -530,10 +526,8 @@ static void writepages_done(struct ore_io_state *ios, void *p)
 		good_bytes = pcol->length;
 		ret = PAGE_WAS_NOT_IN_IO;
 	} else {
-		good_bytes = pcol->length - resid;
+		good_bytes = 0;
 	}
-	if (good_bytes > pcol->ios->length)
-		good_bytes = pcol->ios->length;
 
 	EXOFS_DBGMSG2("writepages_done(0x%lx) good_bytes=0x%llx"
 		     " length=0x%lx nr_pages=%u\n",
diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c
index 4ca59d49279..3b1cc3a132d 100644
--- a/fs/exofs/ore.c
+++ b/fs/exofs/ore.c
@@ -317,7 +317,7 @@ static void _clear_bio(struct bio *bio)
 	}
 }
 
-int ore_check_io(struct ore_io_state *ios, u64 *resid)
+int ore_check_io(struct ore_io_state *ios, ore_on_dev_error on_dev_error)
 {
 	enum osd_err_priority acumulated_osd_err = 0;
 	int acumulated_lin_err = 0;
@@ -325,7 +325,8 @@ int ore_check_io(struct ore_io_state *ios, u64 *resid)
 
 	for (i = 0; i < ios->numdevs; i++) {
 		struct osd_sense_info osi;
-		struct osd_request *or = ios->per_dev[i].or;
+		struct ore_per_dev_state *per_dev = &ios->per_dev[i];
+		struct osd_request *or = per_dev->or;
 		int ret;
 
 		if (unlikely(!or))
@@ -337,29 +338,31 @@ int ore_check_io(struct ore_io_state *ios, u64 *resid)
 
 		if (OSD_ERR_PRI_CLEAR_PAGES == osi.osd_err_pri) {
 			/* start read offset passed endof file */
-			_clear_bio(ios->per_dev[i].bio);
+			_clear_bio(per_dev->bio);
 			ORE_DBGMSG("start read offset passed end of file "
 				"offset=0x%llx, length=0x%llx\n",
-				_LLU(ios->per_dev[i].offset),
-				_LLU(ios->per_dev[i].length));
+				_LLU(per_dev->offset),
+				_LLU(per_dev->length));
 
 			continue; /* we recovered */
 		}
 
+		if (on_dev_error) {
+			u64 residual = ios->reading ?
+					or->in.residual : or->out.residual;
+			u64 offset = (ios->offset + ios->length) - residual;
+			struct ore_dev *od = ios->oc->ods[
+					per_dev->dev - ios->oc->first_dev];
+
+			on_dev_error(ios, od, per_dev->dev, osi.osd_err_pri,
+				     offset, residual);
+		}
 		if (osi.osd_err_pri >= acumulated_osd_err) {
 			acumulated_osd_err = osi.osd_err_pri;
 			acumulated_lin_err = ret;
 		}
 	}
 
-	/* TODO: raid specific residual calculations */
-	if (resid) {
-		if (likely(!acumulated_lin_err))
-			*resid = 0;
-		else
-			*resid = ios->length;
-	}
-
 	return acumulated_lin_err;
 }
 EXPORT_SYMBOL(ore_check_io);
author	Boaz Harrosh <bharrosh@panasas.com>	2011-09-28 13:25:50 +0300
committer	Boaz Harrosh <bharrosh@panasas.com>	2011-10-14 18:54:42 +0200
commit	4b46c9f5cf69505f0bc708995b88b0cc60317ffd (patch)
tree	e5369fe948509c230470f922a0cd89cda60f2692 /fs/exofs
parent	5a51c0c7e9a913649aa65d8233470682bcbb7694 (diff)