ocfs2/cluster: Make fence method configurable - v2
authorSunil Mushran <sunil.mushran@oracle.com>
Wed, 18 Nov 2009 00:29:19 +0000 (16:29 -0800)
committerJoel Becker <joel.becker@oracle.com>
Thu, 3 Dec 2009 00:49:26 +0000 (16:49 -0800)
By default, o2cb fences the box by calling emergency_restart(). While this
scheme works well in production, it comes in the way during testing as it
does not let the tester take stack/core dumps for analysis.

This patch allows user to dynamically change the fence method to panic() by:
# echo "panic" > /sys/kernel/config/cluster/<clustername>/fence_method

Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
fs/ocfs2/cluster/nodemanager.c
fs/ocfs2/cluster/nodemanager.h
fs/ocfs2/cluster/quorum.c

index 7ee6188bc79a76ebc939b738bcfc3914c99b3ef6..c81142e3ef844def09ef96725f2ebb68247aaecc 100644 (file)
  * cluster references throughout where nodes are looked up */
 struct o2nm_cluster *o2nm_single_cluster = NULL;
 
+char *o2nm_fence_method_desc[O2NM_FENCE_METHODS] = {
+               "reset",        /* O2NM_FENCE_RESET */
+               "panic",        /* O2NM_FENCE_PANIC */
+};
 
 struct o2nm_node *o2nm_get_node_by_num(u8 node_num)
 {
@@ -579,6 +583,43 @@ static ssize_t o2nm_cluster_attr_reconnect_delay_ms_write(
        return o2nm_cluster_attr_write(page, count,
                                       &cluster->cl_reconnect_delay_ms);
 }
+
+static ssize_t o2nm_cluster_attr_fence_method_read(
+       struct o2nm_cluster *cluster, char *page)
+{
+       ssize_t ret = 0;
+
+       if (cluster)
+               ret = sprintf(page, "%s\n",
+                             o2nm_fence_method_desc[cluster->cl_fence_method]);
+       return ret;
+}
+
+static ssize_t o2nm_cluster_attr_fence_method_write(
+       struct o2nm_cluster *cluster, const char *page, size_t count)
+{
+       unsigned int i;
+
+       if (page[count - 1] != '\n')
+               goto bail;
+
+       for (i = 0; i < O2NM_FENCE_METHODS; ++i) {
+               if (count != strlen(o2nm_fence_method_desc[i]) + 1)
+                       continue;
+               if (strncasecmp(page, o2nm_fence_method_desc[i], count - 1))
+                       continue;
+               if (cluster->cl_fence_method != i) {
+                       printk(KERN_INFO "ocfs2: Changing fence method to %s\n",
+                              o2nm_fence_method_desc[i]);
+                       cluster->cl_fence_method = i;
+               }
+               return count;
+       }
+
+bail:
+       return -EINVAL;
+}
+
 static struct o2nm_cluster_attribute o2nm_cluster_attr_idle_timeout_ms = {
        .attr   = { .ca_owner = THIS_MODULE,
                    .ca_name = "idle_timeout_ms",
@@ -603,10 +644,19 @@ static struct o2nm_cluster_attribute o2nm_cluster_attr_reconnect_delay_ms = {
        .store  = o2nm_cluster_attr_reconnect_delay_ms_write,
 };
 
+static struct o2nm_cluster_attribute o2nm_cluster_attr_fence_method = {
+       .attr   = { .ca_owner = THIS_MODULE,
+                   .ca_name = "fence_method",
+                   .ca_mode = S_IRUGO | S_IWUSR },
+       .show   = o2nm_cluster_attr_fence_method_read,
+       .store  = o2nm_cluster_attr_fence_method_write,
+};
+
 static struct configfs_attribute *o2nm_cluster_attrs[] = {
        &o2nm_cluster_attr_idle_timeout_ms.attr,
        &o2nm_cluster_attr_keepalive_delay_ms.attr,
        &o2nm_cluster_attr_reconnect_delay_ms.attr,
+       &o2nm_cluster_attr_fence_method.attr,
        NULL,
 };
 static ssize_t o2nm_cluster_show(struct config_item *item,
@@ -778,6 +828,7 @@ static struct config_group *o2nm_cluster_group_make_group(struct config_group *g
        cluster->cl_reconnect_delay_ms = O2NET_RECONNECT_DELAY_MS_DEFAULT;
        cluster->cl_idle_timeout_ms    = O2NET_IDLE_TIMEOUT_MS_DEFAULT;
        cluster->cl_keepalive_delay_ms = O2NET_KEEPALIVE_DELAY_MS_DEFAULT;
+       cluster->cl_fence_method       = O2NM_FENCE_RESET;
 
        ret = &cluster->cl_group;
        o2nm_single_cluster = cluster;
index c992ea0da4add8295528b2e8b02c907ab9337df0..09ea2d388bbb564e176155057b64b989314ef2a0 100644 (file)
 #include <linux/configfs.h>
 #include <linux/rbtree.h>
 
+enum o2nm_fence_method {
+       O2NM_FENCE_RESET        = 0,
+       O2NM_FENCE_PANIC,
+       O2NM_FENCE_METHODS,     /* Number of fence methods */
+};
+
 struct o2nm_node {
        spinlock_t              nd_lock;
        struct config_item      nd_item;
@@ -58,6 +64,7 @@ struct o2nm_cluster {
        unsigned int            cl_idle_timeout_ms;
        unsigned int            cl_keepalive_delay_ms;
        unsigned int            cl_reconnect_delay_ms;
+       enum o2nm_fence_method  cl_fence_method;
 
        /* this bitmap is part of a hack for disk bitmap.. will go eventually. - zab */
        unsigned long   cl_nodes_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
index bbacf7da48a4ea9d3fb3df86c86e6b6ccc843a44..639024033fceeea6cc5978bcdcf3c3a65f216986 100644 (file)
@@ -74,8 +74,20 @@ static void o2quo_fence_self(void)
         * threads can still schedule, etc, etc */
        o2hb_stop_all_regions();
 
-       printk("ocfs2 is very sorry to be fencing this system by restarting\n");
-       emergency_restart();
+       switch (o2nm_single_cluster->cl_fence_method) {
+       case O2NM_FENCE_PANIC:
+               panic("*** ocfs2 is very sorry to be fencing this system by "
+                     "panicing ***\n");
+               break;
+       default:
+               WARN_ON(o2nm_single_cluster->cl_fence_method >=
+                       O2NM_FENCE_METHODS);
+       case O2NM_FENCE_RESET:
+               printk(KERN_ERR "*** ocfs2 is very sorry to be fencing this "
+                      "system by restarting ***\n");
+               emergency_restart();
+               break;
+       };
 }
 
 /* Indicate that a timeout occured on a hearbeat region write. The