1515
1616#define LVB_SIZE 64
1717#define NEW_DEV_TIMEOUT 5000
18+ #define WAIT_DLM_LOCK_TIMEOUT (30 * HZ)
1819
1920struct dlm_lock_resource {
2021 dlm_lockspace_t * ls ;
@@ -56,6 +57,7 @@ struct resync_info {
5657#define MD_CLUSTER_ALREADY_IN_CLUSTER 6
5758#define MD_CLUSTER_PENDING_RECV_EVENT 7
5859#define MD_CLUSTER_HOLDING_MUTEX_FOR_RECVD 8
60+ #define MD_CLUSTER_WAITING_FOR_SYNC 9
5961
6062struct md_cluster_info {
6163 struct mddev * mddev ; /* the md device which md_cluster_info belongs to */
@@ -91,6 +93,7 @@ struct md_cluster_info {
9193 sector_t sync_hi ;
9294};
9395
96+ /* For compatibility, add the new msg_type at the end. */
9497enum msg_type {
9598 METADATA_UPDATED = 0 ,
9699 RESYNCING ,
@@ -100,6 +103,7 @@ enum msg_type {
100103 BITMAP_NEEDS_SYNC ,
101104 CHANGE_CAPACITY ,
102105 BITMAP_RESIZE ,
106+ RESYNCING_START ,
103107};
104108
105109struct cluster_msg {
@@ -130,8 +134,13 @@ static int dlm_lock_sync(struct dlm_lock_resource *res, int mode)
130134 0 , sync_ast , res , res -> bast );
131135 if (ret )
132136 return ret ;
133- wait_event (res -> sync_locking , res -> sync_locking_done );
137+ ret = wait_event_timeout (res -> sync_locking , res -> sync_locking_done ,
138+ WAIT_DLM_LOCK_TIMEOUT );
134139 res -> sync_locking_done = false;
140+ if (!ret ) {
141+ pr_err ("locking DLM '%s' timeout!\n" , res -> name );
142+ return - EBUSY ;
143+ }
135144 if (res -> lksb .sb_status == 0 )
136145 res -> mode = mode ;
137146 return res -> lksb .sb_status ;
@@ -455,6 +464,7 @@ static void process_suspend_info(struct mddev *mddev,
455464 clear_bit (MD_RESYNCING_REMOTE , & mddev -> recovery );
456465 remove_suspend_info (mddev , slot );
457466 set_bit (MD_RECOVERY_NEEDED , & mddev -> recovery );
467+ clear_bit (MD_CLUSTER_WAITING_FOR_SYNC , & cinfo -> state );
458468 md_wakeup_thread (mddev -> thread );
459469 return ;
460470 }
@@ -525,6 +535,7 @@ static int process_add_new_disk(struct mddev *mddev, struct cluster_msg *cmsg)
525535 res = -1 ;
526536 }
527537 clear_bit (MD_CLUSTER_WAITING_FOR_NEWDISK , & cinfo -> state );
538+ set_bit (MD_CLUSTER_WAITING_FOR_SYNC , & cinfo -> state );
528539 return res ;
529540}
530541
@@ -593,6 +604,9 @@ static int process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg)
593604 case CHANGE_CAPACITY :
594605 set_capacity_and_notify (mddev -> gendisk , mddev -> array_sectors );
595606 break ;
607+ case RESYNCING_START :
608+ clear_bit (MD_CLUSTER_WAITING_FOR_SYNC , & mddev -> cluster_info -> state );
609+ break ;
596610 case RESYNCING :
597611 set_bit (MD_RESYNCING_REMOTE , & mddev -> recovery );
598612 process_suspend_info (mddev , le32_to_cpu (msg -> slot ),
@@ -743,15 +757,15 @@ static void unlock_comm(struct md_cluster_info *cinfo)
743757 */
744758static int __sendmsg (struct md_cluster_info * cinfo , struct cluster_msg * cmsg )
745759{
746- int error ;
760+ int error , unlock_error ;
747761 int slot = cinfo -> slot_number - 1 ;
748762
749763 cmsg -> slot = cpu_to_le32 (slot );
750764 /*get EX on Message*/
751765 error = dlm_lock_sync (cinfo -> message_lockres , DLM_LOCK_EX );
752766 if (error ) {
753767 pr_err ("md-cluster: failed to get EX on MESSAGE (%d)\n" , error );
754- goto failed_message ;
768+ return error ;
755769 }
756770
757771 memcpy (cinfo -> message_lockres -> lksb .sb_lvbptr , (void * )cmsg ,
@@ -781,14 +795,10 @@ static int __sendmsg(struct md_cluster_info *cinfo, struct cluster_msg *cmsg)
781795 }
782796
783797failed_ack :
784- error = dlm_unlock_sync (cinfo -> message_lockres );
785- if (unlikely (error != 0 )) {
798+ while ((unlock_error = dlm_unlock_sync (cinfo -> message_lockres )))
786799 pr_err ("md-cluster: failed convert to NL on MESSAGE(%d)\n" ,
787- error );
788- /* in case the message can't be released due to some reason */
789- goto failed_ack ;
790- }
791- failed_message :
800+ unlock_error );
801+
792802 return error ;
793803}
794804
@@ -1343,6 +1353,23 @@ static void resync_info_get(struct mddev *mddev, sector_t *lo, sector_t *hi)
13431353 spin_unlock_irq (& cinfo -> suspend_lock );
13441354}
13451355
1356+ static int resync_status_get (struct mddev * mddev )
1357+ {
1358+ struct md_cluster_info * cinfo = mddev -> cluster_info ;
1359+
1360+ return test_bit (MD_CLUSTER_WAITING_FOR_SYNC , & cinfo -> state );
1361+ }
1362+
1363+ static int resync_start_notify (struct mddev * mddev )
1364+ {
1365+ struct md_cluster_info * cinfo = mddev -> cluster_info ;
1366+ struct cluster_msg cmsg = {0 };
1367+
1368+ cmsg .type = cpu_to_le32 (RESYNCING_START );
1369+
1370+ return sendmsg (cinfo , & cmsg , 0 );
1371+ }
1372+
13461373static int resync_info_update (struct mddev * mddev , sector_t lo , sector_t hi )
13471374{
13481375 struct md_cluster_info * cinfo = mddev -> cluster_info ;
@@ -1570,13 +1597,15 @@ static int gather_bitmaps(struct md_rdev *rdev)
15701597 return err ;
15711598}
15721599
1573- static struct md_cluster_operations cluster_ops = {
1600+ static const struct md_cluster_operations cluster_ops = {
15741601 .join = join ,
15751602 .leave = leave ,
15761603 .slot_number = slot_number ,
15771604 .resync_start = resync_start ,
15781605 .resync_finish = resync_finish ,
15791606 .resync_info_update = resync_info_update ,
1607+ .resync_start_notify = resync_start_notify ,
1608+ .resync_status_get = resync_status_get ,
15801609 .resync_info_get = resync_info_get ,
15811610 .metadata_update_start = metadata_update_start ,
15821611 .metadata_update_finish = metadata_update_finish ,
0 commit comments