@@ -1516,256 +1516,6 @@ static void __make_request(struct mddev *mddev, struct bio *bio, int sectors)
15161516 raid10_write_request (mddev , bio , r10_bio );
15171517}
15181518
1519- static struct bio * raid10_split_bio (struct r10conf * conf ,
1520- struct bio * bio , sector_t sectors , bool want_first )
1521- {
1522- struct bio * split ;
1523-
1524- split = bio_split (bio , sectors , GFP_NOIO , & conf -> bio_split );
1525- bio_chain (split , bio );
1526- allow_barrier (conf );
1527- if (want_first ) {
1528- submit_bio_noacct (bio );
1529- bio = split ;
1530- } else
1531- submit_bio_noacct (split );
1532- wait_barrier (conf );
1533-
1534- return bio ;
1535- }
1536-
1537- static void raid10_end_discard_request (struct bio * bio )
1538- {
1539- struct r10bio * r10_bio = bio -> bi_private ;
1540- struct r10conf * conf = r10_bio -> mddev -> private ;
1541- struct md_rdev * rdev = NULL ;
1542- int dev ;
1543- int slot , repl ;
1544-
1545- /*
1546- * We don't care the return value of discard bio
1547- */
1548- if (!test_bit (R10BIO_Uptodate , & r10_bio -> state ))
1549- set_bit (R10BIO_Uptodate , & r10_bio -> state );
1550-
1551- dev = find_bio_disk (conf , r10_bio , bio , & slot , & repl );
1552- if (repl )
1553- rdev = conf -> mirrors [dev ].replacement ;
1554- if (!rdev ) {
1555- /* raid10_remove_disk uses smp_mb to make sure rdev is set to
1556- * replacement before setting replacement to NULL. It can read
1557- * rdev first without barrier protect even replacment is NULL
1558- */
1559- smp_rmb ();
1560- rdev = conf -> mirrors [dev ].rdev ;
1561- }
1562-
1563- if (atomic_dec_and_test (& r10_bio -> remaining )) {
1564- md_write_end (r10_bio -> mddev );
1565- raid_end_bio_io (r10_bio );
1566- }
1567-
1568- rdev_dec_pending (rdev , conf -> mddev );
1569- }
1570-
1571- /* There are some limitations to handle discard bio
1572- * 1st, the discard size is bigger than stripe_size*2.
1573- * 2st, if the discard bio spans reshape progress, we use the old way to
1574- * handle discard bio
1575- */
1576- static int raid10_handle_discard (struct mddev * mddev , struct bio * bio )
1577- {
1578- struct r10conf * conf = mddev -> private ;
1579- struct geom * geo = & conf -> geo ;
1580- struct r10bio * r10_bio ;
1581-
1582- int disk ;
1583- sector_t chunk ;
1584- unsigned int stripe_size ;
1585- sector_t split_size ;
1586-
1587- sector_t bio_start , bio_end ;
1588- sector_t first_stripe_index , last_stripe_index ;
1589- sector_t start_disk_offset ;
1590- unsigned int start_disk_index ;
1591- sector_t end_disk_offset ;
1592- unsigned int end_disk_index ;
1593- unsigned int remainder ;
1594-
1595- if (test_bit (MD_RECOVERY_RESHAPE , & mddev -> recovery ))
1596- return - EAGAIN ;
1597-
1598- wait_barrier (conf );
1599-
1600- /* Check reshape again to avoid reshape happens after checking
1601- * MD_RECOVERY_RESHAPE and before wait_barrier
1602- */
1603- if (test_bit (MD_RECOVERY_RESHAPE , & mddev -> recovery ))
1604- goto out ;
1605-
1606- stripe_size = geo -> raid_disks << geo -> chunk_shift ;
1607- bio_start = bio -> bi_iter .bi_sector ;
1608- bio_end = bio_end_sector (bio );
1609-
1610- /* Maybe one discard bio is smaller than strip size or across one stripe
1611- * and discard region is larger than one stripe size. For far offset layout,
1612- * if the discard region is not aligned with stripe size, there is hole
1613- * when we submit discard bio to member disk. For simplicity, we only
1614- * handle discard bio which discard region is bigger than stripe_size*2
1615- */
1616- if (bio_sectors (bio ) < stripe_size * 2 )
1617- goto out ;
1618-
1619- /* For far offset layout, if bio is not aligned with stripe size, it splits
1620- * the part that is not aligned with strip size.
1621- */
1622- div_u64_rem (bio_start , stripe_size , & remainder );
1623- if (geo -> far_offset && remainder ) {
1624- split_size = stripe_size - remainder ;
1625- bio = raid10_split_bio (conf , bio , split_size , false);
1626- }
1627- div_u64_rem (bio_end , stripe_size , & remainder );
1628- if (geo -> far_offset && remainder ) {
1629- split_size = bio_sectors (bio ) - remainder ;
1630- bio = raid10_split_bio (conf , bio , split_size , true);
1631- }
1632-
1633- r10_bio = mempool_alloc (& conf -> r10bio_pool , GFP_NOIO );
1634- r10_bio -> mddev = mddev ;
1635- r10_bio -> state = 0 ;
1636- r10_bio -> sectors = 0 ;
1637- memset (r10_bio -> devs , 0 , sizeof (r10_bio -> devs [0 ]) * geo -> raid_disks );
1638-
1639- wait_blocked_dev (mddev , r10_bio );
1640-
1641- r10_bio -> master_bio = bio ;
1642-
1643- bio_start = bio -> bi_iter .bi_sector ;
1644- bio_end = bio_end_sector (bio );
1645-
1646- /* raid10 uses chunk as the unit to store data. It's similar like raid0.
1647- * One stripe contains the chunks from all member disk (one chunk from
1648- * one disk at the same HBA address). For layout detail, see 'man md 4'
1649- */
1650- chunk = bio_start >> geo -> chunk_shift ;
1651- chunk *= geo -> near_copies ;
1652- first_stripe_index = chunk ;
1653- start_disk_index = sector_div (first_stripe_index , geo -> raid_disks );
1654- if (geo -> far_offset )
1655- first_stripe_index *= geo -> far_copies ;
1656- start_disk_offset = (bio_start & geo -> chunk_mask ) +
1657- (first_stripe_index << geo -> chunk_shift );
1658-
1659- chunk = bio_end >> geo -> chunk_shift ;
1660- chunk *= geo -> near_copies ;
1661- last_stripe_index = chunk ;
1662- end_disk_index = sector_div (last_stripe_index , geo -> raid_disks );
1663- if (geo -> far_offset )
1664- last_stripe_index *= geo -> far_copies ;
1665- end_disk_offset = (bio_end & geo -> chunk_mask ) +
1666- (last_stripe_index << geo -> chunk_shift );
1667-
1668- rcu_read_lock ();
1669- for (disk = 0 ; disk < geo -> raid_disks ; disk ++ ) {
1670- struct md_rdev * rdev = rcu_dereference (conf -> mirrors [disk ].rdev );
1671- struct md_rdev * rrdev = rcu_dereference (
1672- conf -> mirrors [disk ].replacement );
1673-
1674- r10_bio -> devs [disk ].bio = NULL ;
1675- r10_bio -> devs [disk ].repl_bio = NULL ;
1676-
1677- if (rdev && (test_bit (Faulty , & rdev -> flags )))
1678- rdev = NULL ;
1679- if (rrdev && (test_bit (Faulty , & rrdev -> flags )))
1680- rrdev = NULL ;
1681- if (!rdev && !rrdev )
1682- continue ;
1683-
1684- if (rdev ) {
1685- r10_bio -> devs [disk ].bio = bio ;
1686- atomic_inc (& rdev -> nr_pending );
1687- }
1688- if (rrdev ) {
1689- r10_bio -> devs [disk ].repl_bio = bio ;
1690- atomic_inc (& rrdev -> nr_pending );
1691- }
1692- }
1693- rcu_read_unlock ();
1694-
1695- atomic_set (& r10_bio -> remaining , 1 );
1696- for (disk = 0 ; disk < geo -> raid_disks ; disk ++ ) {
1697- sector_t dev_start , dev_end ;
1698- struct bio * mbio , * rbio = NULL ;
1699- struct md_rdev * rdev = rcu_dereference (conf -> mirrors [disk ].rdev );
1700- struct md_rdev * rrdev = rcu_dereference (
1701- conf -> mirrors [disk ].replacement );
1702-
1703- /*
1704- * Now start to calculate the start and end address for each disk.
1705- * The space between dev_start and dev_end is the discard region.
1706- *
1707- * For dev_start, it needs to consider three conditions:
1708- * 1st, the disk is before start_disk, you can imagine the disk in
1709- * the next stripe. So the dev_start is the start address of next
1710- * stripe.
1711- * 2st, the disk is after start_disk, it means the disk is at the
1712- * same stripe of first disk
1713- * 3st, the first disk itself, we can use start_disk_offset directly
1714- */
1715- if (disk < start_disk_index )
1716- dev_start = (first_stripe_index + 1 ) * mddev -> chunk_sectors ;
1717- else if (disk > start_disk_index )
1718- dev_start = first_stripe_index * mddev -> chunk_sectors ;
1719- else
1720- dev_start = start_disk_offset ;
1721-
1722- if (disk < end_disk_index )
1723- dev_end = (last_stripe_index + 1 ) * mddev -> chunk_sectors ;
1724- else if (disk > end_disk_index )
1725- dev_end = last_stripe_index * mddev -> chunk_sectors ;
1726- else
1727- dev_end = end_disk_offset ;
1728-
1729- /* It only handles discard bio which size is >= stripe size, so
1730- * dev_end > dev_start all the time
1731- */
1732- if (r10_bio -> devs [disk ].bio ) {
1733- mbio = bio_clone_fast (bio , GFP_NOIO , & mddev -> bio_set );
1734- mbio -> bi_end_io = raid10_end_discard_request ;
1735- mbio -> bi_private = r10_bio ;
1736- r10_bio -> devs [disk ].bio = mbio ;
1737- r10_bio -> devs [disk ].devnum = disk ;
1738- atomic_inc (& r10_bio -> remaining );
1739- md_submit_discard_bio (mddev , rdev , mbio ,
1740- dev_start + choose_data_offset (r10_bio , rdev ),
1741- dev_end - dev_start );
1742- bio_endio (mbio );
1743- }
1744- if (r10_bio -> devs [disk ].repl_bio ) {
1745- rbio = bio_clone_fast (bio , GFP_NOIO , & mddev -> bio_set );
1746- rbio -> bi_end_io = raid10_end_discard_request ;
1747- rbio -> bi_private = r10_bio ;
1748- r10_bio -> devs [disk ].repl_bio = rbio ;
1749- r10_bio -> devs [disk ].devnum = disk ;
1750- atomic_inc (& r10_bio -> remaining );
1751- md_submit_discard_bio (mddev , rrdev , rbio ,
1752- dev_start + choose_data_offset (r10_bio , rrdev ),
1753- dev_end - dev_start );
1754- bio_endio (rbio );
1755- }
1756- }
1757-
1758- if (atomic_dec_and_test (& r10_bio -> remaining )) {
1759- md_write_end (r10_bio -> mddev );
1760- raid_end_bio_io (r10_bio );
1761- }
1762-
1763- return 0 ;
1764- out :
1765- allow_barrier (conf );
1766- return - EAGAIN ;
1767- }
1768-
17691519static bool raid10_make_request (struct mddev * mddev , struct bio * bio )
17701520{
17711521 struct r10conf * conf = mddev -> private ;
@@ -1780,10 +1530,6 @@ static bool raid10_make_request(struct mddev *mddev, struct bio *bio)
17801530 if (!md_write_start (mddev , bio ))
17811531 return false;
17821532
1783- if (unlikely (bio_op (bio ) == REQ_OP_DISCARD ))
1784- if (!raid10_handle_discard (mddev , bio ))
1785- return true;
1786-
17871533 /*
17881534 * If this request crosses a chunk boundary, we need to split
17891535 * it.
@@ -4023,7 +3769,7 @@ static int raid10_run(struct mddev *mddev)
40233769
40243770 if (mddev -> queue ) {
40253771 blk_queue_max_discard_sectors (mddev -> queue ,
4026- UINT_MAX );
3772+ mddev -> chunk_sectors );
40273773 blk_queue_max_write_same_sectors (mddev -> queue , 0 );
40283774 blk_queue_max_write_zeroes_sectors (mddev -> queue , 0 );
40293775 blk_queue_io_min (mddev -> queue , mddev -> chunk_sectors << 9 );
0 commit comments