Skip to content

Commit 7a0c62f

Browse files
committed
s390/mm: Introduce region-third and segment table entry present bits
JIRA: https://issues.redhat.com/browse/RHEL-74362 Tested: by me commit 03e6db1 Author: Gerald Schaefer <gerald.schaefer@linux.ibm.com> Date: Thu Nov 21 18:45:21 2024 +0100 s390/mm: Introduce region-third and segment table entry present bits Introduce region-third and segment table entry present SW bits, and adjust pmd/pud_present() accordingly. Also add pmd/pud_present() checks to pmd/pud_leaf(), to return false for future swap entries. Same logic applies to pmd_trans_huge(), make that return pmd_leaf() instead of duplicating the same check. huge_pte_offset() also needs to be adjusted, current code would return NULL for !pud_present(). Use the same logic as in the generic version, which allows for !pud_present() swap entries. Similar to PTE, bit 63 can be used for the new SW present bit in region and segment table entries. For segment-table entries (PMD) the architecture says that "Bits 62-63 are available for programming", so they are safe to use. The same is true for large leaf region-third-table entries (PUD). However, for non-leaf region-third-table entries, bits 62-63 indicate the TABLE LENGTH and both must be set to 1. But such entries would always be considered as present, so it is safe to use bit 63 as PRESENT bit for PUD. They also should not conflict with bit 62 potentially later used for preserving SOFT_DIRTY in swap entries, because they are not swap entries. Valid PMDs / PUDs should always have the present bit set, so add it to the various pgprot defines, and also _SEGMENT_ENTRY which is OR'ed e.g. in pmd_populate(). _REGION3_ENTRY wouldn't need any change, as the present bit is already included in the TABLE LENGTH, but also explicitly add it there, for completeness, and just in case the bit would ever be changed. gmap code needs some adjustment, to also OR the _SEGMENT_ENTRY, like it is already done gmap_shadow_pgt() when creating new PMDs, but not in __gmap_link(). Otherwise, the gmap PMDs would not be considered present, e.g. when using pmd_leaf() checks in gmap code. The various WARN_ON checks in gmap code also need adjustment, to tolerate the new present bit. This is a prerequisite for hugetlbfs PTE_MARKER support on s390, which is needed to fix a regression introduced with commit 8a13897 ("mm: userfaultfd: support UFFDIO_POISON for hugetlbfs"). That commit depends on the availability of swap entries for hugetlbfs, which were not available for s390 so far. Reviewed-by: Alexander Gordeev <agordeev@linux.ibm.com> Signed-off-by: Gerald Schaefer <gerald.schaefer@linux.ibm.com> Signed-off-by: Heiko Carstens <hca@linux.ibm.com> Signed-off-by: Aristeu Rozanski <arozansk@redhat.com>
1 parent 7706611 commit 7a0c62f

File tree

3 files changed

+47
-24
lines changed

3 files changed

+47
-24
lines changed

arch/s390/include/asm/pgtable.h

Lines changed: 35 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,8 @@ static inline int is_module_addr(void *addr)
259259
#define _REGION1_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INVALID)
260260
#define _REGION2_ENTRY (_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_LENGTH)
261261
#define _REGION2_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INVALID)
262-
#define _REGION3_ENTRY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_LENGTH)
262+
#define _REGION3_ENTRY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_LENGTH | \
263+
_REGION3_ENTRY_PRESENT)
263264
#define _REGION3_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID)
264265

265266
#define _REGION3_ENTRY_ORIGIN_LARGE ~0x7fffffffUL /* large page address */
@@ -277,6 +278,14 @@ static inline int is_module_addr(void *addr)
277278

278279
#define _REGION_ENTRY_BITS 0xfffffffffffff22fUL
279280

281+
/*
282+
* SW region present bit. For non-leaf region-third-table entries, bits 62-63
283+
* indicate the TABLE LENGTH and both must be set to 1. But such entries
284+
* would always be considered as present, so it is safe to use bit 63 as
285+
* PRESENT bit for PUD.
286+
*/
287+
#define _REGION3_ENTRY_PRESENT 0x0001
288+
280289
/* Bits in the segment table entry */
281290
#define _SEGMENT_ENTRY_BITS 0xfffffffffffffe33UL
282291
#define _SEGMENT_ENTRY_HARDWARE_BITS 0xfffffffffffffe30UL
@@ -288,7 +297,7 @@ static inline int is_module_addr(void *addr)
288297
#define _SEGMENT_ENTRY_INVALID 0x20 /* invalid segment table entry */
289298
#define _SEGMENT_ENTRY_TYPE_MASK 0x0c /* segment table type mask */
290299

291-
#define _SEGMENT_ENTRY (0)
300+
#define _SEGMENT_ENTRY (_SEGMENT_ENTRY_PRESENT)
292301
#define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INVALID)
293302

294303
#define _SEGMENT_ENTRY_DIRTY 0x2000 /* SW segment dirty bit */
@@ -304,6 +313,8 @@ static inline int is_module_addr(void *addr)
304313
#define _SEGMENT_ENTRY_SOFT_DIRTY 0x0000 /* SW segment soft dirty bit */
305314
#endif
306315

316+
#define _SEGMENT_ENTRY_PRESENT 0x0001 /* SW segment present bit */
317+
307318
#define _CRST_ENTRIES 2048 /* number of region/segment table entries */
308319
#define _PAGE_ENTRIES 256 /* number of page table entries */
309320

@@ -435,17 +446,22 @@ static inline int is_module_addr(void *addr)
435446
/*
436447
* Segment entry (large page) protection definitions.
437448
*/
438-
#define SEGMENT_NONE __pgprot(_SEGMENT_ENTRY_INVALID | \
449+
#define SEGMENT_NONE __pgprot(_SEGMENT_ENTRY_PRESENT | \
450+
_SEGMENT_ENTRY_INVALID | \
439451
_SEGMENT_ENTRY_PROTECT)
440-
#define SEGMENT_RO __pgprot(_SEGMENT_ENTRY_PROTECT | \
452+
#define SEGMENT_RO __pgprot(_SEGMENT_ENTRY_PRESENT | \
453+
_SEGMENT_ENTRY_PROTECT | \
441454
_SEGMENT_ENTRY_READ | \
442455
_SEGMENT_ENTRY_NOEXEC)
443-
#define SEGMENT_RX __pgprot(_SEGMENT_ENTRY_PROTECT | \
456+
#define SEGMENT_RX __pgprot(_SEGMENT_ENTRY_PRESENT | \
457+
_SEGMENT_ENTRY_PROTECT | \
444458
_SEGMENT_ENTRY_READ)
445-
#define SEGMENT_RW __pgprot(_SEGMENT_ENTRY_READ | \
459+
#define SEGMENT_RW __pgprot(_SEGMENT_ENTRY_PRESENT | \
460+
_SEGMENT_ENTRY_READ | \
446461
_SEGMENT_ENTRY_WRITE | \
447462
_SEGMENT_ENTRY_NOEXEC)
448-
#define SEGMENT_RWX __pgprot(_SEGMENT_ENTRY_READ | \
463+
#define SEGMENT_RWX __pgprot(_SEGMENT_ENTRY_PRESENT | \
464+
_SEGMENT_ENTRY_READ | \
449465
_SEGMENT_ENTRY_WRITE)
450466
#define SEGMENT_KERNEL __pgprot(_SEGMENT_ENTRY | \
451467
_SEGMENT_ENTRY_LARGE | \
@@ -472,19 +488,22 @@ static inline int is_module_addr(void *addr)
472488
*/
473489

474490
#define REGION3_KERNEL __pgprot(_REGION_ENTRY_TYPE_R3 | \
491+
_REGION3_ENTRY_PRESENT | \
475492
_REGION3_ENTRY_LARGE | \
476493
_REGION3_ENTRY_READ | \
477494
_REGION3_ENTRY_WRITE | \
478495
_REGION3_ENTRY_YOUNG | \
479496
_REGION3_ENTRY_DIRTY | \
480497
_REGION_ENTRY_NOEXEC)
481498
#define REGION3_KERNEL_RO __pgprot(_REGION_ENTRY_TYPE_R3 | \
499+
_REGION3_ENTRY_PRESENT | \
482500
_REGION3_ENTRY_LARGE | \
483501
_REGION3_ENTRY_READ | \
484502
_REGION3_ENTRY_YOUNG | \
485503
_REGION_ENTRY_PROTECT | \
486504
_REGION_ENTRY_NOEXEC)
487505
#define REGION3_KERNEL_EXEC __pgprot(_REGION_ENTRY_TYPE_R3 | \
506+
_REGION3_ENTRY_PRESENT | \
488507
_REGION3_ENTRY_LARGE | \
489508
_REGION3_ENTRY_READ | \
490509
_REGION3_ENTRY_WRITE | \
@@ -706,7 +725,7 @@ static inline int pud_present(pud_t pud)
706725
{
707726
if (pud_folded(pud))
708727
return 1;
709-
return (pud_val(pud) & _REGION_ENTRY_ORIGIN) != 0UL;
728+
return (pud_val(pud) & _REGION3_ENTRY_PRESENT) != 0;
710729
}
711730

712731
static inline int pud_none(pud_t pud)
@@ -721,13 +740,18 @@ static inline bool pud_leaf(pud_t pud)
721740
{
722741
if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) != _REGION_ENTRY_TYPE_R3)
723742
return 0;
724-
return !!(pud_val(pud) & _REGION3_ENTRY_LARGE);
743+
return (pud_present(pud) && (pud_val(pud) & _REGION3_ENTRY_LARGE) != 0);
744+
}
745+
746+
static inline int pmd_present(pmd_t pmd)
747+
{
748+
return (pmd_val(pmd) & _SEGMENT_ENTRY_PRESENT) != 0;
725749
}
726750

727751
#define pmd_leaf pmd_leaf
728752
static inline bool pmd_leaf(pmd_t pmd)
729753
{
730-
return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0;
754+
return (pmd_present(pmd) && (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0);
731755
}
732756

733757
static inline int pmd_bad(pmd_t pmd)
@@ -759,11 +783,6 @@ static inline int p4d_bad(p4d_t p4d)
759783
return (p4d_val(p4d) & ~_REGION_ENTRY_BITS) != 0;
760784
}
761785

762-
static inline int pmd_present(pmd_t pmd)
763-
{
764-
return pmd_val(pmd) != _SEGMENT_ENTRY_EMPTY;
765-
}
766-
767786
static inline int pmd_none(pmd_t pmd)
768787
{
769788
return pmd_val(pmd) == _SEGMENT_ENTRY_EMPTY;
@@ -1808,7 +1827,7 @@ static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
18081827

18091828
static inline int pmd_trans_huge(pmd_t pmd)
18101829
{
1811-
return pmd_val(pmd) & _SEGMENT_ENTRY_LARGE;
1830+
return pmd_leaf(pmd);
18121831
}
18131832

18141833
#define has_transparent_hugepage has_transparent_hugepage

arch/s390/mm/gmap.c

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -616,7 +616,8 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
616616
if (pmd_leaf(*pmd)) {
617617
*table = (pmd_val(*pmd) &
618618
_SEGMENT_ENTRY_HARDWARE_BITS_LARGE)
619-
| _SEGMENT_ENTRY_GMAP_UC;
619+
| _SEGMENT_ENTRY_GMAP_UC
620+
| _SEGMENT_ENTRY;
620621
} else
621622
*table = pmd_val(*pmd) &
622623
_SEGMENT_ENTRY_HARDWARE_BITS;
@@ -2345,7 +2346,8 @@ static void gmap_pmdp_clear(struct mm_struct *mm, unsigned long vmaddr,
23452346
gaddr = __gmap_segment_gaddr((unsigned long *)pmdp);
23462347
pmdp_notify_gmap(gmap, pmdp, gaddr);
23472348
WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
2348-
_SEGMENT_ENTRY_GMAP_UC));
2349+
_SEGMENT_ENTRY_GMAP_UC |
2350+
_SEGMENT_ENTRY));
23492351
if (purge)
23502352
__pmdp_csp(pmdp);
23512353
set_pmd(pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
@@ -2399,7 +2401,8 @@ void gmap_pmdp_idte_local(struct mm_struct *mm, unsigned long vmaddr)
23992401
gaddr = __gmap_segment_gaddr(entry);
24002402
pmdp_notify_gmap(gmap, pmdp, gaddr);
24012403
WARN_ON(*entry & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
2402-
_SEGMENT_ENTRY_GMAP_UC));
2404+
_SEGMENT_ENTRY_GMAP_UC |
2405+
_SEGMENT_ENTRY));
24032406
if (MACHINE_HAS_TLB_GUEST)
24042407
__pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE,
24052408
gmap->asce, IDTE_LOCAL);
@@ -2434,7 +2437,8 @@ void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr)
24342437
gaddr = __gmap_segment_gaddr(entry);
24352438
pmdp_notify_gmap(gmap, pmdp, gaddr);
24362439
WARN_ON(*entry & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
2437-
_SEGMENT_ENTRY_GMAP_UC));
2440+
_SEGMENT_ENTRY_GMAP_UC |
2441+
_SEGMENT_ENTRY));
24382442
if (MACHINE_HAS_TLB_GUEST)
24392443
__pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE,
24402444
gmap->asce, IDTE_GLOBAL);

arch/s390/mm/hugetlbpage.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ static inline unsigned long __pte_to_rste(pte_t pte)
4848
*/
4949
if (pte_present(pte)) {
5050
rste = pte_val(pte) & PAGE_MASK;
51+
rste |= _SEGMENT_ENTRY_PRESENT;
5152
rste |= move_set_bit(pte_val(pte), _PAGE_READ,
5253
_SEGMENT_ENTRY_READ);
5354
rste |= move_set_bit(pte_val(pte), _PAGE_WRITE,
@@ -223,11 +224,10 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
223224
p4dp = p4d_offset(pgdp, addr);
224225
if (p4d_present(*p4dp)) {
225226
pudp = pud_offset(p4dp, addr);
226-
if (pud_present(*pudp)) {
227-
if (pud_leaf(*pudp))
228-
return (pte_t *) pudp;
227+
if (sz == PUD_SIZE)
228+
return (pte_t *)pudp;
229+
if (pud_present(*pudp))
229230
pmdp = pmd_offset(pudp, addr);
230-
}
231231
}
232232
}
233233
return (pte_t *) pmdp;

0 commit comments

Comments
 (0)