@@ -103,9 +103,9 @@ struct vfio_dma {
103103struct vfio_batch {
104104 struct page * * pages ; /* for pin_user_pages_remote */
105105 struct page * fallback_page ; /* if pages alloc fails */
106- int capacity ; /* length of pages array */
107- int size ; /* of batch currently */
108- int offset ; /* of next entry in pages */
106+ unsigned int capacity ; /* length of pages array */
107+ unsigned int size ; /* of batch currently */
108+ unsigned int offset ; /* of next entry in pages */
109109};
110110
111111struct vfio_iommu_group {
@@ -471,12 +471,12 @@ static int put_pfn(unsigned long pfn, int prot)
471471
472472#define VFIO_BATCH_MAX_CAPACITY (PAGE_SIZE / sizeof(struct page *))
473473
474- static void vfio_batch_init (struct vfio_batch * batch )
474+ static void __vfio_batch_init (struct vfio_batch * batch , bool single )
475475{
476476 batch -> size = 0 ;
477477 batch -> offset = 0 ;
478478
479- if (unlikely (disable_hugepages ))
479+ if (single || unlikely (disable_hugepages ))
480480 goto fallback ;
481481
482482 batch -> pages = (struct page * * ) __get_free_page (GFP_KERNEL );
@@ -491,6 +491,16 @@ static void vfio_batch_init(struct vfio_batch *batch)
491491 batch -> capacity = 1 ;
492492}
493493
494+ static void vfio_batch_init (struct vfio_batch * batch )
495+ {
496+ __vfio_batch_init (batch , false);
497+ }
498+
499+ static void vfio_batch_init_single (struct vfio_batch * batch )
500+ {
501+ __vfio_batch_init (batch , true);
502+ }
503+
494504static void vfio_batch_unpin (struct vfio_batch * batch , struct vfio_dma * dma )
495505{
496506 while (batch -> size ) {
@@ -510,7 +520,7 @@ static void vfio_batch_fini(struct vfio_batch *batch)
510520
511521static int follow_fault_pfn (struct vm_area_struct * vma , struct mm_struct * mm ,
512522 unsigned long vaddr , unsigned long * pfn ,
513- bool write_fault )
523+ unsigned long * addr_mask , bool write_fault )
514524{
515525 struct follow_pfnmap_args args = { .vma = vma , .address = vaddr };
516526 int ret ;
@@ -534,36 +544,46 @@ static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm,
534544 return ret ;
535545 }
536546
537- if (write_fault && !args .writable )
547+ if (write_fault && !args .writable ) {
538548 ret = - EFAULT ;
539- else
549+ } else {
540550 * pfn = args .pfn ;
551+ * addr_mask = args .addr_mask ;
552+ }
541553
542554 follow_pfnmap_end (& args );
543555 return ret ;
544556}
545557
546558/*
547559 * Returns the positive number of pfns successfully obtained or a negative
548- * error code.
560+ * error code. The initial pfn is stored in the pfn arg. For page-backed
561+ * pfns, the provided batch is also updated to indicate the filled pages and
562+ * initial offset. For VM_PFNMAP pfns, only the returned number of pfns and
563+ * returned initial pfn are provided; subsequent pfns are contiguous.
549564 */
550- static int vaddr_get_pfns (struct mm_struct * mm , unsigned long vaddr ,
551- long npages , int prot , unsigned long * pfn ,
552- struct page * * pages )
565+ static long vaddr_get_pfns (struct mm_struct * mm , unsigned long vaddr ,
566+ unsigned long npages , int prot , unsigned long * pfn ,
567+ struct vfio_batch * batch )
553568{
569+ unsigned long pin_pages = min_t (unsigned long , npages , batch -> capacity );
554570 struct vm_area_struct * vma ;
555571 unsigned int flags = 0 ;
556- int ret ;
572+ long ret ;
557573
558574 if (prot & IOMMU_WRITE )
559575 flags |= FOLL_WRITE ;
560576
561577 mmap_read_lock (mm );
562- ret = pin_user_pages_remote (mm , vaddr , npages , flags | FOLL_LONGTERM ,
563- pages , NULL );
578+ ret = pin_user_pages_remote (mm , vaddr , pin_pages , flags | FOLL_LONGTERM ,
579+ batch -> pages , NULL );
564580 if (ret > 0 ) {
565- * pfn = page_to_pfn (pages [0 ]);
581+ * pfn = page_to_pfn (batch -> pages [0 ]);
582+ batch -> size = ret ;
583+ batch -> offset = 0 ;
566584 goto done ;
585+ } else if (!ret ) {
586+ ret = - EFAULT ;
567587 }
568588
569589 vaddr = untagged_addr_remote (mm , vaddr );
@@ -572,15 +592,22 @@ static int vaddr_get_pfns(struct mm_struct *mm, unsigned long vaddr,
572592 vma = vma_lookup (mm , vaddr );
573593
574594 if (vma && vma -> vm_flags & VM_PFNMAP ) {
575- ret = follow_fault_pfn (vma , mm , vaddr , pfn , prot & IOMMU_WRITE );
595+ unsigned long addr_mask ;
596+
597+ ret = follow_fault_pfn (vma , mm , vaddr , pfn , & addr_mask ,
598+ prot & IOMMU_WRITE );
576599 if (ret == - EAGAIN )
577600 goto retry ;
578601
579602 if (!ret ) {
580- if (is_invalid_reserved_pfn (* pfn ))
581- ret = 1 ;
582- else
603+ if (is_invalid_reserved_pfn (* pfn )) {
604+ unsigned long epfn ;
605+
606+ epfn = (* pfn | (~addr_mask >> PAGE_SHIFT )) + 1 ;
607+ ret = min_t (long , npages , epfn - * pfn );
608+ } else {
583609 ret = - EFAULT ;
610+ }
584611 }
585612 }
586613done :
@@ -594,7 +621,7 @@ static int vaddr_get_pfns(struct mm_struct *mm, unsigned long vaddr,
594621 * first page and all consecutive pages with the same locking.
595622 */
596623static long vfio_pin_pages_remote (struct vfio_dma * dma , unsigned long vaddr ,
597- long npage , unsigned long * pfn_base ,
624+ unsigned long npage , unsigned long * pfn_base ,
598625 unsigned long limit , struct vfio_batch * batch )
599626{
600627 unsigned long pfn ;
@@ -616,32 +643,42 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
616643 * pfn_base = 0 ;
617644 }
618645
646+ if (unlikely (disable_hugepages ))
647+ npage = 1 ;
648+
619649 while (npage ) {
620650 if (!batch -> size ) {
621651 /* Empty batch, so refill it. */
622- long req_pages = min_t (long , npage , batch -> capacity );
623-
624- ret = vaddr_get_pfns (mm , vaddr , req_pages , dma -> prot ,
625- & pfn , batch -> pages );
652+ ret = vaddr_get_pfns (mm , vaddr , npage , dma -> prot ,
653+ & pfn , batch );
626654 if (ret < 0 )
627655 goto unpin_out ;
628656
629- batch -> size = ret ;
630- batch -> offset = 0 ;
631-
632657 if (!* pfn_base ) {
633658 * pfn_base = pfn ;
634659 rsvd = is_invalid_reserved_pfn (* pfn_base );
635660 }
661+
662+ /* Handle pfnmap */
663+ if (!batch -> size ) {
664+ if (pfn != * pfn_base + pinned || !rsvd )
665+ goto out ;
666+
667+ pinned += ret ;
668+ npage -= ret ;
669+ vaddr += (PAGE_SIZE * ret );
670+ iova += (PAGE_SIZE * ret );
671+ continue ;
672+ }
636673 }
637674
638675 /*
639- * pfn is preset for the first iteration of this inner loop and
640- * updated at the end to handle a VM_PFNMAP pfn. In that case,
641- * batch->pages isn't valid (there's no struct page), so allow
642- * batch->pages to be touched only when there's more than one
643- * pfn to check, which guarantees the pfns are from a
644- * !VM_PFNMAP vma .
676+ * pfn is preset for the first iteration of this inner loop
677+ * due to the fact that vaddr_get_pfns() needs to provide the
678+ * initial pfn for pfnmaps. Therefore to reduce redundancy,
679+ * the next pfn is fetched at the end of the loop.
680+ * A PageReserved() page could still qualify as page backed
681+ * and rsvd here, and therefore continues to use the batch .
645682 */
646683 while (true) {
647684 if (pfn != * pfn_base + pinned ||
@@ -676,21 +713,12 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
676713
677714 pfn = page_to_pfn (batch -> pages [batch -> offset ]);
678715 }
679-
680- if (unlikely (disable_hugepages ))
681- break ;
682716 }
683717
684718out :
685719 ret = vfio_lock_acct (dma , lock_acct , false);
686720
687721unpin_out :
688- if (batch -> size == 1 && !batch -> offset ) {
689- /* May be a VM_PFNMAP pfn, which the batch can't remember. */
690- put_pfn (pfn , dma -> prot );
691- batch -> size = 0 ;
692- }
693-
694722 if (ret < 0 ) {
695723 if (pinned && !rsvd ) {
696724 for (pfn = * pfn_base ; pinned ; pfn ++ , pinned -- )
@@ -705,7 +733,7 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
705733}
706734
707735static long vfio_unpin_pages_remote (struct vfio_dma * dma , dma_addr_t iova ,
708- unsigned long pfn , long npage ,
736+ unsigned long pfn , unsigned long npage ,
709737 bool do_accounting )
710738{
711739 long unlocked = 0 , locked = 0 ;
@@ -728,15 +756,17 @@ static long vfio_unpin_pages_remote(struct vfio_dma *dma, dma_addr_t iova,
728756static int vfio_pin_page_external (struct vfio_dma * dma , unsigned long vaddr ,
729757 unsigned long * pfn_base , bool do_accounting )
730758{
731- struct page * pages [ 1 ] ;
759+ struct vfio_batch batch ;
732760 struct mm_struct * mm ;
733761 int ret ;
734762
735763 mm = dma -> mm ;
736764 if (!mmget_not_zero (mm ))
737765 return - ENODEV ;
738766
739- ret = vaddr_get_pfns (mm , vaddr , 1 , dma -> prot , pfn_base , pages );
767+ vfio_batch_init_single (& batch );
768+
769+ ret = vaddr_get_pfns (mm , vaddr , 1 , dma -> prot , pfn_base , & batch );
740770 if (ret != 1 )
741771 goto out ;
742772
@@ -755,6 +785,7 @@ static int vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr,
755785 }
756786
757787out :
788+ vfio_batch_fini (& batch );
758789 mmput (mm );
759790 return ret ;
760791}
0 commit comments