From 34c605fe53d49886d2741223b12950a33bdf2acf Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Tue, 26 Aug 2025 16:56:06 +0200 Subject: [PATCH 1/9] xen: rework xen_pv_domain() Rework xen_pv_domain() to no longer use the xen_domain_type variable, but the artificial X86_FEATURE_XENPV cpu feature. On non-x86 architectures xen_pv_domain() can be defined as "0". This has the advantage that a kernel not built with CONFIG_XEN_PV will be smaller due to dead code elimination. Set the X86_FEATURE_XENPV feature very early, as xen_pv_domain() is used rather early, too. Reviewed-by: Jason Andryuk Signed-off-by: Juergen Gross Message-ID: <20250826145608.10352-2-jgross@suse.com> --- arch/x86/xen/enlighten_pv.c | 2 +- include/xen/xen.h | 9 ++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 26bbaf4b7330..4806cc28d7ca 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -382,7 +382,6 @@ static bool __init xen_check_xsave(void) static void __init xen_init_capabilities(void) { - setup_force_cpu_cap(X86_FEATURE_XENPV); setup_clear_cpu_cap(X86_FEATURE_DCA); setup_clear_cpu_cap(X86_FEATURE_APERFMPERF); setup_clear_cpu_cap(X86_FEATURE_MTRR); @@ -1402,6 +1401,7 @@ asmlinkage __visible void __init xen_start_kernel(struct start_info *si) JMP32_INSN_SIZE); xen_domain_type = XEN_PV_DOMAIN; + setup_force_cpu_cap(X86_FEATURE_XENPV); xen_start_flags = xen_start_info->flags; /* Interrupts are guaranteed to be off initially. */ early_boot_irqs_disabled = true; diff --git a/include/xen/xen.h b/include/xen/xen.h index a1e5b3f18d69..61854e3f2837 100644 --- a/include/xen/xen.h +++ b/include/xen/xen.h @@ -22,8 +22,15 @@ extern bool xen_pvh; #define xen_pvh 0 #endif +#ifdef CONFIG_X86 +#include + +#define xen_pv_domain() (cpu_feature_enabled(X86_FEATURE_XENPV)) +#else +#define xen_pv_domain() 0 +#endif + #define xen_domain() (xen_domain_type != XEN_NATIVE) -#define xen_pv_domain() (xen_domain_type == XEN_PV_DOMAIN) #define xen_hvm_domain() (xen_domain_type == XEN_HVM_DOMAIN) #define xen_pvh_domain() (xen_pvh) From 0f4283123fe1e6016296048d0fdcfce615047a13 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Tue, 26 Aug 2025 16:56:07 +0200 Subject: [PATCH 2/9] xen: replace XENFEAT_auto_translated_physmap with xen_pv_domain() Instead of testing the XENFEAT_auto_translated_physmap feature, just use !xen_pv_domain() which is equivalent. This has the advantage that a kernel not built with CONFIG_XEN_PV will be smaller due to dead code elimination. Reviewed-by: Jason Andryuk Signed-off-by: Juergen Gross Message-ID: <20250826145608.10352-3-jgross@suse.com> --- arch/x86/include/asm/xen/page.h | 14 +++++++------- arch/x86/xen/mmu.c | 2 +- arch/x86/xen/p2m.c | 4 ++-- drivers/xen/balloon.c | 4 ++-- drivers/xen/gntdev.c | 2 +- drivers/xen/grant-table.c | 6 +++--- drivers/xen/privcmd.c | 14 ++++++-------- drivers/xen/unpopulated-alloc.c | 4 ++-- drivers/xen/xenbus/xenbus_client.c | 2 +- include/xen/grant_table.h | 4 ++-- include/xen/mem-reservation.h | 4 ++-- include/xen/xen-ops.h | 7 ++++--- 12 files changed, 33 insertions(+), 34 deletions(-) diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index 85e63d58c074..59f642a94b9d 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -12,9 +12,9 @@ #include #include +#include #include #include -#include /* Xen machine address */ typedef struct xmaddr { @@ -162,7 +162,7 @@ static inline unsigned long pfn_to_mfn(unsigned long pfn) * pfn_to_mfn. This will have to be removed when we figured * out which call. */ - if (xen_feature(XENFEAT_auto_translated_physmap)) + if (!xen_pv_domain()) return pfn; mfn = __pfn_to_mfn(pfn); @@ -175,7 +175,7 @@ static inline unsigned long pfn_to_mfn(unsigned long pfn) static inline int phys_to_machine_mapping_valid(unsigned long pfn) { - if (xen_feature(XENFEAT_auto_translated_physmap)) + if (!xen_pv_domain()) return 1; return __pfn_to_mfn(pfn) != INVALID_P2M_ENTRY; @@ -210,7 +210,7 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn) * gfn_to_pfn. This will have to be removed when we figure * out which call. */ - if (xen_feature(XENFEAT_auto_translated_physmap)) + if (!xen_pv_domain()) return mfn; pfn = mfn_to_pfn_no_overrides(mfn); @@ -242,7 +242,7 @@ static inline xpaddr_t machine_to_phys(xmaddr_t machine) /* Pseudo-physical <-> Guest conversion */ static inline unsigned long pfn_to_gfn(unsigned long pfn) { - if (xen_feature(XENFEAT_auto_translated_physmap)) + if (!xen_pv_domain()) return pfn; else return pfn_to_mfn(pfn); @@ -250,7 +250,7 @@ static inline unsigned long pfn_to_gfn(unsigned long pfn) static inline unsigned long gfn_to_pfn(unsigned long gfn) { - if (xen_feature(XENFEAT_auto_translated_physmap)) + if (!xen_pv_domain()) return gfn; else return mfn_to_pfn(gfn); @@ -284,7 +284,7 @@ static inline unsigned long bfn_to_local_pfn(unsigned long mfn) { unsigned long pfn; - if (xen_feature(XENFEAT_auto_translated_physmap)) + if (!xen_pv_domain()) return mfn; pfn = mfn_to_pfn(mfn); diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index c4c479373249..3be45bf4bc79 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -41,7 +41,7 @@ EXPORT_SYMBOL_GPL(arbitrary_virt_to_machine); int xen_unmap_domain_gfn_range(struct vm_area_struct *vma, int nr, struct page **pages) { - if (xen_feature(XENFEAT_auto_translated_physmap)) + if (!xen_pv_domain()) return xen_xlate_unmap_gfn_range(vma, nr, pages); if (!pages) diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 56914e21e303..2dd12b61a230 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -686,7 +686,7 @@ int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, int i, ret = 0; pte_t *pte; - if (xen_feature(XENFEAT_auto_translated_physmap)) + if (!xen_pv_domain()) return 0; if (kmap_ops) { @@ -769,7 +769,7 @@ int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, { int i, ret = 0; - if (xen_feature(XENFEAT_auto_translated_physmap)) + if (!xen_pv_domain()) return 0; for (i = 0; i < count; i++) { diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 2de37dcd7556..49c3f9926394 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -302,7 +302,7 @@ static enum bp_state reserve_additional_memory(void) * are not restored since this region is now known not to * conflict with any devices. */ - if (!xen_feature(XENFEAT_auto_translated_physmap)) { + if (xen_pv_domain()) { unsigned long pfn, i; pfn = PFN_DOWN(resource->start); @@ -626,7 +626,7 @@ int xen_alloc_ballooned_pages(unsigned int nr_pages, struct page **pages) */ BUILD_BUG_ON(XEN_PAGE_SIZE != PAGE_SIZE); - if (!xen_feature(XENFEAT_auto_translated_physmap)) { + if (xen_pv_domain()) { ret = xen_alloc_p2m_entry(page_to_pfn(page)); if (ret < 0) goto out_undo; diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 1f2160765618..74491967f2ae 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -1183,7 +1183,7 @@ static int __init gntdev_init(void) if (!xen_domain()) return -ENODEV; - use_ptemod = !xen_feature(XENFEAT_auto_translated_physmap); + use_ptemod = xen_pv_domain(); err = misc_register(&gntdev_miscdev); if (err != 0) { diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 04a6b470b15d..478d2ad725ac 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -1449,7 +1449,7 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx) unsigned int nr_gframes = end_idx + 1; int rc; - if (xen_feature(XENFEAT_auto_translated_physmap)) { + if (!xen_pv_domain()) { struct xen_add_to_physmap xatp; unsigned int i = end_idx; rc = 0; @@ -1570,7 +1570,7 @@ static int gnttab_setup(void) if (max_nr_gframes < nr_grant_frames) return -ENOSYS; - if (xen_feature(XENFEAT_auto_translated_physmap) && gnttab_shared.addr == NULL) { + if (!xen_pv_domain() && gnttab_shared.addr == NULL) { gnttab_shared.addr = xen_auto_xlat_grant_frames.vaddr; if (gnttab_shared.addr == NULL) { pr_warn("gnttab share frames is not mapped!\n"); @@ -1588,7 +1588,7 @@ int gnttab_resume(void) int gnttab_suspend(void) { - if (!xen_feature(XENFEAT_auto_translated_physmap)) + if (xen_pv_domain()) gnttab_interface->unmap_frames(); return 0; } diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index 13a10f3294a8..f52a457b302d 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -271,7 +271,7 @@ static long privcmd_ioctl_mmap(struct file *file, void __user *udata) struct mmap_gfn_state state; /* We only support privcmd_ioctl_mmap_batch for non-auto-translated. */ - if (xen_feature(XENFEAT_auto_translated_physmap)) + if (!xen_pv_domain()) return -ENOSYS; if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd))) @@ -353,7 +353,7 @@ static int mmap_batch_fn(void *data, int nr, void *state) struct page **cur_pages = NULL; int ret; - if (xen_feature(XENFEAT_auto_translated_physmap)) + if (!xen_pv_domain()) cur_pages = &pages[st->index]; BUG_ON(nr < 0); @@ -535,7 +535,7 @@ static long privcmd_ioctl_mmap_batch( ret = -EINVAL; goto out_unlock; } - if (xen_feature(XENFEAT_auto_translated_physmap)) { + if (!xen_pv_domain()) { ret = alloc_empty_pages(vma, nr_pages); if (ret < 0) goto out_unlock; @@ -779,8 +779,7 @@ static long privcmd_ioctl_mmap_resource(struct file *file, goto out; } - if (IS_ENABLED(CONFIG_XEN_AUTO_XLATE) && - xen_feature(XENFEAT_auto_translated_physmap)) { + if (IS_ENABLED(CONFIG_XEN_AUTO_XLATE) && !xen_pv_domain()) { unsigned int nr = DIV_ROUND_UP(kdata.num, XEN_PFN_PER_PAGE); struct page **pages; unsigned int i; @@ -811,8 +810,7 @@ static long privcmd_ioctl_mmap_resource(struct file *file, if (rc) goto out; - if (IS_ENABLED(CONFIG_XEN_AUTO_XLATE) && - xen_feature(XENFEAT_auto_translated_physmap)) { + if (IS_ENABLED(CONFIG_XEN_AUTO_XLATE) && !xen_pv_domain()) { rc = xen_remap_vma_range(vma, kdata.addr, kdata.num << PAGE_SHIFT); } else { unsigned int domid = @@ -1591,7 +1589,7 @@ static void privcmd_close(struct vm_area_struct *vma) int numgfns = (vma->vm_end - vma->vm_start) >> XEN_PAGE_SHIFT; int rc; - if (!xen_feature(XENFEAT_auto_translated_physmap) || !numpgs || !pages) + if (xen_pv_domain() || !numpgs || !pages) return; rc = xen_unmap_domain_gfn_range(vma, numgfns, pages); diff --git a/drivers/xen/unpopulated-alloc.c b/drivers/xen/unpopulated-alloc.c index a39f2d36dd9c..d6fc2aefe264 100644 --- a/drivers/xen/unpopulated-alloc.c +++ b/drivers/xen/unpopulated-alloc.c @@ -105,7 +105,7 @@ static int fill_list(unsigned int nr_pages) * are not restored since this region is now known not to * conflict with any devices. */ - if (!xen_feature(XENFEAT_auto_translated_physmap)) { + if (xen_pv_domain()) { xen_pfn_t pfn = PFN_DOWN(res->start); for (i = 0; i < alloc_pages; i++) { @@ -184,7 +184,7 @@ int xen_alloc_unpopulated_pages(unsigned int nr_pages, struct page **pages) pages[i] = pg; #ifdef CONFIG_XEN_HAVE_PVMMU - if (!xen_feature(XENFEAT_auto_translated_physmap)) { + if (xen_pv_domain()) { ret = xen_alloc_p2m_entry(page_to_pfn(pg)); if (ret < 0) { unsigned int j; diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c index e73ec225d4a6..2dc874fb5506 100644 --- a/drivers/xen/xenbus/xenbus_client.c +++ b/drivers/xen/xenbus/xenbus_client.c @@ -955,7 +955,7 @@ static const struct xenbus_ring_ops ring_ops_hvm = { void __init xenbus_ring_ops_init(void) { #ifdef CONFIG_XEN_PV - if (!xen_feature(XENFEAT_auto_translated_physmap)) + if (xen_pv_domain()) ring_ops = &ring_ops_pv; else #endif diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h index e279be353e3f..69ac6d80a006 100644 --- a/include/xen/grant_table.h +++ b/include/xen/grant_table.h @@ -164,7 +164,7 @@ gnttab_set_map_op(struct gnttab_map_grant_ref *map, phys_addr_t addr, { if (flags & GNTMAP_contains_pte) map->host_addr = addr; - else if (xen_feature(XENFEAT_auto_translated_physmap)) + else if (!xen_pv_domain()) map->host_addr = __pa(addr); else map->host_addr = addr; @@ -181,7 +181,7 @@ gnttab_set_unmap_op(struct gnttab_unmap_grant_ref *unmap, phys_addr_t addr, { if (flags & GNTMAP_contains_pte) unmap->host_addr = addr; - else if (xen_feature(XENFEAT_auto_translated_physmap)) + else if (!xen_pv_domain()) unmap->host_addr = __pa(addr); else unmap->host_addr = addr; diff --git a/include/xen/mem-reservation.h b/include/xen/mem-reservation.h index a2ab516fcd2c..3cbe3df0dfd4 100644 --- a/include/xen/mem-reservation.h +++ b/include/xen/mem-reservation.h @@ -39,7 +39,7 @@ static inline void xenmem_reservation_va_mapping_update(unsigned long count, xen_pfn_t *frames) { #ifdef CONFIG_XEN_HAVE_PVMMU - if (!xen_feature(XENFEAT_auto_translated_physmap)) + if (xen_pv_domain()) __xenmem_reservation_va_mapping_update(count, pages, frames); #endif } @@ -48,7 +48,7 @@ static inline void xenmem_reservation_va_mapping_reset(unsigned long count, struct page **pages) { #ifdef CONFIG_XEN_HAVE_PVMMU - if (!xen_feature(XENFEAT_auto_translated_physmap)) + if (xen_pv_domain()) __xenmem_reservation_va_mapping_reset(count, pages); #endif } diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index 9e2a769b0d96..496e6013c689 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -116,7 +117,7 @@ static inline int xen_remap_domain_gfn_array(struct vm_area_struct *vma, unsigned int domid, struct page **pages) { - if (xen_feature(XENFEAT_auto_translated_physmap)) + if (!xen_pv_domain()) return xen_xlate_remap_gfn_array(vma, addr, gfn, nr, err_ptr, prot, domid, pages); @@ -150,7 +151,7 @@ static inline int xen_remap_domain_mfn_array(struct vm_area_struct *vma, int nr, int *err_ptr, pgprot_t prot, unsigned int domid) { - if (xen_feature(XENFEAT_auto_translated_physmap)) + if (!xen_pv_domain()) return -EOPNOTSUPP; return xen_remap_pfn(vma, addr, mfn, nr, err_ptr, prot, domid, @@ -175,7 +176,7 @@ static inline int xen_remap_domain_gfn_range(struct vm_area_struct *vma, pgprot_t prot, unsigned int domid, struct page **pages) { - if (xen_feature(XENFEAT_auto_translated_physmap)) + if (!xen_pv_domain()) return -EOPNOTSUPP; return xen_remap_pfn(vma, addr, &gfn, nr, NULL, prot, domid, false); From 2ea7a5bcc4cfca817b3502b38f97885767730ed8 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Tue, 26 Aug 2025 16:56:08 +0200 Subject: [PATCH 3/9] drivers/xen/gntdev: use xen_pv_domain() instead of cached value Eliminate the use_ptemod variable by replacing its use cases with xen_pv_domain(). Instead of passing the xen_pv_domain() return value to gntdev_ioctl_dmabuf_exp_from_refs(), use xen_pv_domain() in that function. Reviewed-by: Jason Andryuk Signed-off-by: Juergen Gross Message-ID: <20250826145608.10352-4-jgross@suse.com> --- drivers/xen/gntdev-dmabuf.c | 7 +++---- drivers/xen/gntdev-dmabuf.h | 2 +- drivers/xen/gntdev.c | 33 ++++++++++++++------------------- 3 files changed, 18 insertions(+), 24 deletions(-) diff --git a/drivers/xen/gntdev-dmabuf.c b/drivers/xen/gntdev-dmabuf.c index 82855105ab85..550980dd3b0b 100644 --- a/drivers/xen/gntdev-dmabuf.c +++ b/drivers/xen/gntdev-dmabuf.c @@ -720,16 +720,15 @@ static void dmabuf_imp_release_all(struct gntdev_dmabuf_priv *priv) /* DMA buffer IOCTL support. */ -long gntdev_ioctl_dmabuf_exp_from_refs(struct gntdev_priv *priv, int use_ptemod, +long gntdev_ioctl_dmabuf_exp_from_refs(struct gntdev_priv *priv, struct ioctl_gntdev_dmabuf_exp_from_refs __user *u) { struct ioctl_gntdev_dmabuf_exp_from_refs op; u32 *refs; long ret; - if (use_ptemod) { - pr_debug("Cannot provide dma-buf: use_ptemode %d\n", - use_ptemod); + if (xen_pv_domain()) { + pr_debug("Cannot provide dma-buf in a PV domain\n"); return -EINVAL; } diff --git a/drivers/xen/gntdev-dmabuf.h b/drivers/xen/gntdev-dmabuf.h index 3d9b9cf9d5a1..9adf96ac74d3 100644 --- a/drivers/xen/gntdev-dmabuf.h +++ b/drivers/xen/gntdev-dmabuf.h @@ -18,7 +18,7 @@ struct gntdev_dmabuf_priv *gntdev_dmabuf_init(struct file *filp); void gntdev_dmabuf_fini(struct gntdev_dmabuf_priv *priv); -long gntdev_ioctl_dmabuf_exp_from_refs(struct gntdev_priv *priv, int use_ptemod, +long gntdev_ioctl_dmabuf_exp_from_refs(struct gntdev_priv *priv, struct ioctl_gntdev_dmabuf_exp_from_refs __user *u); long gntdev_ioctl_dmabuf_exp_wait_released(struct gntdev_priv *priv, diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 74491967f2ae..91ba5078c9d9 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -73,9 +73,6 @@ module_param(limit, uint, 0644); MODULE_PARM_DESC(limit, "Maximum number of grants that may be mapped by one mapping request"); -/* True in PV mode, false otherwise */ -static int use_ptemod; - static void unmap_grant_pages(struct gntdev_grant_map *map, int offset, int pages); @@ -163,7 +160,7 @@ struct gntdev_grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count, NULL == add->pages || NULL == add->being_removed) goto err; - if (use_ptemod) { + if (xen_pv_domain()) { add->kmap_ops = kvmalloc_array(count, sizeof(add->kmap_ops[0]), GFP_KERNEL); add->kunmap_ops = kvmalloc_array(count, sizeof(add->kunmap_ops[0]), @@ -211,7 +208,7 @@ struct gntdev_grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count, add->grants[i].ref = INVALID_GRANT_REF; add->map_ops[i].handle = INVALID_GRANT_HANDLE; add->unmap_ops[i].handle = INVALID_GRANT_HANDLE; - if (use_ptemod) { + if (xen_pv_domain()) { add->kmap_ops[i].handle = INVALID_GRANT_HANDLE; add->kunmap_ops[i].handle = INVALID_GRANT_HANDLE; } @@ -268,7 +265,7 @@ void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map) if (!refcount_dec_and_test(&map->users)) return; - if (map->pages && !use_ptemod) { + if (map->pages && !xen_pv_domain()) { /* * Increment the reference count. This ensures that the * subsequent call to unmap_grant_pages() will not wind up @@ -298,7 +295,7 @@ void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map) */ } - if (use_ptemod && map->notifier_init) + if (xen_pv_domain() && map->notifier_init) mmu_interval_notifier_remove(&map->notifier); if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) { @@ -334,7 +331,7 @@ int gntdev_map_grant_pages(struct gntdev_grant_map *map) size_t alloced = 0; int i, err = 0; - if (!use_ptemod) { + if (!xen_pv_domain()) { /* Note: it could already be mapped */ if (map->map_ops[0].handle != INVALID_GRANT_HANDLE) return 0; @@ -389,7 +386,7 @@ int gntdev_map_grant_pages(struct gntdev_grant_map *map) if (map->flags & GNTMAP_device_map) map->unmap_ops[i].dev_bus_addr = map->map_ops[i].dev_bus_addr; - if (use_ptemod) { + if (xen_pv_domain()) { if (map->kmap_ops[i].status == GNTST_okay) { alloced++; map->kunmap_ops[i].handle = map->kmap_ops[i].handle; @@ -421,7 +418,7 @@ static void __unmap_grant_pages_done(int result, map->unmap_ops[offset+i].handle, map->unmap_ops[offset+i].status); map->unmap_ops[offset+i].handle = INVALID_GRANT_HANDLE; - if (use_ptemod) { + if (xen_pv_domain()) { if (map->kunmap_ops[offset + i].status == GNTST_okay && map->kunmap_ops[offset + i].handle != INVALID_GRANT_HANDLE) successful_unmaps++; @@ -464,7 +461,7 @@ static void __unmap_grant_pages(struct gntdev_grant_map *map, int offset, } map->unmap_data.unmap_ops = map->unmap_ops + offset; - map->unmap_data.kunmap_ops = use_ptemod ? map->kunmap_ops + offset : NULL; + map->unmap_data.kunmap_ops = xen_pv_domain() ? map->kunmap_ops + offset : NULL; map->unmap_data.pages = map->pages + offset; map->unmap_data.count = pages; map->unmap_data.done = __unmap_grant_pages_done; @@ -1039,7 +1036,7 @@ static long gntdev_ioctl(struct file *flip, #ifdef CONFIG_XEN_GNTDEV_DMABUF case IOCTL_GNTDEV_DMABUF_EXP_FROM_REFS: - return gntdev_ioctl_dmabuf_exp_from_refs(priv, use_ptemod, ptr); + return gntdev_ioctl_dmabuf_exp_from_refs(priv, ptr); case IOCTL_GNTDEV_DMABUF_EXP_WAIT_RELEASED: return gntdev_ioctl_dmabuf_exp_wait_released(priv, ptr); @@ -1086,7 +1083,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) vm_flags_set(vma, VM_DONTEXPAND | VM_DONTDUMP | VM_MIXEDMAP); - if (use_ptemod) + if (xen_pv_domain()) vm_flags_set(vma, VM_DONTCOPY); vma->vm_private_data = map; @@ -1102,7 +1099,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) map->pages_vm_start = vma->vm_start; - if (use_ptemod) { + if (xen_pv_domain()) { err = mmu_interval_notifier_insert_locked( &map->notifier, vma->vm_mm, vma->vm_start, vma->vm_end - vma->vm_start, &gntdev_mmu_ops); @@ -1113,7 +1110,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) } mutex_unlock(&priv->lock); - if (use_ptemod) { + if (xen_pv_domain()) { /* * gntdev takes the address of the PTE in find_grant_ptes() and * passes it to the hypervisor in gntdev_map_grant_pages(). The @@ -1139,7 +1136,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) if (err) goto out_put_map; - if (!use_ptemod) { + if (!xen_pv_domain()) { err = vm_map_pages_zero(vma, map->pages, map->count); if (err) goto out_put_map; @@ -1154,7 +1151,7 @@ unlock_out: out_unlock_put: mutex_unlock(&priv->lock); out_put_map: - if (use_ptemod) + if (xen_pv_domain()) unmap_grant_pages(map, 0, map->count); gntdev_put_map(priv, map); return err; @@ -1183,8 +1180,6 @@ static int __init gntdev_init(void) if (!xen_domain()) return -ENODEV; - use_ptemod = xen_pv_domain(); - err = misc_register(&gntdev_miscdev); if (err != 0) { pr_err("Could not register gntdev device\n"); From 29c2f18d4f3fa444e3b8972b45d548cf521f8f15 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Fri, 29 Aug 2025 09:04:02 +0200 Subject: [PATCH 4/9] x86/xen: select HIBERNATE_CALLBACKS more directly The config XEN_SAVE_RESTORE's only purpose is to select HIBERNATE_CALLBACKS, when config XEN is set. The XEN config definition can simply select HIBERNATE_CALLBACKS, though, and the definition of XEN_SAVE_RESTORE can be dropped. So, remove this indirection through XEN_SAVE_RESTORE and select HIBERNATE_CALLBACKS directly. Also, drop the XEN_SAVE_RESTORE from the x86 xen config fragment. No functional change intended with this clean-up. Signed-off-by: Lukas Bulwahn Reviewed-by: Juergen Gross Signed-off-by: Juergen Gross Message-ID: <20250829070402.159390-1-lukas.bulwahn@redhat.com> --- arch/x86/configs/xen.config | 1 - arch/x86/xen/Kconfig | 7 +------ 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/arch/x86/configs/xen.config b/arch/x86/configs/xen.config index d5d091e03bd3..98b6952ba9d2 100644 --- a/arch/x86/configs/xen.config +++ b/arch/x86/configs/xen.config @@ -12,7 +12,6 @@ CONFIG_CPU_FREQ=y # x86 xen specific config options CONFIG_XEN_PVH=y -CONFIG_XEN_SAVE_RESTORE=y # CONFIG_XEN_DEBUG_FS is not set CONFIG_XEN_MCE_LOG=y CONFIG_XEN_ACPI_PROCESSOR=m diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 98d8a50d2aed..aa4040fd9215 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -8,6 +8,7 @@ config XEN depends on PARAVIRT select PARAVIRT_CLOCK select X86_HV_CALLBACK_VECTOR + select HIBERNATE_CALLBACKS depends on X86_64 || (X86_32 && X86_PAE) depends on X86_64 || (X86_GENERIC || MPENTIUM4 || MATOM) depends on X86_LOCAL_APIC && X86_TSC @@ -64,12 +65,6 @@ config XEN_PVHVM_GUEST help Support running as a Xen PVHVM guest. -config XEN_SAVE_RESTORE - bool - depends on XEN - select HIBERNATE_CALLBACKS - default y - config XEN_DEBUG_FS bool "Enable Xen debug and tuning parameters in debugfs" depends on XEN && DEBUG_FS From 08df2d7dd4ab2db8a172d824cda7872d5eca460a Mon Sep 17 00:00:00 2001 From: Jason Andryuk Date: Wed, 27 Aug 2025 20:36:01 -0400 Subject: [PATCH 5/9] xen/events: Cleanup find_virq() return codes rc is overwritten by the evtchn_status hypercall in each iteration, so the return value will be whatever the last iteration is. This could incorrectly return success even if the event channel was not found. Change to an explicit -ENOENT for an un-found virq and return 0 on a successful match. Fixes: 62cc5fc7b2e0 ("xen/pv-on-hvm kexec: rebind virqs to existing eventchannel ports") Cc: stable@vger.kernel.org Signed-off-by: Jason Andryuk Reviewed-by: Jan Beulich Reviewed-by: Juergen Gross Signed-off-by: Juergen Gross Message-ID: <20250828003604.8949-2-jason.andryuk@amd.com> --- drivers/xen/events/events_base.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 41309d38f78c..374231d84e4f 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -1318,10 +1318,11 @@ static int find_virq(unsigned int virq, unsigned int cpu, evtchn_port_t *evtchn) { struct evtchn_status status; evtchn_port_t port; - int rc = -ENOENT; memset(&status, 0, sizeof(status)); for (port = 0; port < xen_evtchn_max_channels(); port++) { + int rc; + status.dom = DOMID_SELF; status.port = port; rc = HYPERVISOR_event_channel_op(EVTCHNOP_status, &status); @@ -1331,10 +1332,10 @@ static int find_virq(unsigned int virq, unsigned int cpu, evtchn_port_t *evtchn) continue; if (status.u.virq == virq && status.vcpu == xen_vcpu_nr(cpu)) { *evtchn = port; - break; + return 0; } } - return rc; + return -ENOENT; } /** From 07ce121d93a5e5fb2440a24da3dbf408fcee978e Mon Sep 17 00:00:00 2001 From: Jason Andryuk Date: Wed, 27 Aug 2025 20:36:02 -0400 Subject: [PATCH 6/9] xen/events: Return -EEXIST for bound VIRQs Change find_virq() to return -EEXIST when a VIRQ is bound to a different CPU than the one passed in. With that, remove the BUG_ON() from bind_virq_to_irq() to propogate the error upwards. Some VIRQs are per-cpu, but others are per-domain or global. Those must be bound to CPU0 and can then migrate elsewhere. The lookup for per-domain and global will probably fail when migrated off CPU 0, especially when the current CPU is tracked. This now returns -EEXIST instead of BUG_ON(). A second call to bind a per-domain or global VIRQ is not expected, but make it non-fatal to avoid trying to look up the irq, since we don't know which per_cpu(virq_to_irq) it will be in. Cc: stable@vger.kernel.org Signed-off-by: Jason Andryuk Reviewed-by: Juergen Gross Signed-off-by: Juergen Gross Message-ID: <20250828003604.8949-3-jason.andryuk@amd.com> --- drivers/xen/events/events_base.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 374231d84e4f..b060b5a95f45 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -1314,10 +1314,12 @@ int bind_interdomain_evtchn_to_irq_lateeoi(struct xenbus_device *dev, } EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irq_lateeoi); -static int find_virq(unsigned int virq, unsigned int cpu, evtchn_port_t *evtchn) +static int find_virq(unsigned int virq, unsigned int cpu, evtchn_port_t *evtchn, + bool percpu) { struct evtchn_status status; evtchn_port_t port; + bool exists = false; memset(&status, 0, sizeof(status)); for (port = 0; port < xen_evtchn_max_channels(); port++) { @@ -1330,12 +1332,16 @@ static int find_virq(unsigned int virq, unsigned int cpu, evtchn_port_t *evtchn) continue; if (status.status != EVTCHNSTAT_virq) continue; - if (status.u.virq == virq && status.vcpu == xen_vcpu_nr(cpu)) { + if (status.u.virq != virq) + continue; + if (status.vcpu == xen_vcpu_nr(cpu)) { *evtchn = port; return 0; + } else if (!percpu) { + exists = true; } } - return -ENOENT; + return exists ? -EEXIST : -ENOENT; } /** @@ -1382,8 +1388,11 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu) evtchn = bind_virq.port; else { if (ret == -EEXIST) - ret = find_virq(virq, cpu, &evtchn); - BUG_ON(ret < 0); + ret = find_virq(virq, cpu, &evtchn, percpu); + if (ret) { + __unbind_from_irq(info, info->irq); + goto out; + } } ret = xen_irq_info_virq_setup(info, cpu, evtchn, virq); From 3fcc8e146935415d69ffabb5df40ecf50e106131 Mon Sep 17 00:00:00 2001 From: Jason Andryuk Date: Wed, 27 Aug 2025 20:36:03 -0400 Subject: [PATCH 7/9] xen/events: Update virq_to_irq on migration VIRQs come in 3 flavors, per-VPU, per-domain, and global, and the VIRQs are tracked in per-cpu virq_to_irq arrays. Per-domain and global VIRQs must be bound on CPU 0, and bind_virq_to_irq() sets the per_cpu virq_to_irq at registration time Later, the interrupt can migrate, and info->cpu is updated. When calling __unbind_from_irq(), the per-cpu virq_to_irq is cleared for a different cpu. If bind_virq_to_irq() is called again with CPU 0, the stale irq is returned. There won't be any irq_info for the irq, so things break. Make xen_rebind_evtchn_to_cpu() update the per_cpu virq_to_irq mappings to keep them update to date with the current cpu. This ensures the correct virq_to_irq is cleared in __unbind_from_irq(). Fixes: e46cdb66c8fc ("xen: event channels") Cc: stable@vger.kernel.org Signed-off-by: Jason Andryuk Reviewed-by: Juergen Gross Signed-off-by: Juergen Gross Message-ID: <20250828003604.8949-4-jason.andryuk@amd.com> --- drivers/xen/events/events_base.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index b060b5a95f45..9478fae014e5 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -1797,9 +1797,20 @@ static int xen_rebind_evtchn_to_cpu(struct irq_info *info, unsigned int tcpu) * virq or IPI channel, which don't actually need to be rebound. Ignore * it, but don't do the xenlinux-level rebind in that case. */ - if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0) + if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0) { + int old_cpu = info->cpu; + bind_evtchn_to_cpu(info, tcpu, false); + if (info->type == IRQT_VIRQ) { + int virq = info->u.virq; + int irq = per_cpu(virq_to_irq, old_cpu)[virq]; + + per_cpu(virq_to_irq, old_cpu)[virq] = -1; + per_cpu(virq_to_irq, tcpu)[virq] = irq; + } + } + do_unmask(info, EVT_MASK_REASON_TEMPORARY); return 0; From f770c3d858687252f1270265ba152d5c622e793f Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Thu, 4 Sep 2025 15:11:09 +0200 Subject: [PATCH 8/9] xen/manage: Fix suspend error path The device power management API has the following asymmetry: * dpm_suspend_start() does not clean up on failure (it requires a call to dpm_resume_end()) * dpm_suspend_end() does clean up on failure (it does not require a call to dpm_resume_start()) The asymmetry was introduced by commit d8f3de0d2412 ("Suspend-related patches for 2.6.27") in June 2008: It removed a call to device_resume() from device_suspend() (which was later renamed to dpm_suspend_start()). When Xen began using the device power management API in May 2008 with commit 0e91398f2a5d ("xen: implement save/restore"), the asymmetry did not yet exist. But since it was introduced, a call to dpm_resume_end() is missing in the error path of dpm_suspend_start(). Fix it. Fixes: d8f3de0d2412 ("Suspend-related patches for 2.6.27") Signed-off-by: Lukas Wunner Cc: stable@vger.kernel.org # v2.6.27 Reviewed-by: "Rafael J. Wysocki (Intel)" Signed-off-by: Juergen Gross Message-ID: <22453676d1ddcebbe81641bb68ddf587fee7e21e.1756990799.git.lukas@wunner.de> --- drivers/xen/manage.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c index 841afa4933c7..1f5a7a42fc32 100644 --- a/drivers/xen/manage.c +++ b/drivers/xen/manage.c @@ -110,7 +110,7 @@ static void do_suspend(void) err = dpm_suspend_start(PMSG_FREEZE); if (err) { pr_err("%s: dpm_suspend_start %d\n", __func__, err); - goto out_thaw; + goto out_resume_end; } printk(KERN_DEBUG "suspending xenstore...\n"); @@ -150,6 +150,7 @@ out_resume: else xs_suspend_cancel(); +out_resume_end: dpm_resume_end(si.cancelled ? PMSG_THAW : PMSG_RESTORE); out_thaw: From 9d52b0b41be5b932a0a929c10038f1bb04af4ca5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Sun, 21 Sep 2025 18:28:47 +0200 Subject: [PATCH 9/9] xen: take system_transition_mutex on suspend MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Xen's do_suspend() calls dpm_suspend_start() without taking required system_transition_mutex. Since 12ffc3b1513eb moved the pm_restrict_gfp_mask() call, not taking that mutex results in a WARN. Take the mutex in do_suspend(), and use mutex_trylock() to follow how enter_state() does this. Suggested-by: Jürgen Groß Fixes: 12ffc3b1513eb "PM: Restrict swap use to later in the suspend sequence" Link: https://lore.kernel.org/xen-devel/aKiBJeqsYx_4Top5@mail-itl/ Signed-off-by: Marek Marczykowski-Górecki Cc: stable@vger.kernel.org # v6.16+ Reviewed-by: Juergen Gross Signed-off-by: Juergen Gross Message-ID: <20250921162853.223116-1-marmarek@invisiblethingslab.com> --- drivers/xen/manage.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c index 1f5a7a42fc32..e20c40a62e64 100644 --- a/drivers/xen/manage.c +++ b/drivers/xen/manage.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -95,10 +96,16 @@ static void do_suspend(void) shutting_down = SHUTDOWN_SUSPEND; + if (!mutex_trylock(&system_transition_mutex)) + { + pr_err("%s: failed to take system_transition_mutex\n", __func__); + goto out; + } + err = freeze_processes(); if (err) { pr_err("%s: freeze processes failed %d\n", __func__, err); - goto out; + goto out_unlock; } err = freeze_kernel_threads(); @@ -155,6 +162,8 @@ out_resume_end: out_thaw: thaw_processes(); +out_unlock: + mutex_unlock(&system_transition_mutex); out: shutting_down = SHUTDOWN_INVALID; }