Merge tag 'kvm-riscv-6.19-1' of https://github.com/kvm-riscv/linux into HEAD

KVM/riscv changes for 6.19

- SBI MPXY support for KVM guest
- New KVM_EXIT_FAIL_ENTRY_NO_VSFILE for the case when in-kernel
  AIA virtualization fails to allocate IMSIC VS-file
- Support enabling dirty log gradually in small chunks
- Fix guest page fault within HLV* instructions
- Flush VS-stage TLB after VCPU migration for Andes cores
This commit is contained in:
Paolo Bonzini
2025-12-02 18:35:25 +01:00
21 changed files with 138 additions and 94 deletions

View File

@@ -8028,7 +8028,7 @@ will be initialized to 1 when created. This also improves performance because
dirty logging can be enabled gradually in small chunks on the first call
to KVM_CLEAR_DIRTY_LOG. KVM_DIRTY_LOG_INITIALLY_SET depends on
KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE (it is also only available on
x86 and arm64 for now).
x86, arm64 and riscv for now).
KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 was previously available under the name
KVM_CAP_MANUAL_DIRTY_LOG_PROTECT, but the implementation had bugs that make

View File

@@ -59,6 +59,9 @@
BIT(IRQ_VS_TIMER) | \
BIT(IRQ_VS_EXT))
#define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
KVM_DIRTY_LOG_INITIALLY_SET)
struct kvm_vm_stat {
struct kvm_vm_stat_generic generic;
};
@@ -327,4 +330,7 @@ bool kvm_riscv_vcpu_stopped(struct kvm_vcpu *vcpu);
void kvm_riscv_vcpu_record_steal_time(struct kvm_vcpu *vcpu);
/* Flags representing implementation specific details */
DECLARE_STATIC_KEY_FALSE(kvm_riscv_vsstage_tlb_no_gpa);
#endif /* __RISCV_KVM_HOST_H__ */

View File

@@ -49,6 +49,7 @@ void kvm_riscv_local_hfence_vvma_gva(unsigned long vmid,
unsigned long gva, unsigned long gvsz,
unsigned long order);
void kvm_riscv_local_hfence_vvma_all(unsigned long vmid);
void kvm_riscv_local_tlb_sanitize(struct kvm_vcpu *vcpu);
void kvm_riscv_tlb_flush_process(struct kvm_vcpu *vcpu);

View File

@@ -69,7 +69,9 @@ struct kvm_vcpu_sbi_extension {
unsigned long reg_size, const void *reg_val);
};
void kvm_riscv_vcpu_sbi_forward(struct kvm_vcpu *vcpu, struct kvm_run *run);
int kvm_riscv_vcpu_sbi_forward_handler(struct kvm_vcpu *vcpu,
struct kvm_run *run,
struct kvm_vcpu_sbi_return *retdata);
void kvm_riscv_vcpu_sbi_system_reset(struct kvm_vcpu *vcpu,
struct kvm_run *run,
u32 type, u64 flags);
@@ -105,6 +107,7 @@ extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_dbcn;
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_susp;
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_sta;
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_fwft;
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_mpxy;
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_experimental;
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_vendor;

View File

@@ -22,6 +22,5 @@ unsigned long kvm_riscv_gstage_vmid_bits(void);
int kvm_riscv_gstage_vmid_init(struct kvm *kvm);
bool kvm_riscv_gstage_vmid_ver_changed(struct kvm_vmid *vmid);
void kvm_riscv_gstage_vmid_update(struct kvm_vcpu *vcpu);
void kvm_riscv_gstage_vmid_sanitize(struct kvm_vcpu *vcpu);
#endif

View File

@@ -23,6 +23,8 @@
#define KVM_INTERRUPT_SET -1U
#define KVM_INTERRUPT_UNSET -2U
#define KVM_EXIT_FAIL_ENTRY_NO_VSFILE (1ULL << 0)
/* for KVM_GET_REGS and KVM_SET_REGS */
struct kvm_regs {
};
@@ -211,6 +213,7 @@ enum KVM_RISCV_SBI_EXT_ID {
KVM_RISCV_SBI_EXT_STA,
KVM_RISCV_SBI_EXT_SUSP,
KVM_RISCV_SBI_EXT_FWFT,
KVM_RISCV_SBI_EXT_MPXY,
KVM_RISCV_SBI_EXT_MAX,
};

View File

@@ -27,6 +27,7 @@ kvm-y += vcpu_onereg.o
kvm-$(CONFIG_RISCV_PMU_SBI) += vcpu_pmu.o
kvm-y += vcpu_sbi.o
kvm-y += vcpu_sbi_base.o
kvm-y += vcpu_sbi_forward.o
kvm-y += vcpu_sbi_fwft.o
kvm-y += vcpu_sbi_hsm.o
kvm-$(CONFIG_RISCV_PMU_SBI) += vcpu_sbi_pmu.o

View File

@@ -814,7 +814,7 @@ int kvm_riscv_vcpu_aia_imsic_update(struct kvm_vcpu *vcpu)
/* For HW acceleration mode, we can't continue */
if (kvm->arch.aia.mode == KVM_DEV_RISCV_AIA_MODE_HWACCEL) {
run->fail_entry.hardware_entry_failure_reason =
CSR_HSTATUS;
KVM_EXIT_FAIL_ENTRY_NO_VSFILE;
run->fail_entry.cpu = vcpu->cpu;
run->exit_reason = KVM_EXIT_FAIL_ENTRY;
return 0;

View File

@@ -15,6 +15,18 @@
#include <asm/kvm_nacl.h>
#include <asm/sbi.h>
DEFINE_STATIC_KEY_FALSE(kvm_riscv_vsstage_tlb_no_gpa);
static void kvm_riscv_setup_vendor_features(void)
{
/* Andes AX66: split two-stage TLBs */
if (riscv_cached_mvendorid(0) == ANDES_VENDOR_ID &&
(riscv_cached_marchid(0) & 0xFFFF) == 0x8A66) {
static_branch_enable(&kvm_riscv_vsstage_tlb_no_gpa);
kvm_info("VS-stage TLB does not cache guest physical address and VMID\n");
}
}
long kvm_arch_dev_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -160,6 +172,8 @@ static int __init riscv_kvm_init(void)
kvm_info("AIA available with %d guest external interrupts\n",
kvm_riscv_aia_nr_hgei);
kvm_riscv_setup_vendor_features();
kvm_register_perf_callbacks(NULL);
rc = kvm_init(sizeof(struct kvm_vcpu), 0, THIS_MODULE);

View File

@@ -161,8 +161,11 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
* allocated dirty_bitmap[], dirty pages will be tracked while
* the memory slot is write protected.
*/
if (change != KVM_MR_DELETE && new->flags & KVM_MEM_LOG_DIRTY_PAGES)
if (change != KVM_MR_DELETE && new->flags & KVM_MEM_LOG_DIRTY_PAGES) {
if (kvm_dirty_log_manual_protect_and_init_set(kvm))
return;
mmu_wp_memory_region(kvm, new->id);
}
}
int kvm_arch_prepare_memory_region(struct kvm *kvm,

View File

@@ -158,6 +158,36 @@ void kvm_riscv_local_hfence_vvma_all(unsigned long vmid)
csr_write(CSR_HGATP, hgatp);
}
void kvm_riscv_local_tlb_sanitize(struct kvm_vcpu *vcpu)
{
unsigned long vmid;
if (!kvm_riscv_gstage_vmid_bits() ||
vcpu->arch.last_exit_cpu == vcpu->cpu)
return;
/*
* On RISC-V platforms with hardware VMID support, we share same
* VMID for all VCPUs of a particular Guest/VM. This means we might
* have stale G-stage TLB entries on the current Host CPU due to
* some other VCPU of the same Guest which ran previously on the
* current Host CPU.
*
* To cleanup stale TLB entries, we simply flush all G-stage TLB
* entries by VMID whenever underlying Host CPU changes for a VCPU.
*/
vmid = READ_ONCE(vcpu->kvm->arch.vmid.vmid);
kvm_riscv_local_hfence_gvma_vmid_all(vmid);
/*
* Flush VS-stage TLB entries for implementation where VS-stage
* TLB does not cahce guest physical address and VMID.
*/
if (static_branch_unlikely(&kvm_riscv_vsstage_tlb_no_gpa))
kvm_riscv_local_hfence_vvma_all(vmid);
}
void kvm_riscv_fence_i_process(struct kvm_vcpu *vcpu)
{
kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_FENCE_I_RCVD);

View File

@@ -968,7 +968,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
* Note: This should be done after G-stage VMID has been
* updated using kvm_riscv_gstage_vmid_ver_changed()
*/
kvm_riscv_gstage_vmid_sanitize(vcpu);
kvm_riscv_local_tlb_sanitize(vcpu);
trace_kvm_entry(vcpu);

View File

@@ -298,6 +298,22 @@ static int system_opcode_insn(struct kvm_vcpu *vcpu, struct kvm_run *run,
return (rc <= 0) ? rc : 1;
}
static bool is_load_guest_page_fault(unsigned long scause)
{
/**
* If a g-stage page fault occurs, the direct approach
* is to let the g-stage page fault handler handle it
* naturally, however, calling the g-stage page fault
* handler here seems rather strange.
* Considering this is a corner case, we can directly
* return to the guest and re-execute the same PC, this
* will trigger a g-stage page fault again and then the
* regular g-stage page fault handler will populate
* g-stage page table.
*/
return (scause == EXC_LOAD_GUEST_PAGE_FAULT);
}
/**
* kvm_riscv_vcpu_virtual_insn -- Handle virtual instruction trap
*
@@ -323,6 +339,8 @@ int kvm_riscv_vcpu_virtual_insn(struct kvm_vcpu *vcpu, struct kvm_run *run,
ct->sepc,
&utrap);
if (utrap.scause) {
if (is_load_guest_page_fault(utrap.scause))
return 1;
utrap.sepc = ct->sepc;
kvm_riscv_vcpu_trap_redirect(vcpu, &utrap);
return 1;
@@ -378,6 +396,8 @@ int kvm_riscv_vcpu_mmio_load(struct kvm_vcpu *vcpu, struct kvm_run *run,
insn = kvm_riscv_vcpu_unpriv_read(vcpu, true, ct->sepc,
&utrap);
if (utrap.scause) {
if (is_load_guest_page_fault(utrap.scause))
return 1;
/* Redirect trap if we failed to read instruction */
utrap.sepc = ct->sepc;
kvm_riscv_vcpu_trap_redirect(vcpu, &utrap);
@@ -504,6 +524,8 @@ int kvm_riscv_vcpu_mmio_store(struct kvm_vcpu *vcpu, struct kvm_run *run,
insn = kvm_riscv_vcpu_unpriv_read(vcpu, true, ct->sepc,
&utrap);
if (utrap.scause) {
if (is_load_guest_page_fault(utrap.scause))
return 1;
/* Redirect trap if we failed to read instruction */
utrap.sepc = ct->sepc;
kvm_riscv_vcpu_trap_redirect(vcpu, &utrap);

View File

@@ -82,6 +82,10 @@ static const struct kvm_riscv_sbi_extension_entry sbi_ext[] = {
.ext_idx = KVM_RISCV_SBI_EXT_FWFT,
.ext_ptr = &vcpu_sbi_ext_fwft,
},
{
.ext_idx = KVM_RISCV_SBI_EXT_MPXY,
.ext_ptr = &vcpu_sbi_ext_mpxy,
},
{
.ext_idx = KVM_RISCV_SBI_EXT_EXPERIMENTAL,
.ext_ptr = &vcpu_sbi_ext_experimental,
@@ -120,7 +124,9 @@ static bool riscv_vcpu_supports_sbi_ext(struct kvm_vcpu *vcpu, int idx)
return sext && scontext->ext_status[sext->ext_idx] != KVM_RISCV_SBI_EXT_STATUS_UNAVAILABLE;
}
void kvm_riscv_vcpu_sbi_forward(struct kvm_vcpu *vcpu, struct kvm_run *run)
int kvm_riscv_vcpu_sbi_forward_handler(struct kvm_vcpu *vcpu,
struct kvm_run *run,
struct kvm_vcpu_sbi_return *retdata)
{
struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
@@ -137,6 +143,8 @@ void kvm_riscv_vcpu_sbi_forward(struct kvm_vcpu *vcpu, struct kvm_run *run)
run->riscv_sbi.args[5] = cp->a5;
run->riscv_sbi.ret[0] = SBI_ERR_NOT_SUPPORTED;
run->riscv_sbi.ret[1] = 0;
retdata->uexit = true;
return 0;
}
void kvm_riscv_vcpu_sbi_system_reset(struct kvm_vcpu *vcpu,

View File

@@ -41,8 +41,7 @@ static int kvm_sbi_ext_base_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
* For experimental/vendor extensions
* forward it to the userspace
*/
kvm_riscv_vcpu_sbi_forward(vcpu, run);
retdata->uexit = true;
return kvm_riscv_vcpu_sbi_forward_handler(vcpu, run, retdata);
} else {
sbi_ext = kvm_vcpu_sbi_find_ext(vcpu, cp->a0);
*out_val = sbi_ext && sbi_ext->probe ?
@@ -71,28 +70,3 @@ const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_base = {
.extid_end = SBI_EXT_BASE,
.handler = kvm_sbi_ext_base_handler,
};
static int kvm_sbi_ext_forward_handler(struct kvm_vcpu *vcpu,
struct kvm_run *run,
struct kvm_vcpu_sbi_return *retdata)
{
/*
* Both SBI experimental and vendor extensions are
* unconditionally forwarded to userspace.
*/
kvm_riscv_vcpu_sbi_forward(vcpu, run);
retdata->uexit = true;
return 0;
}
const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_experimental = {
.extid_start = SBI_EXT_EXPERIMENTAL_START,
.extid_end = SBI_EXT_EXPERIMENTAL_END,
.handler = kvm_sbi_ext_forward_handler,
};
const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_vendor = {
.extid_start = SBI_EXT_VENDOR_START,
.extid_end = SBI_EXT_VENDOR_END,
.handler = kvm_sbi_ext_forward_handler,
};

View File

@@ -0,0 +1,34 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2025 Ventana Micro Systems Inc.
*/
#include <linux/kvm_host.h>
#include <asm/kvm_vcpu_sbi.h>
#include <asm/sbi.h>
const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_experimental = {
.extid_start = SBI_EXT_EXPERIMENTAL_START,
.extid_end = SBI_EXT_EXPERIMENTAL_END,
.handler = kvm_riscv_vcpu_sbi_forward_handler,
};
const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_vendor = {
.extid_start = SBI_EXT_VENDOR_START,
.extid_end = SBI_EXT_VENDOR_END,
.handler = kvm_riscv_vcpu_sbi_forward_handler,
};
const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_dbcn = {
.extid_start = SBI_EXT_DBCN,
.extid_end = SBI_EXT_DBCN,
.default_disabled = true,
.handler = kvm_riscv_vcpu_sbi_forward_handler,
};
const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_mpxy = {
.extid_start = SBI_EXT_MPXY,
.extid_end = SBI_EXT_MPXY,
.default_disabled = true,
.handler = kvm_riscv_vcpu_sbi_forward_handler,
};

View File

@@ -185,35 +185,3 @@ const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_srst = {
.extid_end = SBI_EXT_SRST,
.handler = kvm_sbi_ext_srst_handler,
};
static int kvm_sbi_ext_dbcn_handler(struct kvm_vcpu *vcpu,
struct kvm_run *run,
struct kvm_vcpu_sbi_return *retdata)
{
struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
unsigned long funcid = cp->a6;
switch (funcid) {
case SBI_EXT_DBCN_CONSOLE_WRITE:
case SBI_EXT_DBCN_CONSOLE_READ:
case SBI_EXT_DBCN_CONSOLE_WRITE_BYTE:
/*
* The SBI debug console functions are unconditionally
* forwarded to the userspace.
*/
kvm_riscv_vcpu_sbi_forward(vcpu, run);
retdata->uexit = true;
break;
default:
retdata->err_val = SBI_ERR_NOT_SUPPORTED;
}
return 0;
}
const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_dbcn = {
.extid_start = SBI_EXT_DBCN,
.extid_end = SBI_EXT_DBCN,
.default_disabled = true,
.handler = kvm_sbi_ext_dbcn_handler,
};

View File

@@ -47,9 +47,7 @@ static int kvm_sbi_ext_susp_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
kvm_riscv_vcpu_sbi_request_reset(vcpu, cp->a1, cp->a2);
/* userspace provides the suspend implementation */
kvm_riscv_vcpu_sbi_forward(vcpu, run);
retdata->uexit = true;
break;
return kvm_riscv_vcpu_sbi_forward_handler(vcpu, run, retdata);
default:
retdata->err_val = SBI_ERR_NOT_SUPPORTED;
break;

View File

@@ -32,8 +32,7 @@ static int kvm_sbi_ext_v01_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
* The CONSOLE_GETCHAR/CONSOLE_PUTCHAR SBI calls cannot be
* handled in kernel so we forward these to user-space
*/
kvm_riscv_vcpu_sbi_forward(vcpu, run);
retdata->uexit = true;
ret = kvm_riscv_vcpu_sbi_forward_handler(vcpu, run, retdata);
break;
case SBI_EXT_0_1_SET_TIMER:
#if __riscv_xlen == 32

View File

@@ -122,26 +122,3 @@ void kvm_riscv_gstage_vmid_update(struct kvm_vcpu *vcpu)
kvm_for_each_vcpu(i, v, vcpu->kvm)
kvm_make_request(KVM_REQ_UPDATE_HGATP, v);
}
void kvm_riscv_gstage_vmid_sanitize(struct kvm_vcpu *vcpu)
{
unsigned long vmid;
if (!kvm_riscv_gstage_vmid_bits() ||
vcpu->arch.last_exit_cpu == vcpu->cpu)
return;
/*
* On RISC-V platforms with hardware VMID support, we share same
* VMID for all VCPUs of a particular Guest/VM. This means we might
* have stale G-stage TLB entries on the current Host CPU due to
* some other VCPU of the same Guest which ran previously on the
* current Host CPU.
*
* To cleanup stale TLB entries, we simply flush all G-stage TLB
* entries by VMID whenever underlying Host CPU changes for a VCPU.
*/
vmid = READ_ONCE(vcpu->kvm->arch.vmid.vmid);
kvm_riscv_local_hfence_gvma_vmid_all(vmid);
}

View File

@@ -133,6 +133,7 @@ bool filter_reg(__u64 reg)
case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_SUSP:
case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_STA:
case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_FWFT:
case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_MPXY:
case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_EXPERIMENTAL:
case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_VENDOR:
return true;
@@ -639,6 +640,7 @@ static const char *sbi_ext_single_id_to_str(__u64 reg_off)
KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_SUSP),
KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_STA),
KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_FWFT),
KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_MPXY),
KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_EXPERIMENTAL),
KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_VENDOR),
};
@@ -1142,6 +1144,7 @@ KVM_SBI_EXT_SUBLIST_CONFIG(sta, STA);
KVM_SBI_EXT_SIMPLE_CONFIG(pmu, PMU);
KVM_SBI_EXT_SIMPLE_CONFIG(dbcn, DBCN);
KVM_SBI_EXT_SIMPLE_CONFIG(susp, SUSP);
KVM_SBI_EXT_SIMPLE_CONFIG(mpxy, MPXY);
KVM_SBI_EXT_SUBLIST_CONFIG(fwft, FWFT);
KVM_ISA_EXT_SUBLIST_CONFIG(aia, AIA);
@@ -1222,6 +1225,7 @@ struct vcpu_reg_list *vcpu_configs[] = {
&config_sbi_pmu,
&config_sbi_dbcn,
&config_sbi_susp,
&config_sbi_mpxy,
&config_sbi_fwft,
&config_aia,
&config_fp_f,