X-Git-Url: http://git.maemo.org/git/?a=blobdiff_plain;f=kvm-all.c;h=73e814a31d81194ddff0f38636e87f504515e7dd;hb=d3f243676addaef6c8d818934565292c698f91cc;hp=9fb295ce6358868a967ebc28ab0179e712eda63a;hpb=a9c11522bb4daabc43b3fe12d08c8db6deb2ac79;p=qemu diff --git a/kvm-all.c b/kvm-all.c index 9fb295c..73e814a 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -22,6 +22,7 @@ #include "qemu-common.h" #include "sysemu.h" +#include "gdbstub.h" #include "kvm.h" /* KVM uses PAGE_SIZE in it's definition of COALESCED_MMIO_MAX */ @@ -56,6 +57,9 @@ struct KVMState int fd; int vmfd; int coalesced_mmio; +#ifdef KVM_CAP_SET_GUEST_DEBUG + struct kvm_sw_breakpoint_head kvm_sw_breakpoints; +#endif }; static KVMState *kvm_state; @@ -72,24 +76,55 @@ static KVMSlot *kvm_alloc_slot(KVMState *s) return &s->slots[i]; } - return NULL; + fprintf(stderr, "%s: no free slot available\n", __func__); + abort(); } -static KVMSlot *kvm_lookup_slot(KVMState *s, target_phys_addr_t start_addr) +static KVMSlot *kvm_lookup_matching_slot(KVMState *s, + target_phys_addr_t start_addr, + target_phys_addr_t end_addr) { int i; for (i = 0; i < ARRAY_SIZE(s->slots); i++) { KVMSlot *mem = &s->slots[i]; - if (start_addr >= mem->start_addr && - start_addr < (mem->start_addr + mem->memory_size)) + if (start_addr == mem->start_addr && + end_addr == mem->start_addr + mem->memory_size) { return mem; + } } return NULL; } +/* + * Find overlapping slot with lowest start address + */ +static KVMSlot *kvm_lookup_overlapping_slot(KVMState *s, + target_phys_addr_t start_addr, + target_phys_addr_t end_addr) +{ + KVMSlot *found = NULL; + int i; + + for (i = 0; i < ARRAY_SIZE(s->slots); i++) { + KVMSlot *mem = &s->slots[i]; + + if (mem->memory_size == 0 || + (found && found->start_addr < mem->start_addr)) { + continue; + } + + if (end_addr > mem->start_addr && + start_addr < mem->start_addr + mem->memory_size) { + found = mem; + } + } + + return found; +} + static int kvm_set_user_memory_region(KVMState *s, KVMSlot *slot) { struct kvm_userspace_memory_region mem; @@ -97,7 +132,7 @@ static int kvm_set_user_memory_region(KVMState *s, KVMSlot *slot) mem.slot = slot->slot; mem.guest_phys_addr = slot->start_addr; mem.memory_size = slot->memory_size; - mem.userspace_addr = (unsigned long)phys_ram_base + slot->phys_offset; + mem.userspace_addr = (unsigned long)qemu_get_ram_ptr(slot->phys_offset); mem.flags = slot->flags; return kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem); @@ -159,14 +194,16 @@ int kvm_sync_vcpus(void) /* * dirty pages logging control */ -static int kvm_dirty_pages_log_change(target_phys_addr_t phys_addr, target_phys_addr_t end_addr, - unsigned flags, +static int kvm_dirty_pages_log_change(target_phys_addr_t phys_addr, + ram_addr_t size, unsigned flags, unsigned mask) { KVMState *s = kvm_state; - KVMSlot *mem = kvm_lookup_slot(s, phys_addr); + KVMSlot *mem = kvm_lookup_matching_slot(s, phys_addr, phys_addr + size); if (mem == NULL) { - dprintf("invalid parameters %llx-%llx\n", phys_addr, end_addr); + fprintf(stderr, "BUG: %s: invalid parameters " TARGET_FMT_plx "-" + TARGET_FMT_plx "\n", __func__, phys_addr, + phys_addr + size - 1); return -EINVAL; } @@ -180,16 +217,16 @@ static int kvm_dirty_pages_log_change(target_phys_addr_t phys_addr, target_phys_ return kvm_set_user_memory_region(s, mem); } -int kvm_log_start(target_phys_addr_t phys_addr, target_phys_addr_t end_addr) +int kvm_log_start(target_phys_addr_t phys_addr, ram_addr_t size) { - return kvm_dirty_pages_log_change(phys_addr, end_addr, + return kvm_dirty_pages_log_change(phys_addr, size, KVM_MEM_LOG_DIRTY_PAGES, KVM_MEM_LOG_DIRTY_PAGES); } -int kvm_log_stop(target_phys_addr_t phys_addr, target_phys_addr_t end_addr) +int kvm_log_stop(target_phys_addr_t phys_addr, ram_addr_t size) { - return kvm_dirty_pages_log_change(phys_addr, end_addr, + return kvm_dirty_pages_log_change(phys_addr, size, 0, KVM_MEM_LOG_DIRTY_PAGES); } @@ -199,32 +236,30 @@ int kvm_log_stop(target_phys_addr_t phys_addr, target_phys_addr_t end_addr) * This function updates qemu's dirty bitmap using cpu_physical_memory_set_dirty(). * This means all bits are set to dirty. * - * @start_add: start of logged region. This is what we use to search the memslot + * @start_add: start of logged region. * @end_addr: end of logged region. */ -void kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr, target_phys_addr_t end_addr) +void kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr, + target_phys_addr_t end_addr) { KVMState *s = kvm_state; KVMDirtyLog d; - KVMSlot *mem = kvm_lookup_slot(s, start_addr); + KVMSlot *mem = kvm_lookup_matching_slot(s, start_addr, end_addr); unsigned long alloc_size; ram_addr_t addr; target_phys_addr_t phys_addr = start_addr; - dprintf("sync addr: %llx into %lx\n", start_addr, mem->phys_offset); + dprintf("sync addr: " TARGET_FMT_lx " into %lx\n", start_addr, + mem->phys_offset); if (mem == NULL) { - fprintf(stderr, "BUG: %s: invalid parameters\n", __func__); + fprintf(stderr, "BUG: %s: invalid parameters " TARGET_FMT_plx "-" + TARGET_FMT_plx "\n", __func__, phys_addr, end_addr - 1); return; } alloc_size = mem->memory_size >> TARGET_PAGE_BITS / sizeof(d.dirty_bitmap); d.dirty_bitmap = qemu_mallocz(alloc_size); - if (d.dirty_bitmap == NULL) { - dprintf("Could not allocate dirty bitmap\n"); - return; - } - d.slot = mem->slot; dprintf("slot %d, phys_addr %llx, uaddr: %llx\n", d.slot, mem->start_addr, mem->phys_offset); @@ -285,19 +320,34 @@ int kvm_uncoalesce_mmio_region(target_phys_addr_t start, ram_addr_t size) return ret; } +int kvm_check_extension(KVMState *s, unsigned int extension) +{ + int ret; + + ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, extension); + if (ret < 0) { + ret = 0; + } + + return ret; +} + int kvm_init(int smp_cpus) { KVMState *s; int ret; int i; - if (smp_cpus > 1) + if (smp_cpus > 1) { + fprintf(stderr, "No SMP KVM support, use '-smp 1'\n"); return -EINVAL; + } s = qemu_mallocz(sizeof(KVMState)); - if (s == NULL) - return -ENOMEM; +#ifdef KVM_CAP_SET_GUEST_DEBUG + TAILQ_INIT(&s->kvm_sw_breakpoints); +#endif for (i = 0; i < ARRAY_SIZE(s->slots); i++) s->slots[i].slot = i; @@ -329,13 +379,11 @@ int kvm_init(int smp_cpus) /* initially, KVM allocated its own memory and we had to jump through * hooks to make phys_ram_base point to this. Modern versions of KVM - * just use a user allocated buffer so we can use phys_ram_base + * just use a user allocated buffer so we can use regular pages * unmodified. Make sure we have a sufficiently modern version of KVM. */ - ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, KVM_CAP_USER_MEMORY); - if (ret <= 0) { - if (ret == 0) - ret = -EINVAL; + if (!kvm_check_extension(s, KVM_CAP_USER_MEMORY)) { + ret = -EINVAL; fprintf(stderr, "kvm does not support KVM_CAP_USER_MEMORY\n"); goto err; } @@ -343,11 +391,8 @@ int kvm_init(int smp_cpus) /* There was a nasty bug in < kvm-80 that prevents memory slots from being * destroyed properly. Since we rely on this capability, refuse to work * with any kernel without this capability. */ - ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, - KVM_CAP_DESTROY_MEMORY_REGION_WORKS); - if (ret <= 0) { - if (ret == 0) - ret = -EINVAL; + if (!kvm_check_extension(s, KVM_CAP_DESTROY_MEMORY_REGION_WORKS)) { + ret = -EINVAL; fprintf(stderr, "KVM kernel module broken (DESTROY_MEMORY_REGION)\n" @@ -355,11 +400,10 @@ int kvm_init(int smp_cpus) goto err; } - s->coalesced_mmio = 0; #ifdef KVM_CAP_COALESCED_MMIO - ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, KVM_CAP_COALESCED_MMIO); - if (ret > 0) - s->coalesced_mmio = ret; + s->coalesced_mmio = kvm_check_extension(s, KVM_CAP_COALESCED_MMIO); +#else + s->coalesced_mmio = 0; #endif ret = kvm_arch_init(s, smp_cpus); @@ -452,7 +496,7 @@ int kvm_cpu_exec(CPUState *env) do { kvm_arch_pre_run(env, run); - if ((env->interrupt_request & CPU_INTERRUPT_EXIT)) { + if (env->exit_request) { dprintf("interrupt exit requested\n"); ret = 0; break; @@ -511,6 +555,16 @@ int kvm_cpu_exec(CPUState *env) break; case KVM_EXIT_DEBUG: dprintf("kvm_exit_debug\n"); +#ifdef KVM_CAP_SET_GUEST_DEBUG + if (kvm_arch_debug(&run->debug.arch)) { + gdb_set_stop_cpu(env); + vm_stop(EXCP_DEBUG); + env->exception_index = EXCP_DEBUG; + return 0; + } + /* re-enter, this exception was guest-internal */ + ret = 1; +#endif /* KVM_CAP_SET_GUEST_DEBUG */ break; default: dprintf("kvm_arch_handle_exit\n"); @@ -519,8 +573,8 @@ int kvm_cpu_exec(CPUState *env) } } while (ret > 0); - if ((env->interrupt_request & CPU_INTERRUPT_EXIT)) { - env->interrupt_request &= ~CPU_INTERRUPT_EXIT; + if (env->exit_request) { + env->exit_request = 0; env->exception_index = EXCP_INTERRUPT; } @@ -533,60 +587,119 @@ void kvm_set_phys_mem(target_phys_addr_t start_addr, { KVMState *s = kvm_state; ram_addr_t flags = phys_offset & ~TARGET_PAGE_MASK; - KVMSlot *mem; + KVMSlot *mem, old; + int err; + + if (start_addr & ~TARGET_PAGE_MASK) { + if (flags >= IO_MEM_UNASSIGNED) { + if (!kvm_lookup_overlapping_slot(s, start_addr, + start_addr + size)) { + return; + } + fprintf(stderr, "Unaligned split of a KVM memory slot\n"); + } else { + fprintf(stderr, "Only page-aligned memory slots supported\n"); + } + abort(); + } /* KVM does not support read-only slots */ phys_offset &= ~IO_MEM_ROM; - mem = kvm_lookup_slot(s, start_addr); - if (mem) { - if ((flags == IO_MEM_UNASSIGNED) || (flags >= TLB_MMIO)) { - mem->memory_size = 0; - mem->start_addr = start_addr; - mem->phys_offset = 0; + while (1) { + mem = kvm_lookup_overlapping_slot(s, start_addr, start_addr + size); + if (!mem) { + break; + } + + if (flags < IO_MEM_UNASSIGNED && start_addr >= mem->start_addr && + (start_addr + size <= mem->start_addr + mem->memory_size) && + (phys_offset - start_addr == mem->phys_offset - mem->start_addr)) { + /* The new slot fits into the existing one and comes with + * identical parameters - nothing to be done. */ + return; + } + + old = *mem; + + /* unregister the overlapping slot */ + mem->memory_size = 0; + err = kvm_set_user_memory_region(s, mem); + if (err) { + fprintf(stderr, "%s: error unregistering overlapping slot: %s\n", + __func__, strerror(-err)); + abort(); + } + + /* Workaround for older KVM versions: we can't join slots, even not by + * unregistering the previous ones and then registering the larger + * slot. We have to maintain the existing fragmentation. Sigh. + * + * This workaround assumes that the new slot starts at the same + * address as the first existing one. If not or if some overlapping + * slot comes around later, we will fail (not seen in practice so far) + * - and actually require a recent KVM version. */ + if (old.start_addr == start_addr && old.memory_size < size && + flags < IO_MEM_UNASSIGNED) { + mem = kvm_alloc_slot(s); + mem->memory_size = old.memory_size; + mem->start_addr = old.start_addr; + mem->phys_offset = old.phys_offset; mem->flags = 0; - kvm_set_user_memory_region(s, mem); - } else if (start_addr >= mem->start_addr && - (start_addr + size) <= (mem->start_addr + - mem->memory_size)) { - KVMSlot slot; - target_phys_addr_t mem_start; - ram_addr_t mem_size, mem_offset; - - /* Not splitting */ - if ((phys_offset - (start_addr - mem->start_addr)) == - mem->phys_offset) - return; + err = kvm_set_user_memory_region(s, mem); + if (err) { + fprintf(stderr, "%s: error updating slot: %s\n", __func__, + strerror(-err)); + abort(); + } - /* unregister whole slot */ - memcpy(&slot, mem, sizeof(slot)); - mem->memory_size = 0; - kvm_set_user_memory_region(s, mem); + start_addr += old.memory_size; + phys_offset += old.memory_size; + size -= old.memory_size; + continue; + } - /* register prefix slot */ - mem_start = slot.start_addr; - mem_size = start_addr - slot.start_addr; - mem_offset = slot.phys_offset; - if (mem_size) - kvm_set_phys_mem(mem_start, mem_size, mem_offset); + /* register prefix slot */ + if (old.start_addr < start_addr) { + mem = kvm_alloc_slot(s); + mem->memory_size = start_addr - old.start_addr; + mem->start_addr = old.start_addr; + mem->phys_offset = old.phys_offset; + mem->flags = 0; - /* register new slot */ - kvm_set_phys_mem(start_addr, size, phys_offset); + err = kvm_set_user_memory_region(s, mem); + if (err) { + fprintf(stderr, "%s: error registering prefix slot: %s\n", + __func__, strerror(-err)); + abort(); + } + } - /* register suffix slot */ - mem_start = start_addr + size; - mem_offset += mem_size + size; - mem_size = slot.memory_size - mem_size - size; - if (mem_size) - kvm_set_phys_mem(mem_start, mem_size, mem_offset); + /* register suffix slot */ + if (old.start_addr + old.memory_size > start_addr + size) { + ram_addr_t size_delta; - return; - } else { - printf("Registering overlapping slot\n"); - abort(); + mem = kvm_alloc_slot(s); + mem->start_addr = start_addr + size; + size_delta = mem->start_addr - old.start_addr; + mem->memory_size = old.memory_size - size_delta; + mem->phys_offset = old.phys_offset + size_delta; + mem->flags = 0; + + err = kvm_set_user_memory_region(s, mem); + if (err) { + fprintf(stderr, "%s: error registering suffix slot: %s\n", + __func__, strerror(-err)); + abort(); + } } } + + /* in case the KVM bug workaround already "consumed" the new slot */ + if (!size) + return; + /* KVM does not need to know about this memory */ if (flags >= IO_MEM_UNASSIGNED) return; @@ -597,8 +710,12 @@ void kvm_set_phys_mem(target_phys_addr_t start_addr, mem->phys_offset = phys_offset; mem->flags = 0; - kvm_set_user_memory_region(s, mem); - /* FIXME deal with errors */ + err = kvm_set_user_memory_region(s, mem); + if (err) { + fprintf(stderr, "%s: error registering slot: %s\n", __func__, + strerror(-err)); + abort(); + } } int kvm_ioctl(KVMState *s, int type, ...) @@ -657,9 +774,182 @@ int kvm_has_sync_mmu(void) #ifdef KVM_CAP_SYNC_MMU KVMState *s = kvm_state; - if (kvm_ioctl(s, KVM_CHECK_EXTENSION, KVM_CAP_SYNC_MMU) > 0) - return 1; + return kvm_check_extension(s, KVM_CAP_SYNC_MMU); +#else + return 0; +#endif +} + +void kvm_setup_guest_memory(void *start, size_t size) +{ + if (!kvm_has_sync_mmu()) { +#ifdef MADV_DONTFORK + int ret = madvise(start, size, MADV_DONTFORK); + + if (ret) { + perror("madvice"); + exit(1); + } +#else + fprintf(stderr, + "Need MADV_DONTFORK in absence of synchronous KVM MMU\n"); + exit(1); #endif + } +} + +#ifdef KVM_CAP_SET_GUEST_DEBUG +struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *env, + target_ulong pc) +{ + struct kvm_sw_breakpoint *bp; + + TAILQ_FOREACH(bp, &env->kvm_state->kvm_sw_breakpoints, entry) { + if (bp->pc == pc) + return bp; + } + return NULL; +} + +int kvm_sw_breakpoints_active(CPUState *env) +{ + return !TAILQ_EMPTY(&env->kvm_state->kvm_sw_breakpoints); +} + +int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap) +{ + struct kvm_guest_debug dbg; + + dbg.control = 0; + if (env->singlestep_enabled) + dbg.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP; + + kvm_arch_update_guest_debug(env, &dbg); + dbg.control |= reinject_trap; + + return kvm_vcpu_ioctl(env, KVM_SET_GUEST_DEBUG, &dbg); +} + +int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr, + target_ulong len, int type) +{ + struct kvm_sw_breakpoint *bp; + CPUState *env; + int err; + + if (type == GDB_BREAKPOINT_SW) { + bp = kvm_find_sw_breakpoint(current_env, addr); + if (bp) { + bp->use_count++; + return 0; + } + + bp = qemu_malloc(sizeof(struct kvm_sw_breakpoint)); + if (!bp) + return -ENOMEM; + + bp->pc = addr; + bp->use_count = 1; + err = kvm_arch_insert_sw_breakpoint(current_env, bp); + if (err) { + free(bp); + return err; + } + + TAILQ_INSERT_HEAD(¤t_env->kvm_state->kvm_sw_breakpoints, + bp, entry); + } else { + err = kvm_arch_insert_hw_breakpoint(addr, len, type); + if (err) + return err; + } + + for (env = first_cpu; env != NULL; env = env->next_cpu) { + err = kvm_update_guest_debug(env, 0); + if (err) + return err; + } + return 0; +} + +int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr, + target_ulong len, int type) +{ + struct kvm_sw_breakpoint *bp; + CPUState *env; + int err; + + if (type == GDB_BREAKPOINT_SW) { + bp = kvm_find_sw_breakpoint(current_env, addr); + if (!bp) + return -ENOENT; + if (bp->use_count > 1) { + bp->use_count--; + return 0; + } + + err = kvm_arch_remove_sw_breakpoint(current_env, bp); + if (err) + return err; + + TAILQ_REMOVE(¤t_env->kvm_state->kvm_sw_breakpoints, bp, entry); + qemu_free(bp); + } else { + err = kvm_arch_remove_hw_breakpoint(addr, len, type); + if (err) + return err; + } + + for (env = first_cpu; env != NULL; env = env->next_cpu) { + err = kvm_update_guest_debug(env, 0); + if (err) + return err; + } return 0; } + +void kvm_remove_all_breakpoints(CPUState *current_env) +{ + struct kvm_sw_breakpoint *bp, *next; + KVMState *s = current_env->kvm_state; + CPUState *env; + + TAILQ_FOREACH_SAFE(bp, &s->kvm_sw_breakpoints, entry, next) { + if (kvm_arch_remove_sw_breakpoint(current_env, bp) != 0) { + /* Try harder to find a CPU that currently sees the breakpoint. */ + for (env = first_cpu; env != NULL; env = env->next_cpu) { + if (kvm_arch_remove_sw_breakpoint(env, bp) == 0) + break; + } + } + } + kvm_arch_remove_all_hw_breakpoints(); + + for (env = first_cpu; env != NULL; env = env->next_cpu) + kvm_update_guest_debug(env, 0); +} + +#else /* !KVM_CAP_SET_GUEST_DEBUG */ + +int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap) +{ + return -EINVAL; +} + +int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr, + target_ulong len, int type) +{ + return -EINVAL; +} + +int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr, + target_ulong len, int type) +{ + return -EINVAL; +} + +void kvm_remove_all_breakpoints(CPUState *current_env) +{ +} +#endif /* !KVM_CAP_SET_GUEST_DEBUG */