From 9366f4186025e1d8fc3bebd41fb714521c170b6f Mon Sep 17 00:00:00 2001 From: aliguori Date: Mon, 6 Oct 2008 14:53:52 +0000 Subject: [PATCH] Introduce v3 of savevm protocol The current savevm/loadvm protocol has some draw backs. It does not support the ability to do progressive saving which means it cannot be used for live checkpointing or migration. The sections sizes are 32-bit integers which means that it will not function when using more than 4GB of memory for a guest. It attempts to seek within the output file which means it cannot be streamed. The current protocol also is pretty lax about how it supports forward compatibility. If a saved section version is greater than what the restore code support, the restore code generally treats the saved data as being in whatever version it supports. This means that restoring a saved VM on an older version of QEMU will likely result in silent guest failure. This patch introduces a new version of the savevm protocol. It has the following features: * Support for progressive save of sections (for live checkpoint/migration) * An asynchronous API for doing save * Support for interleaving multiple progressive save sections (for future support of memory hot-add/storage migration) * Fully streaming format * Strong section version checking Signed-off-by: Anthony Liguori git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@5434 c046a42c-6fe2-441c-8c8c-71466251a162 --- hw/hw.h | 9 ++ sysemu.h | 6 ++ vl.c | 278 ++++++++++++++++++++++++++++++++++++++++++++++++++++---------- 3 files changed, 248 insertions(+), 45 deletions(-) diff --git a/hw/hw.h b/hw/hw.h index c9390c1..e130355 100644 --- a/hw/hw.h +++ b/hw/hw.h @@ -217,6 +217,7 @@ int64_t qemu_ftell(QEMUFile *f); int64_t qemu_fseek(QEMUFile *f, int64_t pos, int whence); typedef void SaveStateHandler(QEMUFile *f, void *opaque); +typedef int SaveLiveStateHandler(QEMUFile *f, int stage, void *opaque); typedef int LoadStateHandler(QEMUFile *f, void *opaque, int version_id); int register_savevm(const char *idstr, @@ -226,6 +227,14 @@ int register_savevm(const char *idstr, LoadStateHandler *load_state, void *opaque); +int register_savevm_live(const char *idstr, + int instance_id, + int version_id, + SaveLiveStateHandler *save_live_state, + SaveStateHandler *save_state, + LoadStateHandler *load_state, + void *opaque); + typedef void QEMUResetHandler(void *opaque); void qemu_register_reset(QEMUResetHandler *func, void *opaque); diff --git a/sysemu.h b/sysemu.h index 0547d93..0cd0e1b 100644 --- a/sysemu.h +++ b/sysemu.h @@ -50,6 +50,12 @@ void do_info_snapshots(void); void main_loop_wait(int timeout); +int qemu_savevm_state_begin(QEMUFile *f); +int qemu_savevm_state_iterate(QEMUFile *f); +int qemu_savevm_state_complete(QEMUFile *f); +int qemu_savevm_state(QEMUFile *f); +int qemu_loadvm_state(QEMUFile *f); + /* Polling handling */ /* return TRUE if no sleep should be done afterwards */ diff --git a/vl.c b/vl.c index c94fdc0..5ec93c9 100644 --- a/vl.c +++ b/vl.c @@ -6579,6 +6579,8 @@ typedef struct SaveStateEntry { char idstr[256]; int instance_id; int version_id; + int section_id; + SaveLiveStateHandler *save_live_state; SaveStateHandler *save_state; LoadStateHandler *load_state; void *opaque; @@ -6591,14 +6593,16 @@ static SaveStateEntry *first_se; of the system, so instance_id should be removed/replaced. Meanwhile pass -1 as instance_id if you do not already have a clearly distinguishing id for all instances of your device class. */ -int register_savevm(const char *idstr, - int instance_id, - int version_id, - SaveStateHandler *save_state, - LoadStateHandler *load_state, - void *opaque) +int register_savevm_live(const char *idstr, + int instance_id, + int version_id, + SaveLiveStateHandler *save_live_state, + SaveStateHandler *save_state, + LoadStateHandler *load_state, + void *opaque) { SaveStateEntry *se, **pse; + static int global_section_id; se = qemu_malloc(sizeof(SaveStateEntry)); if (!se) @@ -6606,6 +6610,8 @@ int register_savevm(const char *idstr, pstrcpy(se->idstr, sizeof(se->idstr), idstr); se->instance_id = (instance_id == -1) ? 0 : instance_id; se->version_id = version_id; + se->section_id = global_section_id++; + se->save_live_state = save_live_state; se->save_state = save_state; se->load_state = load_state; se->opaque = opaque; @@ -6624,25 +6630,105 @@ int register_savevm(const char *idstr, return 0; } -#define QEMU_VM_FILE_MAGIC 0x5145564d -#define QEMU_VM_FILE_VERSION 0x00000002 +int register_savevm(const char *idstr, + int instance_id, + int version_id, + SaveStateHandler *save_state, + LoadStateHandler *load_state, + void *opaque) +{ + return register_savevm_live(idstr, instance_id, version_id, + NULL, save_state, load_state, opaque); +} + +#define QEMU_VM_FILE_MAGIC 0x5145564d +#define QEMU_VM_FILE_VERSION_COMPAT 0x00000002 +#define QEMU_VM_FILE_VERSION 0x00000003 -static int qemu_savevm_state(QEMUFile *f) +#define QEMU_VM_EOF 0x00 +#define QEMU_VM_SECTION_START 0x01 +#define QEMU_VM_SECTION_PART 0x02 +#define QEMU_VM_SECTION_END 0x03 +#define QEMU_VM_SECTION_FULL 0x04 + +int qemu_savevm_state_begin(QEMUFile *f) { SaveStateEntry *se; - int len, ret; - int64_t cur_pos, len_pos, total_len_pos; qemu_put_be32(f, QEMU_VM_FILE_MAGIC); qemu_put_be32(f, QEMU_VM_FILE_VERSION); - total_len_pos = qemu_ftell(f); - qemu_put_be64(f, 0); /* total size */ + + for (se = first_se; se != NULL; se = se->next) { + int len; + + if (se->save_live_state == NULL) + continue; + + /* Section type */ + qemu_put_byte(f, QEMU_VM_SECTION_START); + qemu_put_be32(f, se->section_id); + + /* ID string */ + len = strlen(se->idstr); + qemu_put_byte(f, len); + qemu_put_buffer(f, (uint8_t *)se->idstr, len); + + qemu_put_be32(f, se->instance_id); + qemu_put_be32(f, se->version_id); + + se->save_live_state(f, QEMU_VM_SECTION_START, se->opaque); + } + + return 0; +} + +int qemu_savevm_state_iterate(QEMUFile *f) +{ + SaveStateEntry *se; + int ret = 0; + + for (se = first_se; se != NULL; se = se->next) { + if (se->save_live_state == NULL) + continue; + + /* Section type */ + qemu_put_byte(f, QEMU_VM_SECTION_PART); + qemu_put_be32(f, se->section_id); + + ret |= se->save_live_state(f, QEMU_VM_SECTION_PART, se->opaque); + } + + if (ret) + return 1; + + return 0; +} + +int qemu_savevm_state_complete(QEMUFile *f) +{ + SaveStateEntry *se; + + for (se = first_se; se != NULL; se = se->next) { + if (se->save_live_state == NULL) + continue; + + /* Section type */ + qemu_put_byte(f, QEMU_VM_SECTION_END); + qemu_put_be32(f, se->section_id); + + se->save_live_state(f, QEMU_VM_SECTION_END, se->opaque); + } for(se = first_se; se != NULL; se = se->next) { + int len; + if (se->save_state == NULL) - /* this one has a loader only, for backwards compatibility */ continue; + /* Section type */ + qemu_put_byte(f, QEMU_VM_SECTION_FULL); + qemu_put_be32(f, se->section_id); + /* ID string */ len = strlen(se->idstr); qemu_put_byte(f, len); @@ -6651,24 +6737,37 @@ static int qemu_savevm_state(QEMUFile *f) qemu_put_be32(f, se->instance_id); qemu_put_be32(f, se->version_id); - /* record size: filled later */ - len_pos = qemu_ftell(f); - qemu_put_be32(f, 0); se->save_state(f, se->opaque); - - /* fill record size */ - cur_pos = qemu_ftell(f); - len = cur_pos - len_pos - 4; - qemu_fseek(f, len_pos, SEEK_SET); - qemu_put_be32(f, len); - qemu_fseek(f, cur_pos, SEEK_SET); } - cur_pos = qemu_ftell(f); - qemu_fseek(f, total_len_pos, SEEK_SET); - qemu_put_be64(f, cur_pos - total_len_pos - 8); - qemu_fseek(f, cur_pos, SEEK_SET); - ret = 0; + qemu_put_byte(f, QEMU_VM_EOF); + + return 0; +} + +int qemu_savevm_state(QEMUFile *f) +{ + int saved_vm_running; + int ret; + + saved_vm_running = vm_running; + vm_stop(0); + + ret = qemu_savevm_state_begin(f); + if (ret < 0) + goto out; + + do { + ret = qemu_savevm_state_iterate(f); + if (ret < 0) + goto out; + } while (ret == 1); + + ret = qemu_savevm_state_complete(f); + +out: + if (saved_vm_running) + vm_start(); return ret; } @@ -6684,23 +6783,20 @@ static SaveStateEntry *find_se(const char *idstr, int instance_id) return NULL; } -static int qemu_loadvm_state(QEMUFile *f) +typedef struct LoadStateEntry { + SaveStateEntry *se; + int section_id; + int version_id; + struct LoadStateEntry *next; +} LoadStateEntry; + +static int qemu_loadvm_state_v2(QEMUFile *f) { SaveStateEntry *se; int len, ret, instance_id, record_len, version_id; int64_t total_len, end_pos, cur_pos; - unsigned int v; char idstr[256]; - v = qemu_get_be32(f); - if (v != QEMU_VM_FILE_MAGIC) - goto fail; - v = qemu_get_be32(f); - if (v != QEMU_VM_FILE_VERSION) { - fail: - ret = -1; - goto the_end; - } total_len = qemu_get_be64(f); end_pos = total_len + qemu_ftell(f); for(;;) { @@ -6712,10 +6808,6 @@ static int qemu_loadvm_state(QEMUFile *f) instance_id = qemu_get_be32(f); version_id = qemu_get_be32(f); record_len = qemu_get_be32(f); -#if 0 - printf("idstr=%s instance=0x%x version=%d len=%d\n", - idstr, instance_id, version_id, record_len); -#endif cur_pos = qemu_ftell(f); se = find_se(idstr, instance_id); if (!se) { @@ -6731,8 +6823,104 @@ static int qemu_loadvm_state(QEMUFile *f) /* always seek to exact end of record */ qemu_fseek(f, cur_pos + record_len, SEEK_SET); } + return 0; +} + +int qemu_loadvm_state(QEMUFile *f) +{ + LoadStateEntry *first_le = NULL; + uint8_t section_type; + unsigned int v; + int ret; + + v = qemu_get_be32(f); + if (v != QEMU_VM_FILE_MAGIC) + return -EINVAL; + + v = qemu_get_be32(f); + if (v == QEMU_VM_FILE_VERSION_COMPAT) + return qemu_loadvm_state_v2(f); + if (v != QEMU_VM_FILE_VERSION) + return -ENOTSUP; + + while ((section_type = qemu_get_byte(f)) != QEMU_VM_EOF) { + uint32_t instance_id, version_id, section_id; + LoadStateEntry *le; + SaveStateEntry *se; + char idstr[257]; + int len; + + switch (section_type) { + case QEMU_VM_SECTION_START: + case QEMU_VM_SECTION_FULL: + /* Read section start */ + section_id = qemu_get_be32(f); + len = qemu_get_byte(f); + qemu_get_buffer(f, (uint8_t *)idstr, len); + idstr[len] = 0; + instance_id = qemu_get_be32(f); + version_id = qemu_get_be32(f); + + /* Find savevm section */ + se = find_se(idstr, instance_id); + if (se == NULL) { + fprintf(stderr, "Unknown savevm section or instance '%s' %d\n", idstr, instance_id); + ret = -EINVAL; + goto out; + } + + /* Validate version */ + if (version_id > se->version_id) { + fprintf(stderr, "savevm: unsupported version %d for '%s' v%d\n", + version_id, idstr, se->version_id); + ret = -EINVAL; + goto out; + } + + /* Add entry */ + le = qemu_mallocz(sizeof(*le)); + if (le == NULL) { + ret = -ENOMEM; + goto out; + } + + le->se = se; + le->section_id = section_id; + le->version_id = version_id; + le->next = first_le; + first_le = le; + + le->se->load_state(f, le->se->opaque, le->version_id); + break; + case QEMU_VM_SECTION_PART: + case QEMU_VM_SECTION_END: + section_id = qemu_get_be32(f); + + for (le = first_le; le && le->section_id != section_id; le = le->next); + if (le == NULL) { + fprintf(stderr, "Unknown savevm section %d\n", section_id); + ret = -EINVAL; + goto out; + } + + le->se->load_state(f, le->se->opaque, le->version_id); + break; + default: + fprintf(stderr, "Unknown savevm section type %d\n", section_type); + ret = -EINVAL; + goto out; + } + } + ret = 0; - the_end: + +out: + while (first_le) { + LoadStateEntry *le = first_le; + first_le = first_le->next; + qemu_free(le); + } + return ret; } -- 1.7.9.5