vga: change tabs to spaces
[qemu] / hw / apic.c
index 276dd45..2c414c1 100644 (file)
--- a/hw/apic.c
+++ b/hw/apic.c
@@ -1,6 +1,6 @@
 /*
  *  APIC support
- * 
+ *
  *  Copyright (c) 2004-2005 Fabrice Bellard
  *
  * This library is free software; you can redistribute it and/or
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>
  */
-#include "vl.h"
+#include "hw.h"
+#include "pc.h"
+#include "pci.h"
+#include "msix.h"
+#include "qemu-timer.h"
+#include "host-utils.h"
+#include "kvm.h"
 
 //#define DEBUG_APIC
-//#define DEBUG_IOAPIC
 
 /* APIC Local Vector Table */
 #define APIC_LVT_TIMER   0
 #define        APIC_INPUT_POLARITY             (1<<13)
 #define        APIC_SEND_PENDING               (1<<12)
 
-#define IOAPIC_NUM_PINS                        0x18
-
 #define ESR_ILLEGAL_ADDRESS (1 << 7)
 
 #define APIC_SV_ENABLE (1 << 8)
 
+#define MAX_APICS 255
+#define MAX_APIC_WORDS 8
+
+/* Intel APIC constants: from include/asm/msidef.h */
+#define MSI_DATA_VECTOR_SHIFT          0
+#define MSI_DATA_VECTOR_MASK           0x000000ff
+#define MSI_DATA_DELIVERY_MODE_SHIFT   8
+#define MSI_DATA_TRIGGER_SHIFT         15
+#define MSI_DATA_LEVEL_SHIFT           14
+#define MSI_ADDR_DEST_MODE_SHIFT       2
+#define MSI_ADDR_DEST_ID_SHIFT         12
+#define        MSI_ADDR_DEST_ID_MASK           0x00ffff0
+
+#define MSI_ADDR_BASE                   0xfee00000
+#define MSI_ADDR_SIZE                   0x100000
+
 typedef struct APICState {
     CPUState *cpu_env;
     uint32_t apicbase;
@@ -80,28 +98,131 @@ typedef struct APICState {
     int count_shift;
     uint32_t initial_count;
     int64_t initial_count_load_time, next_time;
+    uint32_t idx;
     QEMUTimer *timer;
-
-    struct APICState *next_apic;
+    int sipi_vector;
+    int wait_for_sipi;
 } APICState;
 
-struct IOAPICState {
-    uint8_t id;
-    uint8_t ioregsel;
-
-    uint32_t irr;
-    uint64_t ioredtbl[IOAPIC_NUM_PINS];
-};
-
 static int apic_io_memory;
-static APICState *first_local_apic = NULL;
-static int last_apic_id = 0;
+static APICState *local_apics[MAX_APICS + 1];
+static int last_apic_idx = 0;
+static int apic_irq_delivered;
+
 
-static void apic_init_ipi(APICState *s);
 static void apic_set_irq(APICState *s, int vector_num, int trigger_mode);
 static void apic_update_irq(APICState *s);
+static void apic_get_delivery_bitmask(uint32_t *deliver_bitmask,
+                                      uint8_t dest, uint8_t dest_mode);
+
+/* Find first bit starting from msb */
+static int fls_bit(uint32_t value)
+{
+    return 31 - clz32(value);
+}
+
+/* Find first bit starting from lsb */
+static int ffs_bit(uint32_t value)
+{
+    return ctz32(value);
+}
+
+static inline void set_bit(uint32_t *tab, int index)
+{
+    int i, mask;
+    i = index >> 5;
+    mask = 1 << (index & 0x1f);
+    tab[i] |= mask;
+}
+
+static inline void reset_bit(uint32_t *tab, int index)
+{
+    int i, mask;
+    i = index >> 5;
+    mask = 1 << (index & 0x1f);
+    tab[i] &= ~mask;
+}
 
-static void apic_bus_deliver(uint32_t deliver_bitmask, uint8_t delivery_mode,
+static inline int get_bit(uint32_t *tab, int index)
+{
+    int i, mask;
+    i = index >> 5;
+    mask = 1 << (index & 0x1f);
+    return !!(tab[i] & mask);
+}
+
+static void apic_local_deliver(CPUState *env, int vector)
+{
+    APICState *s = env->apic_state;
+    uint32_t lvt = s->lvt[vector];
+    int trigger_mode;
+
+    if (lvt & APIC_LVT_MASKED)
+        return;
+
+    switch ((lvt >> 8) & 7) {
+    case APIC_DM_SMI:
+        cpu_interrupt(env, CPU_INTERRUPT_SMI);
+        break;
+
+    case APIC_DM_NMI:
+        cpu_interrupt(env, CPU_INTERRUPT_NMI);
+        break;
+
+    case APIC_DM_EXTINT:
+        cpu_interrupt(env, CPU_INTERRUPT_HARD);
+        break;
+
+    case APIC_DM_FIXED:
+        trigger_mode = APIC_TRIGGER_EDGE;
+        if ((vector == APIC_LVT_LINT0 || vector == APIC_LVT_LINT1) &&
+            (lvt & APIC_LVT_LEVEL_TRIGGER))
+            trigger_mode = APIC_TRIGGER_LEVEL;
+        apic_set_irq(s, lvt & 0xff, trigger_mode);
+    }
+}
+
+void apic_deliver_pic_intr(CPUState *env, int level)
+{
+    if (level)
+        apic_local_deliver(env, APIC_LVT_LINT0);
+    else {
+        APICState *s = env->apic_state;
+        uint32_t lvt = s->lvt[APIC_LVT_LINT0];
+
+        switch ((lvt >> 8) & 7) {
+        case APIC_DM_FIXED:
+            if (!(lvt & APIC_LVT_LEVEL_TRIGGER))
+                break;
+            reset_bit(s->irr, lvt & 0xff);
+            /* fall through */
+        case APIC_DM_EXTINT:
+            cpu_reset_interrupt(env, CPU_INTERRUPT_HARD);
+            break;
+        }
+    }
+}
+
+#define foreach_apic(apic, deliver_bitmask, code) \
+{\
+    int __i, __j, __mask;\
+    for(__i = 0; __i < MAX_APIC_WORDS; __i++) {\
+        __mask = deliver_bitmask[__i];\
+        if (__mask) {\
+            for(__j = 0; __j < 32; __j++) {\
+                if (__mask & (1 << __j)) {\
+                    apic = local_apics[__i * 32 + __j];\
+                    if (apic) {\
+                        code;\
+                    }\
+                }\
+            }\
+        }\
+    }\
+}
+
+static void apic_bus_deliver(const uint32_t *deliver_bitmask,
+                             uint8_t delivery_mode,
                              uint8_t vector_num, uint8_t polarity,
                              uint8_t trigger_mode)
 {
@@ -109,23 +230,44 @@ static void apic_bus_deliver(uint32_t deliver_bitmask, uint8_t delivery_mode,
 
     switch (delivery_mode) {
         case APIC_DM_LOWPRI:
+            /* XXX: search for focus processor, arbitration */
+            {
+                int i, d;
+                d = -1;
+                for(i = 0; i < MAX_APIC_WORDS; i++) {
+                    if (deliver_bitmask[i]) {
+                        d = i * 32 + ffs_bit(deliver_bitmask[i]);
+                        break;
+                    }
+                }
+                if (d >= 0) {
+                    apic_iter = local_apics[d];
+                    if (apic_iter) {
+                        apic_set_irq(apic_iter, vector_num, trigger_mode);
+                    }
+                }
+            }
+            return;
+
         case APIC_DM_FIXED:
-            /* XXX: arbitration */
             break;
 
         case APIC_DM_SMI:
+            foreach_apic(apic_iter, deliver_bitmask,
+                cpu_interrupt(apic_iter->cpu_env, CPU_INTERRUPT_SMI) );
+            return;
+
         case APIC_DM_NMI:
-            break;
+            foreach_apic(apic_iter, deliver_bitmask,
+                cpu_interrupt(apic_iter->cpu_env, CPU_INTERRUPT_NMI) );
+            return;
 
         case APIC_DM_INIT:
             /* normal INIT IPI sent to processors */
-            for (apic_iter = first_local_apic; apic_iter != NULL;
-                 apic_iter = apic_iter->next_apic) {
-                if (deliver_bitmask & (1 << apic_iter->id))
-                    apic_init_ipi(apic_iter);
-            }
+            foreach_apic(apic_iter, deliver_bitmask,
+                         cpu_interrupt(apic_iter->cpu_env, CPU_INTERRUPT_INIT) );
             return;
-    
+
         case APIC_DM_EXTINT:
             /* handled in I/O APIC code */
             break;
@@ -134,20 +276,30 @@ static void apic_bus_deliver(uint32_t deliver_bitmask, uint8_t delivery_mode,
             return;
     }
 
-    for (apic_iter = first_local_apic; apic_iter != NULL;
-         apic_iter = apic_iter->next_apic) {
-        if (deliver_bitmask & (1 << apic_iter->id))
-            apic_set_irq(apic_iter, vector_num, trigger_mode);
-    }
+    foreach_apic(apic_iter, deliver_bitmask,
+                 apic_set_irq(apic_iter, vector_num, trigger_mode) );
+}
+
+void apic_deliver_irq(uint8_t dest, uint8_t dest_mode,
+                      uint8_t delivery_mode, uint8_t vector_num,
+                      uint8_t polarity, uint8_t trigger_mode)
+{
+    uint32_t deliver_bitmask[MAX_APIC_WORDS];
+
+    apic_get_delivery_bitmask(deliver_bitmask, dest, dest_mode);
+    apic_bus_deliver(deliver_bitmask, delivery_mode, vector_num, polarity,
+                     trigger_mode);
 }
 
 void cpu_set_apic_base(CPUState *env, uint64_t val)
 {
     APICState *s = env->apic_state;
 #ifdef DEBUG_APIC
-    printf("cpu_set_apic_base: %016llx\n", val);
+    printf("cpu_set_apic_base: %016" PRIx64 "\n", val);
 #endif
-    s->apicbase = (val & 0xfffff000) | 
+    if (!s)
+        return;
+    s->apicbase = (val & 0xfffff000) |
         (s->apicbase & (MSR_IA32_APICBASE_BSP | MSR_IA32_APICBASE_ENABLE));
     /* if disabled, cannot be enabled again */
     if (!(val & MSR_IA32_APICBASE_ENABLE)) {
@@ -161,14 +313,17 @@ uint64_t cpu_get_apic_base(CPUState *env)
 {
     APICState *s = env->apic_state;
 #ifdef DEBUG_APIC
-    printf("cpu_get_apic_base: %016llx\n", (uint64_t)s->apicbase);
+    printf("cpu_get_apic_base: %016" PRIx64 "\n",
+           s ? (uint64_t)s->apicbase: 0);
 #endif
-    return s->apicbase;
+    return s ? s->apicbase : 0;
 }
 
 void cpu_set_apic_tpr(CPUX86State *env, uint8_t val)
 {
     APICState *s = env->apic_state;
+    if (!s)
+        return;
     s->tpr = (val & 0x0f) << 4;
     apic_update_irq(s);
 }
@@ -176,43 +331,7 @@ void cpu_set_apic_tpr(CPUX86State *env, uint8_t val)
 uint8_t cpu_get_apic_tpr(CPUX86State *env)
 {
     APICState *s = env->apic_state;
-    return s->tpr >> 4;
-}
-
-static int fls_bit(int value)
-{
-    unsigned int ret = 0;
-
-#ifdef HOST_I386
-    __asm__ __volatile__ ("bsr %1, %0\n" : "+r" (ret) : "rm" (value));
-    return ret;
-#else
-    if (value > 0xffff)
-        value >>= 16, ret = 16;
-    if (value > 0xff)
-        value >>= 8, ret += 8;
-    if (value > 0xf)
-        value >>= 4, ret += 4;
-    if (value > 0x3)
-        value >>= 2, ret += 2;
-    return ret + (value >> 1);
-#endif
-}
-
-static inline void set_bit(uint32_t *tab, int index)
-{
-    int i, mask;
-    i = index >> 5;
-    mask = 1 << (index & 0x1f);
-    tab[i] |= mask;
-}
-
-static inline void reset_bit(uint32_t *tab, int index)
-{
-    int i, mask;
-    i = index >> 5;
-    mask = 1 << (index & 0x1f);
-    tab[i] &= ~mask;
+    return s ? s->tpr >> 4 : 0;
 }
 
 /* return -1 if no bit is set */
@@ -264,8 +383,20 @@ static void apic_update_irq(APICState *s)
     cpu_interrupt(s->cpu_env, CPU_INTERRUPT_HARD);
 }
 
+void apic_reset_irq_delivered(void)
+{
+    apic_irq_delivered = 0;
+}
+
+int apic_get_irq_delivered(void)
+{
+    return apic_irq_delivered;
+}
+
 static void apic_set_irq(APICState *s, int vector_num, int trigger_mode)
 {
+    apic_irq_delivered += !get_bit(s->irr, vector_num);
+
     set_bit(s->irr, vector_num);
     if (trigger_mode)
         set_bit(s->tmr, vector_num);
@@ -286,35 +417,67 @@ static void apic_eoi(APICState *s)
     apic_update_irq(s);
 }
 
-static uint32_t apic_get_delivery_bitmask(uint8_t dest, uint8_t dest_mode)
+static int apic_find_dest(uint8_t dest)
+{
+    APICState *apic = local_apics[dest];
+    int i;
+
+    if (apic && apic->id == dest)
+        return dest;  /* shortcut in case apic->id == apic->idx */
+
+    for (i = 0; i < MAX_APICS; i++) {
+        apic = local_apics[i];
+       if (apic && apic->id == dest)
+            return i;
+    }
+
+    return -1;
+}
+
+static void apic_get_delivery_bitmask(uint32_t *deliver_bitmask,
+                                      uint8_t dest, uint8_t dest_mode)
 {
-    uint32_t mask = 0;
     APICState *apic_iter;
+    int i;
 
     if (dest_mode == 0) {
-        if (dest == 0xff)
-            mask = 0xff;
-        else
-            mask = 1 << dest;
+        if (dest == 0xff) {
+            memset(deliver_bitmask, 0xff, MAX_APIC_WORDS * sizeof(uint32_t));
+        } else {
+            int idx = apic_find_dest(dest);
+            memset(deliver_bitmask, 0x00, MAX_APIC_WORDS * sizeof(uint32_t));
+            if (idx >= 0)
+                set_bit(deliver_bitmask, idx);
+        }
     } else {
         /* XXX: cluster mode */
-        for (apic_iter = first_local_apic; apic_iter != NULL;
-             apic_iter = apic_iter->next_apic) {
-            if (dest & apic_iter->log_dest)
-                mask |= (1 << apic_iter->id);
+        memset(deliver_bitmask, 0x00, MAX_APIC_WORDS * sizeof(uint32_t));
+        for(i = 0; i < MAX_APICS; i++) {
+            apic_iter = local_apics[i];
+            if (apic_iter) {
+                if (apic_iter->dest_mode == 0xf) {
+                    if (dest & apic_iter->log_dest)
+                        set_bit(deliver_bitmask, i);
+                } else if (apic_iter->dest_mode == 0x0) {
+                    if ((dest & 0xf0) == (apic_iter->log_dest & 0xf0) &&
+                        (dest & apic_iter->log_dest & 0x0f)) {
+                        set_bit(deliver_bitmask, i);
+                    }
+                }
+            }
         }
     }
-
-    return mask;
 }
 
 
-static void apic_init_ipi(APICState *s)
+void apic_init_reset(CPUState *env)
 {
+    APICState *s = env->apic_state;
     int i;
 
-    for(i = 0; i < APIC_LVT_NB; i++)
-        s->lvt[i] = 1 << 16; /* mask LVT */
+    if (!s)
+        return;
+
     s->tpr = 0;
     s->spurious_vec = 0xff;
     s->log_dest = 0;
@@ -322,7 +485,8 @@ static void apic_init_ipi(APICState *s)
     memset(s->isr, 0, sizeof(s->isr));
     memset(s->tmr, 0, sizeof(s->tmr));
     memset(s->irr, 0, sizeof(s->irr));
-    memset(s->lvt, 0, sizeof(s->lvt));
+    for(i = 0; i < APIC_LVT_NB; i++)
+        s->lvt[i] = 1 << 16; /* mask LVT */
     s->esr = 0;
     memset(s->icr, 0, sizeof(s->icr));
     s->divide_conf = 0;
@@ -330,72 +494,74 @@ static void apic_init_ipi(APICState *s)
     s->initial_count = 0;
     s->initial_count_load_time = 0;
     s->next_time = 0;
+    s->wait_for_sipi = 1;
+
+    env->halted = !(s->apicbase & MSR_IA32_APICBASE_BSP);
 }
 
-/* send a SIPI message to the CPU to start it */
 static void apic_startup(APICState *s, int vector_num)
 {
-    CPUState *env = s->cpu_env;
-    if (!env->cpu_halted)
+    s->sipi_vector = vector_num;
+    cpu_interrupt(s->cpu_env, CPU_INTERRUPT_SIPI);
+}
+
+void apic_sipi(CPUState *env)
+{
+    APICState *s = env->apic_state;
+
+    cpu_reset_interrupt(env, CPU_INTERRUPT_SIPI);
+
+    if (!s->wait_for_sipi)
         return;
+
     env->eip = 0;
-    cpu_x86_load_seg_cache(env, R_CS, vector_num << 8, vector_num << 12, 
+    cpu_x86_load_seg_cache(env, R_CS, s->sipi_vector << 8, s->sipi_vector << 12,
                            0xffff, 0);
-    env->cpu_halted = 0;
+    env->halted = 0;
+    s->wait_for_sipi = 0;
 }
 
 static void apic_deliver(APICState *s, uint8_t dest, uint8_t dest_mode,
                          uint8_t delivery_mode, uint8_t vector_num,
                          uint8_t polarity, uint8_t trigger_mode)
 {
-    uint32_t deliver_bitmask = 0;
+    uint32_t deliver_bitmask[MAX_APIC_WORDS];
     int dest_shorthand = (s->icr[0] >> 18) & 3;
     APICState *apic_iter;
 
     switch (dest_shorthand) {
-        case 0:
-            deliver_bitmask = apic_get_delivery_bitmask(dest, dest_mode);
-            break;
-        case 1:
-            deliver_bitmask = (1 << s->id);
-            break;
-        case 2:
-            deliver_bitmask = 0xffffffff;
-            break;
-        case 3:
-            deliver_bitmask = 0xffffffff & ~(1 << s->id);
-            break;
+    case 0:
+        apic_get_delivery_bitmask(deliver_bitmask, dest, dest_mode);
+        break;
+    case 1:
+        memset(deliver_bitmask, 0x00, sizeof(deliver_bitmask));
+        set_bit(deliver_bitmask, s->idx);
+        break;
+    case 2:
+        memset(deliver_bitmask, 0xff, sizeof(deliver_bitmask));
+        break;
+    case 3:
+        memset(deliver_bitmask, 0xff, sizeof(deliver_bitmask));
+        reset_bit(deliver_bitmask, s->idx);
+        break;
     }
 
     switch (delivery_mode) {
-        case APIC_DM_LOWPRI:
-            /* XXX: search for focus processor, arbitration */
-            dest = s->id;
-            break;
-
         case APIC_DM_INIT:
             {
                 int trig_mode = (s->icr[0] >> 15) & 1;
                 int level = (s->icr[0] >> 14) & 1;
                 if (level == 0 && trig_mode == 1) {
-                    for (apic_iter = first_local_apic; apic_iter != NULL;
-                         apic_iter = apic_iter->next_apic) {
-                        if (deliver_bitmask & (1 << apic_iter->id)) {
-                            apic_iter->arb_id = apic_iter->id;
-                        }
-                    }
+                    foreach_apic(apic_iter, deliver_bitmask,
+                                 apic_iter->arb_id = apic_iter->id );
                     return;
                 }
             }
             break;
 
         case APIC_DM_SIPI:
-            for (apic_iter = first_local_apic; apic_iter != NULL;
-                 apic_iter = apic_iter->next_apic) {
-                if (deliver_bitmask & (1 << apic_iter->id)) {
-                    apic_startup(apic_iter, vector_num);
-                }
-            }
+            foreach_apic(apic_iter, deliver_bitmask,
+                         apic_startup(apic_iter, vector_num) );
             return;
     }
 
@@ -414,24 +580,41 @@ int apic_get_interrupt(CPUState *env)
         return -1;
     if (!(s->spurious_vec & APIC_SV_ENABLE))
         return -1;
-    
+
     /* XXX: spurious IRQ handling */
     intno = get_highest_priority_int(s->irr);
     if (intno < 0)
         return -1;
-    reset_bit(s->irr, intno);
     if (s->tpr && intno <= s->tpr)
         return s->spurious_vec & 0xff;
+    reset_bit(s->irr, intno);
     set_bit(s->isr, intno);
     apic_update_irq(s);
     return intno;
 }
 
+int apic_accept_pic_intr(CPUState *env)
+{
+    APICState *s = env->apic_state;
+    uint32_t lvt0;
+
+    if (!s)
+        return -1;
+
+    lvt0 = s->lvt[APIC_LVT_LINT0];
+
+    if ((s->apicbase & MSR_IA32_APICBASE_ENABLE) == 0 ||
+        (lvt0 & APIC_LVT_MASKED) == 0)
+        return 1;
+
+    return 0;
+}
+
 static uint32_t apic_get_current_count(APICState *s)
 {
     int64_t d;
     uint32_t val;
-    d = (qemu_get_clock(vm_clock) - s->initial_count_load_time) >> 
+    d = (qemu_get_clock(vm_clock) - s->initial_count_load_time) >>
         s->count_shift;
     if (s->lvt[APIC_LVT_TIMER] & APIC_LVT_TIMER_PERIODIC) {
         /* periodic */
@@ -448,11 +631,13 @@ static uint32_t apic_get_current_count(APICState *s)
 static void apic_timer_update(APICState *s, int64_t current_time)
 {
     int64_t next_time, d;
-    
+
     if (!(s->lvt[APIC_LVT_TIMER] & APIC_LVT_MASKED)) {
-        d = (current_time - s->initial_count_load_time) >> 
+        d = (current_time - s->initial_count_load_time) >>
             s->count_shift;
         if (s->lvt[APIC_LVT_TIMER] & APIC_LVT_TIMER_PERIODIC) {
+            if (!s->initial_count)
+                goto no_timer;
             d = ((d / ((uint64_t)s->initial_count + 1)) + 1) * ((uint64_t)s->initial_count + 1);
         } else {
             if (d >= s->initial_count)
@@ -472,9 +657,7 @@ static void apic_timer(void *opaque)
 {
     APICState *s = opaque;
 
-    if (!(s->lvt[APIC_LVT_TIMER] & APIC_LVT_MASKED)) {
-        apic_set_irq(s, s->lvt[APIC_LVT_TIMER] & 0xff, APIC_TRIGGER_EDGE);
-    }
+    apic_local_deliver(s->cpu_env, APIC_LVT_TIMER);
     apic_timer_update(s, s->next_time);
 }
 
@@ -526,6 +709,9 @@ static uint32_t apic_mem_readl(void *opaque, target_phys_addr_t addr)
         /* ppr */
         val = apic_get_ppr(s);
         break;
+    case 0x0b:
+        val = 0;
+        break;
     case 0x0d:
         val = s->log_dest << 24;
         break;
@@ -574,11 +760,31 @@ static uint32_t apic_mem_readl(void *opaque, target_phys_addr_t addr)
     return val;
 }
 
+static void apic_send_msi(target_phys_addr_t addr, uint32 data)
+{
+    uint8_t dest = (addr & MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
+    uint8_t vector = (data & MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT;
+    uint8_t dest_mode = (addr >> MSI_ADDR_DEST_MODE_SHIFT) & 0x1;
+    uint8_t trigger_mode = (data >> MSI_DATA_TRIGGER_SHIFT) & 0x1;
+    uint8_t delivery = (data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x7;
+    /* XXX: Ignore redirection hint. */
+    apic_deliver_irq(dest, dest_mode, delivery, vector, 0, trigger_mode);
+}
+
 static void apic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
 {
     CPUState *env;
     APICState *s;
-    int index;
+    int index = (addr >> 4) & 0xff;
+    if (addr > 0xfff || !index) {
+        /* MSI and MMIO APIC are at the same memory location,
+         * but actually not on the global bus: MSI is on PCI bus
+         * APIC is connected directly to the CPU.
+         * Mapping them on the global bus happens to work because
+         * MSI registers are reserved in APIC MMIO and vice versa. */
+        apic_send_msi(addr, val);
+        return;
+    }
 
     env = cpu_single_env;
     if (!env)
@@ -589,7 +795,6 @@ static void apic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
     printf("APIC write: %08x = %08x\n", (uint32_t)addr, val);
 #endif
 
-    index = (addr >> 4) & 0xff;
     switch(index) {
     case 0x02:
         s->id = (val >> 24);
@@ -659,42 +864,13 @@ static void apic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
     }
 }
 
-static void apic_save(QEMUFile *f, void *opaque)
+/* This function is only used for old state version 1 and 2 */
+static int apic_load_old(QEMUFile *f, void *opaque, int version_id)
 {
     APICState *s = opaque;
     int i;
 
-    qemu_put_be32s(f, &s->apicbase);
-    qemu_put_8s(f, &s->id);
-    qemu_put_8s(f, &s->arb_id);
-    qemu_put_8s(f, &s->tpr);
-    qemu_put_be32s(f, &s->spurious_vec);
-    qemu_put_8s(f, &s->log_dest);
-    qemu_put_8s(f, &s->dest_mode);
-    for (i = 0; i < 8; i++) {
-        qemu_put_be32s(f, &s->isr[i]);
-        qemu_put_be32s(f, &s->tmr[i]);
-        qemu_put_be32s(f, &s->irr[i]);
-    }
-    for (i = 0; i < APIC_LVT_NB; i++) {
-        qemu_put_be32s(f, &s->lvt[i]);
-    }
-    qemu_put_be32s(f, &s->esr);
-    qemu_put_be32s(f, &s->icr[0]);
-    qemu_put_be32s(f, &s->icr[1]);
-    qemu_put_be32s(f, &s->divide_conf);
-    qemu_put_be32s(f, &s->count_shift);
-    qemu_put_be32s(f, &s->initial_count);
-    qemu_put_be64s(f, &s->initial_count_load_time);
-    qemu_put_be64s(f, &s->next_time);
-}
-
-static int apic_load(QEMUFile *f, void *opaque, int version_id)
-{
-    APICState *s = opaque;
-    int i;
-
-    if (version_id != 1)
+    if (version_id > 2)
         return -EINVAL;
 
     /* XXX: what if the base changes? (registered memory regions) */
@@ -717,26 +893,77 @@ static int apic_load(QEMUFile *f, void *opaque, int version_id)
     qemu_get_be32s(f, &s->icr[0]);
     qemu_get_be32s(f, &s->icr[1]);
     qemu_get_be32s(f, &s->divide_conf);
-    qemu_get_be32s(f, &s->count_shift);
+    s->count_shift=qemu_get_be32(f);
     qemu_get_be32s(f, &s->initial_count);
-    qemu_get_be64s(f, &s->initial_count_load_time);
-    qemu_get_be64s(f, &s->next_time);
+    s->initial_count_load_time=qemu_get_be64(f);
+    s->next_time=qemu_get_be64(f);
+
+    if (version_id >= 2)
+        qemu_get_timer(f, s->timer);
     return 0;
 }
 
+static const VMStateDescription vmstate_apic = {
+    .name = "apic",
+    .version_id = 3,
+    .minimum_version_id = 3,
+    .minimum_version_id_old = 1,
+    .load_state_old = apic_load_old,
+    .fields      = (VMStateField []) {
+        VMSTATE_UINT32(apicbase, APICState),
+        VMSTATE_UINT8(id, APICState),
+        VMSTATE_UINT8(arb_id, APICState),
+        VMSTATE_UINT8(tpr, APICState),
+        VMSTATE_UINT32(spurious_vec, APICState),
+        VMSTATE_UINT8(log_dest, APICState),
+        VMSTATE_UINT8(dest_mode, APICState),
+        VMSTATE_UINT32_ARRAY(isr, APICState, 8),
+        VMSTATE_UINT32_ARRAY(tmr, APICState, 8),
+        VMSTATE_UINT32_ARRAY(irr, APICState, 8),
+        VMSTATE_UINT32_ARRAY(lvt, APICState, APIC_LVT_NB),
+        VMSTATE_UINT32(esr, APICState),
+        VMSTATE_UINT32_ARRAY(icr, APICState, 2),
+        VMSTATE_UINT32(divide_conf, APICState),
+        VMSTATE_INT32(count_shift, APICState),
+        VMSTATE_UINT32(initial_count, APICState),
+        VMSTATE_INT64(initial_count_load_time, APICState),
+        VMSTATE_INT64(next_time, APICState),
+        VMSTATE_TIMER(timer, APICState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 static void apic_reset(void *opaque)
 {
     APICState *s = opaque;
-    apic_init_ipi(s);
+    int bsp;
+
+    cpu_synchronize_state(s->cpu_env);
+
+    bsp = cpu_is_bsp(s->cpu_env);
+    s->apicbase = 0xfee00000 |
+        (bsp ? MSR_IA32_APICBASE_BSP : 0) | MSR_IA32_APICBASE_ENABLE;
+
+    cpu_reset(s->cpu_env);
+    apic_init_reset(s->cpu_env);
+
+    if (bsp) {
+        /*
+         * LINT0 delivery mode on CPU #0 is set to ExtInt at initialization
+         * time typically by BIOS, so PIC interrupt can be delivered to the
+         * processor when local APIC is enabled.
+         */
+        s->lvt[APIC_LVT_LINT0] = 0x700;
+    }
 }
 
-static CPUReadMemoryFunc *apic_mem_read[3] = {
+static CPUReadMemoryFunc * const apic_mem_read[3] = {
     apic_mem_readb,
     apic_mem_readw,
     apic_mem_readl,
 };
 
-static CPUWriteMemoryFunc *apic_mem_write[3] = {
+static CPUWriteMemoryFunc * const apic_mem_write[3] = {
     apic_mem_writeb,
     apic_mem_writew,
     apic_mem_writel,
@@ -746,236 +973,32 @@ int apic_init(CPUState *env)
 {
     APICState *s;
 
-    s = qemu_mallocz(sizeof(APICState));
-    if (!s)
+    if (last_apic_idx >= MAX_APICS)
         return -1;
+    s = qemu_mallocz(sizeof(APICState));
     env->apic_state = s;
-    apic_init_ipi(s);
-    s->id = last_apic_id++;
+    s->idx = last_apic_idx++;
+    s->id = env->cpuid_apic_id;
     s->cpu_env = env;
-    s->apicbase = 0xfee00000 | 
-        (s->id ? 0 : MSR_IA32_APICBASE_BSP) | MSR_IA32_APICBASE_ENABLE;
+
+    apic_reset(s);
+    msix_supported = 1;
 
     /* XXX: mapping more APICs at the same memory location */
     if (apic_io_memory == 0) {
         /* NOTE: the APIC is directly connected to the CPU - it is not
            on the global memory bus. */
-        apic_io_memory = cpu_register_io_memory(0, apic_mem_read, 
+        apic_io_memory = cpu_register_io_memory(apic_mem_read,
                                                 apic_mem_write, NULL);
-        cpu_register_physical_memory(s->apicbase & ~0xfff, 0x1000,
+        /* XXX: what if the base changes? */
+        cpu_register_physical_memory(MSI_ADDR_BASE, MSI_ADDR_SIZE,
                                      apic_io_memory);
     }
     s->timer = qemu_new_timer(vm_clock, apic_timer, s);
 
-    register_savevm("apic", 0, 1, apic_save, apic_load, s);
+    vmstate_register(s->idx, &vmstate_apic, s);
     qemu_register_reset(apic_reset, s);
 
-    s->next_apic = first_local_apic;
-    first_local_apic = s;
-    
+    local_apics[s->idx] = s;
     return 0;
 }
-
-static void ioapic_service(IOAPICState *s)
-{
-    uint8_t i;
-    uint8_t trig_mode;
-    uint8_t vector;
-    uint8_t delivery_mode;
-    uint32_t mask;
-    uint64_t entry;
-    uint8_t dest;
-    uint8_t dest_mode;
-    uint8_t polarity;
-
-    for (i = 0; i < IOAPIC_NUM_PINS; i++) {
-        mask = 1 << i;
-        if (s->irr & mask) {
-            entry = s->ioredtbl[i];
-            if (!(entry & APIC_LVT_MASKED)) {
-                trig_mode = ((entry >> 15) & 1);
-                dest = entry >> 56;
-                dest_mode = (entry >> 11) & 1;
-                delivery_mode = (entry >> 8) & 7;
-                polarity = (entry >> 13) & 1;
-                if (trig_mode == APIC_TRIGGER_EDGE)
-                    s->irr &= ~mask;
-                if (delivery_mode == APIC_DM_EXTINT)
-                    vector = pic_read_irq(isa_pic);
-                else
-                    vector = entry & 0xff;
-                apic_bus_deliver(apic_get_delivery_bitmask(dest, dest_mode),
-                                 delivery_mode, vector, polarity, trig_mode);
-            }
-        }
-    }
-}
-
-void ioapic_set_irq(void *opaque, int vector, int level)
-{
-    IOAPICState *s = opaque;
-
-    if (vector >= 0 && vector < IOAPIC_NUM_PINS) {
-        uint32_t mask = 1 << vector;
-        uint64_t entry = s->ioredtbl[vector];
-
-        if ((entry >> 15) & 1) {
-            /* level triggered */
-            if (level) {
-                s->irr |= mask;
-                ioapic_service(s);
-            } else {
-                s->irr &= ~mask;
-            }
-        } else {
-            /* edge triggered */
-            if (level) {
-                s->irr |= mask;
-                ioapic_service(s);
-            }
-        }
-    }
-}
-
-static uint32_t ioapic_mem_readl(void *opaque, target_phys_addr_t addr)
-{
-    IOAPICState *s = opaque;
-    int index;
-    uint32_t val = 0;
-
-    addr &= 0xff;
-    if (addr == 0x00) {
-        val = s->ioregsel;
-    } else if (addr == 0x10) {
-        switch (s->ioregsel) {
-            case 0x00:
-                val = s->id << 24;
-                break;
-            case 0x01:
-                val = 0x11 | ((IOAPIC_NUM_PINS - 1) << 16); /* version 0x11 */
-                break;
-            case 0x02:
-                val = 0;
-                break;
-            default:
-                index = (s->ioregsel - 0x10) >> 1;
-                if (index >= 0 && index < IOAPIC_NUM_PINS) {
-                    if (s->ioregsel & 1)
-                        val = s->ioredtbl[index] >> 32;
-                    else
-                        val = s->ioredtbl[index] & 0xffffffff;
-                }
-        }
-#ifdef DEBUG_IOAPIC
-        printf("I/O APIC read: %08x = %08x\n", s->ioregsel, val);
-#endif
-    }
-    return val;
-}
-
-static void ioapic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
-{
-    IOAPICState *s = opaque;
-    int index;
-
-    addr &= 0xff;
-    if (addr == 0x00)  {
-        s->ioregsel = val;
-        return;
-    } else if (addr == 0x10) {
-#ifdef DEBUG_IOAPIC
-        printf("I/O APIC write: %08x = %08x\n", s->ioregsel, val);
-#endif
-        switch (s->ioregsel) {
-            case 0x00:
-                s->id = (val >> 24) & 0xff;
-                return;
-            case 0x01:
-            case 0x02:
-                return;
-            default:
-                index = (s->ioregsel - 0x10) >> 1;
-                if (index >= 0 && index < IOAPIC_NUM_PINS) {
-                    if (s->ioregsel & 1) {
-                        s->ioredtbl[index] &= 0xffffffff;
-                        s->ioredtbl[index] |= (uint64_t)val << 32;
-                    } else {
-                        s->ioredtbl[index] &= ~0xffffffffULL;
-                        s->ioredtbl[index] |= val;
-                    }
-                    ioapic_service(s);
-                }
-        }
-    }
-}
-
-static void ioapic_save(QEMUFile *f, void *opaque)
-{
-    IOAPICState *s = opaque;
-    int i;
-
-    qemu_put_8s(f, &s->id);
-    qemu_put_8s(f, &s->ioregsel);
-    for (i = 0; i < IOAPIC_NUM_PINS; i++) {
-        qemu_put_be64s(f, &s->ioredtbl[i]);
-    }
-}
-
-static int ioapic_load(QEMUFile *f, void *opaque, int version_id)
-{
-    IOAPICState *s = opaque;
-    int i;
-
-    if (version_id != 1)
-        return -EINVAL;
-
-    qemu_get_8s(f, &s->id);
-    qemu_get_8s(f, &s->ioregsel);
-    for (i = 0; i < IOAPIC_NUM_PINS; i++) {
-        qemu_get_be64s(f, &s->ioredtbl[i]);
-    }
-    return 0;
-}
-
-static void ioapic_reset(void *opaque)
-{
-    IOAPICState *s = opaque;
-    int i;
-
-    memset(s, 0, sizeof(*s));
-    for(i = 0; i < IOAPIC_NUM_PINS; i++)
-        s->ioredtbl[i] = 1 << 16; /* mask LVT */
-}
-
-static CPUReadMemoryFunc *ioapic_mem_read[3] = {
-    ioapic_mem_readl,
-    ioapic_mem_readl,
-    ioapic_mem_readl,
-};
-
-static CPUWriteMemoryFunc *ioapic_mem_write[3] = {
-    ioapic_mem_writel,
-    ioapic_mem_writel,
-    ioapic_mem_writel,
-};
-
-IOAPICState *ioapic_init(void)
-{
-    IOAPICState *s;
-    int io_memory;
-
-    s = qemu_mallocz(sizeof(IOAPICState));
-    if (!s)
-        return NULL;
-    ioapic_reset(s);
-    s->id = last_apic_id++;
-
-    io_memory = cpu_register_io_memory(0, ioapic_mem_read, 
-                                       ioapic_mem_write, s);
-    cpu_register_physical_memory(0xfec00000, 0x1000, io_memory);
-
-    register_savevm("ioapic", 0, 1, ioapic_save, ioapic_load, s);
-    qemu_register_reset(ioapic_reset, s);
-    
-    return s;
-}