<?xml version="1.0" encoding="UTF-8"?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:syn="http://purl.org/rss/1.0/modules/syndication/" xmlns:admin="http://webns.net/mvcb/">
  <channel rdf:about="http://blog.gmane.org/gmane.linux.kernel.virtualization.lguest">
    <title>gmane.linux.kernel.virtualization.lguest</title>
    <link>http://blog.gmane.org/gmane.linux.kernel.virtualization.lguest</link>
    <description/>
    <syn:updatePeriod>hourly</syn:updatePeriod>
    <syn:updateFrequency>1</syn:updateFrequency>
    <syn:updateBase>1901-01-01T00:00+00:00</syn:updateBase>
    <items>
      <rdf:Seq>
        <rdf:li rdf:resource="http://comments.gmane.org/gmane.linux.kernel.virtualization.lguest/646"/>
        <rdf:li rdf:resource="http://comments.gmane.org/gmane.linux.kernel.virtualization.lguest/585"/>
        <rdf:li rdf:resource="http://comments.gmane.org/gmane.linux.kernel.virtualization.lguest/584"/>
        <rdf:li rdf:resource="http://comments.gmane.org/gmane.linux.kernel.virtualization.lguest/583"/>
        <rdf:li rdf:resource="http://comments.gmane.org/gmane.linux.kernel.virtualization.lguest/579"/>
        <rdf:li rdf:resource="http://comments.gmane.org/gmane.linux.kernel.virtualization.lguest/577"/>
        <rdf:li rdf:resource="http://comments.gmane.org/gmane.linux.kernel.virtualization.lguest/574"/>
        <rdf:li rdf:resource="http://comments.gmane.org/gmane.linux.kernel.virtualization.lguest/573"/>
        <rdf:li rdf:resource="http://comments.gmane.org/gmane.linux.kernel.virtualization.lguest/558"/>
        <rdf:li rdf:resource="http://comments.gmane.org/gmane.linux.kernel.virtualization.lguest/548"/>
        <rdf:li rdf:resource="http://comments.gmane.org/gmane.linux.kernel.virtualization.lguest/546"/>
        <rdf:li rdf:resource="http://comments.gmane.org/gmane.linux.kernel.virtualization.lguest/538"/>
        <rdf:li rdf:resource="http://comments.gmane.org/gmane.linux.kernel.virtualization.lguest/535"/>
        <rdf:li rdf:resource="http://comments.gmane.org/gmane.linux.kernel.virtualization.lguest/532"/>
        <rdf:li rdf:resource="http://comments.gmane.org/gmane.linux.kernel.virtualization.lguest/531"/>
        <rdf:li rdf:resource="http://comments.gmane.org/gmane.linux.kernel.virtualization.lguest/530"/>
        <rdf:li rdf:resource="http://comments.gmane.org/gmane.linux.kernel.virtualization.lguest/529"/>
        <rdf:li rdf:resource="http://comments.gmane.org/gmane.linux.kernel.virtualization.lguest/528"/>
        <rdf:li rdf:resource="http://comments.gmane.org/gmane.linux.kernel.virtualization.lguest/527"/>
        <rdf:li rdf:resource="http://comments.gmane.org/gmane.linux.kernel.virtualization.lguest/526"/>
      </rdf:Seq>
    </items>
    <image rdf:resource="http://gmane.org/img/gmane-25t.png"/>
    <textinput rdf:resource=""/>
  </channel>
  <image rdf:about="http://gmane.org/img/gmane-25t.png">
    <title>Gmane</title>
    <url>http://gmane.org/img/gmane-25t.png</url>
    <link>http://gmane.org</link>
  </image>
  <item rdf:about="http://comments.gmane.org/gmane.linux.kernel.virtualization.lguest/646">
    <title>work</title>
    <link>http://comments.gmane.org/gmane.linux.kernel.virtualization.lguest/646</link>
    <description>Hello!

I'd like to ask if I can go on with my lguest to-do list or if PAE is
causing any problems (if this is the case, please let me know and I'll
 try to fix it).

regards
Matias
</description>
    <dc:creator>Matias Zabaljauregui</dc:creator>
    <dc:date>2008-11-11T14:13:03</dc:date>
  </item>
  <item rdf:about="http://comments.gmane.org/gmane.linux.kernel.virtualization.lguest/585">
    <title>problems with DMA buffer and network card</title>
    <link>http://comments.gmane.org/gmane.linux.kernel.virtualization.lguest/585</link>
    <description>_______________________________________________
Lguest mailing list
Lguest-mnsaURCQ41sdnm+yROfE0A&lt; at &gt;public.gmane.org
https://ozlabs.org/mailman/listinfo/lguest
</description>
    <dc:creator>octane indice</dc:creator>
    <dc:date>2008-10-31T15:24:00</dc:date>
  </item>
  <item rdf:about="http://comments.gmane.org/gmane.linux.kernel.virtualization.lguest/584">
    <title>[PULL] lguest boot fixes, example launcher fixes.</title>
    <link>http://comments.gmane.org/gmane.linux.kernel.virtualization.lguest/584</link>
    <description>The following changes since commit 65fc716fa673cf98fb5887180fd3c52ca0371198:
  Linus Torvalds (1):
        Merge git://git.kernel.org/.../sam/kbuild-fixes

are available in the git repository at:

  ssh://master.kernel.org/pub/scm/linux/kernel/git/rusty/linux-2.6-for-linus.git master

Rusty Russell (3):
      lguest: fix example launcher compile after moved asm-x86 dir.
      lguest: fix early_ioremap.
      lguest: fix irq vectors.

 Documentation/lguest/Makefile |    2 +-
 Documentation/lguest/lguest.c |    2 +-
 arch/x86/lguest/boot.c        |   32 ++++++++++++++++++++------------
 3 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/Documentation/lguest/Makefile b/Documentation/lguest/Makefile
index bac037e..725eef8 100644
--- a/Documentation/lguest/Makefile
+++ b/Documentation/lguest/Makefile
&lt; at &gt;&lt; at &gt; -1,5 +1,5 &lt; at &gt;&lt; at &gt;
 # This creates the demonstration utility "lguest" which runs a Linux guest.
-CFLAGS:=-Wall -Wmissing-declarations -Wmissing-prototypes -O3 -I../../include
+CFLAGS:=-Wall -Wmissing-declarations -Wmissing-prototypes -O3 -I../../include -I../../arch/x86/include
 LDLIBS:=-lz
 
 all: lguest
diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c
index da86fd5..8045206 100644
--- a/Documentation/lguest/lguest.c
+++ b/Documentation/lguest/lguest.c
&lt; at &gt;&lt; at &gt; -44,7 +44,7 &lt; at &gt;&lt; at &gt;
 #include "linux/virtio_console.h"
 #include "linux/virtio_rng.h"
 #include "linux/virtio_ring.h"
-#include "asm-x86/bootparam.h"
+#include "asm/bootparam.h"
 /*L:110 We can ignore the 39 include files we need for this program, but I do
  * want to draw attention to the use of kernel-style types.
  *
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 48ee4f9..a5d8e1a 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
&lt; at &gt;&lt; at &gt; -367,10 +367,9 &lt; at &gt;&lt; at &gt; static void lguest_cpuid(unsigned int *ax, unsigned int *bx,
  * lazily after a task switch, and Linux uses that gratefully, but wouldn't a
  * name like "FPUTRAP bit" be a little less cryptic?
  *
- * We store cr0 (and cr3) locally, because the Host never changes it.  The
- * Guest sometimes wants to read it and we'd prefer not to bother the Host
- * unnecessarily. */
-static unsigned long current_cr0, current_cr3;
+ * We store cr0 locally because the Host never changes it.  The Guest sometimes
+ * wants to read it and we'd prefer not to bother the Host unnecessarily. */
+static unsigned long current_cr0;
 static void lguest_write_cr0(unsigned long val)
 {
 lazy_hcall(LHCALL_TS, val &amp; X86_CR0_TS, 0, 0);
&lt; at &gt;&lt; at &gt; -399,17 +398,23 &lt; at &gt;&lt; at &gt; static unsigned long lguest_read_cr2(void)
 return lguest_data.cr2;
 }
 
+/* See lguest_set_pte() below. */
+static bool cr3_changed = false;
+
 /* cr3 is the current toplevel pagetable page: the principle is the same as
- * cr0.  Keep a local copy, and tell the Host when it changes. */
+ * cr0.  Keep a local copy, and tell the Host when it changes.  The only
+ * difference is that our local copy is in lguest_data because the Host needs
+ * to set it upon our initial hypercall. */
 static void lguest_write_cr3(unsigned long cr3)
 {
+lguest_data.pgdir = cr3;
 lazy_hcall(LHCALL_NEW_PGTABLE, cr3, 0, 0);
-current_cr3 = cr3;
+cr3_changed = true;
 }
 
 static unsigned long lguest_read_cr3(void)
 {
-return current_cr3;
+return lguest_data.pgdir;
 }
 
 /* cr4 is used to enable and disable PGE, but we don't care. */
&lt; at &gt;&lt; at &gt; -498,13 +503,13 &lt; at &gt;&lt; at &gt; static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval)
  * to forget all of them.  Fortunately, this is very rare.
  *
  * ... except in early boot when the kernel sets up the initial pagetables,
- * which makes booting astonishingly slow.  So we don't even tell the Host
- * anything changed until we've done the first page table switch. */
+ * which makes booting astonishingly slow: 1.83 seconds!  So we don't even tell
+ * the Host anything changed until we've done the first page table switch,
+ * which brings boot back to 0.25 seconds. */
 static void lguest_set_pte(pte_t *ptep, pte_t pteval)
 {
 *ptep = pteval;
-/* Don't bother with hypercall before initial setup. */
-if (current_cr3)
+if (cr3_changed)
 lazy_hcall(LHCALL_FLUSH_TLB, 1, 0, 0);
 }
 
&lt; at &gt;&lt; at &gt; -521,7 +526,7 &lt; at &gt;&lt; at &gt; static void lguest_set_pte(pte_t *ptep, pte_t pteval)
 static void lguest_flush_tlb_single(unsigned long addr)
 {
 /* Simply set it to zero: if it was not, it will fault back in. */
-lazy_hcall(LHCALL_SET_PTE, current_cr3, addr, 0);
+lazy_hcall(LHCALL_SET_PTE, lguest_data.pgdir, addr, 0);
 }
 
 /* This is what happens after the Guest has removed a large number of entries.
&lt; at &gt;&lt; at &gt; -581,6 +586,9 &lt; at &gt;&lt; at &gt; static void __init lguest_init_IRQ(void)
 
 for (i = 0; i &lt; LGUEST_IRQS; i++) {
 int vector = FIRST_EXTERNAL_VECTOR + i;
+/* Some systems map "vectors" to interrupts weirdly.  Lguest has
+ * a straightforward 1 to 1 mapping, so force that here. */
+__get_cpu_var(vector_irq)[vector] = i;
 if (vector != SYSCALL_VECTOR) {
 set_intr_gate(vector, interrupt[vector]);
 set_irq_chip_and_handler_name(i, &amp;lguest_irq_controller,
</description>
    <dc:creator>Rusty Russell</dc:creator>
    <dc:date>2008-10-31T00:53:14</dc:date>
  </item>
  <item rdf:about="http://comments.gmane.org/gmane.linux.kernel.virtualization.lguest/583">
    <title>[PULL] lguest and virtio_net fixes</title>
    <link>http://comments.gmane.org/gmane.linux.kernel.virtualization.lguest/583</link>
    <description>The following changes since commit 49fdf6785fd660e18a1eb4588928f47e9fa29a9a:
  Linus Torvalds (1):
        Merge branch 'for-linus' of git://git.kernel.dk/linux-2.6-block

are available in the git repository at:

  ssh://master.kernel.org/pub/scm/linux/kernel/git/rusty/linux-2.6-for-linus.git master

Mark McLoughlin (2):
      virtio_net: Recycle some more rx buffer pages
      virtio_net: hook up the set-tso ethtool op

Rusty Russell (3):
      lguest: fix example launcher compile after moved asm-x86 dir.
      lguest: fix early_ioremap.
      lguest: fix irq vectors.

 Documentation/lguest/Makefile |    2 +-
 Documentation/lguest/lguest.c |    2 +-
 arch/x86/lguest/boot.c        |   32 ++++++++++++++++++++------------
 drivers/net/virtio_net.c      |   22 ++++++++++++++--------
 4 files changed, 36 insertions(+), 22 deletions(-)

diff --git a/Documentation/lguest/Makefile b/Documentation/lguest/Makefile
index bac037e..725eef8 100644
--- a/Documentation/lguest/Makefile
+++ b/Documentation/lguest/Makefile
&lt; at &gt;&lt; at &gt; -1,5 +1,5 &lt; at &gt;&lt; at &gt;
 # This creates the demonstration utility "lguest" which runs a Linux guest.
-CFLAGS:=-Wall -Wmissing-declarations -Wmissing-prototypes -O3 -I../../include
+CFLAGS:=-Wall -Wmissing-declarations -Wmissing-prototypes -O3 -I../../include -I../../arch/x86/include
 LDLIBS:=-lz
 
 all: lguest
diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c
index 7228369..6ac5230 100644
--- a/Documentation/lguest/lguest.c
+++ b/Documentation/lguest/lguest.c
&lt; at &gt;&lt; at &gt; -44,7 +44,7 &lt; at &gt;&lt; at &gt;
 #include "linux/virtio_console.h"
 #include "linux/virtio_rng.h"
 #include "linux/virtio_ring.h"
-#include "asm-x86/bootparam.h"
+#include "asm/bootparam.h"
 /*L:110 We can ignore the 39 include files we need for this program, but I do
  * want to draw attention to the use of kernel-style types.
  *
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 48ee4f9..a5d8e1a 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
&lt; at &gt;&lt; at &gt; -367,10 +367,9 &lt; at &gt;&lt; at &gt; static void lguest_cpuid(unsigned int *ax, unsigned int *bx,
  * lazily after a task switch, and Linux uses that gratefully, but wouldn't a
  * name like "FPUTRAP bit" be a little less cryptic?
  *
- * We store cr0 (and cr3) locally, because the Host never changes it.  The
- * Guest sometimes wants to read it and we'd prefer not to bother the Host
- * unnecessarily. */
-static unsigned long current_cr0, current_cr3;
+ * We store cr0 locally because the Host never changes it.  The Guest sometimes
+ * wants to read it and we'd prefer not to bother the Host unnecessarily. */
+static unsigned long current_cr0;
 static void lguest_write_cr0(unsigned long val)
 {
 lazy_hcall(LHCALL_TS, val &amp; X86_CR0_TS, 0, 0);
&lt; at &gt;&lt; at &gt; -399,17 +398,23 &lt; at &gt;&lt; at &gt; static unsigned long lguest_read_cr2(void)
 return lguest_data.cr2;
 }
 
+/* See lguest_set_pte() below. */
+static bool cr3_changed = false;
+
 /* cr3 is the current toplevel pagetable page: the principle is the same as
- * cr0.  Keep a local copy, and tell the Host when it changes. */
+ * cr0.  Keep a local copy, and tell the Host when it changes.  The only
+ * difference is that our local copy is in lguest_data because the Host needs
+ * to set it upon our initial hypercall. */
 static void lguest_write_cr3(unsigned long cr3)
 {
+lguest_data.pgdir = cr3;
 lazy_hcall(LHCALL_NEW_PGTABLE, cr3, 0, 0);
-current_cr3 = cr3;
+cr3_changed = true;
 }
 
 static unsigned long lguest_read_cr3(void)
 {
-return current_cr3;
+return lguest_data.pgdir;
 }
 
 /* cr4 is used to enable and disable PGE, but we don't care. */
&lt; at &gt;&lt; at &gt; -498,13 +503,13 &lt; at &gt;&lt; at &gt; static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval)
  * to forget all of them.  Fortunately, this is very rare.
  *
  * ... except in early boot when the kernel sets up the initial pagetables,
- * which makes booting astonishingly slow.  So we don't even tell the Host
- * anything changed until we've done the first page table switch. */
+ * which makes booting astonishingly slow: 1.83 seconds!  So we don't even tell
+ * the Host anything changed until we've done the first page table switch,
+ * which brings boot back to 0.25 seconds. */
 static void lguest_set_pte(pte_t *ptep, pte_t pteval)
 {
 *ptep = pteval;
-/* Don't bother with hypercall before initial setup. */
-if (current_cr3)
+if (cr3_changed)
 lazy_hcall(LHCALL_FLUSH_TLB, 1, 0, 0);
 }
 
&lt; at &gt;&lt; at &gt; -521,7 +526,7 &lt; at &gt;&lt; at &gt; static void lguest_set_pte(pte_t *ptep, pte_t pteval)
 static void lguest_flush_tlb_single(unsigned long addr)
 {
 /* Simply set it to zero: if it was not, it will fault back in. */
-lazy_hcall(LHCALL_SET_PTE, current_cr3, addr, 0);
+lazy_hcall(LHCALL_SET_PTE, lguest_data.pgdir, addr, 0);
 }
 
 /* This is what happens after the Guest has removed a large number of entries.
&lt; at &gt;&lt; at &gt; -581,6 +586,9 &lt; at &gt;&lt; at &gt; static void __init lguest_init_IRQ(void)
 
 for (i = 0; i &lt; LGUEST_IRQS; i++) {
 int vector = FIRST_EXTERNAL_VECTOR + i;
+/* Some systems map "vectors" to interrupts weirdly.  Lguest has
+ * a straightforward 1 to 1 mapping, so force that here. */
+__get_cpu_var(vector_irq)[vector] = i;
 if (vector != SYSCALL_VECTOR) {
 set_intr_gate(vector, interrupt[vector]);
 set_irq_chip_and_handler_name(i, &amp;lguest_irq_controller,
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 0196a0d..3c726f1 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
&lt; at &gt;&lt; at &gt; -82,6 +82,16 &lt; at &gt;&lt; at &gt; static void give_a_page(struct virtnet_info *vi, struct page *page)
 vi-&gt;pages = page;
 }
 
+static void trim_pages(struct virtnet_info *vi, struct sk_buff *skb)
+{
+unsigned int i;
+
+for (i = 0; i &lt; skb_shinfo(skb)-&gt;nr_frags; i++)
+give_a_page(vi, skb_shinfo(skb)-&gt;frags[i].page);
+skb_shinfo(skb)-&gt;nr_frags = 0;
+skb-&gt;data_len = 0;
+}
+
 static struct page *get_a_page(struct virtnet_info *vi, gfp_t gfp_mask)
 {
 struct page *p = vi-&gt;pages;
&lt; at &gt;&lt; at &gt; -121,14 +131,8 &lt; at &gt;&lt; at &gt; static void receive_skb(struct net_device *dev, struct sk_buff *skb,
 }
 len -= sizeof(struct virtio_net_hdr);
 
-if (len &lt;= MAX_PACKET_LEN) {
-unsigned int i;
-
-for (i = 0; i &lt; skb_shinfo(skb)-&gt;nr_frags; i++)
-give_a_page(dev-&gt;priv, skb_shinfo(skb)-&gt;frags[i].page);
-skb-&gt;data_len = 0;
-skb_shinfo(skb)-&gt;nr_frags = 0;
-}
+if (len &lt;= MAX_PACKET_LEN)
+trim_pages(dev-&gt;priv, skb);
 
 err = pskb_trim(skb, len);
 if (err) {
&lt; at &gt;&lt; at &gt; -232,6 +236,7 &lt; at &gt;&lt; at &gt; static void try_fill_recv(struct virtnet_info *vi)
 err = vi-&gt;rvq-&gt;vq_ops-&gt;add_buf(vi-&gt;rvq, sg, 0, num, skb);
 if (err) {
 skb_unlink(skb, &amp;vi-&gt;recv);
+trim_pages(vi, skb);
 kfree_skb(skb);
 break;
 }
&lt; at &gt;&lt; at &gt; -478,6 +483,7 &lt; at &gt;&lt; at &gt; static int virtnet_set_tx_csum(struct net_device *dev, u32 data)
 static struct ethtool_ops virtnet_ethtool_ops = {
 .set_tx_csum = virtnet_set_tx_csum,
 .set_sg = ethtool_op_set_sg,
+.set_tso = ethtool_op_set_tso,
 };
 
 static int virtnet_probe(struct virtio_device *vdev)
</description>
    <dc:creator>Rusty Russell</dc:creator>
    <dc:date>2008-10-28T12:09:40</dc:date>
  </item>
  <item rdf:about="http://comments.gmane.org/gmane.linux.kernel.virtualization.lguest/579">
    <title>[PATCH resend v2] lguest: Change over to using KVMhypercalls mechanism</title>
    <link>http://comments.gmane.org/gmane.linux.kernel.virtualization.lguest/579</link>
    <description>This patch allows us to use KVM hypercalls.

Signed-off-by: Matias Zabaljauregui &lt;zabaljauregui-Re5JQEeQqe8AvxtiuMwx3w&lt; at &gt;public.gmane.org&gt;
---
 arch/x86/lguest/boot.c                |   87 ++++++++++++++++++++-------------
 arch/x86/lguest/i386_head.S           |    4 +-
 drivers/lguest/interrupts_and_traps.c |    7 ++-
 drivers/lguest/lguest_device.c        |    4 +-
 drivers/lguest/x86/core.c             |   54 ++++++++++++++++++++-
 include/asm-x86/lguest_hcall.h        |   24 ++--------
 6 files changed, 118 insertions(+), 62 deletions(-)

diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index c6a6b75..f6ae1cb 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
&lt; at &gt;&lt; at &gt; -106,7 +106,7 &lt; at &gt;&lt; at &gt; static void async_hcall(unsigned long call, unsigned long arg1,
 local_irq_save(flags);
 if (lguest_data.hcall_status[next_call] != 0xFF) {
 /* Table full, so do normal hcall which will flush table. */
-hcall(call, arg1, arg2, arg3);
+kvm_hypercall3(call, arg1, arg2, arg3);
 } else {
 lguest_data.hcalls[next_call].arg0 = call;
 lguest_data.hcalls[next_call].arg1 = arg1;
&lt; at &gt;&lt; at &gt; -133,13 +133,31 &lt; at &gt;&lt; at &gt; static void async_hcall(unsigned long call, unsigned long arg1,
  *
  * So, when we're in lazy mode, we call async_hcall() to store the call for
  * future processing: */
-static void lazy_hcall(unsigned long call,
+static void lazy_hcall1(unsigned long call,
+       unsigned long arg1)
+{
+if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE)
+kvm_hypercall1(call, arg1);
+else
+async_hcall(call, arg1, 0, 0);
+}
+static void lazy_hcall2(unsigned long call,
+       unsigned long arg1,
+       unsigned long arg2)
+{
+if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE)
+kvm_hypercall2(call, arg1, arg2);
+else
+async_hcall(call, arg1, arg2, 0);
+}
+
+static void lazy_hcall3(unsigned long call,
        unsigned long arg1,
        unsigned long arg2,
        unsigned long arg3)
 {
 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE)
-hcall(call, arg1, arg2, arg3);
+kvm_hypercall3(call, arg1, arg2, arg3);
 else
 async_hcall(call, arg1, arg2, arg3);
 }
&lt; at &gt;&lt; at &gt; -149,7 +167,7 &lt; at &gt;&lt; at &gt; static void lazy_hcall(unsigned long call,
 static void lguest_leave_lazy_mode(void)
 {
 paravirt_leave_lazy(paravirt_get_lazy_mode());
-hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0);
+kvm_hypercall0(LHCALL_FLUSH_ASYNC);
 }
 
 /*G:033
&lt; at &gt;&lt; at &gt; -223,7 +241,7 &lt; at &gt;&lt; at &gt; static void lguest_write_idt_entry(gate_desc *dt,
 /* Keep the local copy up to date. */
 native_write_idt_entry(dt, entrynum, g);
 /* Tell Host about this new entry. */
-hcall(LHCALL_LOAD_IDT_ENTRY, entrynum, desc[0], desc[1]);
+kvm_hypercall3(LHCALL_LOAD_IDT_ENTRY, entrynum, desc[0], desc[1]);
 }
 
 /* Changing to a different IDT is very rare: we keep the IDT up-to-date every
&lt; at &gt;&lt; at &gt; -235,7 +253,7 &lt; at &gt;&lt; at &gt; static void lguest_load_idt(const struct desc_ptr *desc)
 struct desc_struct *idt = (void *)desc-&gt;address;
 
 for (i = 0; i &lt; (desc-&gt;size+1)/8; i++)
-hcall(LHCALL_LOAD_IDT_ENTRY, i, idt[i].a, idt[i].b);
+kvm_hypercall3(LHCALL_LOAD_IDT_ENTRY, i, idt[i].a, idt[i].b);
 }
 
 /*
&lt; at &gt;&lt; at &gt; -256,7 +274,7 &lt; at &gt;&lt; at &gt; static void lguest_load_idt(const struct desc_ptr *desc)
 static void lguest_load_gdt(const struct desc_ptr *desc)
 {
 BUG_ON((desc-&gt;size+1)/8 != GDT_ENTRIES);
-hcall(LHCALL_LOAD_GDT, __pa(desc-&gt;address), GDT_ENTRIES, 0);
+kvm_hypercall2(LHCALL_LOAD_GDT, __pa(desc-&gt;address), GDT_ENTRIES);
 }
 
 /* For a single GDT entry which changes, we do the lazy thing: alter our GDT,
&lt; at &gt;&lt; at &gt; -266,7 +284,7 &lt; at &gt;&lt; at &gt; static void lguest_write_gdt_entry(struct desc_struct *dt, int entrynum,
    const void *desc, int type)
 {
 native_write_gdt_entry(dt, entrynum, desc, type);
-hcall(LHCALL_LOAD_GDT, __pa(dt), GDT_ENTRIES, 0);
+kvm_hypercall2(LHCALL_LOAD_GDT, __pa(dt), GDT_ENTRIES);
 }
 
 /* OK, I lied.  There are three "thread local storage" GDT entries which change
&lt; at &gt;&lt; at &gt; -278,7 +296,7 &lt; at &gt;&lt; at &gt; static void lguest_load_tls(struct thread_struct *t, unsigned int cpu)
  * can't handle us removing entries we're currently using.  So we clear
  * the GS register here: if it's needed it'll be reloaded anyway. */
 loadsegment(gs, 0);
-lazy_hcall(LHCALL_LOAD_TLS, __pa(&amp;t-&gt;tls_array), cpu, 0);
+lazy_hcall2(LHCALL_LOAD_TLS, __pa(&amp;t-&gt;tls_array), cpu);
 }
 
 /*G:038 That's enough excitement for now, back to ploughing through each of
&lt; at &gt;&lt; at &gt; -376,7 +394,7 &lt; at &gt;&lt; at &gt; static void lguest_cpuid(unsigned int *ax, unsigned int *bx,
 static unsigned long current_cr0, current_cr3;
 static void lguest_write_cr0(unsigned long val)
 {
-lazy_hcall(LHCALL_TS, val &amp; X86_CR0_TS, 0, 0);
+lazy_hcall1(LHCALL_TS, val &amp; X86_CR0_TS);
 current_cr0 = val;
 }
 
&lt; at &gt;&lt; at &gt; -390,7 +408,7 &lt; at &gt;&lt; at &gt; static unsigned long lguest_read_cr0(void)
  * the vowels have been optimized out. */
 static void lguest_clts(void)
 {
-lazy_hcall(LHCALL_TS, 0, 0, 0);
+lazy_hcall1(LHCALL_TS, 0);
 current_cr0 &amp;= ~X86_CR0_TS;
 }
 
&lt; at &gt;&lt; at &gt; -406,7 +424,7 &lt; at &gt;&lt; at &gt; static unsigned long lguest_read_cr2(void)
  * cr0.  Keep a local copy, and tell the Host when it changes. */
 static void lguest_write_cr3(unsigned long cr3)
 {
-lazy_hcall(LHCALL_NEW_PGTABLE, cr3, 0, 0);
+lazy_hcall1(LHCALL_NEW_PGTABLE, cr3);
 current_cr3 = cr3;
 }
 
&lt; at &gt;&lt; at &gt; -482,7 +500,7 &lt; at &gt;&lt; at &gt; static void lguest_set_pte_at(struct mm_struct *mm, unsigned long addr,
       pte_t *ptep, pte_t pteval)
 {
 *ptep = pteval;
-lazy_hcall(LHCALL_SET_PTE, __pa(mm-&gt;pgd), addr, pteval.pte_low);
+lazy_hcall3(LHCALL_SET_PTE, __pa(mm-&gt;pgd), addr, pteval.pte_low);
 }
 
 #ifdef CONFIG_X86_PAE
&lt; at &gt;&lt; at &gt; -493,24 +511,24 &lt; at &gt;&lt; at &gt; static void lguest_set_pud(pud_t *pudp, pud_t pudval)
 {
 *pudp = pudval;
 /* 32 bytes aligned pdpt address. */
-lazy_hcall(LHCALL_SET_PUD, __pa(pudp) &amp; 0xFFFFFFE0,
-   (__pa(pudp) &amp; 0x1F) / 8, 0);
+lazy_hcall2(LHCALL_SET_PUD, __pa(pudp) &amp; 0xFFFFFFE0,
+   (__pa(pudp) &amp; 0x1F) / 8);
 }
 
 /* The Guest calls this to set a PMD entry, when PAE is active */
 static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval)
 {
 *pmdp = pmdval;
-lazy_hcall(LHCALL_SET_PMD, __pa(pmdp) &amp; PAGE_MASK,
-   (__pa(pmdp) &amp; (PAGE_SIZE - 1)) / 8, 0);
+lazy_hcall2(LHCALL_SET_PMD, __pa(pmdp) &amp; PAGE_MASK,
+   (__pa(pmdp) &amp; (PAGE_SIZE - 1)) / 8);
 }
 
 #else
 static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval)
 {
 *pmdp = pmdval;
-lazy_hcall(LHCALL_SET_PMD, __pa(pmdp)&amp;PAGE_MASK,
-   (__pa(pmdp)&amp;(PAGE_SIZE-1))/4, 0);
+lazy_hcall2(LHCALL_SET_PMD, __pa(pmdp)&amp;PAGE_MASK,
+   (__pa(pmdp)&amp;(PAGE_SIZE-1))/4);
 }
 #endif
 
&lt; at &gt;&lt; at &gt; -534,7 +552,7 &lt; at &gt;&lt; at &gt; static void lguest_set_pte(pte_t *ptep, pte_t pteval)
 
 /* Don't bother with hypercall before initial setup. */
 if (current_cr3)
-lazy_hcall(LHCALL_FLUSH_TLB, 1, 0, 0);
+lazy_hcall1(LHCALL_FLUSH_TLB, 1);
 }
 
 #ifdef CONFIG_X86_PAE
&lt; at &gt;&lt; at &gt; -544,7 +562,7 &lt; at &gt;&lt; at &gt; static void lguest_set_pte_atomic(pte_t *ptep, pte_t pte)
 
 /* Don't bother with hypercall before initial setup. */
 if (current_cr3)
-lazy_hcall(LHCALL_FLUSH_TLB, 1, 0, 0);
+lazy_hcall1(LHCALL_FLUSH_TLB, 1);
 }
 
 static inline void lguest_set_pte_present(struct mm_struct *mm,
&lt; at &gt;&lt; at &gt; -557,7 +575,7 &lt; at &gt;&lt; at &gt; static inline void lguest_set_pte_present(struct mm_struct *mm,
 smp_wmb();
 ptep-&gt;pte_low = pte.pte_low;
 
-lazy_hcall(LHCALL_SET_PTE, __pa(mm-&gt;pgd), addr, pte.pte_low);
+lazy_hcall3(LHCALL_SET_PTE, __pa(mm-&gt;pgd), addr, pte.pte_low);
 }
 
 void lguest_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
&lt; at &gt;&lt; at &gt; -566,7 +584,7 &lt; at &gt;&lt; at &gt; void lguest_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 smp_wmb();
 ptep-&gt;pte_high = 0;
 
-lazy_hcall(LHCALL_SET_PTE, current_cr3, addr, 0);
+lazy_hcall3(LHCALL_SET_PTE, current_cr3, addr, 0);
 }
 
 void lguest_pmd_clear(pmd_t *pmdp)
&lt; at &gt;&lt; at &gt; -588,7 +606,7 &lt; at &gt;&lt; at &gt; void lguest_pmd_clear(pmd_t *pmdp)
 static void lguest_flush_tlb_single(unsigned long addr)
 {
 /* Simply set it to zero: if it was not, it will fault back in. */
-lazy_hcall(LHCALL_SET_PTE, current_cr3, addr, 0);
+lazy_hcall3(LHCALL_SET_PTE, current_cr3, addr, 0);
 }
 
 /* This is what happens after the Guest has removed a large number of entries.
&lt; at &gt;&lt; at &gt; -596,7 +614,7 &lt; at &gt;&lt; at &gt; static void lguest_flush_tlb_single(unsigned long addr)
  * have changed, ie. virtual addresses below PAGE_OFFSET. */
 static void lguest_flush_tlb_user(void)
 {
-lazy_hcall(LHCALL_FLUSH_TLB, 0, 0, 0);
+lazy_hcall1(LHCALL_FLUSH_TLB, 0);
 }
 
 /* This is called when the kernel page tables have changed.  That's not very
&lt; at &gt;&lt; at &gt; -604,7 +622,7 &lt; at &gt;&lt; at &gt; static void lguest_flush_tlb_user(void)
  * slow), so it's worth separating this from the user flushing above. */
 static void lguest_flush_tlb_kernel(void)
 {
-lazy_hcall(LHCALL_FLUSH_TLB, 1, 0, 0);
+lazy_hcall1(LHCALL_FLUSH_TLB, 1);
 }
 
 /*
&lt; at &gt;&lt; at &gt; -735,7 +753,7 &lt; at &gt;&lt; at &gt; static int lguest_clockevent_set_next_event(unsigned long delta,
 }
 
 /* Please wake us this far in the future. */
-hcall(LHCALL_SET_CLOCKEVENT, delta, 0, 0);
+kvm_hypercall1(LHCALL_SET_CLOCKEVENT, delta);
 return 0;
 }
 
&lt; at &gt;&lt; at &gt; -746,7 +764,7 &lt; at &gt;&lt; at &gt; static void lguest_clockevent_set_mode(enum clock_event_mode mode,
 case CLOCK_EVT_MODE_UNUSED:
 case CLOCK_EVT_MODE_SHUTDOWN:
 /* A 0 argument shuts the clock down. */
-hcall(LHCALL_SET_CLOCKEVENT, 0, 0, 0);
+kvm_hypercall0(LHCALL_SET_CLOCKEVENT);
 break;
 case CLOCK_EVT_MODE_ONESHOT:
 /* This is what we expect. */
&lt; at &gt;&lt; at &gt; -821,7 +839,7 &lt; at &gt;&lt; at &gt; static void lguest_time_init(void)
 static void lguest_load_sp0(struct tss_struct *tss,
     struct thread_struct *thread)
 {
-lazy_hcall(LHCALL_SET_STACK, __KERNEL_DS|0x1, thread-&gt;sp0,
+lazy_hcall3(LHCALL_SET_STACK, __KERNEL_DS|0x1, thread-&gt;sp0,
    THREAD_SIZE/PAGE_SIZE);
 }
 
&lt; at &gt;&lt; at &gt; -864,7 +882,7 &lt; at &gt;&lt; at &gt; static u32 lguest_apic_read(unsigned long reg)
 /* STOP!  Until an interrupt comes in. */
 static void lguest_safe_halt(void)
 {
-hcall(LHCALL_HALT, 0, 0, 0);
+kvm_hypercall0(LHCALL_HALT);
 }
 
 /* The SHUTDOWN hypercall takes a string to describe what's happening, and
&lt; at &gt;&lt; at &gt; -874,7 +892,8 &lt; at &gt;&lt; at &gt; static void lguest_safe_halt(void)
  * rather than virtual addresses, so we use __pa() here. */
 static void lguest_power_off(void)
 {
-hcall(LHCALL_SHUTDOWN, __pa("Power down"), LGUEST_SHUTDOWN_POWEROFF, 0);
+kvm_hypercall2(LHCALL_SHUTDOWN, __pa("Power down"),
+LGUEST_SHUTDOWN_POWEROFF);
 }
 
 /*
&lt; at &gt;&lt; at &gt; -884,7 +903,7 &lt; at &gt;&lt; at &gt; static void lguest_power_off(void)
  */
 static int lguest_panic(struct notifier_block *nb, unsigned long l, void *p)
 {
-hcall(LHCALL_SHUTDOWN, __pa(p), LGUEST_SHUTDOWN_POWEROFF, 0);
+kvm_hypercall2(LHCALL_SHUTDOWN, __pa(p), LGUEST_SHUTDOWN_POWEROFF);
 /* The hcall won't return, but to keep gcc happy, we're "done". */
 return NOTIFY_DONE;
 }
&lt; at &gt;&lt; at &gt; -925,7 +944,7 &lt; at &gt;&lt; at &gt; static __init int early_put_chars(u32 vtermno, const char *buf, int count)
 len = sizeof(scratch) - 1;
 scratch[len] = '\0';
 memcpy(scratch, buf, len);
-hcall(LHCALL_NOTIFY, __pa(scratch), 0, 0);
+kvm_hypercall1(LHCALL_NOTIFY, __pa(scratch));
 
 /* This routine returns the number of bytes actually written. */
 return len;
&lt; at &gt;&lt; at &gt; -935,7 +954,7 &lt; at &gt;&lt; at &gt; static __init int early_put_chars(u32 vtermno, const char *buf, int count)
  * Launcher to reboot us. */
 static void lguest_restart(char *reason)
 {
-hcall(LHCALL_SHUTDOWN, __pa(reason), LGUEST_SHUTDOWN_RESTART, 0);
+kvm_hypercall2(LHCALL_SHUTDOWN, __pa(reason), LGUEST_SHUTDOWN_RESTART);
 }
 
 /*G:050
diff --git a/arch/x86/lguest/i386_head.S b/arch/x86/lguest/i386_head.S
index 10b9bd3..f795419 100644
--- a/arch/x86/lguest/i386_head.S
+++ b/arch/x86/lguest/i386_head.S
&lt; at &gt;&lt; at &gt; -27,8 +27,8 &lt; at &gt;&lt; at &gt; ENTRY(lguest_entry)
 /* We make the "initialization" hypercall now to tell the Host about
  * us, and also find out where it put our page tables. */
 movl $LHCALL_LGUEST_INIT, %eax
-movl $lguest_data - __PAGE_OFFSET, %edx
-int $LGUEST_TRAP_ENTRY
+movl $lguest_data - __PAGE_OFFSET, %ebx
+.byte 0x0f,0x01,0xc1 /* KVM_HYPERCALL */
 
 /* Set up the initial stack so we can run C code. */
 movl $(init_thread_union+THREAD_SIZE),%esp
diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c
index a103906..12898d4 100644
--- a/drivers/lguest/interrupts_and_traps.c
+++ b/drivers/lguest/interrupts_and_traps.c
&lt; at &gt;&lt; at &gt; -283,9 +283,10 &lt; at &gt;&lt; at &gt; static int direct_trap(unsigned int num)
 
 /* The Host needs to see page faults (for shadow paging and to save the
  * fault address), general protection faults (in/out emulation) and
- * device not available (TS handling), and of course, the hypercall
- * trap. */
-return num != 14 &amp;&amp; num != 13 &amp;&amp; num != 7 &amp;&amp; num != LGUEST_TRAP_ENTRY;
+ * device not available (TS handling), invalid opcode fault (kvm hcall)
+ * and of course, the hypercall trap. */
+return num != 14 &amp;&amp; num != 13 &amp;&amp; num != 7 &amp;&amp;
+num != LGUEST_TRAP_ENTRY &amp;&amp; num != 6;
 }
 /*:*/
 
diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c
index a661bbd..99f63b1 100644
--- a/drivers/lguest/lguest_device.c
+++ b/drivers/lguest/lguest_device.c
&lt; at &gt;&lt; at &gt; -161,7 +161,7 &lt; at &gt;&lt; at &gt; static void set_status(struct virtio_device *vdev, u8 status)
 
 /* We set the status. */
 to_lgdev(vdev)-&gt;desc-&gt;status = status;
-hcall(LHCALL_NOTIFY, (max_pfn&lt;&lt;PAGE_SHIFT) + offset, 0, 0);
+kvm_hypercall1(LHCALL_NOTIFY, (max_pfn&lt;&lt;PAGE_SHIFT) + offset);
 }
 
 static void lg_set_status(struct virtio_device *vdev, u8 status)
&lt; at &gt;&lt; at &gt; -209,7 +209,7 &lt; at &gt;&lt; at &gt; static void lg_notify(struct virtqueue *vq)
  * virtqueue structure. */
 struct lguest_vq_info *lvq = vq-&gt;priv;
 
-hcall(LHCALL_NOTIFY, lvq-&gt;config.pfn &lt;&lt; PAGE_SHIFT, 0, 0);
+kvm_hypercall1(LHCALL_NOTIFY, lvq-&gt;config.pfn &lt;&lt; PAGE_SHIFT);
 }
 
 /* This routine finds the first virtqueue described in the configuration of
diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c
index bf79423..0968a1a 100644
--- a/drivers/lguest/x86/core.c
+++ b/drivers/lguest/x86/core.c
&lt; at &gt;&lt; at &gt; -290,6 +290,49 &lt; at &gt;&lt; at &gt; static int emulate_insn(struct lg_cpu *cpu)
 return 1;
 }
 
+/* Our hypercalls mechanism used to be based on direct software interrupts.
+ * After Anthony's "Refactor hypercall infrastructure" kvm patch,
+ * we decided to change over to using kvm hypercalls.
+ * KVM_HYPERCALL generates an invalid opcode fault (fault 6) on non-VT cpus, so
+ * the easiest solution seemed to be an *emulation approach*: if the fault was
+ * really produced by an hypercall (is_hypercall() does exactly this check), we
+ * just call the corresponding hypercall host implementation function.
+ * But invalid opcode faults are notably slower than software interrupts.
+ * So we implemented the *patching (or rewriting) approach*: every time we find
+ * the KVM_HYPERCALL opcode in guest code, we patch it to int 0x1f opcode, so
+ * next time the guest calls this hypercall, it will use the faster
+ * int mechanism. We made some tests to convince you. The benchmark shows the
+ * average cycle cost of a hypercall. For each alternative solution
+ * mentioned above we've made 5 runs of the benchmark:
+ * 1) direct software interrupt:  2915, 2789, 2764, 2721, 2898
+ * 2) emulation technique: 3410, 3681, 3466, 3392, 3780
+ * 3) patching (rewrite) technique: 2977, 2975, 2891, 2637, 2884 */
+static void rewrite_hypercall(struct lg_cpu *cpu)
+{
+/* This are the opcodes we use to patch the guest.
+ * The opcode for "int $0x1f"  is  0xcd 0x1f
+ * but vmcall instruction is 3 bytes long, so we complete
+ * the sequence with a NOP (0x90). */
+u8 insn[3] = {0xcd, 0x1f, 0x90};
+
+__lgwrite(cpu, guest_pa(cpu, cpu-&gt;regs-&gt;eip), insn, sizeof(insn));
+}
+
+static bool is_hypercall(struct lg_cpu *cpu)
+{
+u8 insn[3];
+
+/* This must be the Guest kernel trying to do something.
+ * The bottom two bits of the CS segment register are the privilege
+ * level. */
+if ((cpu-&gt;regs-&gt;cs &amp; 3) != GUEST_PL)
+return 0;
+
+/* Is it a vmcall? */
+__lgread(cpu, insn, guest_pa(cpu, cpu-&gt;regs-&gt;eip), sizeof(insn));
+return insn[0] == 0x0f &amp;&amp; insn[1] == 0x01 &amp;&amp; insn[2] == 0xc1;
+}
+
 /*H:050 Once we've re-enabled interrupts, we look at why the Guest exited. */
 void lguest_arch_handle_trap(struct lg_cpu *cpu)
 {
&lt; at &gt;&lt; at &gt; -337,7 +380,7 &lt; at &gt;&lt; at &gt; void lguest_arch_handle_trap(struct lg_cpu *cpu)
 break;
 case 32 ... 255:
 /* These values mean a real interrupt occurred, in which case
- * the Host handler has already been run.  We just do a
+ * the Host handler has already been run. We just do a
  * friendly check if another process should now be run, then
  * return to run the Guest again */
 cond_resched();
&lt; at &gt;&lt; at &gt; -347,6 +390,15 &lt; at &gt;&lt; at &gt; void lguest_arch_handle_trap(struct lg_cpu *cpu)
  * up the pointer now to indicate a hypercall is pending. */
 cpu-&gt;hcall = (struct hcall_args *)cpu-&gt;regs;
 return;
+case 6:
+/* kvm hypercalls trigger an invalid opcode fault (6).
+ * We need to check if ring == LGUEST_PL and
+ * faulting instruction == vmcall. */
+if (is_hypercall(cpu)) {
+rewrite_hypercall(cpu);
+return;
+}
+break;
 }
 
 /* We didn't handle the trap, so it needs to go to the Guest. */
diff --git a/include/asm-x86/lguest_hcall.h b/include/asm-x86/lguest_hcall.h
index c0860dc..0cac03f 100644
--- a/include/asm-x86/lguest_hcall.h
+++ b/include/asm-x86/lguest_hcall.h
&lt; at &gt;&lt; at &gt; -27,36 +27,20 &lt; at &gt;&lt; at &gt;
 
 #ifndef __ASSEMBLY__
 #include &lt;asm/hw_irq.h&gt;
+#include &lt;asm/kvm_para.h&gt;
 
 /*G:031 But first, how does our Guest contact the Host to ask for privileged
  * operations?  There are two ways: the direct way is to make a "hypercall",
  * to make requests of the Host Itself.
  *
- * Our hypercall mechanism uses the highest unused trap code (traps 32 and
- * above are used by real hardware interrupts).  Eighteen hypercalls are
+ * We use the KVM hypercall mechanism. Eighteen hypercalls are
  * available: the hypercall number is put in the %eax register, and the
- * arguments (when required) are placed in %edx, %ebx and %ecx.  If a return
+ * arguments (when required) are placed in %ebx, %ecx and %edx.  If a return
  * value makes sense, it's returned in %eax.
  *
  * Grossly invalid calls result in Sudden Death at the hands of the vengeful
  * Host, rather than returning failure.  This reflects Winston Churchill's
  * definition of a gentleman: "someone who is only rude intentionally". */
-static inline unsigned long
-hcall(unsigned long call,
-      unsigned long arg1, unsigned long arg2, unsigned long arg3)
-{
-/* "int" is the Intel instruction to trigger a trap. */
-asm volatile("int $" __stringify(LGUEST_TRAP_ENTRY)
-     /* The call in %eax (aka "a") might be overwritten */
-     : "=a"(call)
-       /* The arguments are in %eax, %edx, %ebx &amp; %ecx */
-     : "a"(call), "d"(arg1), "b"(arg2), "c"(arg3)
-       /* "memory" means this might write somewhere in memory.
-* This isn't true for all calls, but it's safe to tell
-* gcc that it might happen so it doesn't get clever. */
-     : "memory");
-return call;
-}
 /*:*/
 
 /* Can't use our min() macro here: needs to be a constant */
&lt; at &gt;&lt; at &gt; -65,7 +49,7 &lt; at &gt;&lt; at &gt; hcall(unsigned long call,
 #define LHCALL_RING_SIZE 64
 struct hcall_args {
 /* These map directly onto eax, ebx, ecx, edx in struct lguest_regs */
-unsigned long arg0, arg2, arg3, arg1;
+unsigned long arg0, arg1, arg2, arg3;
 };
 
 #endif /* !__ASSEMBLY__ */
</description>
    <dc:creator>Matias Zabaljauregui</dc:creator>
    <dc:date>2008-10-23T21:17:13</dc:date>
  </item>
  <item rdf:about="http://comments.gmane.org/gmane.linux.kernel.virtualization.lguest/577">
    <title>[PATCH v2] lguest: Change over to using KVM hypercallsmechanism</title>
    <link>http://comments.gmane.org/gmane.linux.kernel.virtualization.lguest/577</link>
    <description>This patch allows us to use KVM hypercalls.

Signed-off-by: Matias Zabaljauregui &lt;zabaljauregui-Re5JQEeQqe8AvxtiuMwx3w&lt; at &gt;public.gmane.org&gt;
---
 arch/x86/lguest/boot.c                |   87 ++++++++++++++++++++-------------
 arch/x86/lguest/i386_head.S           |    4 +-
 drivers/lguest/interrupts_and_traps.c |    7 ++-
 drivers/lguest/lguest_device.c        |    4 +-
 drivers/lguest/x86/core.c             |   49 ++++++++++++++++++
 include/asm-x86/lguest_hcall.h        |   24 ++--------
 6 files changed, 114 insertions(+), 61 deletions(-)

diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index c6a6b75..f6ae1cb 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
&lt; at &gt;&lt; at &gt; -106,7 +106,7 &lt; at &gt;&lt; at &gt; static void async_hcall(unsigned long call, unsigned long arg1,
 local_irq_save(flags);
 if (lguest_data.hcall_status[next_call] != 0xFF) {
 /* Table full, so do normal hcall which will flush table. */
-hcall(call, arg1, arg2, arg3);
+kvm_hypercall3(call, arg1, arg2, arg3);
 } else {
 lguest_data.hcalls[next_call].arg0 = call;
 lguest_data.hcalls[next_call].arg1 = arg1;
&lt; at &gt;&lt; at &gt; -133,13 +133,31 &lt; at &gt;&lt; at &gt; static void async_hcall(unsigned long call, unsigned long arg1,
  *
  * So, when we're in lazy mode, we call async_hcall() to store the call for
  * future processing: */
-static void lazy_hcall(unsigned long call,
+static void lazy_hcall1(unsigned long call,
+       unsigned long arg1)
+{
+if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE)
+kvm_hypercall1(call, arg1);
+else
+async_hcall(call, arg1, 0, 0);
+}
+static void lazy_hcall2(unsigned long call,
+       unsigned long arg1,
+       unsigned long arg2)
+{
+if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE)
+kvm_hypercall2(call, arg1, arg2);
+else
+async_hcall(call, arg1, arg2, 0);
+}
+
+static void lazy_hcall3(unsigned long call,
        unsigned long arg1,
        unsigned long arg2,
        unsigned long arg3)
 {
 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE)
-hcall(call, arg1, arg2, arg3);
+kvm_hypercall3(call, arg1, arg2, arg3);
 else
 async_hcall(call, arg1, arg2, arg3);
 }
&lt; at &gt;&lt; at &gt; -149,7 +167,7 &lt; at &gt;&lt; at &gt; static void lazy_hcall(unsigned long call,
 static void lguest_leave_lazy_mode(void)
 {
 paravirt_leave_lazy(paravirt_get_lazy_mode());
-hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0);
+kvm_hypercall0(LHCALL_FLUSH_ASYNC);
 }
 
 /*G:033
&lt; at &gt;&lt; at &gt; -223,7 +241,7 &lt; at &gt;&lt; at &gt; static void lguest_write_idt_entry(gate_desc *dt,
 /* Keep the local copy up to date. */
 native_write_idt_entry(dt, entrynum, g);
 /* Tell Host about this new entry. */
-hcall(LHCALL_LOAD_IDT_ENTRY, entrynum, desc[0], desc[1]);
+kvm_hypercall3(LHCALL_LOAD_IDT_ENTRY, entrynum, desc[0], desc[1]);
 }
 
 /* Changing to a different IDT is very rare: we keep the IDT up-to-date every
&lt; at &gt;&lt; at &gt; -235,7 +253,7 &lt; at &gt;&lt; at &gt; static void lguest_load_idt(const struct desc_ptr *desc)
 struct desc_struct *idt = (void *)desc-&gt;address;
 
 for (i = 0; i &lt; (desc-&gt;size+1)/8; i++)
-hcall(LHCALL_LOAD_IDT_ENTRY, i, idt[i].a, idt[i].b);
+kvm_hypercall3(LHCALL_LOAD_IDT_ENTRY, i, idt[i].a, idt[i].b);
 }
 
 /*
&lt; at &gt;&lt; at &gt; -256,7 +274,7 &lt; at &gt;&lt; at &gt; static void lguest_load_idt(const struct desc_ptr *desc)
 static void lguest_load_gdt(const struct desc_ptr *desc)
 {
 BUG_ON((desc-&gt;size+1)/8 != GDT_ENTRIES);
-hcall(LHCALL_LOAD_GDT, __pa(desc-&gt;address), GDT_ENTRIES, 0);
+kvm_hypercall2(LHCALL_LOAD_GDT, __pa(desc-&gt;address), GDT_ENTRIES);
 }
 
 /* For a single GDT entry which changes, we do the lazy thing: alter our GDT,
&lt; at &gt;&lt; at &gt; -266,7 +284,7 &lt; at &gt;&lt; at &gt; static void lguest_write_gdt_entry(struct desc_struct *dt, int entrynum,
    const void *desc, int type)
 {
 native_write_gdt_entry(dt, entrynum, desc, type);
-hcall(LHCALL_LOAD_GDT, __pa(dt), GDT_ENTRIES, 0);
+kvm_hypercall2(LHCALL_LOAD_GDT, __pa(dt), GDT_ENTRIES);
 }
 
 /* OK, I lied.  There are three "thread local storage" GDT entries which change
&lt; at &gt;&lt; at &gt; -278,7 +296,7 &lt; at &gt;&lt; at &gt; static void lguest_load_tls(struct thread_struct *t, unsigned int cpu)
  * can't handle us removing entries we're currently using.  So we clear
  * the GS register here: if it's needed it'll be reloaded anyway. */
 loadsegment(gs, 0);
-lazy_hcall(LHCALL_LOAD_TLS, __pa(&amp;t-&gt;tls_array), cpu, 0);
+lazy_hcall2(LHCALL_LOAD_TLS, __pa(&amp;t-&gt;tls_array), cpu);
 }
 
 /*G:038 That's enough excitement for now, back to ploughing through each of
&lt; at &gt;&lt; at &gt; -376,7 +394,7 &lt; at &gt;&lt; at &gt; static void lguest_cpuid(unsigned int *ax, unsigned int *bx,
 static unsigned long current_cr0, current_cr3;
 static void lguest_write_cr0(unsigned long val)
 {
-lazy_hcall(LHCALL_TS, val &amp; X86_CR0_TS, 0, 0);
+lazy_hcall1(LHCALL_TS, val &amp; X86_CR0_TS);
 current_cr0 = val;
 }
 
&lt; at &gt;&lt; at &gt; -390,7 +408,7 &lt; at &gt;&lt; at &gt; static unsigned long lguest_read_cr0(void)
  * the vowels have been optimized out. */
 static void lguest_clts(void)
 {
-lazy_hcall(LHCALL_TS, 0, 0, 0);
+lazy_hcall1(LHCALL_TS, 0);
 current_cr0 &amp;= ~X86_CR0_TS;
 }
 
&lt; at &gt;&lt; at &gt; -406,7 +424,7 &lt; at &gt;&lt; at &gt; static unsigned long lguest_read_cr2(void)
  * cr0.  Keep a local copy, and tell the Host when it changes. */
 static void lguest_write_cr3(unsigned long cr3)
 {
-lazy_hcall(LHCALL_NEW_PGTABLE, cr3, 0, 0);
+lazy_hcall1(LHCALL_NEW_PGTABLE, cr3);
 current_cr3 = cr3;
 }
 
&lt; at &gt;&lt; at &gt; -482,7 +500,7 &lt; at &gt;&lt; at &gt; static void lguest_set_pte_at(struct mm_struct *mm, unsigned long addr,
       pte_t *ptep, pte_t pteval)
 {
 *ptep = pteval;
-lazy_hcall(LHCALL_SET_PTE, __pa(mm-&gt;pgd), addr, pteval.pte_low);
+lazy_hcall3(LHCALL_SET_PTE, __pa(mm-&gt;pgd), addr, pteval.pte_low);
 }
 
 #ifdef CONFIG_X86_PAE
&lt; at &gt;&lt; at &gt; -493,24 +511,24 &lt; at &gt;&lt; at &gt; static void lguest_set_pud(pud_t *pudp, pud_t pudval)
 {
 *pudp = pudval;
 /* 32 bytes aligned pdpt address. */
-lazy_hcall(LHCALL_SET_PUD, __pa(pudp) &amp; 0xFFFFFFE0,
-   (__pa(pudp) &amp; 0x1F) / 8, 0);
+lazy_hcall2(LHCALL_SET_PUD, __pa(pudp) &amp; 0xFFFFFFE0,
+   (__pa(pudp) &amp; 0x1F) / 8);
 }
 
 /* The Guest calls this to set a PMD entry, when PAE is active */
 static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval)
 {
 *pmdp = pmdval;
-lazy_hcall(LHCALL_SET_PMD, __pa(pmdp) &amp; PAGE_MASK,
-   (__pa(pmdp) &amp; (PAGE_SIZE - 1)) / 8, 0);
+lazy_hcall2(LHCALL_SET_PMD, __pa(pmdp) &amp; PAGE_MASK,
+   (__pa(pmdp) &amp; (PAGE_SIZE - 1)) / 8);
 }
 
 #else
 static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval)
 {
 *pmdp = pmdval;
-lazy_hcall(LHCALL_SET_PMD, __pa(pmdp)&amp;PAGE_MASK,
-   (__pa(pmdp)&amp;(PAGE_SIZE-1))/4, 0);
+lazy_hcall2(LHCALL_SET_PMD, __pa(pmdp)&amp;PAGE_MASK,
+   (__pa(pmdp)&amp;(PAGE_SIZE-1))/4);
 }
 #endif
 
&lt; at &gt;&lt; at &gt; -534,7 +552,7 &lt; at &gt;&lt; at &gt; static void lguest_set_pte(pte_t *ptep, pte_t pteval)
 
 /* Don't bother with hypercall before initial setup. */
 if (current_cr3)
-lazy_hcall(LHCALL_FLUSH_TLB, 1, 0, 0);
+lazy_hcall1(LHCALL_FLUSH_TLB, 1);
 }
 
 #ifdef CONFIG_X86_PAE
&lt; at &gt;&lt; at &gt; -544,7 +562,7 &lt; at &gt;&lt; at &gt; static void lguest_set_pte_atomic(pte_t *ptep, pte_t pte)
 
 /* Don't bother with hypercall before initial setup. */
 if (current_cr3)
-lazy_hcall(LHCALL_FLUSH_TLB, 1, 0, 0);
+lazy_hcall1(LHCALL_FLUSH_TLB, 1);
 }
 
 static inline void lguest_set_pte_present(struct mm_struct *mm,
&lt; at &gt;&lt; at &gt; -557,7 +575,7 &lt; at &gt;&lt; at &gt; static inline void lguest_set_pte_present(struct mm_struct *mm,
 smp_wmb();
 ptep-&gt;pte_low = pte.pte_low;
 
-lazy_hcall(LHCALL_SET_PTE, __pa(mm-&gt;pgd), addr, pte.pte_low);
+lazy_hcall3(LHCALL_SET_PTE, __pa(mm-&gt;pgd), addr, pte.pte_low);
 }
 
 void lguest_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
&lt; at &gt;&lt; at &gt; -566,7 +584,7 &lt; at &gt;&lt; at &gt; void lguest_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 smp_wmb();
 ptep-&gt;pte_high = 0;
 
-lazy_hcall(LHCALL_SET_PTE, current_cr3, addr, 0);
+lazy_hcall3(LHCALL_SET_PTE, current_cr3, addr, 0);
 }
 
 void lguest_pmd_clear(pmd_t *pmdp)
&lt; at &gt;&lt; at &gt; -588,7 +606,7 &lt; at &gt;&lt; at &gt; void lguest_pmd_clear(pmd_t *pmdp)
 static void lguest_flush_tlb_single(unsigned long addr)
 {
 /* Simply set it to zero: if it was not, it will fault back in. */
-lazy_hcall(LHCALL_SET_PTE, current_cr3, addr, 0);
+lazy_hcall3(LHCALL_SET_PTE, current_cr3, addr, 0);
 }
 
 /* This is what happens after the Guest has removed a large number of entries.
&lt; at &gt;&lt; at &gt; -596,7 +614,7 &lt; at &gt;&lt; at &gt; static void lguest_flush_tlb_single(unsigned long addr)
  * have changed, ie. virtual addresses below PAGE_OFFSET. */
 static void lguest_flush_tlb_user(void)
 {
-lazy_hcall(LHCALL_FLUSH_TLB, 0, 0, 0);
+lazy_hcall1(LHCALL_FLUSH_TLB, 0);
 }
 
 /* This is called when the kernel page tables have changed.  That's not very
&lt; at &gt;&lt; at &gt; -604,7 +622,7 &lt; at &gt;&lt; at &gt; static void lguest_flush_tlb_user(void)
  * slow), so it's worth separating this from the user flushing above. */
 static void lguest_flush_tlb_kernel(void)
 {
-lazy_hcall(LHCALL_FLUSH_TLB, 1, 0, 0);
+lazy_hcall1(LHCALL_FLUSH_TLB, 1);
 }
 
 /*
&lt; at &gt;&lt; at &gt; -735,7 +753,7 &lt; at &gt;&lt; at &gt; static int lguest_clockevent_set_next_event(unsigned long delta,
 }
 
 /* Please wake us this far in the future. */
-hcall(LHCALL_SET_CLOCKEVENT, delta, 0, 0);
+kvm_hypercall1(LHCALL_SET_CLOCKEVENT, delta);
 return 0;
 }
 
&lt; at &gt;&lt; at &gt; -746,7 +764,7 &lt; at &gt;&lt; at &gt; static void lguest_clockevent_set_mode(enum clock_event_mode mode,
 case CLOCK_EVT_MODE_UNUSED:
 case CLOCK_EVT_MODE_SHUTDOWN:
 /* A 0 argument shuts the clock down. */
-hcall(LHCALL_SET_CLOCKEVENT, 0, 0, 0);
+kvm_hypercall0(LHCALL_SET_CLOCKEVENT);
 break;
 case CLOCK_EVT_MODE_ONESHOT:
 /* This is what we expect. */
&lt; at &gt;&lt; at &gt; -821,7 +839,7 &lt; at &gt;&lt; at &gt; static void lguest_time_init(void)
 static void lguest_load_sp0(struct tss_struct *tss,
     struct thread_struct *thread)
 {
-lazy_hcall(LHCALL_SET_STACK, __KERNEL_DS|0x1, thread-&gt;sp0,
+lazy_hcall3(LHCALL_SET_STACK, __KERNEL_DS|0x1, thread-&gt;sp0,
    THREAD_SIZE/PAGE_SIZE);
 }
 
&lt; at &gt;&lt; at &gt; -864,7 +882,7 &lt; at &gt;&lt; at &gt; static u32 lguest_apic_read(unsigned long reg)
 /* STOP!  Until an interrupt comes in. */
 static void lguest_safe_halt(void)
 {
-hcall(LHCALL_HALT, 0, 0, 0);
+kvm_hypercall0(LHCALL_HALT);
 }
 
 /* The SHUTDOWN hypercall takes a string to describe what's happening, and
&lt; at &gt;&lt; at &gt; -874,7 +892,8 &lt; at &gt;&lt; at &gt; static void lguest_safe_halt(void)
  * rather than virtual addresses, so we use __pa() here. */
 static void lguest_power_off(void)
 {
-hcall(LHCALL_SHUTDOWN, __pa("Power down"), LGUEST_SHUTDOWN_POWEROFF, 0);
+kvm_hypercall2(LHCALL_SHUTDOWN, __pa("Power down"),
+LGUEST_SHUTDOWN_POWEROFF);
 }
 
 /*
&lt; at &gt;&lt; at &gt; -884,7 +903,7 &lt; at &gt;&lt; at &gt; static void lguest_power_off(void)
  */
 static int lguest_panic(struct notifier_block *nb, unsigned long l, void *p)
 {
-hcall(LHCALL_SHUTDOWN, __pa(p), LGUEST_SHUTDOWN_POWEROFF, 0);
+kvm_hypercall2(LHCALL_SHUTDOWN, __pa(p), LGUEST_SHUTDOWN_POWEROFF);
 /* The hcall won't return, but to keep gcc happy, we're "done". */
 return NOTIFY_DONE;
 }
&lt; at &gt;&lt; at &gt; -925,7 +944,7 &lt; at &gt;&lt; at &gt; static __init int early_put_chars(u32 vtermno, const char *buf, int count)
 len = sizeof(scratch) - 1;
 scratch[len] = '\0';
 memcpy(scratch, buf, len);
-hcall(LHCALL_NOTIFY, __pa(scratch), 0, 0);
+kvm_hypercall1(LHCALL_NOTIFY, __pa(scratch));
 
 /* This routine returns the number of bytes actually written. */
 return len;
&lt; at &gt;&lt; at &gt; -935,7 +954,7 &lt; at &gt;&lt; at &gt; static __init int early_put_chars(u32 vtermno, const char *buf, int count)
  * Launcher to reboot us. */
 static void lguest_restart(char *reason)
 {
-hcall(LHCALL_SHUTDOWN, __pa(reason), LGUEST_SHUTDOWN_RESTART, 0);
+kvm_hypercall2(LHCALL_SHUTDOWN, __pa(reason), LGUEST_SHUTDOWN_RESTART);
 }
 
 /*G:050
diff --git a/arch/x86/lguest/i386_head.S b/arch/x86/lguest/i386_head.S
index 10b9bd3..f795419 100644
--- a/arch/x86/lguest/i386_head.S
+++ b/arch/x86/lguest/i386_head.S
&lt; at &gt;&lt; at &gt; -27,8 +27,8 &lt; at &gt;&lt; at &gt; ENTRY(lguest_entry)
 /* We make the "initialization" hypercall now to tell the Host about
  * us, and also find out where it put our page tables. */
 movl $LHCALL_LGUEST_INIT, %eax
-movl $lguest_data - __PAGE_OFFSET, %edx
-int $LGUEST_TRAP_ENTRY
+movl $lguest_data - __PAGE_OFFSET, %ebx
+.byte 0x0f,0x01,0xc1 /* KVM_HYPERCALL */
 
 /* Set up the initial stack so we can run C code. */
 movl $(init_thread_union+THREAD_SIZE),%esp
diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c
index a103906..12898d4 100644
--- a/drivers/lguest/interrupts_and_traps.c
+++ b/drivers/lguest/interrupts_and_traps.c
&lt; at &gt;&lt; at &gt; -283,9 +283,10 &lt; at &gt;&lt; at &gt; static int direct_trap(unsigned int num)
 
 /* The Host needs to see page faults (for shadow paging and to save the
  * fault address), general protection faults (in/out emulation) and
- * device not available (TS handling), and of course, the hypercall
- * trap. */
-return num != 14 &amp;&amp; num != 13 &amp;&amp; num != 7 &amp;&amp; num != LGUEST_TRAP_ENTRY;
+ * device not available (TS handling), invalid opcode fault (kvm hcall)
+ * and of course, the hypercall trap. */
+return num != 14 &amp;&amp; num != 13 &amp;&amp; num != 7 &amp;&amp;
+num != LGUEST_TRAP_ENTRY &amp;&amp; num != 6;
 }
 /*:*/
 
diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c
index a661bbd..99f63b1 100644
--- a/drivers/lguest/lguest_device.c
+++ b/drivers/lguest/lguest_device.c
&lt; at &gt;&lt; at &gt; -161,7 +161,7 &lt; at &gt;&lt; at &gt; static void set_status(struct virtio_device *vdev, u8 status)
 
 /* We set the status. */
 to_lgdev(vdev)-&gt;desc-&gt;status = status;
-hcall(LHCALL_NOTIFY, (max_pfn&lt;&lt;PAGE_SHIFT) + offset, 0, 0);
+kvm_hypercall1(LHCALL_NOTIFY, (max_pfn&lt;&lt;PAGE_SHIFT) + offset);
 }
 
 static void lg_set_status(struct virtio_device *vdev, u8 status)
&lt; at &gt;&lt; at &gt; -209,7 +209,7 &lt; at &gt;&lt; at &gt; static void lg_notify(struct virtqueue *vq)
  * virtqueue structure. */
 struct lguest_vq_info *lvq = vq-&gt;priv;
 
-hcall(LHCALL_NOTIFY, lvq-&gt;config.pfn &lt;&lt; PAGE_SHIFT, 0, 0);
+kvm_hypercall1(LHCALL_NOTIFY, lvq-&gt;config.pfn &lt;&lt; PAGE_SHIFT);
 }
 
 /* This routine finds the first virtqueue described in the configuration of
diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c
index bf79423..b20503f 100644
--- a/drivers/lguest/x86/core.c
+++ b/drivers/lguest/x86/core.c
&lt; at &gt;&lt; at &gt; -290,6 +290,46 &lt; at &gt;&lt; at &gt; static int emulate_insn(struct lg_cpu *cpu)
 return 1;
 }
 
+static void rewrite_hypercall(struct lg_cpu *cpu)
+{
+unsigned long physaddr = guest_pa(cpu, cpu-&gt;regs-&gt;eip);
+
+/* This are the opcodes we use to patch the guest.
+ * The opcode for "int $ox1f"  is  0xcd 0x1f
+ * but vmcall instruction is 3 bytes long, so we complete
+ * the sequence with a NOP (0x90). */
+u8 insn[3] = {0xcd, 0x1f, 0x90};
+
+lgwrite(cpu, physaddr, u8, insn[0]);
+lgwrite(cpu, physaddr + 1, u8, insn[1]);
+lgwrite(cpu, physaddr + 2, u8, insn[2]);
+}
+
+static int is_hypercall(struct lg_cpu *cpu)
+{
+u8 insn[3];
+
+/* The eip contains the *virtual* address of the Guest's instruction:
+ * guest_pa just subtracts the Guest's page_offset. */
+unsigned long physaddr = guest_pa(cpu, cpu-&gt;regs-&gt;eip);
+
+/* This must be the Guest kernel trying to do something.
+ * The bottom two bits of the CS segment register are the privilege
+ * level. */
+if ((cpu-&gt;regs-&gt;cs &amp; 3) != GUEST_PL)
+return 0;
+
+/* Is it a vmcall? */
+insn[0] = lgread(cpu, physaddr, u8);
+insn[1] = lgread(cpu, physaddr + 1, u8);
+insn[2] = lgread(cpu, physaddr + 2, u8);
+
+if (insn[0] != 0x0f || insn[1] != 0x01 || insn[2] != 0xc1)
+return 0;
+
+return 1;
+}
+
 /*H:050 Once we've re-enabled interrupts, we look at why the Guest exited. */
 void lguest_arch_handle_trap(struct lg_cpu *cpu)
 {
&lt; at &gt;&lt; at &gt; -347,6 +387,15 &lt; at &gt;&lt; at &gt; void lguest_arch_handle_trap(struct lg_cpu *cpu)
  * up the pointer now to indicate a hypercall is pending. */
 cpu-&gt;hcall = (struct hcall_args *)cpu-&gt;regs;
 return;
+case 6:
+/* kvm hypercalls trigger an invalid opcode fault (6).
+ * We need to check if ring == LGUEST_PL and
+ * faulting instruction == vmcall. */
+if (is_hypercall(cpu)) {
+rewrite_hypercall(cpu);
+return;
+}
+break;
 }
 
 /* We didn't handle the trap, so it needs to go to the Guest. */
diff --git a/include/asm-x86/lguest_hcall.h b/include/asm-x86/lguest_hcall.h
index c0860dc..0cac03f 100644
--- a/include/asm-x86/lguest_hcall.h
+++ b/include/asm-x86/lguest_hcall.h
&lt; at &gt;&lt; at &gt; -27,36 +27,20 &lt; at &gt;&lt; at &gt;
 
 #ifndef __ASSEMBLY__
 #include &lt;asm/hw_irq.h&gt;
+#include &lt;asm/kvm_para.h&gt;
 
 /*G:031 But first, how does our Guest contact the Host to ask for privileged
  * operations?  There are two ways: the direct way is to make a "hypercall",
  * to make requests of the Host Itself.
  *
- * Our hypercall mechanism uses the highest unused trap code (traps 32 and
- * above are used by real hardware interrupts).  Eighteen hypercalls are
+ * We use the KVM hypercall mechanism. Eighteen hypercalls are
  * available: the hypercall number is put in the %eax register, and the
- * arguments (when required) are placed in %edx, %ebx and %ecx.  If a return
+ * arguments (when required) are placed in %ebx, %ecx and %edx.  If a return
  * value makes sense, it's returned in %eax.
  *
  * Grossly invalid calls result in Sudden Death at the hands of the vengeful
  * Host, rather than returning failure.  This reflects Winston Churchill's
  * definition of a gentleman: "someone who is only rude intentionally". */
-static inline unsigned long
-hcall(unsigned long call,
-      unsigned long arg1, unsigned long arg2, unsigned long arg3)
-{
-/* "int" is the Intel instruction to trigger a trap. */
-asm volatile("int $" __stringify(LGUEST_TRAP_ENTRY)
-     /* The call in %eax (aka "a") might be overwritten */
-     : "=a"(call)
-       /* The arguments are in %eax, %edx, %ebx &amp; %ecx */
-     : "a"(call), "d"(arg1), "b"(arg2), "c"(arg3)
-       /* "memory" means this might write somewhere in memory.
-* This isn't true for all calls, but it's safe to tell
-* gcc that it might happen so it doesn't get clever. */
-     : "memory");
-return call;
-}
 /*:*/
 
 /* Can't use our min() macro here: needs to be a constant */
&lt; at &gt;&lt; at &gt; -65,7 +49,7 &lt; at &gt;&lt; at &gt; hcall(unsigned long call,
 #define LHCALL_RING_SIZE 64
 struct hcall_args {
 /* These map directly onto eax, ebx, ecx, edx in struct lguest_regs */
-unsigned long arg0, arg2, arg3, arg1;
+unsigned long arg0, arg1, arg2, arg3;
 };
 
 #endif /* !__ASSEMBLY__ */
</description>
    <dc:creator>Matias Zabaljauregui</dc:creator>
    <dc:date>2008-10-22T22:32:06</dc:date>
  </item>
  <item rdf:about="http://comments.gmane.org/gmane.linux.kernel.virtualization.lguest/574">
    <title>kernel BUG at arch/x86/kernel/irq_32.c</title>
    <link>http://comments.gmane.org/gmane.linux.kernel.virtualization.lguest/574</link>
    <description/>
    <dc:creator>Tiago Maluta</dc:creator>
    <dc:date>2008-10-22T13:15:00</dc:date>
  </item>
  <item rdf:about="http://comments.gmane.org/gmane.linux.kernel.virtualization.lguest/573">
    <title>lguest behaviour</title>
    <link>http://comments.gmane.org/gmane.linux.kernel.virtualization.lguest/573</link>
    <description>In the last few weeks I tried some versions of Linux on lguest and noted
a particularly behavior on my machine so I'd like to ask few questions
about lguest.

Basically I'm using the following configuration for lguest in my .config
file:

CONFIG_PARAVIRT=y
CONFIG_LGUEST_GUEST=y
CONFIG_HIGHMEM64G=n
CONFIG_PHYSICAL_ALIGN=0x100000
CONFIG_EXPERIMENTAL=y
CONFIG_VIRTIO_BLK=m
CONFIG_VIRTIO_NET=m
CONFIG_TUN=m
CONFIG_LGUEST=m

I'm tried some variations too, for example.

CONFIG_VIRTIO_BLK=y
CONFIG_VIRTIO_NET=y

The following example from lguest.txt (Documentation) sometimes works
and sometimes doesn't work...

~#Documentation/lguest/lguest --tunnet=192.168.19.1
--block=/root/initrd-1.1-i386.img 64 vmlinux root=/dev/vda
(...)
IPv6 over IPv4 tunneling driver
NET: Registered protocol family 17
RPC: Registered udp transport module.
RPC: Registered tcp transport module.
Using IPI No-Shortcut mode
VFS: Cannot open root device "vda" or unknown-block(254,0)
Please append a correct "root=" boot option; here are the available
partitions:
fe00       3560 vda driver: virtio_blk
Kernel panic - not syncing: VFS: Unable to mount root fs on
unknown-block(254,0)
lguest: CRASH: VFS: Unable to mount root fs on unknown-block(254,0)


As in other thread when I asket about lguest + nfs I got the same results...

I'd like to know:

1) Is module virtio_blk responsible to create /dev/vda?
2) if yes, must I compile as built-in?
3) I missed some .config option, I think sometimes that I missed some
important option about memory alignment, etc...

All this tests were performed on kernel #2.6.26.3.

Best Regards,
Tiago Maluta





_______________________________________________
Lguest mailing list
Lguest-mnsaURCQ41sdnm+yROfE0A&lt; at &gt;public.gmane.org
https://ozlabs.org/mailman/listinfo/lguest
</description>
    <dc:creator>Tiago Maluta</dc:creator>
    <dc:date>2008-10-21T18:51:29</dc:date>
  </item>
  <item rdf:about="http://comments.gmane.org/gmane.linux.kernel.virtualization.lguest/558">
    <title>lguest under 2.6.26</title>
    <link>http://comments.gmane.org/gmane.linux.kernel.virtualization.lguest/558</link>
    <description>_______________________________________________
Lguest mailing list
Lguest-mnsaURCQ41sdnm+yROfE0A&lt; at &gt;public.gmane.org
https://ozlabs.org/mailman/listinfo/lguest
</description>
    <dc:creator>octane indice</dc:creator>
    <dc:date>2008-09-04T13:27:50</dc:date>
  </item>
  <item rdf:about="http://comments.gmane.org/gmane.linux.kernel.virtualization.lguest/548">
    <title>lguest: unhandled trap</title>
    <link>http://comments.gmane.org/gmane.linux.kernel.virtualization.lguest/548</link>
    <description>Hi,

I'm using 2.6.27-05323-g26e9a39 and when I try to use lguest:

~#Documentation/lguest/lguest 128 vmlinux
lguest: unhandled trap 14 at 0xc0594f6a (0xff900000)


</description>
    <dc:creator>Tiago Maluta</dc:creator>
    <dc:date>2008-10-18T23:36:51</dc:date>
  </item>
  <item rdf:about="http://comments.gmane.org/gmane.linux.kernel.virtualization.lguest/546">
    <title>[PATCH] lguest: Change over to using KVM hypercallsmechanism</title>
    <link>http://comments.gmane.org/gmane.linux.kernel.virtualization.lguest/546</link>
    <description>This patch allows us to use KVM hypercalls

Signed-off-by: Matias Zabaljauregui &lt;zabaljauregui-Re5JQEeQqe8AvxtiuMwx3w&lt; at &gt;public.gmane.org&gt;
---
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index c6a6b75..d5ca7e6 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
&lt; at &gt;&lt; at &gt; -106,7 +106,7 &lt; at &gt;&lt; at &gt; static void async_hcall(unsigned long call, unsigned long arg1,
 local_irq_save(flags);
 if (lguest_data.hcall_status[next_call] != 0xFF) {
 /* Table full, so do normal hcall which will flush table. */
-hcall(call, arg1, arg2, arg3);
+kvm_hypercall3(call, arg1, arg2, arg3);
 } else {
 lguest_data.hcalls[next_call].arg0 = call;
 lguest_data.hcalls[next_call].arg1 = arg1;
&lt; at &gt;&lt; at &gt; -133,13 +133,38 &lt; at &gt;&lt; at &gt; static void async_hcall(unsigned long call, unsigned long arg1,
  *
  * So, when we're in lazy mode, we call async_hcall() to store the call for
  * future processing: */
-static void lazy_hcall(unsigned long call,
+static void lazy_hcall0(unsigned long call)
+{
+if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE)
+kvm_hypercall0(call);
+else
+async_hcall(call, 0, 0, 0);
+}
+static void lazy_hcall1(unsigned long call,
+       unsigned long arg1)
+{
+if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE)
+kvm_hypercall1(call, arg1);
+else
+async_hcall(call, arg1, 0, 0);
+}
+static void lazy_hcall2(unsigned long call,
+       unsigned long arg1,
+       unsigned long arg2)
+{
+if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE)
+kvm_hypercall2(call, arg1, arg2);
+else
+async_hcall(call, arg1, arg2, 0);
+}
+
+static void lazy_hcall3(unsigned long call,
        unsigned long arg1,
        unsigned long arg2,
        unsigned long arg3)
 {
 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE)
-hcall(call, arg1, arg2, arg3);
+kvm_hypercall3(call, arg1, arg2, arg3);
 else
 async_hcall(call, arg1, arg2, arg3);
 }
&lt; at &gt;&lt; at &gt; -149,7 +174,7 &lt; at &gt;&lt; at &gt; static void lazy_hcall(unsigned long call,
 static void lguest_leave_lazy_mode(void)
 {
 paravirt_leave_lazy(paravirt_get_lazy_mode());
-hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0);
+kvm_hypercall0(LHCALL_FLUSH_ASYNC);
 }
 
 /*G:033
&lt; at &gt;&lt; at &gt; -223,7 +248,7 &lt; at &gt;&lt; at &gt; static void lguest_write_idt_entry(gate_desc *dt,
 /* Keep the local copy up to date. */
 native_write_idt_entry(dt, entrynum, g);
 /* Tell Host about this new entry. */
-hcall(LHCALL_LOAD_IDT_ENTRY, entrynum, desc[0], desc[1]);
+kvm_hypercall3(LHCALL_LOAD_IDT_ENTRY, entrynum, desc[0], desc[1]);
 }
 
 /* Changing to a different IDT is very rare: we keep the IDT up-to-date every
&lt; at &gt;&lt; at &gt; -235,7 +260,7 &lt; at &gt;&lt; at &gt; static void lguest_load_idt(const struct desc_ptr *desc)
 struct desc_struct *idt = (void *)desc-&gt;address;
 
 for (i = 0; i &lt; (desc-&gt;size+1)/8; i++)
-hcall(LHCALL_LOAD_IDT_ENTRY, i, idt[i].a, idt[i].b);
+kvm_hypercall3(LHCALL_LOAD_IDT_ENTRY, i, idt[i].a, idt[i].b);
 }
 
 /*
&lt; at &gt;&lt; at &gt; -256,7 +281,7 &lt; at &gt;&lt; at &gt; static void lguest_load_idt(const struct desc_ptr *desc)
 static void lguest_load_gdt(const struct desc_ptr *desc)
 {
 BUG_ON((desc-&gt;size+1)/8 != GDT_ENTRIES);
-hcall(LHCALL_LOAD_GDT, __pa(desc-&gt;address), GDT_ENTRIES, 0);
+kvm_hypercall2(LHCALL_LOAD_GDT, __pa(desc-&gt;address), GDT_ENTRIES);
 }
 
 /* For a single GDT entry which changes, we do the lazy thing: alter our GDT,
&lt; at &gt;&lt; at &gt; -266,7 +291,7 &lt; at &gt;&lt; at &gt; static void lguest_write_gdt_entry(struct desc_struct *dt, int entrynum,
    const void *desc, int type)
 {
 native_write_gdt_entry(dt, entrynum, desc, type);
-hcall(LHCALL_LOAD_GDT, __pa(dt), GDT_ENTRIES, 0);
+kvm_hypercall2(LHCALL_LOAD_GDT, __pa(dt), GDT_ENTRIES);
 }
 
 /* OK, I lied.  There are three "thread local storage" GDT entries which change
&lt; at &gt;&lt; at &gt; -278,7 +303,7 &lt; at &gt;&lt; at &gt; static void lguest_load_tls(struct thread_struct *t, unsigned int cpu)
  * can't handle us removing entries we're currently using.  So we clear
  * the GS register here: if it's needed it'll be reloaded anyway. */
 loadsegment(gs, 0);
-lazy_hcall(LHCALL_LOAD_TLS, __pa(&amp;t-&gt;tls_array), cpu, 0);
+lazy_hcall2(LHCALL_LOAD_TLS, __pa(&amp;t-&gt;tls_array), cpu);
 }
 
 /*G:038 That's enough excitement for now, back to ploughing through each of
&lt; at &gt;&lt; at &gt; -376,7 +401,7 &lt; at &gt;&lt; at &gt; static void lguest_cpuid(unsigned int *ax, unsigned int *bx,
 static unsigned long current_cr0, current_cr3;
 static void lguest_write_cr0(unsigned long val)
 {
-lazy_hcall(LHCALL_TS, val &amp; X86_CR0_TS, 0, 0);
+lazy_hcall1(LHCALL_TS, val &amp; X86_CR0_TS);
 current_cr0 = val;
 }
 
&lt; at &gt;&lt; at &gt; -390,7 +415,7 &lt; at &gt;&lt; at &gt; static unsigned long lguest_read_cr0(void)
  * the vowels have been optimized out. */
 static void lguest_clts(void)
 {
-lazy_hcall(LHCALL_TS, 0, 0, 0);
+lazy_hcall1(LHCALL_TS, 0);
 current_cr0 &amp;= ~X86_CR0_TS;
 }
 
&lt; at &gt;&lt; at &gt; -406,7 +431,7 &lt; at &gt;&lt; at &gt; static unsigned long lguest_read_cr2(void)
  * cr0.  Keep a local copy, and tell the Host when it changes. */
 static void lguest_write_cr3(unsigned long cr3)
 {
-lazy_hcall(LHCALL_NEW_PGTABLE, cr3, 0, 0);
+lazy_hcall1(LHCALL_NEW_PGTABLE, cr3);
 current_cr3 = cr3;
 }
 
&lt; at &gt;&lt; at &gt; -482,7 +507,7 &lt; at &gt;&lt; at &gt; static void lguest_set_pte_at(struct mm_struct *mm, unsigned long addr,
       pte_t *ptep, pte_t pteval)
 {
 *ptep = pteval;
-lazy_hcall(LHCALL_SET_PTE, __pa(mm-&gt;pgd), addr, pteval.pte_low);
+lazy_hcall3(LHCALL_SET_PTE, __pa(mm-&gt;pgd), addr, pteval.pte_low);
 }
 
 #ifdef CONFIG_X86_PAE
&lt; at &gt;&lt; at &gt; -493,24 +518,24 &lt; at &gt;&lt; at &gt; static void lguest_set_pud(pud_t *pudp, pud_t pudval)
 {
 *pudp = pudval;
 /* 32 bytes aligned pdpt address. */
-lazy_hcall(LHCALL_SET_PUD, __pa(pudp) &amp; 0xFFFFFFE0,
-   (__pa(pudp) &amp; 0x1F) / 8, 0);
+lazy_hcall2(LHCALL_SET_PUD, __pa(pudp) &amp; 0xFFFFFFE0,
+   (__pa(pudp) &amp; 0x1F) / 8);
 }
 
 /* The Guest calls this to set a PMD entry, when PAE is active */
 static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval)
 {
 *pmdp = pmdval;
-lazy_hcall(LHCALL_SET_PMD, __pa(pmdp) &amp; PAGE_MASK,
-   (__pa(pmdp) &amp; (PAGE_SIZE - 1)) / 8, 0);
+lazy_hcall2(LHCALL_SET_PMD, __pa(pmdp) &amp; PAGE_MASK,
+   (__pa(pmdp) &amp; (PAGE_SIZE - 1)) / 8);
 }
 
 #else
 static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval)
 {
 *pmdp = pmdval;
-lazy_hcall(LHCALL_SET_PMD, __pa(pmdp)&amp;PAGE_MASK,
-   (__pa(pmdp)&amp;(PAGE_SIZE-1))/4, 0);
+lazy_hcall2(LHCALL_SET_PMD, __pa(pmdp)&amp;PAGE_MASK,
+   (__pa(pmdp)&amp;(PAGE_SIZE-1))/4);
 }
 #endif
 
&lt; at &gt;&lt; at &gt; -534,7 +559,7 &lt; at &gt;&lt; at &gt; static void lguest_set_pte(pte_t *ptep, pte_t pteval)
 
 /* Don't bother with hypercall before initial setup. */
 if (current_cr3)
-lazy_hcall(LHCALL_FLUSH_TLB, 1, 0, 0);
+lazy_hcall1(LHCALL_FLUSH_TLB, 1);
 }
 
 #ifdef CONFIG_X86_PAE
&lt; at &gt;&lt; at &gt; -544,7 +569,7 &lt; at &gt;&lt; at &gt; static void lguest_set_pte_atomic(pte_t *ptep, pte_t pte)
 
 /* Don't bother with hypercall before initial setup. */
 if (current_cr3)
-lazy_hcall(LHCALL_FLUSH_TLB, 1, 0, 0);
+lazy_hcall1(LHCALL_FLUSH_TLB, 1);
 }
 
 static inline void lguest_set_pte_present(struct mm_struct *mm,
&lt; at &gt;&lt; at &gt; -557,7 +582,7 &lt; at &gt;&lt; at &gt; static inline void lguest_set_pte_present(struct mm_struct *mm,
 smp_wmb();
 ptep-&gt;pte_low = pte.pte_low;
 
-lazy_hcall(LHCALL_SET_PTE, __pa(mm-&gt;pgd), addr, pte.pte_low);
+lazy_hcall3(LHCALL_SET_PTE, __pa(mm-&gt;pgd), addr, pte.pte_low);
 }
 
 void lguest_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
&lt; at &gt;&lt; at &gt; -566,7 +591,7 &lt; at &gt;&lt; at &gt; void lguest_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 smp_wmb();
 ptep-&gt;pte_high = 0;
 
-lazy_hcall(LHCALL_SET_PTE, current_cr3, addr, 0);
+lazy_hcall3(LHCALL_SET_PTE, current_cr3, addr, 0);
 }
 
 void lguest_pmd_clear(pmd_t *pmdp)
&lt; at &gt;&lt; at &gt; -588,7 +613,7 &lt; at &gt;&lt; at &gt; void lguest_pmd_clear(pmd_t *pmdp)
 static void lguest_flush_tlb_single(unsigned long addr)
 {
 /* Simply set it to zero: if it was not, it will fault back in. */
-lazy_hcall(LHCALL_SET_PTE, current_cr3, addr, 0);
+lazy_hcall3(LHCALL_SET_PTE, current_cr3, addr, 0);
 }
 
 /* This is what happens after the Guest has removed a large number of entries.
&lt; at &gt;&lt; at &gt; -596,7 +621,7 &lt; at &gt;&lt; at &gt; static void lguest_flush_tlb_single(unsigned long addr)
  * have changed, ie. virtual addresses below PAGE_OFFSET. */
 static void lguest_flush_tlb_user(void)
 {
-lazy_hcall(LHCALL_FLUSH_TLB, 0, 0, 0);
+lazy_hcall1(LHCALL_FLUSH_TLB, 0);
 }
 
 /* This is called when the kernel page tables have changed.  That's not very
&lt; at &gt;&lt; at &gt; -604,7 +629,7 &lt; at &gt;&lt; at &gt; static void lguest_flush_tlb_user(void)
  * slow), so it's worth separating this from the user flushing above. */
 static void lguest_flush_tlb_kernel(void)
 {
-lazy_hcall(LHCALL_FLUSH_TLB, 1, 0, 0);
+lazy_hcall1(LHCALL_FLUSH_TLB, 1);
 }
 
 /*
&lt; at &gt;&lt; at &gt; -735,7 +760,7 &lt; at &gt;&lt; at &gt; static int lguest_clockevent_set_next_event(unsigned long delta,
 }
 
 /* Please wake us this far in the future. */
-hcall(LHCALL_SET_CLOCKEVENT, delta, 0, 0);
+kvm_hypercall1(LHCALL_SET_CLOCKEVENT, delta);
 return 0;
 }
 
&lt; at &gt;&lt; at &gt; -746,7 +771,7 &lt; at &gt;&lt; at &gt; static void lguest_clockevent_set_mode(enum clock_event_mode mode,
 case CLOCK_EVT_MODE_UNUSED:
 case CLOCK_EVT_MODE_SHUTDOWN:
 /* A 0 argument shuts the clock down. */
-hcall(LHCALL_SET_CLOCKEVENT, 0, 0, 0);
+kvm_hypercall0(LHCALL_SET_CLOCKEVENT);
 break;
 case CLOCK_EVT_MODE_ONESHOT:
 /* This is what we expect. */
&lt; at &gt;&lt; at &gt; -821,7 +846,7 &lt; at &gt;&lt; at &gt; static void lguest_time_init(void)
 static void lguest_load_sp0(struct tss_struct *tss,
     struct thread_struct *thread)
 {
-lazy_hcall(LHCALL_SET_STACK, __KERNEL_DS|0x1, thread-&gt;sp0,
+lazy_hcall3(LHCALL_SET_STACK, __KERNEL_DS|0x1, thread-&gt;sp0,
    THREAD_SIZE/PAGE_SIZE);
 }
 
&lt; at &gt;&lt; at &gt; -864,7 +889,7 &lt; at &gt;&lt; at &gt; static u32 lguest_apic_read(unsigned long reg)
 /* STOP!  Until an interrupt comes in. */
 static void lguest_safe_halt(void)
 {
-hcall(LHCALL_HALT, 0, 0, 0);
+kvm_hypercall0(LHCALL_HALT);
 }
 
 /* The SHUTDOWN hypercall takes a string to describe what's happening, and
&lt; at &gt;&lt; at &gt; -874,7 +899,8 &lt; at &gt;&lt; at &gt; static void lguest_safe_halt(void)
  * rather than virtual addresses, so we use __pa() here. */
 static void lguest_power_off(void)
 {
-hcall(LHCALL_SHUTDOWN, __pa("Power down"), LGUEST_SHUTDOWN_POWEROFF, 0);
+kvm_hypercall2(LHCALL_SHUTDOWN, __pa("Power down"),
+LGUEST_SHUTDOWN_POWEROFF);
 }
 
 /*
&lt; at &gt;&lt; at &gt; -884,7 +910,7 &lt; at &gt;&lt; at &gt; static void lguest_power_off(void)
  */
 static int lguest_panic(struct notifier_block *nb, unsigned long l, void *p)
 {
-hcall(LHCALL_SHUTDOWN, __pa(p), LGUEST_SHUTDOWN_POWEROFF, 0);
+kvm_hypercall2(LHCALL_SHUTDOWN, __pa(p), LGUEST_SHUTDOWN_POWEROFF);
 /* The hcall won't return, but to keep gcc happy, we're "done". */
 return NOTIFY_DONE;
 }
&lt; at &gt;&lt; at &gt; -925,7 +951,7 &lt; at &gt;&lt; at &gt; static __init int early_put_chars(u32 vtermno, const char *buf, int count)
 len = sizeof(scratch) - 1;
 scratch[len] = '\0';
 memcpy(scratch, buf, len);
-hcall(LHCALL_NOTIFY, __pa(scratch), 0, 0);
+kvm_hypercall1(LHCALL_NOTIFY, __pa(scratch));
 
 /* This routine returns the number of bytes actually written. */
 return len;
&lt; at &gt;&lt; at &gt; -935,7 +961,7 &lt; at &gt;&lt; at &gt; static __init int early_put_chars(u32 vtermno, const char *buf, int count)
  * Launcher to reboot us. */
 static void lguest_restart(char *reason)
 {
-hcall(LHCALL_SHUTDOWN, __pa(reason), LGUEST_SHUTDOWN_RESTART, 0);
+kvm_hypercall2(LHCALL_SHUTDOWN, __pa(reason), LGUEST_SHUTDOWN_RESTART);
 }
 
 /*G:050
diff --git a/arch/x86/lguest/i386_head.S b/arch/x86/lguest/i386_head.S
index 10b9bd3..f795419 100644
--- a/arch/x86/lguest/i386_head.S
+++ b/arch/x86/lguest/i386_head.S
&lt; at &gt;&lt; at &gt; -27,8 +27,8 &lt; at &gt;&lt; at &gt; ENTRY(lguest_entry)
 /* We make the "initialization" hypercall now to tell the Host about
  * us, and also find out where it put our page tables. */
 movl $LHCALL_LGUEST_INIT, %eax
-movl $lguest_data - __PAGE_OFFSET, %edx
-int $LGUEST_TRAP_ENTRY
+movl $lguest_data - __PAGE_OFFSET, %ebx
+.byte 0x0f,0x01,0xc1 /* KVM_HYPERCALL */
 
 /* Set up the initial stack so we can run C code. */
 movl $(init_thread_union+THREAD_SIZE),%esp
diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c
index a103906..96c296e 100644
--- a/drivers/lguest/interrupts_and_traps.c
+++ b/drivers/lguest/interrupts_and_traps.c
&lt; at &gt;&lt; at &gt; -285,7 +285,7 &lt; at &gt;&lt; at &gt; static int direct_trap(unsigned int num)
  * fault address), general protection faults (in/out emulation) and
  * device not available (TS handling), and of course, the hypercall
  * trap. */
-return num != 14 &amp;&amp; num != 13 &amp;&amp; num != 7 &amp;&amp; num != LGUEST_TRAP_ENTRY;
+return num != 14 &amp;&amp; num != 13 &amp;&amp; num != 7 &amp;&amp; num != 6;
 }
 /*:*/
 
&lt; at &gt;&lt; at &gt; -387,7 +387,7 &lt; at &gt;&lt; at &gt; void load_guest_idt_entry(struct lg_cpu *cpu, unsigned int num, u32 lo, u32 hi)
 {
 /* Guest never handles: NMI, doublefault, spurious interrupt or
  * hypercall.  We ignore when it tries to set them. */
-if (num == 2 || num == 8 || num == 15 || num == LGUEST_TRAP_ENTRY)
+if (num == 2 || num == 8 || num == 15)
 return;
 
 /* Mark the IDT as changed: next time the Guest runs we'll know we have
&lt; at &gt;&lt; at &gt; -412,11 +412,7 &lt; at &gt;&lt; at &gt; static void default_idt_entry(struct desc_struct *idt,
 /* A present interrupt gate. */
 u32 flags = 0x8e00;
 
-/* Set the privilege level on the entry for the hypercall: this allows
- * the Guest to use the "int" instruction to trigger it. */
-if (trap == LGUEST_TRAP_ENTRY)
-flags |= (GUEST_PL &lt;&lt; 13);
-else if (base)
+if (base)
 /* Copy priv. level from what Guest asked for.  This allows
  * debug (int 3) traps from Guest userspace, for example. */
 flags |= (base-&gt;b &amp; 0x6000);
diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c
index a661bbd..99f63b1 100644
--- a/drivers/lguest/lguest_device.c
+++ b/drivers/lguest/lguest_device.c
&lt; at &gt;&lt; at &gt; -161,7 +161,7 &lt; at &gt;&lt; at &gt; static void set_status(struct virtio_device *vdev, u8 status)
 
 /* We set the status. */
 to_lgdev(vdev)-&gt;desc-&gt;status = status;
-hcall(LHCALL_NOTIFY, (max_pfn&lt;&lt;PAGE_SHIFT) + offset, 0, 0);
+kvm_hypercall1(LHCALL_NOTIFY, (max_pfn&lt;&lt;PAGE_SHIFT) + offset);
 }
 
 static void lg_set_status(struct virtio_device *vdev, u8 status)
&lt; at &gt;&lt; at &gt; -209,7 +209,7 &lt; at &gt;&lt; at &gt; static void lg_notify(struct virtqueue *vq)
  * virtqueue structure. */
 struct lguest_vq_info *lvq = vq-&gt;priv;
 
-hcall(LHCALL_NOTIFY, lvq-&gt;config.pfn &lt;&lt; PAGE_SHIFT, 0, 0);
+kvm_hypercall1(LHCALL_NOTIFY, lvq-&gt;config.pfn &lt;&lt; PAGE_SHIFT);
 }
 
 /* This routine finds the first virtqueue described in the configuration of
diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c
index bf79423..9634998 100644
--- a/drivers/lguest/x86/core.c
+++ b/drivers/lguest/x86/core.c
&lt; at &gt;&lt; at &gt; -290,6 +290,34 &lt; at &gt;&lt; at &gt; static int emulate_insn(struct lg_cpu *cpu)
 return 1;
 }
 
+static int is_hypercall(struct lg_cpu *cpu)
+{
+u8 insn[3];
+
+/* The eip contains the *virtual* address of the Guest's instruction:
+ * guest_pa just subtracts the Guest's page_offset. */
+unsigned long physaddr = guest_pa(cpu, cpu-&gt;regs-&gt;eip);
+
+/* This must be the Guest kernel trying to do something.
+ * The bottom two bits of the CS segment register are the privilege
+ * level. */
+if ((cpu-&gt;regs-&gt;cs &amp; 3) != GUEST_PL)
+return 0;
+
+/* Is it a vmcall? */
+insn[0] = lgread(cpu, physaddr, u8);
+insn[1] = lgread(cpu, physaddr + 1, u8);
+insn[2] = lgread(cpu, physaddr + 2, u8);
+
+if (insn[0] != 0x0f || insn[1] != 0x01 || insn[2] != 0xc1)
+return 0;
+
+/* Finally, we've "done" the instruction, so move past it. */
+cpu-&gt;regs-&gt;eip += 3;
+/* Success! */
+return 1;
+}
+
 /*H:050 Once we've re-enabled interrupts, we look at why the Guest exited. */
 void lguest_arch_handle_trap(struct lg_cpu *cpu)
 {
&lt; at &gt;&lt; at &gt; -342,11 +370,18 &lt; at &gt;&lt; at &gt; void lguest_arch_handle_trap(struct lg_cpu *cpu)
  * return to run the Guest again */
 cond_resched();
 return;
-case LGUEST_TRAP_ENTRY:
-/* Our 'struct hcall_args' maps directly over our regs: we set
- * up the pointer now to indicate a hypercall is pending. */
-cpu-&gt;hcall = (struct hcall_args *)cpu-&gt;regs;
-return;
+case 6:
+/* kvm hypercalls trigger an invalid opcode fault (6).
+ * we need to check if ring == LGUEST_PL and
+ * foulting instruction == vmcall */
+if (is_hypercall(cpu)) {
+/* Our 'struct hcall_args' maps directly over
+ * our regs: we set up the pointer now to indicate
+ * a hypercall is pending. */
+cpu-&gt;hcall = (struct hcall_args *)cpu-&gt;regs;
+return;
+}
+break;
 }
 
 /* We didn't handle the trap, so it needs to go to the Guest. */
diff --git a/include/asm-x86/lguest_hcall.h b/include/asm-x86/lguest_hcall.h
index c0860dc..0cac03f 100644
--- a/include/asm-x86/lguest_hcall.h
+++ b/include/asm-x86/lguest_hcall.h
&lt; at &gt;&lt; at &gt; -27,36 +27,20 &lt; at &gt;&lt; at &gt;
 
 #ifndef __ASSEMBLY__
 #include &lt;asm/hw_irq.h&gt;
+#include &lt;asm/kvm_para.h&gt;
 
 /*G:031 But first, how does our Guest contact the Host to ask for privileged
  * operations?  There are two ways: the direct way is to make a "hypercall",
  * to make requests of the Host Itself.
  *
- * Our hypercall mechanism uses the highest unused trap code (traps 32 and
- * above are used by real hardware interrupts).  Eighteen hypercalls are
+ * We use the KVM hypercall mechanism. Eighteen hypercalls are
  * available: the hypercall number is put in the %eax register, and the
- * arguments (when required) are placed in %edx, %ebx and %ecx.  If a return
+ * arguments (when required) are placed in %ebx, %ecx and %edx.  If a return
  * value makes sense, it's returned in %eax.
  *
  * Grossly invalid calls result in Sudden Death at the hands of the vengeful
  * Host, rather than returning failure.  This reflects Winston Churchill's
  * definition of a gentleman: "someone who is only rude intentionally". */
-static inline unsigned long
-hcall(unsigned long call,
-      unsigned long arg1, unsigned long arg2, unsigned long arg3)
-{
-/* "int" is the Intel instruction to trigger a trap. */
-asm volatile("int $" __stringify(LGUEST_TRAP_ENTRY)
-     /* The call in %eax (aka "a") might be overwritten */
-     : "=a"(call)
-       /* The arguments are in %eax, %edx, %ebx &amp; %ecx */
-     : "a"(call), "d"(arg1), "b"(arg2), "c"(arg3)
-       /* "memory" means this might write somewhere in memory.
-* This isn't true for all calls, but it's safe to tell
-* gcc that it might happen so it doesn't get clever. */
-     : "memory");
-return call;
-}
 /*:*/
 
 /* Can't use our min() macro here: needs to be a constant */
&lt; at &gt;&lt; at &gt; -65,7 +49,7 &lt; at &gt;&lt; at &gt; hcall(unsigned long call,
 #define LHCALL_RING_SIZE 64
 struct hcall_args {
 /* These map directly onto eax, ebx, ecx, edx in struct lguest_regs */
-unsigned long arg0, arg2, arg3, arg1;
+unsigned long arg0, arg1, arg2, arg3;
 };
 
 #endif /* !__ASSEMBLY__ */
</description>
    <dc:creator>Matias Zabaljauregui</dc:creator>
    <dc:date>2008-10-17T23:04:24</dc:date>
  </item>
  <item rdf:about="http://comments.gmane.org/gmane.linux.kernel.virtualization.lguest/538">
    <title>booting a kernel with initramfs</title>
    <link>http://comments.gmane.org/gmane.linux.kernel.virtualization.lguest/538</link>
    <description>Hi,

I'd like to make a test using lguest booting a kernel with initramfs. From Documentation/filesystems/ramfs-rootfs-initramfs.txt I tried the approach creating a file (initramfs_list) and compiled. Basically my file contains:

nod /dev/console 644 0 0 c 5 1
nod /dev/loop0 644 0 0 b 7 0
nod /dev/tty0 600 0 0 c 4 0
nod /dev/tty1 600 0 0 c 4 1
nod /dev/ttyS0 644 0 0 c 4 64
dir /bin 755 1000 1000
file /bin/busybox initramfs/busybox 755 0 0
slink /bin/sh /bin/busybox 777 0 0
slink /init initramfs/init.sh 755 0 0
dir /proc 755 0 0
dir /sys 755 0 0
dir /mnt 755 0 0

Where init.sh contains only echo "Hello" and busybox is statically linked. 

#Documentation/lguest/lguest 64 vmlinux 
(...)
Using IPI No-Shortcut mode
Root-NFS: No NFS server available, giving up.
VFS: Unable to mount root fs via NFS, trying floppy.
VFS: Insert root floppy and press ENTER
VFS: Cannot open root device "&lt;NULL&gt;" or unknown-block(2,0)
Please append a correct "root=" boot option; here are the available partitions:
Kernel panic - not syncing: VFS: Unable to mount root fs on unknown-block(2,0)
lguest: CRASH: VFS: Unable to mount root fs on unknown-block(2,0)

I expect to see the echo output from init.sh before got kernel panic..., I think that I forgot something on /dev. 

Obs.: I've tried pass the parameter console= to kernel withou success.

Best regards,
Tiago Maluta
</description>
    <dc:creator>Tiago Maluta</dc:creator>
    <dc:date>2008-10-16T03:16:06</dc:date>
  </item>
  <item rdf:about="http://comments.gmane.org/gmane.linux.kernel.virtualization.lguest/535">
    <title>[PATCH 1/2] lguest: Split add_used() into two logical steps</title>
    <link>http://comments.gmane.org/gmane.linux.kernel.virtualization.lguest/535</link>
    <description>Split add_used() into two logical steps - adding an buffer
to the used ring and notifying the other side of used buffers.

Signed-off-by: Mark McLoughlin &lt;markmc-H+wXaHxf7aLQT0dZR+AlfA&lt; at &gt;public.gmane.org&gt;
---
 Documentation/lguest/lguest.c |   30 ++++++++++++++++++++++--------
 1 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c
index 7228369..2ddf0c6 100644
--- a/Documentation/lguest/lguest.c
+++ b/Documentation/lguest/lguest.c
&lt; at &gt;&lt; at &gt; -750,21 +750,35 &lt; at &gt;&lt; at &gt; static unsigned get_vq_desc(struct virtqueue *vq,
 return head;
 }
 
-/* After we've used one of their buffers, we tell them about it.  We'll then
- * want to send them an interrupt, using trigger_irq(). */
-static void add_used(struct virtqueue *vq, unsigned int head, int len)
+/* After we've used one of their buffers, we add it to the used list */
+static void add_used(struct virtqueue *vq, unsigned int head, int len, int idx)
 {
 struct vring_used_elem *used;
 
+idx += vq-&gt;vring.used-&gt;idx;
+
 /* The virtqueue contains a ring of used buffers.  Get a pointer to the
  * next entry in that used ring. */
-used = &amp;vq-&gt;vring.used-&gt;ring[vq-&gt;vring.used-&gt;idx % vq-&gt;vring.num];
+used = &amp;vq-&gt;vring.used-&gt;ring[idx % vq-&gt;vring.num];
 used-&gt;id = head;
 used-&gt;len = len;
+}
+
+/* Next we need to tell them about the buffers we've used.  We'll then want to
+ * send them an interrupt, using trigger_irq(). */
+static void flush_used(struct virtqueue *vq, unsigned int count)
+{
 /* Make sure buffer is written before we update index. */
 wmb();
-vq-&gt;vring.used-&gt;idx++;
-vq-&gt;inflight--;
+vq-&gt;vring.used-&gt;idx += count;
+vq-&gt;inflight -= count;
+}
+
+/* Usually, you only use a single buffer at a time */
+static void add_used_and_flush(struct virtqueue *vq, unsigned int head, int len)
+{
+add_used(vq, head, len, 0);
+flush_used(vq, 1);
 }
 
 /* This actually sends the interrupt for this virtqueue */
&lt; at &gt;&lt; at &gt; -786,7 +800,7 &lt; at &gt;&lt; at &gt; static void trigger_irq(int fd, struct virtqueue *vq)
 static void add_used_and_trigger(int fd, struct virtqueue *vq,
  unsigned int head, int len)
 {
-add_used(vq, head, len);
+add_used_and_flush(vq, head, len);
 trigger_irq(fd, vq);
 }
 
&lt; at &gt;&lt; at &gt; -1677,7 +1691,7 &lt; at &gt;&lt; at &gt; static bool service_io(struct device *dev)
 
 /* We can't trigger an IRQ, because we're not the Launcher.  It does
  * that when we tell it we're done. */
-add_used(dev-&gt;vq, head, wlen);
+add_used_and_flush(dev-&gt;vq, head, wlen);
 return true;
 }
 
</description>
    <dc:creator>Mark McLoughlin</dc:creator>
    <dc:date>2008-10-08T19:35:07</dc:date>
  </item>
  <item rdf:about="http://comments.gmane.org/gmane.linux.kernel.virtualization.lguest/532">
    <title>lguest + nfs</title>
    <link>http://comments.gmane.org/gmane.linux.kernel.virtualization.lguest/532</link>
    <description>Hi,

I'm trying to mount rootfs from lguest over NFS. 

(1) I can use lguest with an initrd image perfectly.
(3) My NFS it's working too.
(2) I'm using:

    # lguest 64 vmlinux root=/dev/nfs \
        nfsroot=192.168.254.1:/ \ 
        ip=192.168.254.40:192.168.254.1:192.168.254.1:255.255.255.0:\
        lguest:eth0:off

After booting I got:
(...)
IP-Config: Device `eth0' not found.
Looking up port of RPC 100003/2 on 192.168.254.1

(3) I'm using Linux 2.6.26.3 on Gentoo

I've tried many variations withou success. I'd like to know if it's possible to use lguest over nfs?

Best regards,
Tiago Maluta


      Novos endereços, o Yahoo! que você conhece. Crie um email novo com a sua cara &lt; at &gt;ymail.com ou &lt; at &gt;rocketmail.com.
http://br.new.mail.yahoo.com/addresses
</description>
    <dc:creator>Tiago Maluta</dc:creator>
    <dc:date>2008-10-08T00:07:04</dc:date>
  </item>
  <item rdf:about="http://comments.gmane.org/gmane.linux.kernel.virtualization.lguest/531">
    <title>[PATCH v2 2/3] lguest: Physical Address Extension support</title>
    <link>http://comments.gmane.org/gmane.linux.kernel.virtualization.lguest/531</link>
    <description>This patch adds Physical Address Extension support to lguest.

Signed-off-by: Matias Zabaljauregui &lt;zabaljauregui-Re5JQEeQqe8AvxtiuMwx3w&lt; at &gt;public.gmane.org&gt;

diff --git a/arch/x86/lguest/Kconfig b/arch/x86/lguest/Kconfig
index c70e12b..a108c09 100644
--- a/arch/x86/lguest/Kconfig
+++ b/arch/x86/lguest/Kconfig
&lt; at &gt;&lt; at &gt; -2,7 +2,6 &lt; at &gt;&lt; at &gt; config LGUEST_GUEST
 bool "Lguest guest support"
 select PARAVIRT
 depends on X86_32
-depends on !X86_PAE
 depends on !X86_VOYAGER
 select VIRTIO
 select VIRTIO_RING
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index d9249a8..fc2331e 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
&lt; at &gt;&lt; at &gt; -334,8 +334,12 &lt; at &gt;&lt; at &gt; static void lguest_cpuid(unsigned int *ax, unsigned int *bx,
 case 1:/* Basic feature request. */
 /* We only allow kernel to see SSE3, CMPXCHG16B and SSSE3 */
 *cx &amp;= 0x00002201;
-/* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, TSC, FPU. */
+/* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, TSC, FPU, PAE. */
+#ifdef CONFIG_X86_PAE
+*dx &amp;= 0x07808151;
+#else
 *dx &amp;= 0x07808111;
+#endif
 /* The Host can do a nice optimization if it knows that the
  * kernel mappings (addresses above 0xC0000000 or whatever
  * PAGE_OFFSET is set to) haven't changed.  But Linux calls
&lt; at &gt;&lt; at &gt; -481,15 +485,34 &lt; at &gt;&lt; at &gt; static void lguest_set_pte_at(struct mm_struct *mm, unsigned long addr,
 lazy_hcall(LHCALL_SET_PTE, __pa(mm-&gt;pgd), addr, pteval.pte_low);
 }

+#ifdef CONFIG_X86_PAE
 /* The Guest calls this to set a top-level entry.  Again, we set the entry then
  * tell the Host which top-level page we changed, and the index of the entry we
  * changed. */
+static void lguest_set_pud(pud_t *pudp, pud_t pudval)
+{
+*pudp = pudval;
+/* 32 bytes aligned pdpt address. */
+lazy_hcall(LHCALL_SET_PUD, __pa(pudp) &amp; 0xFFFFFFE0,
+   (__pa(pudp) &amp; 0x1F) / 8, 0);
+}
+
+/* The Guest calls this to set a PMD entry, when PAE is active */
+static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval)
+{
+*pmdp = pmdval;
+lazy_hcall(LHCALL_SET_PMD, __pa(pmdp) &amp; PAGE_MASK,
+   (__pa(pmdp) &amp; (PAGE_SIZE - 1)) / 8, 0);
+}
+
+#else
 static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval)
 {
 *pmdp = pmdval;
 lazy_hcall(LHCALL_SET_PMD, __pa(pmdp)&amp;PAGE_MASK,
    (__pa(pmdp)&amp;(PAGE_SIZE-1))/4, 0);
 }
+#endif

 /* There are a couple of legacy places where the kernel sets a PTE, but we
  * don't know the top level any more.  This is useless for us, since we don't
&lt; at &gt;&lt; at &gt; -501,12 +524,57 &lt; at &gt;&lt; at &gt; static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval)
  * anything changed until we've done the first page table switch. */
 static void lguest_set_pte(pte_t *ptep, pte_t pteval)
 {
+#ifdef CONFIG_X86_PAE
+ptep-&gt;pte_high = pteval.pte_high;
+smp_wmb();
+ptep-&gt;pte_low = pteval.pte_low;
+#else
 *ptep = pteval;
+#endif
+
+/* Don't bother with hypercall before initial setup. */
+if (current_cr3)
+lazy_hcall(LHCALL_FLUSH_TLB, 1, 0, 0);
+}
+
+#ifdef CONFIG_X86_PAE
+static void lguest_set_pte_atomic(pte_t *ptep, pte_t pte)
+{
+set_64bit((u64 *)ptep, pte.pte);
+
 /* Don't bother with hypercall before initial setup. */
 if (current_cr3)
 lazy_hcall(LHCALL_FLUSH_TLB, 1, 0, 0);
 }

+static inline void lguest_set_pte_present(struct mm_struct *mm,
+unsigned long addr,
+pte_t *ptep, pte_t pte)
+{
+ptep-&gt;pte_low = 0;
+smp_wmb();
+ptep-&gt;pte_high = pte.pte_high;
+smp_wmb();
+ptep-&gt;pte_low = pte.pte_low;
+
+lazy_hcall(LHCALL_SET_PTE, __pa(mm-&gt;pgd), addr, pte.pte_low);
+}
+
+void lguest_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+{
+ptep-&gt;pte_low = 0;
+smp_wmb();
+ptep-&gt;pte_high = 0;
+
+lazy_hcall(LHCALL_SET_PTE, current_cr3, addr, 0);
+}
+
+void lguest_pmd_clear(pmd_t *pmdp)
+{
+lguest_set_pmd(pmdp, __pmd(0));
+}
+#endif
+
 /* Unfortunately for Lguest, the pv_mmu_ops for page tables were based on
  * native page table operations.  On native hardware you can set a new page
  * table entry whenever you want, but if you want to remove one you have to do
&lt; at &gt;&lt; at &gt; -983,6 +1051,14 &lt; at &gt;&lt; at &gt; __init void lguest_init(void)
 pv_mmu_ops.set_pte = lguest_set_pte;
 pv_mmu_ops.set_pte_at = lguest_set_pte_at;
 pv_mmu_ops.set_pmd = lguest_set_pmd;
+
+#ifdef CONFIG_X86_PAE
+pv_mmu_ops.set_pte_atomic = lguest_set_pte_atomic;
+pv_mmu_ops.set_pte_present = lguest_set_pte_at;
+pv_mmu_ops.pte_clear = lguest_pte_clear;
+pv_mmu_ops.pmd_clear = lguest_pmd_clear;
+pv_mmu_ops.set_pud = lguest_set_pud;
+#endif
 pv_mmu_ops.read_cr2 = lguest_read_cr2;
 pv_mmu_ops.read_cr3 = lguest_read_cr3;
 pv_mmu_ops.lazy_mode.enter = paravirt_enter_lazy_mmu;
diff --git a/drivers/lguest/Kconfig b/drivers/lguest/Kconfig
index 76f2b36..5d491bb 100644
--- a/drivers/lguest/Kconfig
+++ b/drivers/lguest/Kconfig
&lt; at &gt;&lt; at &gt; -1,6 +1,6 &lt; at &gt;&lt; at &gt;
 config LGUEST
 tristate "Linux hypervisor example code"
-depends on X86_32 &amp;&amp; EXPERIMENTAL &amp;&amp; !X86_PAE &amp;&amp; FUTEX &amp;&amp; !X86_VOYAGER
+depends on X86_32 &amp;&amp; EXPERIMENTAL &amp;&amp; FUTEX &amp;&amp; !X86_VOYAGER
 select HVC_DRIVER
 ---help---
   This is a very simple module which allows you to run
diff --git a/drivers/lguest/hypercalls.c b/drivers/lguest/hypercalls.c
index 54d66f0..c5d6678 100644
--- a/drivers/lguest/hypercalls.c
+++ b/drivers/lguest/hypercalls.c
&lt; at &gt;&lt; at &gt; -78,6 +78,11 &lt; at &gt;&lt; at &gt; static void do_hcall(struct lg_cpu *cpu, struct hcall_args *args)
 case LHCALL_SET_PMD:
 guest_set_pmd(cpu-&gt;lg, args-&gt;arg1, args-&gt;arg2);
 break;
+#ifdef CONFIG_X86_PAE
+case LHCALL_SET_PUD:
+guest_set_pud(cpu-&gt;lg, args-&gt;arg1, args-&gt;arg2);
+break;
+#endif
 case LHCALL_SET_CLOCKEVENT:
 guest_set_clockevent(cpu, args-&gt;arg1);
 break;
diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h
index f2c641e..1549ba4 100644
--- a/drivers/lguest/lg.h
+++ b/drivers/lguest/lg.h
&lt; at &gt;&lt; at &gt; -18,7 +18,7 &lt; at &gt;&lt; at &gt; int init_pagetables(struct page **switcher_page, unsigned int pages);

 struct pgdir
 {
-unsigned long gpgdir;
+pgd_t *gpgdir;
 pgd_t *pgdir;
 };

&lt; at &gt;&lt; at &gt; -137,6 +137,8 &lt; at &gt;&lt; at &gt; int run_guest(struct lg_cpu *cpu, unsigned long __user *user);
  * in the kernel. */
 #define pgd_flags(x)(pgd_val(x) &amp; ~PAGE_MASK)
 #define pgd_pfn(x)(pgd_val(x) &gt;&gt; PAGE_SHIFT)
+#define pmd_flags(x)    (pmd_val(x) &amp; ~PAGE_MASK)
+#define pmd_pfn(x)(pmd_val(x) &gt;&gt; PAGE_SHIFT)

 /* interrupts_and_traps.c: */
 void maybe_do_interrupt(struct lg_cpu *cpu);
&lt; at &gt;&lt; at &gt; -168,6 +170,9 &lt; at &gt;&lt; at &gt; int init_guest_pagetable(struct lguest *lg);
 void free_guest_pagetable(struct lguest *lg);
 void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable);
 void guest_set_pmd(struct lguest *lg, unsigned long gpgdir, u32 i);
+#ifdef CONFIG_X86_PAE
+void guest_set_pud(struct lguest *lg, unsigned long gpgdir, u32 i);
+#endif
 void guest_pagetable_clear_all(struct lg_cpu *cpu);
 void guest_pagetable_flush_user(struct lg_cpu *cpu);
 void guest_set_pte(struct lg_cpu *cpu, unsigned long gpgdir,
diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c
index 99b6f66..f606d68 100644
--- a/drivers/lguest/page_tables.c
+++ b/drivers/lguest/page_tables.c
&lt; at &gt;&lt; at &gt; -47,12 +47,17 &lt; at &gt;&lt; at &gt;
  *  (vii) Setting up the page tables initially.
  :*/

-
 /* 1024 entries in a page table page maps 1024 pages: 4MB.  The Switcher is
  * conveniently placed at the top 4MB, so it uses a separate, complete PTE
  * page.  */
 #define SWITCHER_PGD_INDEX (PTRS_PER_PGD - 1)

+/* For PAE we need the PMD index as well. We can use the last 2MB, so we
+ * will need the last pmd entry of the last pmd page. */
+#ifdef CONFIG_X86_PAE
+#define SWITCHER_PMD_INDEX (PTRS_PER_PMD - 1)
+#endif
+
 /* We actually need a separate PTE page for each CPU.  Remember that after the
  * Switcher code itself comes two pages for each CPU, and we don't want this
  * CPU's guest to see the pages of any other CPU. */
&lt; at &gt;&lt; at &gt; -73,39 +78,90 &lt; at &gt;&lt; at &gt; static pgd_t *spgd_addr(struct lg_cpu *cpu, u32 i, unsigned long vaddr)
 {
 unsigned int index = pgd_index(vaddr);

+#ifndef CONFIG_X86_PAE
 /* We kill any Guest trying to touch the Switcher addresses. */
 if (index &gt;= SWITCHER_PGD_INDEX) {
 kill_guest(cpu, "attempt to access switcher pages");
 index = 0;
 }
+#endif
 /* Return a pointer index'th pgd entry for the i'th page table. */
 return &amp;cpu-&gt;lg-&gt;pgdirs[i].pgdir[index];
 }

+#ifdef CONFIG_X86_PAE
+/* This routine then takes the PGD entry given above, which contains the
+ * address of the PMD page.  It then returns a pointer to the PMD entry for the
+ * given address. */
+static pmd_t *spmd_addr(struct lg_cpu *cpu, pgd_t spgd, unsigned long vaddr)
+{
+unsigned int index = pmd_index(vaddr);
+pmd_t *page;
+
+/* We kill any Guest trying to touch the Switcher addresses. */
+if (pgd_index(vaddr) == SWITCHER_PGD_INDEX &amp;&amp;
+index &gt;= SWITCHER_PMD_INDEX) {
+kill_guest(cpu, "attempt to access switcher pages");
+index = 0;
+}
+
+/* You should never call this if the PGD entry wasn't valid */
+BUG_ON(!(pgd_flags(spgd) &amp; _PAGE_PRESENT));
+
+page = __va(pgd_pfn(spgd) &lt;&lt; PAGE_SHIFT);
+return &amp;page[index];
+}
+#endif
+
 /* This routine then takes the page directory entry returned above, which
  * contains the address of the page table entry (PTE) page.  It then returns a
  * pointer to the PTE entry for the given address. */
-static pte_t *spte_addr(pgd_t spgd, unsigned long vaddr)
+static pte_t *spte_addr(struct lg_cpu *cpu, pgd_t spgd, unsigned long vaddr)
 {
+#ifdef CONFIG_X86_PAE
+pmd_t *pmd = spmd_addr(cpu, spgd, vaddr);
+pte_t *page = __va(pmd_pfn(*pmd) &lt;&lt; PAGE_SHIFT);
+
+/* You should never call this if the PMD entry wasn't valid */
+BUG_ON(!(pmd_flags(*pmd) &amp; _PAGE_PRESENT));
+#else
 pte_t *page = __va(pgd_pfn(spgd) &lt;&lt; PAGE_SHIFT);
+
 /* You should never call this if the PGD entry wasn't valid */
 BUG_ON(!(pgd_flags(spgd) &amp; _PAGE_PRESENT));
-return &amp;page[(vaddr &gt;&gt; PAGE_SHIFT) % PTRS_PER_PTE];
+#endif
+return &amp;page[pte_index(vaddr)];
 }

 /* These two functions just like the above two, except they access the Guest
  * page tables.  Hence they return a Guest address. */
-static unsigned long gpgd_addr(struct lg_cpu *cpu, unsigned long vaddr)
+static pgd_t *gpgd_addr(struct lg_cpu *cpu, unsigned long vaddr)
 {
 unsigned int index = vaddr &gt;&gt; (PGDIR_SHIFT);
-return cpu-&gt;lg-&gt;pgdirs[cpu-&gt;cpu_pgd].gpgdir + index * sizeof(pgd_t);
+return cpu-&gt;lg-&gt;pgdirs[cpu-&gt;cpu_pgd].gpgdir + index;
+}
+
+#ifdef CONFIG_X86_PAE
+static unsigned long gpmd_addr(pgd_t gpgd, unsigned long vaddr)
+{
+unsigned long gpage = pgd_pfn(gpgd) &lt;&lt; PAGE_SHIFT;
+BUG_ON(!(pgd_flags(gpgd) &amp; _PAGE_PRESENT));
+return gpage + pmd_index(vaddr) * sizeof(pmd_t);
 }
+#endif

-static unsigned long gpte_addr(pgd_t gpgd, unsigned long vaddr)
+static unsigned long gpte_addr(struct lg_cpu *cpu,
+pgd_t gpgd, unsigned long vaddr)
 {
+#ifdef CONFIG_X86_PAE
+pmd_t gpmd = lgread(cpu,
+(unsigned long) gpmd_addr(gpgd, vaddr), pmd_t);
+unsigned long gpage = pmd_pfn(gpmd) &lt;&lt; PAGE_SHIFT;
+#else
 unsigned long gpage = pgd_pfn(gpgd) &lt;&lt; PAGE_SHIFT;
 BUG_ON(!(pgd_flags(gpgd) &amp; _PAGE_PRESENT));
-return gpage + ((vaddr&gt;&gt;PAGE_SHIFT) % PTRS_PER_PTE) * sizeof(pte_t);
+#endif
+return gpage + pte_index(vaddr) * sizeof(pte_t);
 }
 /*:*/

&lt; at &gt;&lt; at &gt; -184,11 +240,24 &lt; at &gt;&lt; at &gt; static void check_gpte(struct lg_cpu *cpu, pte_t gpte)

 static void check_gpgd(struct lg_cpu *cpu, pgd_t gpgd)
 {
+#ifdef CONFIG_X86_PAE
+if ((pgd_flags(gpgd) &amp; ~_PAGE_PRESENT) ||
+#else
 if ((pgd_flags(gpgd) &amp; ~_PAGE_TABLE) ||
+#endif
    (pgd_pfn(gpgd) &gt;= cpu-&gt;lg-&gt;pfn_limit))
 kill_guest(cpu, "bad page directory entry");
 }

+#ifdef CONFIG_X86_PAE
+static void check_gpmd(struct lg_cpu *cpu, pmd_t gpmd)
+{
+if ((pmd_flags(gpmd) &amp; ~_PAGE_TABLE) ||
+   (pmd_pfn(gpmd) &gt;= cpu-&gt;lg-&gt;pfn_limit))
+kill_guest(cpu, "bad page middle directory entry");
+}
+#endif
+
 /*H:330
  * (i) Looking up a page table entry when the Guest faults.
  *
&lt; at &gt;&lt; at &gt; -207,14 +276,21 &lt; at &gt;&lt; at &gt; int demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
 pte_t gpte;
 pte_t *spte;

+#ifdef CONFIG_X86_PAE
+pmd_t *spmd;
+pmd_t gpmd;
+#endif
+
 /* First step: get the top-level Guest page table entry. */
-gpgd = lgread(cpu, gpgd_addr(cpu, vaddr), pgd_t);
+gpgd = lgread(cpu, (unsigned long) gpgd_addr(cpu, vaddr), pgd_t);
+
 /* Toplevel not present?  We can't map it in. */
 if (!(pgd_flags(gpgd) &amp; _PAGE_PRESENT))
 return 0;

 /* Now look at the matching shadow entry. */
 spgd = spgd_addr(cpu, cpu-&gt;cpu_pgd, vaddr);
+
 if (!(pgd_flags(*spgd) &amp; _PAGE_PRESENT)) {
 /* No shadow entry: allocate a new shadow PTE page. */
 unsigned long ptepage = get_zeroed_page(GFP_KERNEL);
&lt; at &gt;&lt; at &gt; -231,9 +307,38 &lt; at &gt;&lt; at &gt; int demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
 *spgd = __pgd(__pa(ptepage) | pgd_flags(gpgd));
 }

+#ifdef CONFIG_X86_PAE
+gpmd = lgread(cpu, (unsigned long) gpmd_addr(gpgd, vaddr), pmd_t);
+/* middle level not present?  We can't map it in. */
+if (!(pmd_flags(gpmd) &amp; _PAGE_PRESENT))
+return 0;
+
+/* Now look at the matching shadow entry. */
+spmd = spmd_addr(cpu, *spgd, vaddr);
+
+if (!(pmd_flags(*spmd) &amp; _PAGE_PRESENT)) {
+/* No shadow entry: allocate a new shadow PTE page. */
+unsigned long ptepage = get_zeroed_page(GFP_KERNEL);
+
+/* This is not really the Guest's fault, but killing it is
+* simple for this corner case. */
+if (!ptepage) {
+kill_guest(cpu, "out of memory allocating pte page");
+return 0;
+}
+
+/* We check that the Guest pmd is OK. */
+check_gpmd(cpu, gpmd);
+
+/* And we copy the flags to the shadow PMD entry.  The page
+ * number in the shadow PMD is the page we just allocated. */
+*spmd = __pmd(__pa(ptepage) | pmd_flags(gpmd));
+}
+#endif
+
 /* OK, now we look at the lower level in the Guest page table: keep its
  * address, because we might update it later. */
-gpte_ptr = gpte_addr(gpgd, vaddr);
+gpte_ptr = gpte_addr(cpu, gpgd, vaddr);
 gpte = lgread(cpu, gpte_ptr, pte_t);

 /* If this page isn't in the Guest page tables, we can't page it in. */
&lt; at &gt;&lt; at &gt; -259,7 +364,7 &lt; at &gt;&lt; at &gt; int demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
 gpte = pte_mkdirty(gpte);

 /* Get the pointer to the shadow PTE entry we're going to set. */
-spte = spte_addr(*spgd, vaddr);
+spte = spte_addr(cpu, *spgd, vaddr);
 /* If there was a valid shadow PTE entry here before, we release it.
  * This can happen with a write to a previously read-only entry. */
 release_pte(*spte);
&lt; at &gt;&lt; at &gt; -300,15 +405,24 &lt; at &gt;&lt; at &gt; static int page_writable(struct lg_cpu *cpu, unsigned long vaddr)
 {
 pgd_t *spgd;
 unsigned long flags;
+#ifdef CONFIG_X86_PAE
+pmd_t *spmd;
+#endif

 /* Look at the current top level entry: is it present? */
 spgd = spgd_addr(cpu, cpu-&gt;cpu_pgd, vaddr);
 if (!(pgd_flags(*spgd) &amp; _PAGE_PRESENT))
 return 0;

+#ifdef CONFIG_X86_PAE
+spmd = spmd_addr(cpu, *spgd, vaddr);
+if (!(pmd_flags(*spmd) &amp; _PAGE_PRESENT))
+return 0;
+#endif
+
 /* Check the flags on the pte entry itself: it must be present and
  * writable. */
-flags = pte_flags(*(spte_addr(*spgd, vaddr)));
+flags = pte_flags(*(spte_addr(cpu, *spgd, vaddr)));

 return (flags &amp; (_PAGE_PRESENT|_PAGE_RW)) == (_PAGE_PRESENT|_PAGE_RW);
 }
&lt; at &gt;&lt; at &gt; -322,8 +436,44 &lt; at &gt;&lt; at &gt; void pin_page(struct lg_cpu *cpu, unsigned long vaddr)
 kill_guest(cpu, "bad stack page %#lx", vaddr);
 }

+#ifdef CONFIG_X86_PAE
+static void release_pmd(pmd_t *spmd)
+{
+/* If the entry's not present, there's nothing to release. */
+if (pmd_flags(*spmd) &amp; _PAGE_PRESENT) {
+unsigned int i;
+pte_t *ptepage = __va(pmd_pfn(*spmd) &lt;&lt; PAGE_SHIFT);
+/* For each entry in the page, we might need to release it. */
+for (i = 0; i &lt; PTRS_PER_PTE; i++)
+release_pte(ptepage[i]);
+/* Now we can free the page of PTEs */
+free_page((long)ptepage);
+/* And zero out the PMD entry so we never release it twice. */
+*spmd = __pmd(0);
+}
+}
+
 /*H:450 If we chase down the release_pgd() code, it looks like this: */
-static void release_pgd(struct lguest *lg, pgd_t *spgd)
+static void release_pgd(pgd_t *spgd)
+{
+/* If the entry's not present, there's nothing to release. */
+if (pgd_flags(*spgd) &amp; _PAGE_PRESENT) {
+unsigned int i;
+pmd_t *pmdpage = __va(pgd_pfn(*spgd) &lt;&lt; PAGE_SHIFT);
+for (i = 0; i &lt; PTRS_PER_PMD; i++)
+release_pmd(&amp;pmdpage[i]);
+
+/* Now we can free the page of PMDs */
+free_page((long)pmdpage);
+/* And zero out the PGD entry we we never release it twice. */
+*spgd = __pgd(0);
+}
+}
+
+#else /* !CONFIG_X86_PAE */
+
+/*H:450 If we chase down the release_pgd() code, it looks like this: */
+static void release_pgd(pgd_t *spgd)
 {
 /* If the entry's not present, there's nothing to release. */
 if (pgd_flags(*spgd) &amp; _PAGE_PRESENT) {
&lt; at &gt;&lt; at &gt; -342,6 +492,8 &lt; at &gt;&lt; at &gt; static void release_pgd(struct lguest *lg, pgd_t *spgd)
 }
 }

+#endif
+
 /*H:445 We saw flush_user_mappings() twice: once from the flush_user_mappings()
  * hypercall and once in new_pgdir() when we re-used a top-level pgdir page.
  * It simply releases every PTE page from 0 up to the Guest's kernel address. */
&lt; at &gt;&lt; at &gt; -350,7 +502,7 &lt; at &gt;&lt; at &gt; static void flush_user_mappings(struct lguest *lg, int idx)
 unsigned int i;
 /* Release every pgd entry up to the kernel's address. */
 for (i = 0; i &lt; pgd_index(lg-&gt;kernel_address); i++)
-release_pgd(lg, lg-&gt;pgdirs[idx].pgdir + i);
+release_pgd(lg-&gt;pgdirs[idx].pgdir + i);
 }

 /*H:440 (v) Flushing (throwing away) page tables,
&lt; at &gt;&lt; at &gt; -370,23 +522,34 &lt; at &gt;&lt; at &gt; unsigned long guest_pa(struct lg_cpu *cpu, unsigned long vaddr)
 pgd_t gpgd;
 pte_t gpte;

+#ifdef CONFIG_X86_PAE
+pmd_t gpmd;
+#endif
+
 /* First step: get the top-level Guest page table entry. */
-gpgd = lgread(cpu, gpgd_addr(cpu, vaddr), pgd_t);
+gpgd = lgread(cpu, (unsigned long) gpgd_addr(cpu, vaddr), pgd_t);
 /* Toplevel not present?  We can't map it in. */
 if (!(pgd_flags(gpgd) &amp; _PAGE_PRESENT))
 kill_guest(cpu, "Bad address %#lx", vaddr);

-gpte = lgread(cpu, gpte_addr(gpgd, vaddr), pte_t);
+#ifdef CONFIG_X86_PAE
+gpmd = lgread(cpu, (unsigned long) gpmd_addr(gpgd, vaddr), pmd_t);
+if (!(pmd_flags(gpmd) &amp; _PAGE_PRESENT))
+kill_guest(cpu, "Bad address %#lx", vaddr);
+#endif
+
+gpte = lgread(cpu, (unsigned long) gpte_addr(cpu, gpgd, vaddr), pte_t);
 if (!(pte_flags(gpte) &amp; _PAGE_PRESENT))
 kill_guest(cpu, "Bad address %#lx", vaddr);

 return pte_pfn(gpte) * PAGE_SIZE | (vaddr &amp; ~PAGE_MASK);
 }

+
 /* We keep several page tables.  This is a simple routine to find the page
  * table (if any) corresponding to this top-level address the Guest has given
  * us. */
-static unsigned int find_pgdir(struct lguest *lg, unsigned long pgtable)
+static unsigned int find_pgdir(struct lguest *lg, pgd_t *pgtable)
 {
 unsigned int i;
 for (i = 0; i &lt; ARRAY_SIZE(lg-&gt;pgdirs); i++)
&lt; at &gt;&lt; at &gt; -399,11 +562,13 &lt; at &gt;&lt; at &gt; static unsigned int find_pgdir(struct lguest *lg, unsigned long pgtable)
  * allocate a new one (and so the kernel parts are not there), we set
  * blank_pgdir. */
 static unsigned int new_pgdir(struct lg_cpu *cpu,
-      unsigned long gpgdir,
+      pgd_t *gpgdir,
       int *blank_pgdir)
 {
 unsigned int next;
-
+#ifdef CONFIG_X86_PAE
+pmd_t *pmd_table;
+#endif
 /* We pick one entry at random to throw out.  Choosing the Least
  * Recently Used might be better, but this is easy. */
 next = random32() % ARRAY_SIZE(cpu-&gt;lg-&gt;pgdirs);
&lt; at &gt;&lt; at &gt; -414,10 +579,27 &lt; at &gt;&lt; at &gt; static unsigned int new_pgdir(struct lg_cpu *cpu,
 /* If the allocation fails, just keep using the one we have */
 if (!cpu-&gt;lg-&gt;pgdirs[next].pgdir)
 next = cpu-&gt;cpu_pgd;
+#ifdef CONFIG_X86_PAE
+else {
+/* In PAE mode, allocate a pmd page and populate the
+ * last pgd entry. */
+pmd_table = (pmd_t *) get_zeroed_page(GFP_KERNEL);
+if (!pmd_table)
+next = cpu-&gt;cpu_pgd;
+else {
+set_pgd(cpu-&gt;lg-&gt;pgdirs[next].pgdir +
+SWITCHER_PGD_INDEX,
+     __pgd(__pa(pmd_table) | _PAGE_PRESENT));
+
+/* This is a blank page, so there are no kernel
+ * mappings: caller must map the stack! */
+*blank_pgdir = 1;
+}
+}
+#else
 else
-/* This is a blank page, so there are no kernel
- * mappings: caller must map the stack! */
 *blank_pgdir = 1;
+#endif
 }
 /* Record which Guest toplevel this shadows. */
 cpu-&gt;lg-&gt;pgdirs[next].gpgdir = gpgdir;
&lt; at &gt;&lt; at &gt; -437,11 +619,11 &lt; at &gt;&lt; at &gt; void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable)
 int newpgdir, repin = 0;

 /* Look to see if we have this one already. */
-newpgdir = find_pgdir(cpu-&gt;lg, pgtable);
+newpgdir = find_pgdir(cpu-&gt;lg, (pgd_t *)pgtable);
 /* If not, we allocate or mug an existing one: if it's a fresh one,
  * repin gets set to 1. */
 if (newpgdir == ARRAY_SIZE(cpu-&gt;lg-&gt;pgdirs))
-newpgdir = new_pgdir(cpu, pgtable, &amp;repin);
+newpgdir = new_pgdir(cpu, (pgd_t *)pgtable, &amp;repin);
 /* Change the current pgd index to the new one. */
 cpu-&gt;cpu_pgd = newpgdir;
 /* If it was completely blank, we map in the Guest kernel stack */
&lt; at &gt;&lt; at &gt; -456,12 +638,28 &lt; at &gt;&lt; at &gt; static void release_all_pagetables(struct lguest *lg)
 {
 unsigned int i, j;

+#ifdef CONFIG_X86_PAE
+pgd_t *spgd;
+pmd_t *pmdpage;
+#endif
+
 /* Every shadow pagetable this Guest has */
 for (i = 0; i &lt; ARRAY_SIZE(lg-&gt;pgdirs); i++)
-if (lg-&gt;pgdirs[i].pgdir)
+if (lg-&gt;pgdirs[i].pgdir) {
 /* Every PGD entry except the Switcher at the top */
 for (j = 0; j &lt; SWITCHER_PGD_INDEX; j++)
-release_pgd(lg, lg-&gt;pgdirs[i].pgdir + j);
+release_pgd(lg-&gt;pgdirs[i].pgdir + j);
+#ifdef CONFIG_X86_PAE
+/* Get the last pmd page. */
+spgd = lg-&gt;pgdirs[i].pgdir + SWITCHER_PGD_INDEX;
+pmdpage = __va(pgd_pfn(*spgd) &lt;&lt; PAGE_SHIFT);
+
+/* And release the pmd entries of that pmd page,
+ * except for the switcher pmd. */
+for (i = 0; i &lt; SWITCHER_PMD_INDEX; i++)
+release_pmd(&amp;pmdpage[i]);
+#endif
+}
 }

 /* We also throw away everything when a Guest tells us it's changed a kernel
&lt; at &gt;&lt; at &gt; -503,23 +701,38 &lt; at &gt;&lt; at &gt; static void do_set_pte(struct lg_cpu *cpu, int idx,
 /* Look up the matching shadow page directory entry. */
 pgd_t *spgd = spgd_addr(cpu, idx, vaddr);

+#ifdef CONFIG_X86_PAE
+pmd_t *spmd;
+#endif
+
 /* If the top level isn't present, there's no entry to update. */
 if (pgd_flags(*spgd) &amp; _PAGE_PRESENT) {
-/* Otherwise, we start by releasing the existing entry. */
-pte_t *spte = spte_addr(*spgd, vaddr);
-release_pte(*spte);
-
-/* If they're setting this entry as dirty or accessed, we might
- * as well put that entry they've given us in now.  This shaves
- * 10% off a copy-on-write micro-benchmark. */
-if (pte_flags(gpte) &amp; (_PAGE_DIRTY | _PAGE_ACCESSED)) {
-check_gpte(cpu, gpte);
-*spte = gpte_to_spte(cpu, gpte,
-     pte_flags(gpte) &amp; _PAGE_DIRTY);
-} else
-/* Otherwise kill it and we can demand_page() it in
- * later. */
-*spte = __pte(0);
+
+#ifdef CONFIG_X86_PAE
+spmd = spmd_addr(cpu, *spgd, vaddr);
+if (pmd_flags(*spmd) &amp; _PAGE_PRESENT) {
+#endif
+
+/* Otherwise, we start by releasing
+ * the existing entry. */
+pte_t *spte = spte_addr(cpu, *spgd, vaddr);
+release_pte(*spte);
+
+/* If they're setting this entry as dirty or accessed,
+ *  we might as well put that entry they've given us
+ * in now.  This shaves 10% off a
+ * copy-on-write micro-benchmark. */
+if (pte_flags(gpte) &amp; (_PAGE_DIRTY | _PAGE_ACCESSED)) {
+check_gpte(cpu, gpte);
+*spte = gpte_to_spte(cpu, gpte,
+pte_flags(gpte) &amp; _PAGE_DIRTY);
+} else
+/* Otherwise kill it and we can demand_page()
+ * it in later. */
+*spte = __pte(0);
+#ifdef CONFIG_X86_PAE
+}
+#endif
 }
 }

&lt; at &gt;&lt; at &gt; -545,7 +758,7 &lt; at &gt;&lt; at &gt; void guest_set_pte(struct lg_cpu *cpu,
 do_set_pte(cpu, i, vaddr, gpte);
 } else {
 /* Is this page table one we have a shadow for? */
-int pgdir = find_pgdir(cpu-&gt;lg, gpgdir);
+int pgdir = find_pgdir(cpu-&gt;lg, (pgd_t *)gpgdir);
 if (pgdir != ARRAY_SIZE(cpu-&gt;lg-&gt;pgdirs))
 /* If so, do the update. */
 do_set_pte(cpu, pgdir, vaddr, gpte);
&lt; at &gt;&lt; at &gt; -566,9 +779,31 &lt; at &gt;&lt; at &gt; void guest_set_pte(struct lg_cpu *cpu,
  *
  * So with that in mind here's our code to to update a (top-level) PGD entry:
  */
-void guest_set_pmd(struct lguest *lg, unsigned long gpgdir, u32 idx)
+
+#ifdef CONFIG_X86_PAE
+void guest_set_pud(struct lguest *lg, unsigned long pudp, u32 idx)
+{
+int pgdir;
+pgd_t *gpgdir = (pgd_t *) pudp;
+
+/* If they're talking about a page table we have a shadow for... */
+pgdir = find_pgdir(lg, gpgdir);
+if (pgdir &lt; ARRAY_SIZE(lg-&gt;pgdirs))
+/* ... throw it away. */
+release_pgd(lg-&gt;pgdirs[pgdir].pgdir + idx);
+}
+
+void guest_set_pmd(struct lguest *lg, unsigned long pmdp, u32 idx)
+{
+release_all_pagetables(lg);
+}
+
+#else /*!CONFIG_X86_PAE*/
+
+void guest_set_pmd(struct lguest *lg, unsigned long pmdp, u32 idx)
 {
 int pgdir;
+pgd_t *gpgdir = (pgd_t *) pmdp;

 /* The kernel seems to try to initialize this early on: we ignore its
  * attempts to map over the Switcher. */
&lt; at &gt;&lt; at &gt; -579,8 +814,9 &lt; at &gt;&lt; at &gt; void guest_set_pmd(struct lguest *lg, unsigned long gpgdir, u32 idx)
 pgdir = find_pgdir(lg, gpgdir);
 if (pgdir &lt; ARRAY_SIZE(lg-&gt;pgdirs))
 /* ... throw it away. */
-release_pgd(lg, lg-&gt;pgdirs[pgdir].pgdir + idx);
+release_pgd(lg-&gt;pgdirs[pgdir].pgdir + idx);
 }
+#endif

 /* Once we know how much memory we have we can construct simple identity
  * (which set virtual == physical) and linear mappings
&lt; at &gt;&lt; at &gt; -594,8 +830,14 &lt; at &gt;&lt; at &gt; static unsigned long setup_pagetables(unsigned long mem,
 {
 pgd_t *pgdir;
 pte_t *linear;
-unsigned int mapped_pages, i, linear_pages, phys_linear;
-
+unsigned int mapped_pages, i, linear_pages;
+
+#ifdef CONFIG_X86_PAE
+u64 *pmds;
+unsigned int j;
+#else
+unsigned int phys_linear;
+#endif
 /* We have mapped_pages frames to map, so we need
  * linear_pages page tables to map them. */
 mapped_pages = mem / PAGE_SIZE;
&lt; at &gt;&lt; at &gt; -607,14 +849,26 &lt; at &gt;&lt; at &gt; static unsigned long setup_pagetables(unsigned long mem,
 /* Now we use the next linear_pages pages as pte pages */
 linear = (void *)pgdir - linear_pages * PAGE_SIZE;

+#ifdef CONFIG_X86_PAE
+pmds = (void *)linear - PAGE_SIZE;
+ #endif
+
 /* Linear mapping is easy: put every page's address into the
  * mapping in order. */
 for (i = 0; i &lt; mapped_pages; i++)
 set_pte(&amp;linear[i], pfn_pte(i,
 __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER)));

+#ifdef CONFIG_X86_PAE
 /* The top level points to the linear page table pages above.
  * We setup the identity and linear mappings here. */
+for (i = 0, j = 0; i &lt; mapped_pages; i += PTRS_PER_PTE, j++) {
+pmds[j] = ((unsigned long)(linear+i) - mem_base) |
+_PAGE_PRESENT | _PAGE_RW | _PAGE_USER;
+}
+pgdir[0] = pgdir[3] = __pgd((((u32)pmds) - mem_base) | _PAGE_PRESENT);
+
+#else
 phys_linear = (unsigned long)linear - mem_base;
 for (i = 0; i &lt; mapped_pages; i += PTRS_PER_PTE)
 pgdir[i / PTRS_PER_PTE] =
&lt; at &gt;&lt; at &gt; -622,6 +876,8 &lt; at &gt;&lt; at &gt; static unsigned long setup_pagetables(unsigned long mem,
      __pgd((phys_linear + i * sizeof(pte_t)) |
  (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER));

+#endif
+
 /* We return the top level (guest-physical) address: the kernel needs
  * to know where it is. */
 return (unsigned long)pgdir - mem_base;
&lt; at &gt;&lt; at &gt; -636,6 +892,11 &lt; at &gt;&lt; at &gt; int init_guest_pagetable(struct lguest *lg)
 unsigned long initrd_size = 0, mem = 0;
 struct boot_params *boot = (struct boot_params *) lg-&gt;mem_base;

+#ifdef CONFIG_X86_PAE
+pgd_t *pgd;
+pmd_t *pmd_table;
+#endif
+
 /* Get the guest memory size and the ramdisk size
  * from the boot header located at lg-&gt;mem_base*/
 if (copy_from_user(&amp;mem, &amp;boot-&gt;e820_map[0].size, 8))
&lt; at &gt;&lt; at &gt; -645,11 +906,22 &lt; at &gt;&lt; at &gt; int init_guest_pagetable(struct lguest *lg)

 /* We start on the first shadow page table, and give it a blank PGD
  * page. */
-lg-&gt;pgdirs[0].gpgdir = setup_pagetables(mem,
+lg-&gt;pgdirs[0].gpgdir = (pgd_t *) setup_pagetables(mem,
 initrd_size, (unsigned long) lg-&gt;mem_base);
 lg-&gt;pgdirs[0].pgdir = (pgd_t *)get_zeroed_page(GFP_KERNEL);
 if (!lg-&gt;pgdirs[0].pgdir)
 return -ENOMEM;
+#ifdef CONFIG_X86_PAE
+pgd = lg-&gt;pgdirs[0].pgdir;
+pmd_table = (pmd_t *) get_zeroed_page(GFP_KERNEL);
+if (!pmd_table)
+return -ENOMEM;
+
+set_pgd(pgd + SWITCHER_PGD_INDEX, __pgd(__pa(pmd_table)
+| _PAGE_PRESENT));
+#endif
+
+
 lg-&gt;cpus[0].cpu_pgd = 0;
 return 0;
 }
&lt; at &gt;&lt; at &gt; -657,21 +929,36 &lt; at &gt;&lt; at &gt; int init_guest_pagetable(struct lguest *lg)
 /* When the Guest calls LHCALL_LGUEST_INIT we do more setup. */
 void page_table_guest_data_init(struct lg_cpu *cpu)
 {
+#ifdef CONFIG_X86_PAE
+const unsigned long reserve_mb = 2;
+#else
+const unsigned long reserve_mb = 4;
+#endif
+
 /* We get the kernel address: above this is all kernel memory. */
 if (get_user(cpu-&gt;lg-&gt;kernel_address,
-     &amp;cpu-&gt;lg-&gt;lguest_data-&gt;kernel_address)
-    /* We tell the Guest that it can't use the top 4MB of virtual
-     * addresses used by the Switcher. */
-    || put_user(4U*1024*1024, &amp;cpu-&gt;lg-&gt;lguest_data-&gt;reserve_mem)
-    || put_user(cpu-&gt;lg-&gt;pgdirs[0].gpgdir, &amp;cpu-&gt;lg-&gt;lguest_data-&gt;pgdir))
+&amp;cpu-&gt;lg-&gt;lguest_data-&gt;kernel_address)
+/* We tell the Guest that it can't use the top 2 or 4 MB
+ * of virtual addresses used by the Switcher. */
+|| put_user(reserve_mb * 1024 * 1024,
+&amp;cpu-&gt;lg-&gt;lguest_data-&gt;reserve_mem)
+|| put_user((unsigned long) cpu-&gt;lg-&gt;pgdirs[0].gpgdir,
+&amp;cpu-&gt;lg-&gt;lguest_data-&gt;pgdir))
 kill_guest(cpu, "bad guest page %p", cpu-&gt;lg-&gt;lguest_data);

 /* In flush_user_mappings() we loop from 0 to
  * "pgd_index(lg-&gt;kernel_address)".  This assumes it won't hit the
  * Switcher mappings, so check that now. */
+#ifdef CONFIG_X86_PAE
+if (pgd_index(cpu-&gt;lg-&gt;kernel_address) == SWITCHER_PGD_INDEX)
+if (pmd_index(cpu-&gt;lg-&gt;kernel_address) == SWITCHER_PMD_INDEX)
+kill_guest(cpu, "bad kernel address %#lx",
+cpu-&gt;lg-&gt;kernel_address);
+#else
 if (pgd_index(cpu-&gt;lg-&gt;kernel_address) &gt;= SWITCHER_PGD_INDEX)
 kill_guest(cpu, "bad kernel address %#lx",
  cpu-&gt;lg-&gt;kernel_address);
+#endif
 }

 /* When a Guest dies, our cleanup is fairly simple. */
&lt; at &gt;&lt; at &gt; -695,15 +982,28 &lt; at &gt;&lt; at &gt; void free_guest_pagetable(struct lguest *lg)
 void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages)
 {
 pte_t *switcher_pte_page = __get_cpu_var(switcher_pte_pages);
-pgd_t switcher_pgd;
 pte_t regs_pte;
 unsigned long pfn;

+#ifdef CONFIG_X86_PAE
+pmd_t switcher_pmd;
+pmd_t *pmd_table;
+
+switcher_pmd = pfn_pmd(__pa(switcher_pte_page) &gt;&gt;
+PAGE_SHIFT, __pgprot(__PAGE_KERNEL));
+pmd_table = __va(pgd_pfn(cpu-&gt;lg-&gt;
+pgdirs[cpu-&gt;cpu_pgd].pgdir[SWITCHER_PGD_INDEX])
+&lt;&lt; PAGE_SHIFT);
+pmd_table[SWITCHER_PMD_INDEX] = switcher_pmd;
+
+#else
+pgd_t switcher_pgd;
+
 /* Make the last PGD entry for this Guest point to the Switcher's PTE
  * page for this CPU (with appropriate flags). */
 switcher_pgd = __pgd(__pa(switcher_pte_page) | __PAGE_KERNEL);
-
 cpu-&gt;lg-&gt;pgdirs[cpu-&gt;cpu_pgd].pgdir[SWITCHER_PGD_INDEX] = switcher_pgd;
+#endif

 /* We also change the Switcher PTE page.  When we're running the Guest,
  * we want the Guest's "regs" page to appear where the first Switcher
&lt; at &gt;&lt; at &gt; -714,7 +1014,8 &lt; at &gt;&lt; at &gt; void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages)
  * again. */
 pfn = __pa(cpu-&gt;regs_page) &gt;&gt; PAGE_SHIFT;
 regs_pte = pfn_pte(pfn, __pgprot(__PAGE_KERNEL));
-switcher_pte_page[(unsigned long)pages/PAGE_SIZE%PTRS_PER_PTE] = regs_pte;
+switcher_pte_page[(unsigned long)pages / PAGE_SIZE % PTRS_PER_PTE]
+= regs_pte;
 }
 /*:*/

&lt; at &gt;&lt; at &gt; -723,7 +1024,7 &lt; at &gt;&lt; at &gt; static void free_switcher_pte_pages(void)
 unsigned int i;

 for_each_possible_cpu(i)
-free_page((long)switcher_pte_page(i));
+free_page((long)switcher_pte_page(i));
 }

 /*H:520 Setting up the Switcher PTE page for given CPU is fairly easy, given
diff --git a/include/asm-x86/lguest.h b/include/asm-x86/lguest.h
index be4a724..ad08f70 100644
--- a/include/asm-x86/lguest.h
+++ b/include/asm-x86/lguest.h
&lt; at &gt;&lt; at &gt; -17,8 +17,13 &lt; at &gt;&lt; at &gt;
 /* Pages for switcher itself, then two pages per cpu */
 #define TOTAL_SWITCHER_PAGES (SHARED_SWITCHER_PAGES + 2 * NR_CPUS)

+#ifdef CONFIG_X86_PAE
+/* We map at -2M for ease of mapping into the guest (one PTE page). */
+#define SWITCHER_ADDR 0xFFE00000
+#else
 /* We map at -4M for ease of mapping into the guest (one PTE page). */
 #define SWITCHER_ADDR 0xFFC00000
+#endif

 /* Found in switcher.S */
 extern unsigned long default_idt_entries[];
diff --git a/include/asm-x86/lguest_hcall.h b/include/asm-x86/lguest_hcall.h
index a3241f2..c0860dc 100644
--- a/include/asm-x86/lguest_hcall.h
+++ b/include/asm-x86/lguest_hcall.h
&lt; at &gt;&lt; at &gt; -17,6 +17,7 &lt; at &gt;&lt; at &gt;
 #define LHCALL_SET_PMD15
 #define LHCALL_LOAD_TLS16
 #define LHCALL_NOTIFY17
+#define LHCALL_SET_PUD18

 #define LGUEST_TRAP_ENTRY 0x1F

&lt; at &gt;&lt; at &gt; -32,7 +33,7 &lt; at &gt;&lt; at &gt;
  * to make requests of the Host Itself.
  *
  * Our hypercall mechanism uses the highest unused trap code (traps 32 and
- * above are used by real hardware interrupts).  Fifteen hypercalls are
+ * above are used by real hardware interrupts).  Eighteen hypercalls are
  * available: the hypercall number is put in the %eax register, and the
  * arguments (when required) are placed in %edx, %ebx and %ecx.  If a return
  * value makes sense, it's returned in %eax.
</description>
    <dc:creator>Matias Zabaljauregui</dc:creator>
    <dc:date>2008-09-29T04:40:07</dc:date>
  </item>
  <item rdf:about="http://comments.gmane.org/gmane.linux.kernel.virtualization.lguest/530">
    <title>[PATCH v2 3/3] lguest: Page Size Extension support</title>
    <link>http://comments.gmane.org/gmane.linux.kernel.virtualization.lguest/530</link>
    <description>This patch adds Page Size Extension support to lguest.

Signed-off-by: Matias Zabaljauregui &lt;zabaljauregui-Re5JQEeQqe8AvxtiuMwx3w&lt; at &gt;public.gmane.org&gt;

diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index fc2331e..c6a6b75 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
&lt; at &gt;&lt; at &gt; -334,11 +334,11 &lt; at &gt;&lt; at &gt; static void lguest_cpuid(unsigned int *ax, unsigned int *bx,
 case 1:/* Basic feature request. */
 /* We only allow kernel to see SSE3, CMPXCHG16B and SSSE3 */
 *cx &amp;= 0x00002201;
-/* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, TSC, FPU, PAE. */
+/* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, TSC, FPU, PSE, PAE. */
 #ifdef CONFIG_X86_PAE
-*dx &amp;= 0x07808151;
+*dx &amp;= 0x07808159;
 #else
-*dx &amp;= 0x07808111;
+*dx &amp;= 0x07808119;
 #endif
 /* The Host can do a nice optimization if it knows that the
  * kernel mappings (addresses above 0xC0000000 or whatever
diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c
index f606d68..2a503c6 100644
--- a/drivers/lguest/page_tables.c
+++ b/drivers/lguest/page_tables.c
&lt; at &gt;&lt; at &gt; -273,21 +273,97 &lt; at &gt;&lt; at &gt; int demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
 pgd_t gpgd;
 pgd_t *spgd;
 unsigned long gpte_ptr;
+unsigned long gpgd_ptr;
 pte_t gpte;
 pte_t *spte;

 #ifdef CONFIG_X86_PAE
 pmd_t *spmd;
 pmd_t gpmd;
+unsigned long gpmd_ptr;
 #endif

+unsigned long ptepage;
+int i = 0;
+pte_t fake_gpte;
+pte_t *ptep;
+unsigned long frame;
+
 /* First step: get the top-level Guest page table entry. */
-gpgd = lgread(cpu, (unsigned long) gpgd_addr(cpu, vaddr), pgd_t);
+gpgd_ptr = (unsigned long) gpgd_addr(cpu, vaddr);
+gpgd = lgread(cpu, gpgd_ptr, pgd_t);

 /* Toplevel not present?  We can't map it in. */
 if (!(pgd_flags(gpgd) &amp; _PAGE_PRESENT))
 return 0;

+#ifndef CONFIG_X86_PAE
+/* If the gpgd is actually pointing to a 4MB page,
+ * instead of pointing to a pte page, we will back it
+ * with 4KB pages in the host */
+if (pgd_flags(gpgd) &amp; _PAGE_PSE) {
+/* Check they're not trying to write to a page the Guest wants
+ * read-only (bit 2 of errcode == write). */
+if ((errcode &amp; 2) &amp;&amp; !(pgd_flags(gpgd) &amp; _PAGE_RW))
+return 0;
+
+/* User access to a kernel-only page? (bit 3 == user access) */
+if ((errcode &amp; 4) &amp;&amp; !(pgd_flags(gpgd) &amp; _PAGE_USER))
+return 0;
+
+/* Is the 4MB page within the guest limits? */
+if (pgd_pfn(gpgd) + ((PGDIR_SIZE - PAGE_SIZE) &gt;&gt; PAGE_SHIFT) &gt;=
+    cpu-&gt;lg-&gt;pfn_limit)
+kill_guest(cpu, "large page out of limits");
+
+/* Now look at the matching shadow entry. */
+spgd = spgd_addr(cpu, cpu-&gt;cpu_pgd, vaddr);
+
+/* No shadow entry: allocate a new shadow PTE page. */
+if (!(pgd_flags(*spgd) &amp; _PAGE_PRESENT)) {
+ptepage = get_zeroed_page(GFP_KERNEL);
+if (!ptepage)
+kill_guest(cpu,
+   "out of memory allocating pte page");
+/* We build a shadow pgd, pointing to the PTE page */
+set_pgd(spgd, __pgd(__pa(ptepage) |
+      (pgd_flags(gpgd) &amp; ~_PAGE_PSE &amp;  _PAGE_TABLE)));
+}
+
+/* Add the _PAGE_ACCESSED and (for a write) _PAGE_DIRTY flag */
+gpgd = __pgd(pgd_val(gpgd) | _PAGE_ACCESSED);
+if (errcode &amp; 2)
+gpgd = __pgd(pgd_val(gpgd) | _PAGE_DIRTY);
+
+/* We will we use this pointer to populate
+ * the shadow PTE page */
+ptep = __va(pgd_pfn(*spgd) &lt;&lt; PAGE_SHIFT);
+
+/* We will create a fake gpte, so we can use
+ * gpte_to_spte function */
+frame = pgd_pfn(gpgd) &lt;&lt; PAGE_SHIFT;
+
+/* And here, we completely populate the shadow PTE page,
+ * so we map the 1024 4KB pages, backing the 4MB guest page */
+for (; i &lt; PTRS_PER_PTE; i++) {
+fake_gpte =
+    __pte(frame | (pgd_flags(gpgd) &amp; ~_PAGE_PSE));
+frame = frame + PAGE_SIZE;
+release_pte(ptep[i]);
+if (pgd_val(gpgd) &amp; _PAGE_DIRTY)
+ptep[i] = gpte_to_spte(cpu, fake_gpte, 1);
+else
+ptep[i] =
+    gpte_to_spte(cpu, pte_wrprotect(fake_gpte),
+ 0);
+}
+/* Finally, we write the Guest PGD entry back: we've set the
+ * _PAGE_ACCESSED and maybe the _PAGE_DIRTY flags. */
+lgwrite(cpu, gpgd_ptr, pgd_t, gpgd);
+return 1;
+} /* (pgd_flags(gpgd) &amp; _PAGE_PSE) */
+#endif
+
 /* Now look at the matching shadow entry. */
 spgd = spgd_addr(cpu, cpu-&gt;cpu_pgd, vaddr);

&lt; at &gt;&lt; at &gt; -308,11 +384,78 &lt; at &gt;&lt; at &gt; int demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
 }

 #ifdef CONFIG_X86_PAE
-gpmd = lgread(cpu, (unsigned long) gpmd_addr(gpgd, vaddr), pmd_t);
+gpmd_ptr = gpmd_addr(gpgd, vaddr);
+gpmd = lgread(cpu, gpmd_ptr, pmd_t);
+
 /* middle level not present?  We can't map it in. */
 if (!(pmd_flags(gpmd) &amp; _PAGE_PRESENT))
 return 0;

+/* If the gpmd is actually pointing to a 2MB page,
+ * instead of pointing to a pte page, we will back it
+ * with 4KB pages in the host */
+if (pmd_flags(gpmd) &amp; _PAGE_PSE) {
+/* Check they're not trying to write to a page the Guest wants
+ * read-only (bit 2 of errcode == write). */
+if ((errcode &amp; 2) &amp;&amp; !(pmd_flags(gpmd) &amp; _PAGE_RW))
+return 0;
+
+/* User access to a kernel-only page? (bit 3 == user access) */
+if ((errcode &amp; 4) &amp;&amp; !(pmd_flags(gpmd) &amp; _PAGE_USER))
+return 0;
+
+/* Is the 2MB page within the guest limits? */
+if (pmd_pfn(gpmd) + ((PMD_SIZE - PAGE_SIZE) &gt;&gt; PAGE_SHIFT) &gt;=
+cpu-&gt;lg-&gt;pfn_limit)
+kill_guest(cpu, "large page out of limits");
+
+/* Now look at the matching shadow entry. */
+spmd = spmd_addr(cpu, *spgd, vaddr);
+
+/* No shadow entry: allocate a new shadow PTE page. */
+if (!(pmd_flags(*spmd) &amp; _PAGE_PRESENT)) {
+ptepage = get_zeroed_page(GFP_KERNEL);
+if (!ptepage)
+kill_guest(cpu,
+   "out of memory allocating pte page");
+/* We build a shadow pmd, pointing to the PTE page */
+set_pmd(spmd, __pmd(__pa(ptepage) |
+      (pmd_flags(gpmd) &amp; ~_PAGE_PSE &amp;  _PAGE_TABLE)));
+}
+
+/* Add the _PAGE_ACCESSED and (for a write) _PAGE_DIRTY flag */
+gpmd = __pmd(pmd_val(gpmd) | _PAGE_ACCESSED);
+if (errcode &amp; 2)
+gpmd = __pmd(pmd_val(gpmd) | _PAGE_DIRTY);
+
+/* We will we use this pointer to populate
+ * the shadow PTE page */
+ptep = __va(pmd_pfn(*spmd) &lt;&lt; PAGE_SHIFT);
+
+/* We will create a fake gpte, so we can use
+ * gpte_to_spte function */
+frame = pmd_pfn(gpmd) &lt;&lt; PAGE_SHIFT;
+
+/* And here, we completely populate the shadow PTE page,
+ * so we map the 512 4KB pages, backing the 2MB guest page */
+for (; i &lt; PTRS_PER_PMD; i++) {
+fake_gpte =
+    __pte(frame | (pmd_flags(gpmd) &amp; ~_PAGE_PSE));
+frame = frame + PAGE_SIZE;
+release_pte(ptep[i]);
+if (pmd_val(gpmd) &amp; _PAGE_DIRTY)
+ptep[i] = gpte_to_spte(cpu, fake_gpte, 1);
+else
+ptep[i] =
+    gpte_to_spte(cpu, pte_wrprotect(fake_gpte),
+ 0);
+}
+/* Finally, we write the Guest PGD entry back: we've set the
+ * _PAGE_ACCESSED and maybe the _PAGE_DIRTY flags. */
+lgwrite(cpu, gpmd_ptr, pmd_t, gpmd);
+return 1;
+} /* (pgd_flags(gpgd) &amp; _PAGE_PSE) */
+
 /* Now look at the matching shadow entry. */
 spmd = spmd_addr(cpu, *spgd, vaddr);

&lt; at &gt;&lt; at &gt; -532,10 +675,17 &lt; at &gt;&lt; at &gt; unsigned long guest_pa(struct lg_cpu *cpu, unsigned long vaddr)
 if (!(pgd_flags(gpgd) &amp; _PAGE_PRESENT))
 kill_guest(cpu, "Bad address %#lx", vaddr);

+/* Is it a large page? We don't need any gpte to return the address */
+if (pgd_flags(gpgd) &amp; _PAGE_PSE)
+return (pgd_val(gpgd) &amp; PGDIR_MASK) | (vaddr &amp; ~PGDIR_MASK);
+
 #ifdef CONFIG_X86_PAE
 gpmd = lgread(cpu, (unsigned long) gpmd_addr(gpgd, vaddr), pmd_t);
 if (!(pmd_flags(gpmd) &amp; _PAGE_PRESENT))
 kill_guest(cpu, "Bad address %#lx", vaddr);
+
+if (pmd_flags(gpmd) &amp; _PAGE_PSE)
+return (pmd_val(gpmd) &amp; PMD_MASK) | (vaddr &amp; ~PMD_MASK);
 #endif

 gpte = lgread(cpu, (unsigned long) gpte_addr(cpu, gpgd, vaddr), pte_t);
</description>
    <dc:creator>Matias Zabaljauregui</dc:creator>
    <dc:date>2008-09-29T04:40:19</dc:date>
  </item>
  <item rdf:about="http://comments.gmane.org/gmane.linux.kernel.v