Merge the branch/virtual-forcing so far, introducing proper forcing of

virtualizables followed by a guard failure when we eventually return to assembler. This also changes the front-end so that it always aborts a trace after the virtualizable was forced. The changes are not very ootype-friendly. The CLI tests are disabled for now. Fix the write analyzer to give the correct answer even when calling external functions that may call back. svn merge -r69626:69792 svn+ssh://codespeak.net/svn/pypy/branch/virtual-forcing .
author: Armin Rigo <arigo@tunes.org> 2009-12-01 10:56:34 +0000
committer: Armin Rigo <arigo@tunes.org> 2009-12-01 10:56:34 +0000
commit: 0868c1fc43a4ce9db10f8a95034aa03757e989b5 (patch)
tree: b32334affd3b208ee416e16a1665b39963cfa45c /pypy
parent: Add str_w to the TinyObjSpace, to "fix" app-level tests of the locale module (diff)
download: pypy-0868c1fc43a4ce9db10f8a95034aa03757e989b5.tar.gz
pypy-0868c1fc43a4ce9db10f8a95034aa03757e989b5.tar.bz2
pypy-0868c1fc43a4ce9db10f8a95034aa03757e989b5.zip
51 files changed, 1123 insertions, 641 deletions
diff --git a/pypy/annotation/description.py b/pypy/annotation/description.py
index dfa7a65999..e168064a38 100644
--- a/pypy/annotation/description.py
+++ b/pypy/annotation/description.py
@@ -202,6 +202,9 @@ class FunctionDesc(Desc):
             graph.name = alt_name
         return graph
 
+    def getgraphs(self):
+        return self._cache.values()
+
     def getuniquegraph(self):
         if len(self._cache) != 1:
             raise NoStandardGraph(self)
diff --git a/pypy/jit/backend/cli/test/conftest.py b/pypy/jit/backend/cli/test/conftest.py
new file mode 100644
index 0000000000..c99fe66186
--- /dev/null
+++ b/pypy/jit/backend/cli/test/conftest.py
@@ -0,0 +1,5 @@
+import py
+
+class Directory(py.test.collect.Directory):
+    def collect(self):
+        py.test.skip("CLI backend tests skipped for now")
diff --git a/pypy/jit/backend/llgraph/llimpl.py b/pypy/jit/backend/llgraph/llimpl.py
index 7d22893ef1..c988b50ca6 100644
--- a/pypy/jit/backend/llgraph/llimpl.py
+++ b/pypy/jit/backend/llgraph/llimpl.py
@@ -35,8 +35,13 @@ def _from_opaque(opq):
 _TO_OPAQUE = {}
 
 def _to_opaque(value):
-    return lltype.opaqueptr(_TO_OPAQUE[value.__class__], 'opaque',
-                            externalobj=value)
+    try:
+        return value._the_opaque_pointer
+    except AttributeError:
+        op = lltype.opaqueptr(_TO_OPAQUE[value.__class__], 'opaque',
+                              externalobj=value)
+        value._the_opaque_pointer = op
+        return op
 
 def from_opaque_string(s):
     if isinstance(s, str):
@@ -145,6 +150,9 @@ TYPES = {
     'unicodesetitem'  : (('ref', 'int', 'int'), 'int'),
     'cast_ptr_to_int' : (('ref',), 'int'),
     'debug_merge_point': (('ref',), None),
+    'force_token'     : ((), 'int'),
+    'call_may_force'  : (('int', 'varargs'), 'intorptr'),
+    'guard_not_forced': ((), None)
     #'getitem'         : (('void', 'ref', 'int'), 'int'),
     #'setitem'         : (('void', 'ref', 'int', 'int'), None),
     #'newlist'         : (('void', 'varargs'), 'ref'),
@@ -384,6 +392,9 @@ class Frame(object):
     def __init__(self, memocast):
         self.verbose = False
         self.memocast = memocast
+        self.opindex = 1
+        self._forced = False
+        self._may_force = -1
 
     def getenv(self, v):
         if isinstance(v, Constant):
@@ -391,6 +402,19 @@ class Frame(object):
         else:
             return self.env[v]
 
+    def _populate_fail_args(self, op, skip=None):
+        fail_args = []
+        if op.fail_args:
+            for fail_arg in op.fail_args:
+                if fail_arg is None:
+                    fail_args.append(None)
+                elif fail_arg is skip:
+                    fail_args.append(fail_arg.concretetype._defl())
+                else:
+                    fail_args.append(self.getenv(fail_arg))
+        self.fail_args = fail_args
+        self.fail_index = op.fail_index
+
     def execute(self):
         """Execute all operations in a loop,
         possibly following to other loops as well.
@@ -401,6 +425,7 @@ class Frame(object):
         operations = self.loop.operations
         opindex = 0
         while True:
+            self.opindex = opindex
             op = operations[opindex]
             args = [self.getenv(v) for v in op.args]
             if not op.is_final():
@@ -419,18 +444,11 @@ class Frame(object):
                         opindex = 0
                         continue
                     else:
-                        fail_args = []
-                        if op.fail_args:
-                            for fail_arg in op.fail_args:
-                                if fail_arg is None:
-                                    fail_args.append(None)
-                                else:
-                                    fail_args.append(self.getenv(fail_arg))
+                        self._populate_fail_args(op)
                         # a non-patched guard
                         if self.verbose:
                             log.trace('failed: %s' % (
                                 ', '.join(map(str, fail_args)),))
-                        self.fail_args = fail_args
                         return op.fail_index
                 #verbose = self.verbose
                 assert (result is None) == (op.result is None)
@@ -453,7 +471,8 @@ class Frame(object):
             if op.opnum == rop.JUMP:
                 assert len(op.jump_target.inputargs) == len(args)
                 self.env = dict(zip(op.jump_target.inputargs, args))
-                operations = op.jump_target.operations
+                self.loop = op.jump_target
+                operations = self.loop.operations
                 opindex = 0
                 _stats.exec_jumps += 1
             elif op.opnum == rop.FINISH:
@@ -484,7 +503,7 @@ class Frame(object):
         try:
             res = ophandler(self, descr, *values)
         finally:
-            if verbose:
+            if 0:     # if verbose:
                 argtypes, restype = TYPES[opname]
                 if res is None:
                     resdata = ''
@@ -493,9 +512,9 @@ class Frame(object):
                 else:
                     resdata = '-> ' + repr1(res, restype, self.memocast)
                 # fish the types
-                #log.cpu('\t%s %s %s' % (opname, repr_list(values, argtypes,
-                #                                          self.memocast),
-                #                        resdata))
+                log.cpu('\t%s %s %s' % (opname, repr_list(values, argtypes,
+                                                          self.memocast),
+                                        resdata))
         return res
 
     def as_int(self, x):
@@ -776,6 +795,24 @@ class Frame(object):
     def op_uint_xor(self, descr, arg1, arg2):
         return arg1 ^ arg2
 
+    def op_force_token(self, descr):
+        opaque_frame = _to_opaque(self)
+        return llmemory.cast_ptr_to_adr(opaque_frame)
+
+    def op_call_may_force(self, calldescr, func, *args):
+        assert not self._forced
+        self._may_force = self.opindex
+        try:
+            return self.op_call(calldescr, func, *args)
+        finally:
+            self._may_force = -1
+
+    def op_guard_not_forced(self, descr):
+        forced = self._forced
+        self._forced = False
+        if forced:
+            raise GuardFailed
+
 
 class OOFrame(Frame):
 
@@ -1042,6 +1079,25 @@ def get_zero_division_error_value():
     return lltype.cast_opaque_ptr(llmemory.GCREF,
                                   _get_error(ZeroDivisionError).args[1])
 
+def force(opaque_frame):
+    frame = _from_opaque(opaque_frame)
+    assert not frame._forced
+    frame._forced = True
+    assert frame._may_force >= 0
+    call_op = frame.loop.operations[frame._may_force]
+    guard_op = frame.loop.operations[frame._may_force+1]
+    assert call_op.opnum == rop.CALL_MAY_FORCE
+    frame._populate_fail_args(guard_op, skip=call_op.result)
+    return frame.fail_index
+
+def get_forced_token_frame(force_token):
+    opaque_frame = llmemory.cast_adr_to_ptr(force_token,
+                                            lltype.Ptr(_TO_OPAQUE[Frame]))
+    return opaque_frame
+
+def get_frame_forced_token(opaque_frame):
+    return llmemory.cast_ptr_to_adr(opaque_frame)
+
 class MemoCast(object):
     def __init__(self):
         self.addresses = [llmemory.NULL]
@@ -1411,6 +1467,9 @@ setannotation(get_overflow_error, annmodel.SomeAddress())
 setannotation(get_overflow_error_value, annmodel.SomePtr(llmemory.GCREF))
 setannotation(get_zero_division_error, annmodel.SomeAddress())
 setannotation(get_zero_division_error_value, annmodel.SomePtr(llmemory.GCREF))
+setannotation(force, annmodel.SomeInteger())
+setannotation(get_forced_token_frame, s_Frame)
+setannotation(get_frame_forced_token, annmodel.SomeAddress())
 
 setannotation(new_memo_cast, s_MemoCast)
 setannotation(cast_adr_to_int, annmodel.SomeInteger())
diff --git a/pypy/jit/backend/llgraph/runner.py b/pypy/jit/backend/llgraph/runner.py
index f6d37958b3..ab9ee647be 100644
--- a/pypy/jit/backend/llgraph/runner.py
+++ b/pypy/jit/backend/llgraph/runner.py
@@ -4,6 +4,7 @@ Minimal-API wrapper around the llinterpreter to run operations.
 
 import sys
 from pypy.rlib.unroll import unrolling_iterable
+from pypy.rlib.objectmodel import we_are_translated
 from pypy.rpython.lltypesystem import lltype, llmemory, rclass
 from pypy.rpython.ootypesystem import ootype
 from pypy.rpython.llinterp import LLInterpreter
@@ -20,31 +21,16 @@ class MiniStats:
 
 
 class Descr(history.AbstractDescr):
-    name = None
-    ofs = -1
-    typeinfo = '?'
-    
-    def __init__(self, ofs, typeinfo='?', extrainfo=None):
+
+    def __init__(self, ofs, typeinfo, extrainfo=None, name=None):
         self.ofs = ofs
         self.typeinfo = typeinfo
         self.extrainfo = extrainfo
+        self.name = name
 
     def get_extra_info(self):
         return self.extrainfo
 
-    def __hash__(self):
-        return hash((self.ofs, self.typeinfo))
-
-    def __eq__(self, other):
-        if not isinstance(other, Descr):
-            return NotImplemented
-        return self.ofs == other.ofs and self.typeinfo == other.typeinfo
-
-    def __ne__(self, other):
-        if not isinstance(other, Descr):
-            return NotImplemented
-        return self.ofs != other.ofs or self.typeinfo != other.typeinfo
-
     def sort_key(self):
         return self.ofs
 
@@ -75,9 +61,12 @@ class Descr(history.AbstractDescr):
         raise TypeError("cannot use comparison on Descrs")
 
     def __repr__(self):
+        args = [repr(self.ofs), repr(self.typeinfo)]
         if self.name is not None:
-            return '<Descr %r, %r, %r>' % (self.ofs, self.typeinfo, self.name)
-        return '<Descr %r, %r>' % (self.ofs, self.typeinfo)
+            args.append(repr(self.name))
+        if self.extrainfo is not None:
+            args.append('E')
+        return '<Descr %r>' % (', '.join(args),)
 
 
 history.TreeLoop._compiled_version = lltype.nullptr(llimpl.COMPILEDLOOP.TO)
@@ -99,11 +88,21 @@ class BaseCPU(model.AbstractCPU):
         llimpl._stats = self.stats
         llimpl._llinterp = LLInterpreter(self.rtyper)
         self._future_values = []
+        self._descrs = {}
 
     def _freeze_(self):
         assert self.translate_support_code
         return False
 
+    def getdescr(self, ofs, typeinfo='?', extrainfo=None, name=None):
+        key = (ofs, typeinfo, extrainfo, name)
+        try:
+            return self._descrs[key]
+        except KeyError:
+            descr = Descr(ofs, typeinfo, extrainfo, name)
+            self._descrs[key] = descr
+            return descr
+
     def set_class_sizes(self, class_sizes):
         self.class_sizes = class_sizes
         for vtable, size in class_sizes.items():
@@ -233,6 +232,10 @@ class BaseCPU(model.AbstractCPU):
     def get_latest_value_float(self, index):
         return llimpl.frame_float_getvalue(self.latest_frame, index)
 
+    def get_latest_force_token(self):
+        token = llimpl.get_frame_forced_token(self.latest_frame)
+        return self.cast_adr_to_int(token)
+
     # ----------
 
     def get_exception(self):
@@ -252,16 +255,9 @@ class BaseCPU(model.AbstractCPU):
         return (self.cast_adr_to_int(llimpl.get_zero_division_error()),
                 llimpl.get_zero_division_error_value())
 
-    @staticmethod
-    def sizeof(S):
+    def sizeof(self, S):
         assert not isinstance(S, lltype.Ptr)
-        return Descr(symbolic.get_size(S))
-
-    @staticmethod
-    def numof(S):
-        return 4
-
-    ##addresssuffix = '4'
+        return self.getdescr(symbolic.get_size(S))
 
     def cast_adr_to_int(self, adr):
         return llimpl.cast_adr_to_int(self.memo_cast, adr)
@@ -282,18 +278,14 @@ class LLtypeCPU(BaseCPU):
         BaseCPU.__init__(self, *args, **kwds)
         self.fielddescrof_vtable = self.fielddescrof(rclass.OBJECT, 'typeptr')
         
-    @staticmethod
-    def fielddescrof(S, fieldname):
+    def fielddescrof(self, S, fieldname):
         ofs, size = symbolic.get_field_token(S, fieldname)
         token = history.getkind(getattr(S, fieldname))
-        res = Descr(ofs, token[0])
-        res.name = fieldname
-        return res
+        return self.getdescr(ofs, token[0], name=fieldname)
 
-    @staticmethod
-    def calldescrof(FUNC, ARGS, RESULT, extrainfo=None):
+    def calldescrof(self, FUNC, ARGS, RESULT, extrainfo=None):
         token = history.getkind(RESULT)
-        return Descr(0, token[0], extrainfo=extrainfo)
+        return self.getdescr(0, token[0], extrainfo=extrainfo)
 
     def get_exception(self):
         return self.cast_adr_to_int(llimpl.get_exception())
@@ -301,13 +293,12 @@ class LLtypeCPU(BaseCPU):
     def get_exc_value(self):
         return llimpl.get_exc_value()
 
-    @staticmethod
-    def arraydescrof(A):
+    def arraydescrof(self, A):
         assert isinstance(A, lltype.GcArray)
         assert A.OF != lltype.Void
         size = symbolic.get_size(A)
         token = history.getkind(A.OF)
-        return Descr(size, token[0])
+        return self.getdescr(size, token[0])
 
     # ---------- the backend-dependent operations ----------
 
@@ -498,6 +489,14 @@ class LLtypeCPU(BaseCPU):
         return history.BoxInt(llimpl.cast_to_int(ptrbox.getref_base(),
                                                         self.memo_cast))
 
+    def force(self, force_token):
+        token = self.cast_int_to_adr(force_token)
+        frame = llimpl.get_forced_token_frame(token)
+        fail_index = llimpl.force(frame)
+        self.latest_frame = frame
+        return self.get_fail_descr_from_number(fail_index)
+
+
 class OOtypeCPU(BaseCPU):
     is_oo = True
     ts = oohelper
diff --git a/pypy/jit/backend/llgraph/test/test_llgraph.py b/pypy/jit/backend/llgraph/test/test_llgraph.py
index 858a2f8852..4191b3ef4b 100644
--- a/pypy/jit/backend/llgraph/test/test_llgraph.py
+++ b/pypy/jit/backend/llgraph/test/test_llgraph.py
@@ -10,6 +10,9 @@ from pypy.jit.metainterp.executor import execute
 from pypy.jit.backend.test.runner_test import LLtypeBackendTest
 
 class TestLLTypeLLGraph(LLtypeBackendTest):
+    # for individual tests see:
+    # ====> ../../test/runner_test.py
+    
     from pypy.jit.backend.llgraph.runner import LLtypeCPU as cpu_type
 
     def setup_method(self, _):
diff --git a/pypy/jit/backend/llsupport/regalloc.py b/pypy/jit/backend/llsupport/regalloc.py
index f58022a72e..eb05dc959e 100644
--- a/pypy/jit/backend/llsupport/regalloc.py
+++ b/pypy/jit/backend/llsupport/regalloc.py
@@ -304,7 +304,7 @@ class RegisterManager(object):
             self.assembler.regalloc_mov(reg, to)
         # otherwise it's clean
 
-    def before_call(self, force_store=[]):
+    def before_call(self, force_store=[], save_all_regs=False):
         """ Spill registers before a call, as described by
         'self.save_around_call_regs'.  Registers are not spilled if
         they don't survive past the current operation, unless they
@@ -316,8 +316,8 @@ class RegisterManager(object):
                 del self.reg_bindings[v]
                 self.free_regs.append(reg)
                 continue
-            if reg not in self.save_around_call_regs:
-                # we don't need to
+            if not save_all_regs and reg not in self.save_around_call_regs:
+                # we don't have to
                 continue
             self._sync_var(v)
             del self.reg_bindings[v]
@@ -327,12 +327,12 @@ class RegisterManager(object):
         """ Adjust registers according to the result of the call,
         which is in variable v.
         """
-        if v is not None:
-            self._check_type(v)
-            r = self.call_result_location(v)
-            self.reg_bindings[v] = r
-            self.free_regs = [fr for fr in self.free_regs if fr is not r]
-    
+        self._check_type(v)
+        r = self.call_result_location(v)
+        self.reg_bindings[v] = r
+        self.free_regs = [fr for fr in self.free_regs if fr is not r]
+        return r
+
     # abstract methods, override
 
     def convert_to_imm(self, c):
diff --git a/pypy/jit/backend/llsupport/test/test_regalloc.py b/pypy/jit/backend/llsupport/test/test_regalloc.py
index d5340433aa..418e3fb977 100644
--- a/pypy/jit/backend/llsupport/test/test_regalloc.py
+++ b/pypy/jit/backend/llsupport/test/test_regalloc.py
@@ -278,6 +278,30 @@ class TestRegalloc(object):
         assert len(rm.reg_bindings) == 3
         rm._check_invariants()
 
+    def test_call_support_save_all_regs(self):
+        class XRegisterManager(RegisterManager):
+            save_around_call_regs = [r1, r2]
+
+            def call_result_location(self, v):
+                return r1
+
+        sm = TStackManager()
+        asm = MockAsm()
+        boxes, longevity = boxes_and_longevity(5)
+        rm = XRegisterManager(longevity, stack_manager=sm,
+                              assembler=asm)
+        for b in boxes[:-1]:
+            rm.force_allocate_reg(b)
+        rm.before_call(save_all_regs=True)
+        assert len(rm.reg_bindings) == 0
+        assert sm.stack_depth == 4
+        assert len(asm.moves) == 4
+        rm._check_invariants()
+        rm.after_call(boxes[-1])
+        assert len(rm.reg_bindings) == 1
+        rm._check_invariants()
+        
+
     def test_different_stack_width(self):
         class XRegisterManager(RegisterManager):
             reg_width = 2
diff --git a/pypy/jit/backend/model.py b/pypy/jit/backend/model.py
index 022d66d813..fb00711304 100644
--- a/pypy/jit/backend/model.py
+++ b/pypy/jit/backend/model.py
@@ -78,6 +78,11 @@ class AbstractCPU(object):
         or from 'args' if it was a FINISH).  Returns a ptr or an obj."""
         raise NotImplementedError
 
+    def get_latest_force_token(self):
+        """After a GUARD_NOT_FORCED fails, this function returns the
+        same FORCE_TOKEN result as the one in the just-failed loop."""
+        raise NotImplementedError
+
     def get_exception(self):
         raise NotImplementedError
 
@@ -205,6 +210,16 @@ class AbstractCPU(object):
     def do_cast_ptr_to_int(self, ptrbox):
         raise NotImplementedError
 
+    def do_force_token(self):
+        # this should not be implemented at all by the backends
+        raise NotImplementedError
+
+    def do_call_may_force(self, args, calldescr):
+        return self.do_call(args, calldescr)
+
+    def force(self, force_token):
+        raise NotImplementedError
+
     # ootype specific operations
     # --------------------------
 
diff --git a/pypy/jit/backend/test/runner_test.py b/pypy/jit/backend/test/runner_test.py
index 86561dcab6..307acdcac0 100644
--- a/pypy/jit/backend/test/runner_test.py
+++ b/pypy/jit/backend/test/runner_test.py
@@ -1221,6 +1221,139 @@ class LLtypeBackendTest(BaseBackendTest):
             else:
                 assert record == []
 
+    def test_force_operations_returning_void(self):
+        values = []
+        def maybe_force(token, flag):
+            if flag:
+                descr = self.cpu.force(token)
+                values.append(descr)
+                values.append(self.cpu.get_latest_value_int(0))
+                values.append(self.cpu.get_latest_value_int(1))
+
+        FUNC = self.FuncType([lltype.Signed, lltype.Signed], lltype.Void)
+        func_ptr = llhelper(lltype.Ptr(FUNC), maybe_force)
+        funcbox = self.get_funcbox(self.cpu, func_ptr).constbox()
+        calldescr = self.cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT)
+        cpu = self.cpu
+        i0 = BoxInt()
+        i1 = BoxInt()
+        tok = BoxInt()
+        faildescr = BasicFailDescr(1)
+        ops = [
+        ResOperation(rop.FORCE_TOKEN, [], tok),
+        ResOperation(rop.CALL_MAY_FORCE, [funcbox, tok, i1], None,
+                     descr=calldescr),
+        ResOperation(rop.GUARD_NOT_FORCED, [], None, descr=faildescr),
+        ResOperation(rop.FINISH, [i0], None, descr=BasicFailDescr(0))
+        ]
+        ops[2].fail_args = [i1, i0]
+        looptoken = LoopToken()
+        self.cpu.compile_loop([i0, i1], ops, looptoken)
+        self.cpu.set_future_value_int(0, 20)
+        self.cpu.set_future_value_int(1, 0)
+        fail = self.cpu.execute_token(looptoken)
+        assert fail.identifier == 0
+        assert self.cpu.get_latest_value_int(0) == 20
+        assert values == []
+
+        self.cpu.set_future_value_int(0, 10)
+        self.cpu.set_future_value_int(1, 1)
+        fail = self.cpu.execute_token(looptoken)
+        assert fail.identifier == 1
+        assert self.cpu.get_latest_value_int(0) == 1
+        assert self.cpu.get_latest_value_int(1) == 10
+        assert values == [faildescr, 1, 10]
+
+    def test_force_operations_returning_int(self):
+        values = []
+        def maybe_force(token, flag):
+            if flag:
+               self.cpu.force(token)
+               values.append(self.cpu.get_latest_value_int(0))
+               values.append(self.cpu.get_latest_value_int(2))
+            return 42
+
+        FUNC = self.FuncType([lltype.Signed, lltype.Signed], lltype.Signed)
+        func_ptr = llhelper(lltype.Ptr(FUNC), maybe_force)
+        funcbox = self.get_funcbox(self.cpu, func_ptr).constbox()
+        calldescr = self.cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT)
+        cpu = self.cpu
+        i0 = BoxInt()
+        i1 = BoxInt()
+        i2 = BoxInt()
+        tok = BoxInt()
+        faildescr = BasicFailDescr(1)
+        ops = [
+        ResOperation(rop.FORCE_TOKEN, [], tok),
+        ResOperation(rop.CALL_MAY_FORCE, [funcbox, tok, i1], i2,
+                     descr=calldescr),
+        ResOperation(rop.GUARD_NOT_FORCED, [], None, descr=faildescr),
+        ResOperation(rop.FINISH, [i2], None, descr=BasicFailDescr(0))
+        ]
+        ops[2].fail_args = [i1, i2, i0]
+        looptoken = LoopToken()
+        self.cpu.compile_loop([i0, i1], ops, looptoken)
+        self.cpu.set_future_value_int(0, 20)
+        self.cpu.set_future_value_int(1, 0)
+        fail = self.cpu.execute_token(looptoken)
+        assert fail.identifier == 0
+        assert self.cpu.get_latest_value_int(0) == 42
+        assert values == []
+
+        self.cpu.set_future_value_int(0, 10)
+        self.cpu.set_future_value_int(1, 1)
+        fail = self.cpu.execute_token(looptoken)
+        assert fail.identifier == 1
+        assert self.cpu.get_latest_value_int(0) == 1
+        assert self.cpu.get_latest_value_int(1) == 42
+        assert self.cpu.get_latest_value_int(2) == 10
+        assert values == [1, 10]
+
+    def test_force_operations_returning_float(self):
+        values = []
+        def maybe_force(token, flag):
+            if flag:
+               self.cpu.force(token)
+               values.append(self.cpu.get_latest_value_int(0))
+               values.append(self.cpu.get_latest_value_int(2))
+            return 42.5
+
+        FUNC = self.FuncType([lltype.Signed, lltype.Signed], lltype.Float)
+        func_ptr = llhelper(lltype.Ptr(FUNC), maybe_force)
+        funcbox = self.get_funcbox(self.cpu, func_ptr).constbox()
+        calldescr = self.cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT)
+        cpu = self.cpu
+        i0 = BoxInt()
+        i1 = BoxInt()
+        f2 = BoxFloat()
+        tok = BoxInt()
+        faildescr = BasicFailDescr(1)
+        ops = [
+        ResOperation(rop.FORCE_TOKEN, [], tok),
+        ResOperation(rop.CALL_MAY_FORCE, [funcbox, tok, i1], f2,
+                     descr=calldescr),
+        ResOperation(rop.GUARD_NOT_FORCED, [], None, descr=faildescr),
+        ResOperation(rop.FINISH, [f2], None, descr=BasicFailDescr(0))
+        ]
+        ops[2].fail_args = [i1, f2, i0]
+        looptoken = LoopToken()
+        self.cpu.compile_loop([i0, i1], ops, looptoken)
+        self.cpu.set_future_value_int(0, 20)
+        self.cpu.set_future_value_int(1, 0)
+        fail = self.cpu.execute_token(looptoken)
+        assert fail.identifier == 0
+        assert self.cpu.get_latest_value_float(0) == 42.5
+        assert values == []
+
+        self.cpu.set_future_value_int(0, 10)
+        self.cpu.set_future_value_int(1, 1)
+        fail = self.cpu.execute_token(looptoken)
+        assert fail.identifier == 1
+        assert self.cpu.get_latest_value_int(0) == 1
+        assert self.cpu.get_latest_value_float(1) == 42.5
+        assert self.cpu.get_latest_value_int(2) == 10
+        assert values == [1, 10]
+
     # pure do_ / descr features
 
     def test_do_operations(self):
diff --git a/pypy/jit/backend/x86/assembler.py b/pypy/jit/backend/x86/assembler.py
index d0ced87014..0824beb805 100644
--- a/pypy/jit/backend/x86/assembler.py
+++ b/pypy/jit/backend/x86/assembler.py
@@ -9,7 +9,8 @@ from pypy.rpython.lltypesystem.lloperation import llop
 from pypy.rpython.annlowlevel import llhelper
 from pypy.tool.uid import fixid
 from pypy.jit.backend.x86.regalloc import RegAlloc, WORD, lower_byte,\
-     X86RegisterManager, X86XMMRegisterManager, get_ebp_ofs
+     X86RegisterManager, X86XMMRegisterManager, get_ebp_ofs, FRAME_FIXED_SIZE,\
+     FORCE_INDEX_OFS
 from pypy.rlib.objectmodel import we_are_translated, specialize
 from pypy.jit.backend.x86 import codebuf
 from pypy.jit.backend.x86.ri386 import *
@@ -22,8 +23,6 @@ from pypy.rlib.debug import debug_print
 # our calling convention - we pass first 6 args in registers
 # and the rest stays on the stack
 
-RET_BP = 5 # ret ip + bp + bx + esi + edi = 5 words
-
 if sys.platform == 'darwin':
     # darwin requires the stack to be 16 bytes aligned on calls
     CALL_ALIGN = 4
@@ -90,6 +89,7 @@ class Assembler386(object):
         self.fail_boxes_int = NonmovableGrowableArraySigned()
         self.fail_boxes_ptr = NonmovableGrowableArrayGCREF()
         self.fail_boxes_float = NonmovableGrowableArrayFloat()
+        self.fail_ebp = 0
         self.setup_failure_recovery()
 
     def leave_jitted_hook(self):
@@ -200,7 +200,11 @@ class Assembler386(object):
         # patch stack adjustment LEA
         # possibly align, e.g. for Mac OS X        
         mc = codebuf.InMemoryCodeBuilder(adr_lea, adr_lea + 4)
-        mc.write(packimm32(-(stack_depth + RET_BP - 2) * WORD))
+        # Compute the correct offset for the instruction LEA ESP, [EBP-4*words].
+        # Given that [EBP] is where we saved EBP, i.e. in the last word
+        # of our fixed frame, then the 'words' value is:
+        words = (FRAME_FIXED_SIZE - 1) + stack_depth
+        mc.write(packimm32(-WORD * words))
         mc.done()
 
     def _assemble_bootstrap_code(self, inputargs, arglocs):
@@ -210,8 +214,8 @@ class Assembler386(object):
         self.mc.PUSH(ebx)
         self.mc.PUSH(esi)
         self.mc.PUSH(edi)
-        # NB. exactly 4 pushes above; if this changes, fix stack_pos().
-        # You must also keep _get_callshape() in sync.
+        # NB. the shape of the frame is hard-coded in get_basic_shape() too.
+        # Also, make sure this is consistent with FRAME_FIXED_SIZE.
         adr_stackadjust = self._patchable_stackadjust()
         tmp = X86RegisterManager.all_regs[0]
         xmmtmp = X86XMMRegisterManager.all_regs[0]
@@ -266,9 +270,6 @@ class Assembler386(object):
 
     regalloc_mov = mov # legacy interface
 
-    def regalloc_fstp(self, loc):
-        self.mc.FSTP(loc)
-
     def regalloc_push(self, loc):
         if isinstance(loc, XMMREG):
             self.mc.SUB(esp, imm(2*WORD))
@@ -758,7 +759,8 @@ class Assembler386(object):
     def implement_guard_recovery(self, guard_opnum, faildescr, failargs,
                                                                fail_locs):
         exc = (guard_opnum == rop.GUARD_EXCEPTION or
-               guard_opnum == rop.GUARD_NO_EXCEPTION)
+               guard_opnum == rop.GUARD_NO_EXCEPTION or
+               guard_opnum == rop.GUARD_NOT_FORCED)
         return self.generate_quick_failure(faildescr, failargs, fail_locs, exc)
 
     def generate_quick_failure(self, faildescr, failargs, fail_locs, exc):
@@ -876,75 +878,79 @@ class Assembler386(object):
             arglocs.append(loc)
         return arglocs[:]
 
+    def grab_frame_values(self, bytecode, frame_addr, allregisters):
+        # no malloc allowed here!!
+        self.fail_ebp = allregisters[16 + ebp.op]
+        num = 0
+        value_hi = 0
+        while 1:
+            # decode the next instruction from the bytecode
+            code = rffi.cast(lltype.Signed, bytecode[0])
+            bytecode = rffi.ptradd(bytecode, 1)
+            if code >= 4*self.DESCR_FROMSTACK:
+                if code > 0x7F:
+                    shift = 7
+                    code &= 0x7F
+                    while True:
+                        nextcode = rffi.cast(lltype.Signed, bytecode[0])
+                        bytecode = rffi.ptradd(bytecode, 1)
+                        code |= (nextcode & 0x7F) << shift
+                        shift += 7
+                        if nextcode <= 0x7F:
+                            break
+                # load the value from the stack
+                kind = code & 3
+                code = (code >> 2) - self.DESCR_FROMSTACK
+                stackloc = frame_addr + get_ebp_ofs(code)
+                value = rffi.cast(rffi.LONGP, stackloc)[0]
+                if kind == self.DESCR_FLOAT:
+                    value_hi = value
+                    value = rffi.cast(rffi.LONGP, stackloc - 4)[0]
+            else:
+                # 'code' identifies a register: load its value
+                kind = code & 3
+                if kind == self.DESCR_SPECIAL:
+                    if code == self.DESCR_HOLE:
+                        num += 1
+                        continue
+                    assert code == self.DESCR_STOP
+                    break
+                code >>= 2
+                if kind == self.DESCR_FLOAT:
+                    value = allregisters[2*code]
+                    value_hi = allregisters[2*code + 1]
+                else:
+                    value = allregisters[16 + code]
+
+            # store the loaded value into fail_boxes_<type>
+            if kind == self.DESCR_INT:
+                tgt = self.fail_boxes_int.get_addr_for_num(num)
+            elif kind == self.DESCR_REF:
+                tgt = self.fail_boxes_ptr.get_addr_for_num(num)
+            elif kind == self.DESCR_FLOAT:
+                tgt = self.fail_boxes_float.get_addr_for_num(num)
+                rffi.cast(rffi.LONGP, tgt)[1] = value_hi
+            else:
+                assert 0, "bogus kind"
+            rffi.cast(rffi.LONGP, tgt)[0] = value
+            num += 1
+        #
+        if not we_are_translated():
+            assert bytecode[4] == 0xCC
+        fail_index = rffi.cast(rffi.LONGP, bytecode)[0]
+        return fail_index
+
     def setup_failure_recovery(self):
 
         def failure_recovery_func(registers):
-            # no malloc allowed here!!
             # 'registers' is a pointer to a structure containing the
             # original value of the registers, optionally the original
             # value of XMM registers, and finally a reference to the
             # recovery bytecode.  See _build_failure_recovery() for details.
             stack_at_ebp = registers[ebp.op]
             bytecode = rffi.cast(rffi.UCHARP, registers[8])
-            num = 0
-            value_hi = 0
-            while 1:
-                # decode the next instruction from the bytecode
-                code = rffi.cast(lltype.Signed, bytecode[0])
-                bytecode = rffi.ptradd(bytecode, 1)
-                if code >= 4*self.DESCR_FROMSTACK:
-                    if code > 0x7F:
-                        shift = 7
-                        code &= 0x7F
-                        while True:
-                            nextcode = rffi.cast(lltype.Signed, bytecode[0])
-                            bytecode = rffi.ptradd(bytecode, 1)
-                            code |= (nextcode & 0x7F) << shift
-                            shift += 7
-                            if nextcode <= 0x7F:
-                                break
-                    # load the value from the stack
-                    kind = code & 3
-                    code = (code >> 2) - self.DESCR_FROMSTACK
-                    stackloc = stack_at_ebp + get_ebp_ofs(code)
-                    value = rffi.cast(rffi.LONGP, stackloc)[0]
-                    if kind == self.DESCR_FLOAT:
-                        value_hi = value
-                        value = rffi.cast(rffi.LONGP, stackloc - 4)[0]
-                else:
-                    # 'code' identifies a register: load its value
-                    kind = code & 3
-                    if kind == self.DESCR_SPECIAL:
-                        if code == self.DESCR_HOLE:
-                            num += 1
-                            continue
-                        assert code == self.DESCR_STOP
-                        break
-                    code >>= 2
-                    if kind == self.DESCR_FLOAT:
-                        xmmregisters = rffi.ptradd(registers, -16)
-                        value = xmmregisters[2*code]
-                        value_hi = xmmregisters[2*code + 1]
-                    else:
-                        value = registers[code]
-
-                # store the loaded value into fail_boxes_<type>
-                if kind == self.DESCR_INT:
-                    tgt = self.fail_boxes_int.get_addr_for_num(num)
-                elif kind == self.DESCR_REF:
-                    tgt = self.fail_boxes_ptr.get_addr_for_num(num)
-                elif kind == self.DESCR_FLOAT:
-                    tgt = self.fail_boxes_float.get_addr_for_num(num)
-                    rffi.cast(rffi.LONGP, tgt)[1] = value_hi
-                else:
-                    assert 0, "bogus kind"
-                rffi.cast(rffi.LONGP, tgt)[0] = value
-                num += 1
-            #
-            if not we_are_translated():
-                assert bytecode[4] == 0xCC
-            fail_index = rffi.cast(rffi.LONGP, bytecode)[0]
-            return fail_index
+            allregisters = rffi.ptradd(registers, -16)
+            return self.grab_frame_values(bytecode, stack_at_ebp, allregisters)
 
         self.failure_recovery_func = failure_recovery_func
         self.failure_recovery_code = [0, 0, 0, 0]
@@ -997,11 +1003,11 @@ class Assembler386(object):
         # now we return from the complete frame, which starts from
         # _assemble_bootstrap_code().  The LEA below throws away most
         # of the frame, including all the PUSHes that we did just above.
-        mc.LEA(esp, addr_add(ebp, imm((-RET_BP + 2) * WORD)))
-        mc.POP(edi)
-        mc.POP(esi)
-        mc.POP(ebx)
-        mc.POP(ebp)
+        mc.LEA(esp, addr_add(ebp, imm(-3 * WORD)))
+        mc.POP(edi)    # [ebp-12]
+        mc.POP(esi)    # [ebp-8]
+        mc.POP(ebx)    # [ebp-4]
+        mc.POP(ebp)    # [ebp]
         mc.RET()
         self.mc2.done()
         self.failure_recovery_code[exc + 2 * withfloats] = recovery_addr
@@ -1042,14 +1048,14 @@ class Assembler386(object):
         addr = self.cpu.get_on_leave_jitted_int(save_exception=exc)
         mc.CALL(rel32(addr))
 
-        # don't break the following code sequence!
+        # don't break the following code sequence!   xxx no reason any more?
         mc = mc._mc
-        mc.LEA(esp, addr_add(ebp, imm((-RET_BP + 2) * WORD)))
+        mc.LEA(esp, addr_add(ebp, imm(-3 * WORD)))
         mc.MOV(eax, imm(fail_index))
-        mc.POP(edi)
-        mc.POP(esi)
-        mc.POP(ebx)
-        mc.POP(ebp)
+        mc.POP(edi)    # [ebp-12]
+        mc.POP(esi)    # [ebp-8]
+        mc.POP(ebx)    # [ebp-4]
+        mc.POP(ebp)    # [ebp]
         mc.RET()
 
     @specialize.arg(2)
@@ -1098,12 +1104,23 @@ class Assembler386(object):
         self.mc.CALL(x)
         self.mark_gc_roots()
         self.mc.ADD(esp, imm(extra_on_stack))
-        if size == 1:
+        if isinstance(resloc, MODRM64):
+            self.mc.FSTP(resloc)
+        elif size == 1:
             self.mc.AND(eax, imm(0xff))
         elif size == 2:
             self.mc.AND(eax, imm(0xffff))
 
     genop_call_pure = genop_call
+    
+    def genop_guard_call_may_force(self, op, guard_op, addr,
+                                   arglocs, result_loc):
+        faildescr = guard_op.descr
+        fail_index = self.cpu.get_fail_descr_number(faildescr)
+        self.mc.MOV(mem(ebp, FORCE_INDEX_OFS), imm(fail_index))
+        self.genop_call(op, arglocs, result_loc)
+        self.mc.CMP(mem(ebp, FORCE_INDEX_OFS), imm(0))
+        return self.implement_guard(addr, self.mc.JL)
 
     def genop_discard_cond_call_gc_wb(self, op, arglocs):
         # use 'mc._mc' directly instead of 'mc', to avoid
@@ -1134,6 +1151,9 @@ class Assembler386(object):
         assert 0 < offset <= 127
         mc.overwrite(jz_location-1, [chr(offset)])
 
+    def genop_force_token(self, op, arglocs, resloc):
+        self.mc.LEA(resloc, mem(ebp, FORCE_INDEX_OFS))
+
     def not_implemented_op_discard(self, op, arglocs):
         msg = "not implemented operation: %s" % op.getopname()
         print msg
diff --git a/pypy/jit/backend/x86/regalloc.py b/pypy/jit/backend/x86/regalloc.py
index 30fbd93da2..e64bb1b326 100644
--- a/pypy/jit/backend/x86/regalloc.py
+++ b/pypy/jit/backend/x86/regalloc.py
@@ -19,6 +19,8 @@ from pypy.jit.backend.llsupport.regalloc import StackManager, RegisterManager,\
      TempBox
 
 WORD = 4
+FRAME_FIXED_SIZE = 5     # ebp + ebx + esi + edi + force_index = 5 words
+FORCE_INDEX_OFS = -4*WORD
 
 width_of_type = {
     INT : 1,
@@ -98,10 +100,9 @@ class X86XMMRegisterManager(RegisterManager):
         
     def after_call(self, v):
         # the result is stored in st0, but we don't have this around,
-        # so we move it to some stack location
-        if v is not None:
-            loc = self.stack_manager.loc(v, 2)
-            self.assembler.regalloc_fstp(loc)
+        # so genop_call will move it to some stack location immediately
+        # after the call
+        return self.stack_manager.loc(v, 2)
 
 class X86StackManager(StackManager):
 
@@ -287,7 +288,8 @@ class RegAlloc(object):
         self.assembler.regalloc_perform_with_guard(op, guard_op, faillocs,
                                                    arglocs, result_loc,
                                                    self.sm.stack_depth)
-        self.rm.possibly_free_var(op.result)
+        if op.result is not None:
+            self.possibly_free_var(op.result)
         self.possibly_free_vars(guard_op.fail_args)
 
     def perform_guard(self, guard_op, arglocs, result_loc):
@@ -308,7 +310,10 @@ class RegAlloc(object):
             self.assembler.dump('%s(%s)' % (op, arglocs))
         self.assembler.regalloc_perform_discard(op, arglocs)
 
-    def can_optimize_cmp_op(self, op, i, operations):
+    def can_merge_with_next_guard(self, op, i, operations):
+        if op.opnum == rop.CALL_MAY_FORCE:
+            assert operations[i + 1].opnum == rop.GUARD_NOT_FORCED
+            return True
         if not op.is_comparison():
             return False
         if (operations[i + 1].opnum != rop.GUARD_TRUE and
@@ -332,7 +337,7 @@ class RegAlloc(object):
                 i += 1
                 self.possibly_free_vars(op.args)
                 continue
-            if self.can_optimize_cmp_op(op, i, operations):
+            if self.can_merge_with_next_guard(op, i, operations):
                 oplist[op.opnum](self, op, operations[i + 1])
                 i += 1
             else:
@@ -604,25 +609,38 @@ class RegAlloc(object):
         self.Perform(op, [loc0], loc1)
         self.rm.possibly_free_var(op.args[0])
 
-    def _call(self, op, arglocs, force_store=[]):
-        self.rm.before_call(force_store)
-        self.xrm.before_call(force_store)
-        self.Perform(op, arglocs, eax)
+    def _call(self, op, arglocs, force_store=[], guard_not_forced_op=None):
+        save_all_regs = guard_not_forced_op is not None
+        self.rm.before_call(force_store, save_all_regs=save_all_regs)
+        self.xrm.before_call(force_store, save_all_regs=save_all_regs)
         if op.result is not None:
             if op.result.type == FLOAT:
-                self.xrm.after_call(op.result)
+                resloc = self.xrm.after_call(op.result)
             else:
-                self.rm.after_call(op.result)
+                resloc = self.rm.after_call(op.result)
+        else:
+            resloc = None
+        if guard_not_forced_op is not None:
+            self.perform_with_guard(op, guard_not_forced_op, arglocs, resloc)
+        else:
+            self.Perform(op, arglocs, resloc)
 
-    def consider_call(self, op, ignored):
+    def _consider_call(self, op, guard_not_forced_op=None):
         calldescr = op.descr
         assert isinstance(calldescr, BaseCallDescr)
         assert len(calldescr.arg_classes) == len(op.args) - 1
         size = calldescr.get_result_size(self.translate_support_code)
-        self._call(op, [imm(size)] + [self.loc(arg) for arg in op.args])
+        self._call(op, [imm(size)] + [self.loc(arg) for arg in op.args],
+                   guard_not_forced_op=guard_not_forced_op)
 
+    def consider_call(self, op, ignored):
+        self._consider_call(op)
     consider_call_pure = consider_call
 
+    def consider_call_may_force(self, op, guard_op):
+        assert guard_op is not None
+        self._consider_call(op, guard_op)
+
     def consider_cond_call_gc_wb(self, op, ignored):
         assert op.result is None
         arglocs = [self.loc(arg) for arg in op.args]
@@ -927,6 +945,10 @@ class RegAlloc(object):
                     assert reg is eax     # ok to ignore this one
         return gcrootmap.compress_callshape(shape)
 
+    def consider_force_token(self, op, ignored):
+        loc = self.rm.force_allocate_reg(op.result)
+        self.Perform(op, [], loc)
+
     def not_implemented_op(self, op, ignored):
         msg = "[regalloc] Not implemented operation: %s" % op.getopname()
         print msg
@@ -942,10 +964,9 @@ for name, value in RegAlloc.__dict__.iteritems():
 
 def get_ebp_ofs(position):
     # Argument is a stack position (0, 1, 2...).
-    # Returns (ebp-16), (ebp-20), (ebp-24)...
-    # This depends on the fact that our function prologue contains
-    # exactly 4 PUSHes.
-    return -WORD * (4 + position)
+    # Returns (ebp-20), (ebp-24), (ebp-28)...
+    # i.e. the n'th word beyond the fixed frame size.
+    return -WORD * (FRAME_FIXED_SIZE + position)
 
 def lower_byte(reg):
     # argh, kill, use lowest8bits instead
diff --git a/pypy/jit/backend/x86/runner.py b/pypy/jit/backend/x86/runner.py
index a0c4110158..387315a5f9 100644
--- a/pypy/jit/backend/x86/runner.py
+++ b/pypy/jit/backend/x86/runner.py
@@ -6,9 +6,9 @@ from pypy.rpython.llinterp import LLInterpreter
 from pypy.rlib.objectmodel import we_are_translated
 from pypy.jit.metainterp import history
 from pypy.jit.backend.x86.assembler import Assembler386
+from pypy.jit.backend.x86.regalloc import FORCE_INDEX_OFS
 from pypy.jit.backend.llsupport.llmodel import AbstractLLCPU
 
-
 class CPU386(AbstractLLCPU):
     debug = True
     supports_floats = True
@@ -59,6 +59,9 @@ class CPU386(AbstractLLCPU):
             llmemory.GCREF.TO))
         return ptrvalue
 
+    def get_latest_force_token(self):
+        return self.assembler.fail_ebp + FORCE_INDEX_OFS
+
     def execute_token(self, executable_token):
         addr = executable_token._x86_bootstrap_code
         func = rffi.cast(lltype.Ptr(self.BOOTSTRAP_TP), addr)
@@ -87,6 +90,27 @@ class CPU386(AbstractLLCPU):
         adr = llmemory.cast_ptr_to_adr(x)
         return CPU386.cast_adr_to_int(adr)
 
+    all_null_registers = lltype.malloc(rffi.LONGP.TO, 24,
+                                       flavor='raw', zero=True)
+
+    def force(self, addr_of_force_index):
+        TP = rffi.CArrayPtr(lltype.Signed)
+        fail_index = rffi.cast(TP, addr_of_force_index)[0]
+        assert fail_index >= 0, "already forced!"
+        faildescr = self.get_fail_descr_from_number(fail_index)
+        rffi.cast(TP, addr_of_force_index)[0] = -1
+        bytecode = rffi.cast(rffi.UCHARP,
+                             faildescr._x86_failure_recovery_bytecode)
+        # start of "no gc operation!" block
+        fail_index_2 = self.assembler.grab_frame_values(
+            bytecode,
+            addr_of_force_index - FORCE_INDEX_OFS,
+            self.all_null_registers)
+        self.assembler.leave_jitted_hook()
+        # end of "no gc operation!" block
+        assert fail_index == fail_index_2
+        return faildescr
+
 
 class CPU386_NO_SSE2(CPU386):
     supports_floats = False
diff --git a/pypy/jit/backend/x86/test/test_gc_integration.py b/pypy/jit/backend/x86/test/test_gc_integration.py
index 934cf40cca..5a1e8f337b 100644
--- a/pypy/jit/backend/x86/test/test_gc_integration.py
+++ b/pypy/jit/backend/x86/test/test_gc_integration.py
@@ -9,7 +9,7 @@ from pypy.jit.metainterp.resoperation import rop, ResOperation
 from pypy.jit.backend.llsupport.descr import GcCache
 from pypy.jit.backend.llsupport.gc import GcLLDescription
 from pypy.jit.backend.x86.runner import CPU
-from pypy.jit.backend.x86.regalloc import RegAlloc, WORD
+from pypy.jit.backend.x86.regalloc import RegAlloc, WORD, FRAME_FIXED_SIZE
 from pypy.jit.metainterp.test.oparser import parse
 from pypy.rpython.lltypesystem import lltype, llmemory, rffi
 from pypy.rpython.annlowlevel import llhelper
@@ -83,7 +83,8 @@ class TestRegallocDirectGcIntegration(object):
         #
         mark = regalloc.get_mark_gc_roots(cpu.gc_ll_descr.gcrootmap)
         assert mark[0] == 'compressed'
-        expected = ['ebx', 'esi', 'edi', -16, -20, -24]
+        base = -WORD * FRAME_FIXED_SIZE
+        expected = ['ebx', 'esi', 'edi', base, base-4, base-8]
         assert dict.fromkeys(mark[1:]) == dict.fromkeys(expected)
 
 class TestRegallocGcIntegration(BaseTestRegalloc):
diff --git a/pypy/jit/metainterp/codewriter.py b/pypy/jit/metainterp/codewriter.py
index 2c08eb9eab..17d5a0f972 100644
--- a/pypy/jit/metainterp/codewriter.py
+++ b/pypy/jit/metainterp/codewriter.py
@@ -14,6 +14,7 @@ from pypy.translator.backendopt.canraise import RaiseAnalyzer
 from pypy.translator.backendopt.writeanalyze import WriteAnalyzer
 from pypy.jit.metainterp.typesystem import deref, arrayItem, fieldType
 from pypy.jit.metainterp.effectinfo import effectinfo_from_writeanalyze
+from pypy.jit.metainterp.effectinfo import VirtualizableAnalyzer
 
 import py, sys
 from pypy.tool.ansi_print import ansi_log
@@ -182,8 +183,10 @@ class CodeWriter(object):
         self.metainterp_sd = metainterp_sd
         self.cpu = metainterp_sd.cpu
         self.portal_runner_ptr = portal_runner_ptr
-        self.raise_analyzer = RaiseAnalyzer(self.rtyper.annotator.translator)
-        self.write_analyzer = WriteAnalyzer(self.rtyper.annotator.translator)
+        translator = self.rtyper.annotator.translator
+        self.raise_analyzer = RaiseAnalyzer(translator)
+        self.write_analyzer = WriteAnalyzer(translator)
+        self.virtualizable_analyzer = VirtualizableAnalyzer(translator)
 
     def make_portal_bytecode(self, graph):
         log.info("making JitCodes...")
@@ -323,7 +326,9 @@ class CodeWriter(object):
         # ok
         if consider_effects_of is not None:
             effectinfo = effectinfo_from_writeanalyze(
-                    self.write_analyzer.analyze(consider_effects_of), self.cpu)
+                    self.write_analyzer.analyze(consider_effects_of),
+                    self.cpu,
+                    self.virtualizable_analyzer.analyze(consider_effects_of))
             calldescr = self.cpu.calldescrof(FUNC, tuple(NON_VOID_ARGS), RESULT, effectinfo)
         else:
             calldescr = self.cpu.calldescrof(FUNC, tuple(NON_VOID_ARGS), RESULT)
@@ -1203,12 +1208,19 @@ class BytecodeMaker(object):
         if op.opname == "direct_call":
             func = getattr(get_funcobj(op.args[0].value), '_callable', None)
             pure = getattr(func, "_pure_function_", False)
+            all_promoted_args = getattr(func,
+                               "_pure_function_with_all_promoted_args_", False)
+            if pure and not all_promoted_args:
+                effectinfo = calldescr.get_extra_info()
+                assert (effectinfo is not None and
+                        not effectinfo.promotes_virtualizables)
         try:
             canraise = self.codewriter.raise_analyzer.can_raise(op)
         except lltype.DelayedPointer:
             canraise = True  # if we need to look into the delayed ptr that is
                              # the portal, then it's certainly going to raise
         if pure:
+            # XXX check what to do about exceptions (also MemoryError?)
             self.emit('residual_call_pure')
         elif canraise:
             self.emit('residual_call')
@@ -1236,9 +1248,8 @@ class BytecodeMaker(object):
     def handle_regular_indirect_call(self, op):
         self.codewriter.register_indirect_call_targets(op)
         args = op.args[1:-1]
-        calldescr, non_void_args = self.codewriter.getcalldescr(op.args[0],
-                                                                args,
-                                                                op.result)
+        calldescr, non_void_args = self.codewriter.getcalldescr(
+            op.args[0], args, op.result, consider_effects_of=op)
         self.minimize_variables()
         self.emit('indirect_call')
         self.emit(self.get_position(calldescr))
diff --git a/pypy/jit/metainterp/compile.py b/pypy/jit/metainterp/compile.py
index eb51f0316c..22c32f4645 100644
--- a/pypy/jit/metainterp/compile.py
+++ b/pypy/jit/metainterp/compile.py
@@ -221,6 +221,7 @@ class ResumeGuardDescr(ResumeDescr):
             if box:
                 fail_arg_types[i] = box.type
         self.fail_arg_types = fail_arg_types
+        # XXX ^^^ kill this attribute
 
     def handle_fail(self, metainterp_sd):
         from pypy.jit.metainterp.pyjitpl import MetaInterp
@@ -236,6 +237,41 @@ class ResumeGuardDescr(ResumeDescr):
         send_bridge_to_backend(metainterp.staticdata, self, inputargs,
                                new_loop.operations)
 
+
+class ResumeGuardForcedDescr(ResumeGuardDescr):
+
+    def handle_fail(self, metainterp_sd):
+        from pypy.jit.metainterp.pyjitpl import MetaInterp
+        metainterp = MetaInterp(metainterp_sd)
+        token = metainterp_sd.cpu.get_latest_force_token()
+        metainterp._already_allocated_resume_virtuals = self.fetch_data(token)
+        self.counter = -2     # never compile
+        return metainterp.handle_guard_failure(self)
+
+    def force_virtualizable(self, vinfo, virtualizable, force_token):
+        from pypy.jit.metainterp.pyjitpl import MetaInterp
+        from pypy.jit.metainterp.resume import force_from_resumedata
+        metainterp = MetaInterp(self.metainterp_sd)
+        metainterp.history = None    # blackholing
+        liveboxes = metainterp.load_values_from_failure(self)
+        virtualizable_boxes, data = force_from_resumedata(metainterp,
+                                                          liveboxes, self)
+        vinfo.write_boxes(virtualizable, virtualizable_boxes)
+        self.save_data(force_token, data)
+
+    def save_data(self, key, value):
+        globaldata = self.metainterp_sd.globaldata
+        assert key not in globaldata.resume_virtuals
+        globaldata.resume_virtuals[key] = value
+
+    def fetch_data(self, key):
+        globaldata = self.metainterp_sd.globaldata
+        assert key in globaldata.resume_virtuals
+        data = globaldata.resume_virtuals[key]
+        del globaldata.resume_virtuals[key]
+        return data
+
+
 class ResumeFromInterpDescr(ResumeDescr):
     def __init__(self, original_greenkey, redkey):
         ResumeDescr.__init__(self, original_greenkey)
diff --git a/pypy/jit/metainterp/doc/jitpl5.txt b/pypy/jit/metainterp/doc/jitpl5.txt
index 9102751858..decede3d89 100644
--- a/pypy/jit/metainterp/doc/jitpl5.txt
+++ b/pypy/jit/metainterp/doc/jitpl5.txt
@@ -78,16 +78,11 @@ we already saw that loop; and if we did, if the specialization pattern
 matches the real data -- but this is delicate because of the
 non-escaping flag.
 
-Instead, this is done by doing tracing from the start of the loop again.
-At the end, we don't do perfect specialization (for now), but simply
-check that the already-computed specialization still applies, and then
-jump to the already-compiled loop.  (If it does not match, for now we
-just cancel everything.)
-
-If the loop is not only executed but *entered* often enough, then after
-this tracing, we generate a second copy of the loop (a "bridge") that
-starts with all variables unspecialized, and ends with a jump to the
-real loop.  From this point on, we can just jump directly to the bridge
+Instead, this is done by "entry bridges": we do tracing from
+the start of the loop again, and at the end, we try to compile
+the recorded trace as a "bridge" that comes from the
+interpreter (i.e. with no virtuals at all) and goes to the old
+loop.  Later on, we can just jump directly to the entry bridge
 from the JUMP_ABSOLUTE bytecode.
 
 
@@ -105,10 +100,7 @@ When we decide instead to compile more code for this guard failure, we
 take the set of live values and put them back into boxes, and proceed
 with tracing for the rest of the loop.
 
-For now, we just check at the end of the loop that it matches the
-already-computed specialization.  If not, we cancel creating the
-compiled version of it (and mark the guard so that future failures
-always fall back to interpretation).  To do this, when we created the
-original loop, at every guard, we needed to record the set of live
-values (mostly in which register or stack location they are) as well as
-an "escaped-so-far" flag for each pointer.
+At the end of the loop, we check that it matches an already-computed
+specialization.  If not, we go on tracing.  This might unroll the loop
+once.  (Note that there is a global limit on the length of the recorded
+trace, to avoid tracing forever.)
diff --git a/pypy/jit/metainterp/doc/linking.txt b/pypy/jit/metainterp/doc/linking.txt
deleted file mode 100644
index 27ca7ef8f5..0000000000
--- a/pypy/jit/metainterp/doc/linking.txt
+++ /dev/null
@@ -1,53 +0,0 @@
-==============================================================
-Linking between the interpreter, the JIT, and the machine code
-==============================================================
-
-In a pypy-c that contains a generated JIT, the execution of app-level
-code initially uses the normal interpreter, but invokes the JIT if the
-JUMP_ABSOLUTE bytecode is seen often enough (it's the bytecode that
-closes an app-level loop).  This part of the code of pypy-c looks like
-this (in pseudo-C code coming from RPython)::
-
-    void dispatch_JUMP_ABSOLUTE(Frame *frame, int arg)
-    {
-        frame->next_op = arg;
-        can_enter_jit(frame);
-    }
-
-    void can_enter_jit(Frame *frame)
-    {
-        if (!position_seen_often_enough(frame->f_code, frame->next_op))
-            return;
-        machine_code_ptr = lookup_machine_code_for(frame->f_code,
-                                                   frame->next_op);
-        machine_code_ptr(frame);
-        /* at this point, the machine code has updated the frame
-           so that it points to the next bytecode to execute */
-    }
-
-The jit_compile() function comes from RPython sources written in the
-JIT support code (warmspot.py).  It does tracing and generally ends up
-compiling an extra loop to machine code.
-
-Then jit_compile() itself needs to transfer execution to the newly
-compiled loop.  Instead of calling the loop, jit_compile() returns a
-small data structure (a continuation) that points to the loop and
-contains values for the input arguments.  A pointer to this data arrives
-in REG3 in the guard recovery code, which contains the necessary
-assembler to set up the real registers and actually jump to the loop.
-
-The reason to organize the control flow transfers in this way is because
-when the jit_compile() function executes, the frame from the machine
-code is always still available -- it is below in the stack.  The machine
-code didn't do a RET before it CALLed the JIT.  This has two advantages.
-First, it lets the JIT read values directly out of the old frame, to
-find the values that were current when the guard failed.  Second, it
-tells the JIT where the *next* machine code frame will be: it will be at
-the same position as the old frame (because the machine code and the
-guard recovery code just jump to each other).  This is useful to
-implement virtualizables: a virtualizable object living in the heap
-(e.g. a PyFrame instance in pypy-c) needs to contain a pointer to the
-machine code frame's base pointer, and the cleanest approach is to let
-the JIT write this pointer into the virtualizable heap object just
-before it lets the guard recovery code transfer control to the machine
-code.
diff --git a/pypy/jit/metainterp/doc/loop.txt b/pypy/jit/metainterp/doc/loop.txt
index 34f8ffcd86..e982026be6 100644
--- a/pypy/jit/metainterp/doc/loop.txt
+++ b/pypy/jit/metainterp/doc/loop.txt
@@ -36,32 +36,11 @@ the level of specialization::
 
     .       VirtualSpec(cls, name1=spec1, ...)
                     |
-         VirtualizableSpec(cls, name1=spec1, ...)
-                    |
-              FixedClassSpec(cls)
-                    |
                  NotSpec
 
-For (a simplified) example, ``VirtualizableSpec(PyFrame, x =
-VirtualSpec(W_IntObject, value = NotSpec))`` describes the virtualizable
-frame for a loop in which the only used variable is ``x``, which is a
-virtual ``W_IntObject``.
-
-The intersection rules are:
-
-* the intersection of two ``VirtualSpec`` of the same ``cls`` is a
-  further ``VirtualSpec``, and we proceed with the intersection of
-  each field.
-
-* the intersection of two ``VirtualizableSpec`` of the same ``cls`` is
-  like the previous case, except that some names may be omitted
-  completely from a given ``VirtualizableSpec``; in the case a name is
-  present in only one of the ``VirtualizableSpec``, we just keep it
-  unmodified in the intersection.
-
-* in other cases, the result is ``FixedClassSpec`` if the two specnodes
-  have the same class, or ``NotSpec`` if any is a ``NotSpec`` or if the
-  two classes differ.
+For example, ``VirtualSpec(W_IntObject, value = NotSpec))`` describes a
+variable which is a virtual ``W_IntObject``, containing a value that is
+a real integer.
 
 
 Overall Approach
diff --git a/pypy/jit/metainterp/doc/matching_rules.txt b/pypy/jit/metainterp/doc/matching_rules.txt
deleted file mode 100644
index 7c541d601b..0000000000
--- a/pypy/jit/metainterp/doc/matching_rules.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-
-How does perfect specialization match nodes
-============================================
-
-Categories of spec nodes:
-
-NotSpecNode - nothing special
-
-FixedClassSpecNode - specnode with no know fields, matches the same class
-  specnode, less general than NotSpecNode.
-
-VirtualSpecNode - a virtual. matches only specnode that uses exactly
-  the same fields in the identical manner.
-
-VirtualListSpecNode - same as virtual
-
-VirtualizableSpecNode - virtualizable. This one is a little bit more complex:
-  for matching, VirtualizableSpecNode matches NotSpecNode (it was not used
-  at all, hence no guard_nonvirtualized) or VirtualizableSpecNode with a
-  common subset of fields.
-
-Say v0 (Virtualizable) has fields a, b and v1 to match has fields b, c
-means that b need to have the exact same shape, but a and c can stay
-whatever they are.
diff --git a/pypy/jit/metainterp/doc/virtualizables.txt b/pypy/jit/metainterp/doc/virtualizables.txt
deleted file mode 100644
index da0840accb..0000000000
--- a/pypy/jit/metainterp/doc/virtualizables.txt
+++ /dev/null
@@ -1,59 +0,0 @@
-Simplified virtualizables
-=========================
-
-Let's start with some definitions:
-
-* Virtuals are objects which are known not to escape from jit code, hence
-  they're not allocated at all and their fields are stored in registers and or
-  on the stack.
-
-* Virtualizables are objects that are known to escape (for example the frame
-  object), but they're stored anyway on the stack with a way to access and
-  modify from outside the jit code. So the jit knows where they're and have
-  a way to reconstruct them if necessary.
-
-A couple of observations, in terms of a python interpreter:
-
-Usually we pass a virtualizable around everywhere (this is a frame
-object) which is stored on a framestack and allocated before each next
-call to portal (portal is a bytecode dispatch loop). Almost everything
-is stored on top of this virtualizable. There is a valuestack and locals
-which usually are most commonly accessed fields.
-
-A typical loop, for example for adding integers (the best benchmark ever)
-will look like this:
-
-for a code:
-   
-   while i < 10000:
-       i += 1
-
-v1 = getfield_gc(frame, "locals")
-v2 = getarrayitem_gc(v1, 0) # or some other element
-v3 = getfield_gc(frame, "valuestack")
-setarrayitem_gc(v3, 0, v2)
-setarrayitem_gc(v3, 1, Constant(1))
-v4 = getarrayitem_gc(v3, 0)
-v5 = getarrayitem_gc(v3, 1)
-i0 = getfield_gc(v4, "intval")
-i1 = getfield_gc(v5, "intval")
-v3 = new_with_vtable(W_IntObject)
-i2 = int_add(i0, i1)
-setfield_gc(v3, "intval", i2)
-.... store into valuestack, load and store in locals
-
-clearly, what we really want is:
-
-i1 = int_add(i0, 1)
-
-In order to achieve this, we need:
-
-* Make sure that frame is not used
-
-* Make sure that things on the frame are virtual, so they don't get
-  allocated until needed.
-
-So the real loop will pass around virtualizable and intval of local variable i.
-We can achieve that by unpacking W_IntObject read from locals before the loop
-and carefully rebuilding this for each guard failure, by a small bit of
-assembler code.
diff --git a/pypy/jit/metainterp/effectinfo.py b/pypy/jit/metainterp/effectinfo.py
index 2a2bbb1d57..56424af1d3 100644
--- a/pypy/jit/metainterp/effectinfo.py
+++ b/pypy/jit/metainterp/effectinfo.py
@@ -2,21 +2,25 @@ from pypy.jit.metainterp.typesystem import deref, fieldType, arrayItem
 from pypy.rpython.lltypesystem.rclass import OBJECT
 from pypy.rpython.lltypesystem import lltype
 from pypy.rpython.ootypesystem import ootype
+from pypy.translator.backendopt.graphanalyze import BoolGraphAnalyzer
 
 class EffectInfo(object):
     _cache = {}
 
-    def __new__(cls, write_descrs_fields, write_descrs_arrays):
-        key = frozenset(write_descrs_fields), frozenset(write_descrs_arrays)
+    def __new__(cls, write_descrs_fields, write_descrs_arrays,
+                promotes_virtualizables=False):
+        key = (frozenset(write_descrs_fields), frozenset(write_descrs_arrays),
+               promotes_virtualizables)
         if key in cls._cache:
             return cls._cache[key]
         result = object.__new__(cls)
         result.write_descrs_fields = write_descrs_fields
         result.write_descrs_arrays = write_descrs_arrays
+        result.promotes_virtualizables = promotes_virtualizables
         cls._cache[key] = result
         return result
 
-def effectinfo_from_writeanalyze(effects, cpu):
+def effectinfo_from_writeanalyze(effects, cpu, promotes_virtualizables=False):
     from pypy.translator.backendopt.writeanalyze import top_set
     if effects is top_set:
         return None
@@ -39,7 +43,8 @@ def effectinfo_from_writeanalyze(effects, cpu):
             write_descrs_arrays.append(descr)
         else:
             assert 0
-    return EffectInfo(write_descrs_fields, write_descrs_arrays)
+    return EffectInfo(write_descrs_fields, write_descrs_arrays,
+                      promotes_virtualizables)
 
 def consider_struct(TYPE, fieldname):
     if fieldType(TYPE, fieldname) is lltype.Void:
@@ -55,7 +60,6 @@ def consider_struct(TYPE, fieldname):
         return False
     return True
 
-
 def consider_array(ARRAY):
     if arrayItem(ARRAY) is lltype.Void:
         return False
@@ -64,3 +68,9 @@ def consider_array(ARRAY):
     if not isinstance(ARRAY, lltype.GcArray): # can be a non-GC-array
         return False
     return True
+
+# ____________________________________________________________
+
+class VirtualizableAnalyzer(BoolGraphAnalyzer):
+    def analyze_simple_operation(self, op):
+        return op.opname == 'promote_virtualizable'
diff --git a/pypy/jit/metainterp/pyjitpl.py b/pypy/jit/metainterp/pyjitpl.py
index 29139b8534..b0b600f35e 100644
--- a/pypy/jit/metainterp/pyjitpl.py
+++ b/pypy/jit/metainterp/pyjitpl.py
@@ -632,6 +632,7 @@ class MIFrame(object):
                 varargs = [jitcode.cfnptr] + varargs
                 res = self.execute_varargs(rop.CALL, varargs,
                                              descr=jitcode.calldescr, exc=True)
+                self.metainterp.load_fields_from_virtualizable()
             else:
                 # for oosends (ootype only): calldescr is a MethDescr
                 res = self.execute_varargs(rop.OOSEND, varargs,
@@ -651,7 +652,7 @@ class MIFrame(object):
 
     @arguments("descr", "varargs")
     def opimpl_residual_call(self, calldescr, varargs):
-        return self.execute_varargs(rop.CALL, varargs, descr=calldescr, exc=True)
+        return self.do_residual_call(varargs, descr=calldescr, exc=True)
 
     @arguments("varargs")
     def opimpl_recursion_leave_prep(self, varargs):
@@ -675,11 +676,11 @@ class MIFrame(object):
             greenkey = varargs[1:num_green_args + 1]
             if warmrunnerstate.can_inline_callable(greenkey):
                 return self.perform_call(portal_code, varargs[1:], greenkey)
-        return self.execute_varargs(rop.CALL, varargs, descr=calldescr, exc=True)
+        return self.do_residual_call(varargs, descr=calldescr, exc=True)
 
     @arguments("descr", "varargs")
     def opimpl_residual_call_noexception(self, calldescr, varargs):
-        self.execute_varargs(rop.CALL, varargs, descr=calldescr, exc=False)
+        self.do_residual_call(varargs, descr=calldescr, exc=False)
 
     @arguments("descr", "varargs")
     def opimpl_residual_call_pure(self, calldescr, varargs):
@@ -696,8 +697,8 @@ class MIFrame(object):
             return self.perform_call(jitcode, varargs)
         else:
             # but we should not follow calls to that graph
-            return self.execute_varargs(rop.CALL, [box] + varargs,
-                                        descr=calldescr, exc=True)
+            return self.do_residual_call([box] + varargs,
+                                         descr=calldescr, exc=True)
 
     @arguments("orgpc", "methdescr", "varargs")
     def opimpl_oosend(self, pc, methdescr, varargs):
@@ -924,7 +925,6 @@ class MIFrame(object):
         if isinstance(box, Const):    # no need for a guard
             return
         metainterp = self.metainterp
-        metainterp_sd = metainterp.staticdata
         if metainterp.is_blackholing():
             return
         saved_pc = self.pc
@@ -933,8 +933,14 @@ class MIFrame(object):
             moreargs = [box] + extraargs
         else:
             moreargs = list(extraargs)
+        metainterp_sd = metainterp.staticdata
         original_greenkey = metainterp.resumekey.original_greenkey
-        resumedescr = compile.ResumeGuardDescr(metainterp_sd, original_greenkey)
+        if opnum == rop.GUARD_NOT_FORCED:
+            resumedescr = compile.ResumeGuardForcedDescr(metainterp_sd,
+                                                         original_greenkey)
+        else:
+            resumedescr = compile.ResumeGuardDescr(metainterp_sd,
+                                                   original_greenkey)
         guard_op = metainterp.history.record(opnum, moreargs, None,
                                              descr=resumedescr)       
         virtualizable_boxes = None
@@ -980,6 +986,24 @@ class MIFrame(object):
             return self.metainterp.handle_exception()
         return False
 
+    def do_residual_call(self, argboxes, descr, exc):
+        effectinfo = descr.get_extra_info()
+        if effectinfo is None or effectinfo.promotes_virtualizables:
+            # residual calls require attention to keep virtualizables in-sync
+            self.metainterp.vable_before_residual_call()
+            # xxx do something about code duplication
+            resbox = self.metainterp.execute_and_record_varargs(
+                rop.CALL_MAY_FORCE, argboxes, descr=descr)
+            self.metainterp.vable_after_residual_call()
+            if resbox is not None:
+                self.make_result_box(resbox)
+            self.generate_guard(self.pc, rop.GUARD_NOT_FORCED, None, [])
+            if exc:
+                return self.metainterp.handle_exception()
+            return False
+        else:
+            return self.execute_varargs(rop.CALL, argboxes, descr, exc)
+
 # ____________________________________________________________
 
 class MetaInterpStaticData(object):
@@ -1142,6 +1166,7 @@ class MetaInterpGlobalData(object):
         self.indirectcall_dict = None
         self.addr2name = None
         self.loopnumbering = 0
+        self.resume_virtuals = {}
         #
         state = staticdata.state
         if state is not None:
@@ -1168,6 +1193,8 @@ class MetaInterpGlobalData(object):
 
 class MetaInterp(object):
     in_recursion = 0
+    _already_allocated_resume_virtuals = None
+
     def __init__(self, staticdata):
         self.staticdata = staticdata
         self.cpu = staticdata.cpu
@@ -1298,7 +1325,8 @@ class MetaInterp(object):
     @specialize.arg(1)
     def execute_and_record(self, opnum, descr, *argboxes):
         history.check_descr(descr)
-        assert opnum != rop.CALL and opnum != rop.OOSEND
+        assert (opnum != rop.CALL and opnum != rop.CALL_MAY_FORCE
+                and opnum != rop.OOSEND)
         # execute the operation
         profiler = self.staticdata.profiler
         profiler.count_ops(opnum)
@@ -1315,12 +1343,6 @@ class MetaInterp(object):
     @specialize.arg(1)
     def execute_and_record_varargs(self, opnum, argboxes, descr=None):
         history.check_descr(descr)
-        # residual calls require attention to keep virtualizables in-sync.
-        # CALL_PURE doesn't need it because so far 'promote_virtualizable'
-        # as an operation is enough to make the called function non-pure.
-        require_attention = (opnum == rop.CALL or opnum == rop.OOSEND)
-        if require_attention and not self.is_blackholing():
-            self.before_residual_call()
         # execute the operation
         profiler = self.staticdata.profiler
         profiler.count_ops(opnum)
@@ -1328,17 +1350,12 @@ class MetaInterp(object):
         if self.is_blackholing():
             profiler.count_ops(opnum, BLACKHOLED_OPS)
         else:
-            if require_attention:
-                require_attention = self.after_residual_call()
             # check if the operation can be constant-folded away
             argboxes = list(argboxes)
             if rop._ALWAYS_PURE_FIRST <= opnum <= rop._ALWAYS_PURE_LAST:
                 resbox = self._record_helper_pure_varargs(opnum, resbox, descr, argboxes)
             else:
                 resbox = self._record_helper_nonpure_varargs(opnum, resbox, descr, argboxes)
-        # if we are blackholing require_attention has the initial meaning
-        if require_attention:
-            self.after_generate_residual_call()
         return resbox
 
     def _record_helper_pure(self, opnum, resbox, descr, *argboxes): 
@@ -1572,7 +1589,8 @@ class MetaInterp(object):
             self.framestack[-1].follow_jump()
         elif opnum == rop.GUARD_FALSE:     # a goto_if_not that stops jumping
             self.framestack[-1].dont_follow_jump()
-        elif opnum == rop.GUARD_NO_EXCEPTION or opnum == rop.GUARD_EXCEPTION:
+        elif (opnum == rop.GUARD_NO_EXCEPTION or opnum == rop.GUARD_EXCEPTION
+              or opnum == rop.GUARD_NOT_FORCED):
             self.handle_exception()
         elif opnum == rop.GUARD_NO_OVERFLOW:   # an overflow now detected
             self.raise_overflow_error()
@@ -1722,36 +1740,39 @@ class MetaInterp(object):
         vinfo = self.staticdata.virtualizable_info
         virtualizable_box = self.virtualizable_boxes[-1]
         virtualizable = vinfo.unwrap_virtualizable_box(virtualizable_box)
-        vinfo.clear_vable_rti(virtualizable)
+        vinfo.clear_vable_token(virtualizable)
 
-    def before_residual_call(self):
+    def vable_before_residual_call(self):
+        if self.is_blackholing():
+            return
         vinfo = self.staticdata.virtualizable_info
         if vinfo is not None:
             virtualizable_box = self.virtualizable_boxes[-1]
             virtualizable = vinfo.unwrap_virtualizable_box(virtualizable_box)
             vinfo.tracing_before_residual_call(virtualizable)
-
-    def after_residual_call(self):
-        vinfo = self.staticdata.virtualizable_info
-        if vinfo is not None:
-            virtualizable_box = self.virtualizable_boxes[-1]
-            virtualizable = vinfo.unwrap_virtualizable_box(virtualizable_box)
-            if vinfo.tracing_after_residual_call(virtualizable):
-                # This is after the residual call is done, but before it
-                # is actually generated.  We first generate a store-
-                # everything-back, *without actually performing it now*
-                # as it contains the old values (before the call)!
-                self.gen_store_back_in_virtualizable_no_perform()
-                return True    # must call after_generate_residual_call()
-        # otherwise, don't call after_generate_residual_call()
-        return False
-
-    def after_generate_residual_call(self):
-        # Called after generating a residual call, and only if
-        # after_residual_call() returned True, i.e. if code in the residual
-        # call causes the virtualizable to escape.  Reload the modified
-        # fields of the virtualizable.
-        self.gen_load_fields_from_virtualizable()
+            #
+            force_token_box = history.BoxInt()
+            self.history.record(rop.FORCE_TOKEN, [], force_token_box)
+            self.history.record(rop.SETFIELD_GC, [virtualizable_box,
+                                                  force_token_box],
+                                None, descr=vinfo.vable_token_descr)
+
+    def vable_after_residual_call(self):
+        if self.is_blackholing():
+            vable_escapes = True
+        else:
+            vable_escapes = False
+            vinfo = self.staticdata.virtualizable_info
+            if vinfo is not None:
+                virtualizable_box = self.virtualizable_boxes[-1]
+                virtualizable = vinfo.unwrap_virtualizable_box(virtualizable_box)
+                if vinfo.tracing_after_residual_call(virtualizable):
+                    # We just did the residual call, and it shows that the
+                    # virtualizable escapes.
+                    self.switch_to_blackhole()
+                    vable_escapes = True
+        if vable_escapes:
+            self.load_fields_from_virtualizable()
 
     def handle_exception(self):
         etype = self.cpu.get_exception()
@@ -1793,7 +1814,7 @@ class MetaInterp(object):
             # is and stays NULL.
             virtualizable_box = self.virtualizable_boxes[-1]
             virtualizable = vinfo.unwrap_virtualizable_box(virtualizable_box)
-            assert not virtualizable.vable_rti
+            assert not virtualizable.vable_token
             self.synchronize_virtualizable()
 
     def check_synchronized_virtualizable(self):
@@ -1809,27 +1830,17 @@ class MetaInterp(object):
         virtualizable = vinfo.unwrap_virtualizable_box(virtualizable_box)
         vinfo.write_boxes(virtualizable, self.virtualizable_boxes)
 
-    def gen_load_fields_from_virtualizable(self):
+    def load_fields_from_virtualizable(self):
+        # Force a reload of the virtualizable fields into the local
+        # boxes (called only in escaping cases)
+        assert self.is_blackholing()
         vinfo = self.staticdata.virtualizable_info
         if vinfo is not None:
-            vbox = self.virtualizable_boxes[-1]
-            for i in range(vinfo.num_static_extra_boxes):
-                descr = vinfo.static_field_descrs[i]
-                fieldbox = self.execute_and_record(rop.GETFIELD_GC, descr,
-                                                   vbox)
-                self.virtualizable_boxes[i] = fieldbox
-            i = vinfo.num_static_extra_boxes
-            virtualizable = vinfo.unwrap_virtualizable_box(vbox)
-            for k in range(vinfo.num_arrays):
-                descr = vinfo.array_field_descrs[k]
-                abox = self.execute_and_record(rop.GETFIELD_GC, descr, vbox)
-                descr = vinfo.array_descrs[k]
-                for j in range(vinfo.get_array_length(virtualizable, k)):
-                    itembox = self.execute_and_record(rop.GETARRAYITEM_GC,
-                                                      descr, abox, ConstInt(j))
-                    self.virtualizable_boxes[i] = itembox
-                    i += 1
-            assert i + 1 == len(self.virtualizable_boxes)
+            virtualizable_box = self.virtualizable_boxes[-1]
+            virtualizable = vinfo.unwrap_virtualizable_box(virtualizable_box)
+            self.virtualizable_boxes = vinfo.read_boxes(self.cpu,
+                                                        virtualizable)
+            self.virtualizable_boxes.append(virtualizable_box)
 
     def gen_store_back_in_virtualizable(self):
         vinfo = self.staticdata.virtualizable_info
@@ -1853,29 +1864,6 @@ class MetaInterp(object):
                                             abox, ConstInt(j), itembox)
             assert i + 1 == len(self.virtualizable_boxes)
 
-    def gen_store_back_in_virtualizable_no_perform(self):
-        vinfo = self.staticdata.virtualizable_info
-        # xxx only write back the fields really modified
-        vbox = self.virtualizable_boxes[-1]
-        for i in range(vinfo.num_static_extra_boxes):
-            fieldbox = self.virtualizable_boxes[i]
-            self.history.record(rop.SETFIELD_GC, [vbox, fieldbox], None,
-                                descr=vinfo.static_field_descrs[i])
-        i = vinfo.num_static_extra_boxes
-        virtualizable = vinfo.unwrap_virtualizable_box(vbox)
-        for k in range(vinfo.num_arrays):
-            abox = vinfo.BoxArray()
-            self.history.record(rop.GETFIELD_GC, [vbox], abox,
-                                descr=vinfo.array_field_descrs[k])
-            for j in range(vinfo.get_array_length(virtualizable, k)):
-                itembox = self.virtualizable_boxes[i]
-                i += 1
-                self.history.record(rop.SETARRAYITEM_GC,
-                                    [abox, ConstInt(j), itembox],
-                                    None,
-                                    descr=vinfo.array_descrs[k])
-        assert i + 1 == len(self.virtualizable_boxes)
-
     def replace_box(self, oldbox, newbox):
         for frame in self.framestack:
             boxes = frame.env
diff --git a/pypy/jit/metainterp/resoperation.py b/pypy/jit/metainterp/resoperation.py
index 6976633551..332e0fcc7d 100644
--- a/pypy/jit/metainterp/resoperation.py
+++ b/pypy/jit/metainterp/resoperation.py
@@ -125,6 +125,7 @@ _oplist = [
     'GUARD_EXCEPTION',
     'GUARD_NO_OVERFLOW',
     'GUARD_OVERFLOW',
+    'GUARD_NOT_FORCED',
     '_GUARD_LAST', # ----- end of guard operations -----
 
     '_NOSIDEEFFECT_FIRST', # ----- start of no_side_effect operations -----
@@ -220,9 +221,11 @@ _oplist = [
     'COND_CALL_GC_MALLOC',  # [a, b, if_(a<=b)_result, if_(a>b)_call, args...]
                             #        => result          (for mallocs)
     'DEBUG_MERGE_POINT/1',      # debugging only
+    'FORCE_TOKEN/0',
 
     '_CANRAISE_FIRST', # ----- start of can_raise operations -----
     'CALL',
+    'CALL_MAY_FORCE',
     'OOSEND',                     # ootype operation
     '_CANRAISE_LAST', # ----- end of can_raise operations -----
 
diff --git a/pypy/jit/metainterp/resume.py b/pypy/jit/metainterp/resume.py
index 140806f542..d74b476a8d 100644
--- a/pypy/jit/metainterp/resume.py
+++ b/pypy/jit/metainterp/resume.py
@@ -455,6 +455,10 @@ def rebuild_from_resumedata(metainterp, newboxes, storage, expects_virtualizable
     metainterp.framestack.reverse()
     return virtualizable_boxes
 
+def force_from_resumedata(metainterp, newboxes, storage):
+    resumereader = ResumeDataReader(storage, newboxes, metainterp)
+    return resumereader.consume_boxes(), resumereader.virtuals
+
 
 class ResumeDataReader(object):
     virtuals = None
@@ -468,6 +472,10 @@ class ResumeDataReader(object):
 
     def _prepare_virtuals(self, metainterp, virtuals):
         if virtuals:
+            v = metainterp._already_allocated_resume_virtuals
+            if v is not None:
+                self.virtuals = v
+                return
             self.virtuals = [None] * len(virtuals)
             for i in range(len(virtuals)):
                 vinfo = virtuals[i]
@@ -476,7 +484,8 @@ class ResumeDataReader(object):
             for i in range(len(virtuals)):
                 vinfo = virtuals[i]
                 if vinfo is not None:
-                    vinfo.setfields(metainterp, self.virtuals[i], self._decode_box)
+                    vinfo.setfields(metainterp, self.virtuals[i],
+                                    self._decode_box)
 
     def consume_boxes(self):
         numb = self.cur_numb
diff --git a/pypy/jit/metainterp/test/test_basic.py b/pypy/jit/metainterp/test/test_basic.py
index dfdb83b2da..b36f385150 100644
--- a/pypy/jit/metainterp/test/test_basic.py
+++ b/pypy/jit/metainterp/test/test_basic.py
@@ -50,8 +50,8 @@ class JitMixin:
         assert get_stats().enter_count <= count
     def check_jumps(self, maxcount):
         assert get_stats().exec_jumps <= maxcount
-    def check_aborted_count(self, maxcount):
-        assert get_stats().aborted_count == maxcount
+    def check_aborted_count(self, count):
+        assert get_stats().aborted_count == count
 
     def meta_interp(self, *args, **kwds):
         kwds['CPUClass'] = self.CPUClass
@@ -148,6 +148,9 @@ class OOJitMixin(JitMixin):
     type_system = 'ootype'
     CPUClass = runner.OOtypeCPU
 
+    def setup_class(cls):
+        py.test.skip("ootype tests skipped for now")
+
     @staticmethod
     def Ptr(T):
         return T
diff --git a/pypy/jit/metainterp/test/test_codewriter.py b/pypy/jit/metainterp/test/test_codewriter.py
index 7c50b3d40a..04969c95e9 100644
--- a/pypy/jit/metainterp/test/test_codewriter.py
+++ b/pypy/jit/metainterp/test/test_codewriter.py
@@ -121,8 +121,8 @@ class TestCodeWriter:
             supports_floats = False
             def fielddescrof(self, STRUCT, fieldname):
                 return ('fielddescr', STRUCT, fieldname)
-            def calldescrof(self, FUNC, NON_VOID_ARGS, RESULT, stuff=None):
-                return ('calldescr', FUNC, NON_VOID_ARGS, RESULT)
+            def calldescrof(self, FUNC, NON_VOID_ARGS, RESULT, effectinfo=None):
+                return ('calldescr', FUNC, NON_VOID_ARGS, RESULT, effectinfo)
             def typedescrof(self, CLASS):
                 return ('typedescr', CLASS)
             def methdescrof(self, CLASS, methname):
@@ -273,9 +273,17 @@ class TestCodeWriter:
         cw._start(self.metainterp_sd, None)        
         jitcode = cw.make_one_bytecode((graphs[0], None), False)
         assert len(self.metainterp_sd.indirectcalls) == 1
-        names = [jitcode.name for (fnaddress, jitcode)
+        names = [jitcode1.name for (fnaddress, jitcode1)
                                in self.metainterp_sd.indirectcalls]
         assert dict.fromkeys(names) == {'g': None}
+        calldescrs = [calldescr for calldescr in jitcode.constants
+                                if isinstance(calldescr, tuple) and
+                                   calldescr[0] == 'calldescr']
+        assert len(calldescrs) == 1
+        assert calldescrs[0][4] is not None
+        assert not calldescrs[0][4].write_descrs_fields
+        assert not calldescrs[0][4].write_descrs_arrays
+        assert not calldescrs[0][4].promotes_virtualizables
 
     def test_oosend_look_inside_only_one(self):
         class A:
@@ -386,6 +394,47 @@ class TestCodeWriter:
         assert cw.list_of_addr2name[0][1].endswith('.A1')
         assert cw.list_of_addr2name[1][1] == 'A1.g'
 
+    def test_promote_virtualizable_effectinfo(self):
+        class Frame(object):
+            _virtualizable2_ = ['x']
+            
+            def __init__(self, x, y):
+                self.x = x
+                self.y = y
+
+        def g1(f):
+            f.x += 1
+
+        def g2(f):
+            return f.x
+
+        def h(f):
+            f.y -= 1
+
+        def f(n):
+            f_inst = Frame(n+1, n+2)
+            g1(f_inst)
+            r = g2(f_inst)
+            h(f_inst)
+            return r
+
+        graphs = self.make_graphs(f, [5])
+        cw = CodeWriter(self.rtyper)
+        cw.candidate_graphs = [graphs[0]]
+        cw._start(self.metainterp_sd, None)
+        jitcode = cw.make_one_bytecode((graphs[0], None), False)
+        calldescrs = [calldescr for calldescr in jitcode.constants
+                                if isinstance(calldescr, tuple) and
+                                   calldescr[0] == 'calldescr']
+        assert len(calldescrs) == 4    # for __init__, g1, g2, h.
+        effectinfo_g1 = calldescrs[1][4]
+        effectinfo_g2 = calldescrs[2][4]
+        effectinfo_h  = calldescrs[3][4]
+        assert effectinfo_g1.promotes_virtualizables
+        assert effectinfo_g2.promotes_virtualizables
+        assert not effectinfo_h.promotes_virtualizables
+
+
 class ImmutableFieldsTests:
 
     def test_fields(self):
diff --git a/pypy/jit/metainterp/test/test_recursive.py b/pypy/jit/metainterp/test/test_recursive.py
index 7de8a0ccf6..a79d11cd42 100644
--- a/pypy/jit/metainterp/test/test_recursive.py
+++ b/pypy/jit/metainterp/test/test_recursive.py
@@ -144,10 +144,11 @@ class RecursiveTests:
         f = self.get_interpreter(codes)
 
         assert self.meta_interp(f, [0, 0, 0], optimizer=OPTIMIZER_SIMPLE) == 42
-        self.check_loops(int_add = 1, call = 1)
+        self.check_loops(int_add = 1, call_may_force = 1, call = 0)
         assert self.meta_interp(f, [0, 0, 0], optimizer=OPTIMIZER_SIMPLE,
                                 inline=True) == 42
-        self.check_loops(int_add = 2, call = 0, guard_no_exception = 0)
+        self.check_loops(int_add = 2, call_may_force = 0, call = 0,
+                         guard_no_exception = 0)
 
     def test_inline_jitdriver_check(self):
         code = "021"
@@ -158,7 +159,7 @@ class RecursiveTests:
 
         assert self.meta_interp(f, [0, 0, 0], optimizer=OPTIMIZER_SIMPLE,
                                 inline=True) == 42
-        self.check_loops(call = 1)
+        self.check_loops(call_may_force = 1, call = 0)
 
     def test_inline_faulty_can_inline(self):
         code = "021"
@@ -488,10 +489,10 @@ class RecursiveTests:
             return loop(100)
 
         res = self.meta_interp(main, [0], optimizer=OPTIMIZER_SIMPLE, trace_limit=TRACE_LIMIT)
-        self.check_loops(call=1)
+        self.check_loops(call_may_force=1, call=0)
 
         res = self.meta_interp(main, [1], optimizer=OPTIMIZER_SIMPLE, trace_limit=TRACE_LIMIT)
-        self.check_loops(call=0)
+        self.check_loops(call_may_force=0, call=0)
 
     def test_leave_jit_hook(self):
         from pypy.rpython.annlowlevel import hlstr
@@ -645,7 +646,7 @@ class RecursiveTests:
                 result += f('-c-----------l-', i+100)
         self.meta_interp(g, [10], backendopt=True)
         self.check_aborted_count(1)
-        self.check_history(call=1)
+        self.check_history(call_may_force=1, call=0)
         self.check_tree_loop_count(3)
         
 
diff --git a/pypy/jit/metainterp/test/test_resume.py b/pypy/jit/metainterp/test/test_resume.py
index 268e715b16..a9d4fc77ce 100644
--- a/pypy/jit/metainterp/test/test_resume.py
+++ b/pypy/jit/metainterp/test/test_resume.py
@@ -41,6 +41,8 @@ def test_tagged_list_eq():
     assert not tagged_list_eq([tag(1, TAGBOX), tag(-2, TAGBOX)], [tag(1, TAGBOX)])
 
 class MyMetaInterp:
+    _already_allocated_resume_virtuals = None
+
     def __init__(self, cpu=None):
         if cpu is None:
             cpu = LLtypeMixin.cpu
@@ -124,6 +126,7 @@ def test_prepare_virtuals():
         rd_numb = []
         rd_consts = []
     class FakeMetainterp(object):
+        _already_allocated_resume_virtuals = None
         cpu = None
     reader = ResumeDataReader(FakeStorage(), [], FakeMetainterp())
     assert reader.virtuals == ["allocated", None]
diff --git a/pypy/jit/metainterp/test/test_virtualizable.py b/pypy/jit/metainterp/test/test_virtualizable.py
index 8db1ba795b..09cd334bce 100644
--- a/pypy/jit/metainterp/test/test_virtualizable.py
+++ b/pypy/jit/metainterp/test/test_virtualizable.py
@@ -7,7 +7,6 @@ from pypy.rlib.jit import JitDriver, hint, dont_look_inside
 from pypy.rlib.jit import OPTIMIZER_SIMPLE, OPTIMIZER_FULL
 from pypy.rlib.rarithmetic import intmask
 from pypy.jit.metainterp.test.test_basic import LLJitMixin, OOJitMixin
-from pypy.rpython.lltypesystem.rvirtualizable2 import VABLERTIPTR
 from pypy.rpython.rclass import FieldListAccessor
 from pypy.jit.metainterp.warmspot import get_stats, get_translator
 from pypy.jit.metainterp import history, heaptracker
@@ -41,8 +40,7 @@ class ExplicitVirtualizableTests:
     XY = lltype.GcStruct(
         'XY',
         ('parent', rclass.OBJECT),
-        ('vable_base', llmemory.Address),
-        ('vable_rti', VABLERTIPTR),
+        ('vable_token', lltype.Signed),
         ('inst_x', lltype.Signed),
         ('inst_node', lltype.Ptr(LLtypeMixin.NODE)),
         hints = {'virtualizable2_accessor': FieldListAccessor()})
@@ -57,7 +55,7 @@ class ExplicitVirtualizableTests:
 
     def setup(self):
         xy = lltype.malloc(self.XY)
-        xy.vable_rti = lltype.nullptr(VABLERTIPTR.TO)
+        xy.vable_token = 0
         xy.parent.typeptr = self.xy_vtable
         return xy
 
@@ -200,79 +198,12 @@ class ExplicitVirtualizableTests:
         assert res == 134
         self.check_loops(getfield_gc=1, setfield_gc=1)
 
-    def test_external_read_while_tracing(self):
-        myjitdriver = JitDriver(greens = [], reds = ['n', 'm', 'xy'],
-                                virtualizables = ['xy'])
-        class Outer:
-            pass
-        outer = Outer()
-        def ext():
-            xy = outer.xy
-            promote_virtualizable(xy, 'inst_x')
-            return xy.inst_x + 2
-        def f(n):
-            xy = self.setup()
-            xy.inst_x = 10
-            outer.xy = xy
-            m = 0
-            while n > 0:
-                myjitdriver.can_enter_jit(xy=xy, n=n, m=m)
-                myjitdriver.jit_merge_point(xy=xy, n=n, m=m)
-                promote_virtualizable(xy, 'inst_x')
-                xy.inst_x = n + 9998     # virtualized away
-                m += ext()               # 2x setfield_gc, 2x getfield_gc
-                promote_virtualizable(xy, 'inst_x')
-                xy.inst_x = 10           # virtualized away
-                n -= 1
-            return m
-        assert f(20) == 10000*20 + (20*21)/2
-        res = self.meta_interp(f, [20], policy=StopAtXPolicy(ext))
-        assert res == 10000*20 + (20*21)/2
-        # there are no getfields because the optimizer gets rid of them
-        self.check_loops(call=1, getfield_gc=0, setfield_gc=2)
-        # xxx for now a call that forces the virtualizable during tracing
-        # is supposed to always force it later too.
-
-    def test_external_write_while_tracing(self):
-        myjitdriver = JitDriver(greens = [], reds = ['n', 'm', 'xy'],
-                                virtualizables = ['xy'])
-        class Outer:
-            pass
-        outer = Outer()
-        def ext():
-            xy = outer.xy
-            promote_virtualizable(xy, 'inst_x')
-            xy.inst_x += 2
-        def f(n):
-            xy = self.setup()
-            xy.inst_x = 10
-            outer.xy = xy
-            m = 0
-            while n > 0:
-                myjitdriver.can_enter_jit(xy=xy, n=n, m=m)
-                myjitdriver.jit_merge_point(xy=xy, n=n, m=m)
-                promote_virtualizable(xy, 'inst_x')
-                xy.inst_x = n + 9998     # virtualized away
-                ext()                    # 2x setfield_gc, 2x getfield_gc
-                promote_virtualizable(xy, 'inst_x')
-                m += xy.inst_x           # virtualized away
-                n -= 1
-            return m
-        res = self.meta_interp(f, [20], policy=StopAtXPolicy(ext))
-        assert res == f(20)
-        # the getfield_gc of inst_node is optimized away, because ext does not
-        # write to it
-        self.check_loops(call=1, getfield_gc=1, setfield_gc=2)
-        # xxx for now a call that forces the virtualizable during tracing
-        # is supposed to always force it later too.
-
     # ------------------------------
 
     XY2 = lltype.GcStruct(
         'XY2',
         ('parent', rclass.OBJECT),
-        ('vable_base', llmemory.Address),
-        ('vable_rti', VABLERTIPTR),
+        ('vable_token', lltype.Signed),
         ('inst_x', lltype.Signed),
         ('inst_l1', lltype.Ptr(lltype.GcArray(lltype.Signed))),
         ('inst_l2', lltype.Ptr(lltype.GcArray(lltype.Signed))),
@@ -285,7 +216,7 @@ class ExplicitVirtualizableTests:
 
     def setup2(self):
         xy2 = lltype.malloc(self.XY2)
-        xy2.vable_rti = lltype.nullptr(VABLERTIPTR.TO)
+        xy2.vable_token = 0
         xy2.parent.typeptr = self.xy2_vtable
         return xy2
 
@@ -458,7 +389,7 @@ class ExplicitVirtualizableTests:
 
     def setup2sub(self):
         xy2 = lltype.malloc(self.XY2SUB)
-        xy2.parent.vable_rti = lltype.nullptr(VABLERTIPTR.TO)
+        xy2.parent.vable_token = 0
         xy2.parent.parent.typeptr = self.xy2_vtable
         return xy2
 
@@ -649,7 +580,8 @@ class ImplicitVirtualizableTests:
 
         res = self.meta_interp(f, [123], policy=StopAtXPolicy(g))
         assert res == f(123)
-
+        self.check_aborted_count(2)
+        self.check_tree_loop_count(0)
 
     def test_external_write(self):
         jitdriver = JitDriver(greens = [], reds = ['frame'],
@@ -680,10 +612,10 @@ class ImplicitVirtualizableTests:
 
         res = self.meta_interp(f, [240], policy=StopAtXPolicy(g))
         assert res == f(240)
+        self.check_aborted_count(3)
+        self.check_tree_loop_count(0)
 
     def test_external_read_sometimes(self):
-        py.test.skip("known bug: access the frame in a residual call but"
-                     " only sometimes, so that it's not seen during tracing")
         jitdriver = JitDriver(greens = [], reds = ['frame'],
                               virtualizables = ['frame'])
         
@@ -719,6 +651,226 @@ class ImplicitVirtualizableTests:
         res = self.meta_interp(f, [123], policy=StopAtXPolicy(g))
         assert res == f(123)
 
+    def test_external_read_sometimes_with_virtuals(self):
+        jitdriver = JitDriver(greens = [], reds = ['frame'],
+                              virtualizables = ['frame'])
+        
+        class Frame(object):
+            _virtualizable2_ = ['x', 'y']
+        class Y:
+            pass
+        class SomewhereElse:
+            pass
+        somewhere_else = SomewhereElse()
+
+        def g():
+            somewhere_else.counter += 1
+            if somewhere_else.counter == 70:
+                y = somewhere_else.top_frame.y     # external read
+                debug_print(lltype.Void, '-+-+-+-+- external read')
+            else:
+                y = None
+            return y
+
+        def f(n):
+            frame = Frame()
+            frame.x = n
+            somewhere_else.counter = 0
+            somewhere_else.top_frame = frame
+            while frame.x > 0:
+                jitdriver.can_enter_jit(frame=frame)
+                jitdriver.jit_merge_point(frame=frame)
+                frame.y = y = Y()
+                result = g()
+                if frame.y is not y:
+                    return -660
+                if result:
+                    if result is not y:
+                        return -661
+                frame.y = None
+                frame.x -= 1
+            return frame.x
+
+        res = self.meta_interp(f, [123], policy=StopAtXPolicy(g))
+        assert res == f(123)
+
+    def test_external_read_sometimes_changing_virtuals(self):
+        jitdriver = JitDriver(greens = [], reds = ['frame'],
+                              virtualizables = ['frame'])
+        
+        class Frame(object):
+            _virtualizable2_ = ['x', 'y']
+        class Y:
+            pass
+        class SomewhereElse:
+            pass
+        somewhere_else = SomewhereElse()
+
+        def g():
+            somewhere_else.counter += 1
+            if somewhere_else.counter == 70:
+                y = somewhere_else.top_frame.y     # external read
+                debug_print(lltype.Void, '-+-+-+-+- external virtual write')
+                assert y.num == 123
+                y.num += 2
+            else:
+                y = None
+            return y
+
+        def f(n):
+            frame = Frame()
+            frame.x = n
+            somewhere_else.counter = 0
+            somewhere_else.top_frame = frame
+            while frame.x > 0:
+                jitdriver.can_enter_jit(frame=frame)
+                jitdriver.jit_merge_point(frame=frame)
+                frame.y = y = Y()
+                y.num = 123
+                result = g()
+                if frame.y is not y:
+                    return -660
+                if result:
+                    if result is not y:
+                        return -661
+                    if y.num != 125:
+                        return -662
+                frame.y = None
+                frame.x -= 1
+            return frame.x
+
+        res = self.meta_interp(f, [123], policy=StopAtXPolicy(g))
+        assert res == f(123)
+
+    def test_external_read_sometimes_with_exception(self):
+        jitdriver = JitDriver(greens = [], reds = ['frame'],
+                              virtualizables = ['frame'])
+        
+        class Frame(object):
+            _virtualizable2_ = ['x', 'y']
+        class FooBarError(Exception):
+            pass
+        class SomewhereElse:
+            pass
+        somewhere_else = SomewhereElse()
+
+        def g():
+            somewhere_else.counter += 1
+            if somewhere_else.counter == 70:
+                result = somewhere_else.top_frame.y     # external read
+                debug_print(lltype.Void, '-+-+-+-+- external read:', result)
+                assert result == 79
+                raise FooBarError
+            else:
+                result = 1
+            return result
+
+        def f(n):
+            frame = Frame()
+            frame.x = n
+            frame.y = 10
+            somewhere_else.counter = 0
+            somewhere_else.top_frame = frame
+            try:
+                while frame.x > 0:
+                    jitdriver.can_enter_jit(frame=frame)
+                    jitdriver.jit_merge_point(frame=frame)
+                    frame.x -= g()
+                    frame.y += 1
+            except FooBarError:
+                pass
+            return frame.x
+
+        res = self.meta_interp(f, [123], policy=StopAtXPolicy(g))
+        assert res == f(123)
+
+    def test_external_read_sometimes_dont_compile_guard(self):
+        jitdriver = JitDriver(greens = [], reds = ['frame'],
+                              virtualizables = ['frame'])
+        
+        class Frame(object):
+            _virtualizable2_ = ['x', 'y']
+        class SomewhereElse:
+            pass
+        somewhere_else = SomewhereElse()
+
+        def g():
+            somewhere_else.counter += 1
+            if somewhere_else.counter == 70:
+                result = somewhere_else.top_frame.y     # external read
+                debug_print(lltype.Void, '-+-+-+-+- external read:', result)
+                assert result == 79
+            else:
+                result = 1
+            return result
+
+        def f(n):
+            frame = Frame()
+            frame.x = n
+            frame.y = 10
+            somewhere_else.counter = 0
+            somewhere_else.top_frame = frame
+            while frame.x > 0:
+                jitdriver.can_enter_jit(frame=frame)
+                jitdriver.jit_merge_point(frame=frame)
+                frame.x -= g()
+                frame.y += 1
+            return frame.x
+
+        res = self.meta_interp(f, [123], policy=StopAtXPolicy(g), repeat=7)
+        assert res == f(123)
+
+    def test_external_read_sometimes_recursive(self):
+        jitdriver = JitDriver(greens = [], reds = ['frame', 'rec'],
+                              virtualizables = ['frame'])
+        
+        class Frame(object):
+            _virtualizable2_ = ['x', 'y']
+        class SomewhereElse:
+            pass
+        somewhere_else = SomewhereElse()
+
+        def g(rec):
+            somewhere_else.counter += 1
+            if somewhere_else.counter == 70:
+                frame = somewhere_else.top_frame
+                result1 = frame.y     # external read
+                result2 = frame.back.y     # external read
+                debug_print(lltype.Void, '-+-+-+-+- external read:',
+                            result1, result2)
+                assert result1 == 13
+                assert result2 == 1023
+                result = 2
+            elif rec:
+                res = f(4, False)
+                assert res == 0 or res == -1
+                result = 1
+            else:
+                result = 1
+            return result
+
+        def f(n, rec):
+            frame = Frame()
+            frame.x = n
+            frame.y = 10 + 1000 * rec
+            frame.back = somewhere_else.top_frame
+            somewhere_else.top_frame = frame
+            while frame.x > 0:
+                jitdriver.can_enter_jit(frame=frame, rec=rec)
+                jitdriver.jit_merge_point(frame=frame, rec=rec)
+                frame.x -= g(rec)
+                frame.y += 1
+            somewhere_else.top_frame = frame.back
+            return frame.x
+
+        def main(n):
+            somewhere_else.counter = 0
+            somewhere_else.top_frame = None
+            return f(n, True)
+
+        res = self.meta_interp(main, [123], policy=StopAtXPolicy(g))
+        assert res == main(123)
+
     def test_promote_index_in_virtualizable_list(self):
         jitdriver = JitDriver(greens = [], reds = ['frame', 'n'],
                               virtualizables = ['frame'])
@@ -829,26 +981,26 @@ class ImplicitVirtualizableTests:
         assert res == 55
         self.check_loops(new_with_vtable=0)
 
-    def test_check_for_nonstandardness_only_once(self):                                          
-         myjitdriver = JitDriver(greens = [], reds = ['frame'],                                   
-                                 virtualizables = ['frame'])                                      
-                                                                                                  
-         class Frame(object):                                                                     
-             _virtualizable2_ = ['x', 'y', 'z']                                                   
-                                                                                                  
-             def __init__(self, x, y, z=1):                                                       
-                 self = hint(self, access_directly=True)                                          
-                 self.x = x                                                                       
-                 self.y = y                                                                       
-                 self.z = z                                                                       
-                                                                                                  
-         class SomewhereElse:                                                                     
-             pass                                                                                 
-         somewhere_else = SomewhereElse()                                                         
-                                                                                                  
-         def f(n):                                                                                
-             frame = Frame(n, 0)                                                                  
-             somewhere_else.top_frame = frame        # escapes                                    
+    def test_check_for_nonstandardness_only_once(self):
+         myjitdriver = JitDriver(greens = [], reds = ['frame'],
+                                 virtualizables = ['frame'])
+
+         class Frame(object):
+             _virtualizable2_ = ['x', 'y', 'z']
+
+             def __init__(self, x, y, z=1):
+                 self = hint(self, access_directly=True)
+                 self.x = x
+                 self.y = y
+                 self.z = z
+
+         class SomewhereElse:
+             pass
+         somewhere_else = SomewhereElse()
+
+         def f(n):
+             frame = Frame(n, 0)
+             somewhere_else.top_frame = frame        # escapes
              frame = hint(frame, access_directly=True)
              while frame.x > 0:
                  myjitdriver.can_enter_jit(frame=frame)
diff --git a/pypy/jit/metainterp/typesystem.py b/pypy/jit/metainterp/typesystem.py
index 4db034ad01..8ac1b14606 100644
--- a/pypy/jit/metainterp/typesystem.py
+++ b/pypy/jit/metainterp/typesystem.py
@@ -49,10 +49,6 @@ class LLTypeHelper(TypeSystemHelper):
     CONST_NULL = history.ConstPtr(history.ConstPtr.value)
     CVAL_NULLREF = None # patched by optimizeopt.py
 
-    def get_VABLERTI(self):
-        from pypy.rpython.lltypesystem.rvirtualizable2 import VABLERTIPTR
-        return VABLERTIPTR
-
     def new_ConstRef(self, x):
         ptrval = lltype.cast_opaque_ptr(llmemory.GCREF, x)
         return history.ConstPtr(ptrval)
@@ -159,10 +155,6 @@ class OOTypeHelper(TypeSystemHelper):
     loops_done_with_this_frame_ref = None # patched by compile.py
     CONST_NULL = history.ConstObj(history.ConstObj.value)
     CVAL_NULLREF = None # patched by optimizeopt.py
-
-    def get_VABLERTI(self):
-        from pypy.rpython.ootypesystem.rvirtualizable2 import VABLERTI
-        return VABLERTI
     
     def new_ConstRef(self, x):
         obj = ootype.cast_to_object(x)
diff --git a/pypy/jit/metainterp/virtualizable.py b/pypy/jit/metainterp/virtualizable.py
index 18632c6ed7..000ce4fb66 100644
--- a/pypy/jit/metainterp/virtualizable.py
+++ b/pypy/jit/metainterp/virtualizable.py
@@ -4,19 +4,24 @@ from pypy.rpython.annlowlevel import cast_base_ptr_to_instance
 from pypy.rpython import rvirtualizable2
 from pypy.rlib.objectmodel import we_are_translated
 from pypy.rlib.unroll import unrolling_iterable
+from pypy.rlib.nonconst import NonConstant
 from pypy.jit.metainterp.typesystem import deref, fieldType, arrayItem
 from pypy.jit.metainterp import history
 from pypy.jit.metainterp.warmstate import wrap, unwrap
 
 
 class VirtualizableInfo:
+    token_none    = 0
+    token_tracing = -1
+
     def __init__(self, warmrunnerdesc):
         self.warmrunnerdesc = warmrunnerdesc
         jitdriver = warmrunnerdesc.jitdriver
         cpu = warmrunnerdesc.cpu
+        if cpu.ts.name == 'ootype':
+            import py
+            py.test.skip("ootype: fix virtualizables")
         self.cpu = cpu
-        self.VABLERTI = cpu.ts.get_VABLERTI()
-        self.null_vable_rti = cpu.ts.nullptr(deref(self.VABLERTI))
         self.BoxArray = cpu.ts.BoxRef
         #
         assert len(jitdriver.virtualizables) == 1    # for now
@@ -29,6 +34,7 @@ class VirtualizableInfo:
         self.VTYPEPTR = VTYPEPTR
         self.VTYPE = VTYPE = deref(VTYPEPTR)
         self.null_vable = cpu.ts.nullptr(VTYPE)
+        self.vable_token_descr = cpu.fielddescrof(VTYPE, 'vable_token')
         #
         accessor = VTYPE._hints['virtualizable2_accessor']
         all_fields = accessor.fields
@@ -148,7 +154,7 @@ class VirtualizableInfo:
     def finish(self):
         #
         def force_if_necessary(virtualizable):
-            if virtualizable.vable_rti:
+            if virtualizable.vable_token:
                 self.force_now(virtualizable)
         force_if_necessary._always_inline_ = True
         #
@@ -169,72 +175,57 @@ class VirtualizableInfo:
     def is_vtypeptr(self, TYPE):
         return rvirtualizable2.match_virtualizable_type(TYPE, self.VTYPEPTR)
 
-    def cast_instance_to_base_ptr(self, vable_rti):
-        if we_are_translated():
-            return self.cpu.ts.cast_instance_to_base_ref(vable_rti)
-        else:
-            vable_rti._TYPE = self.VABLERTI   # hack for non-translated mode
-            return vable_rti
+    def reset_vable_token(self, virtualizable):
+        virtualizable.vable_token = self.token_none
 
-    def clear_vable_rti(self, virtualizable):
-        if virtualizable.vable_rti:
+    def clear_vable_token(self, virtualizable):
+        if virtualizable.vable_token:
             self.force_now(virtualizable)
-            assert not virtualizable.vable_rti
+            assert not virtualizable.vable_token
 
     def tracing_before_residual_call(self, virtualizable):
-        assert not virtualizable.vable_rti
-        ptr = self.cast_instance_to_base_ptr(tracing_vable_rti)
-        virtualizable.vable_rti = ptr
+        assert not virtualizable.vable_token
+        virtualizable.vable_token = self.token_tracing
 
     def tracing_after_residual_call(self, virtualizable):
-        if virtualizable.vable_rti:
+        if virtualizable.vable_token:
             # not modified by the residual call; assert that it is still
             # set to 'tracing_vable_rti' and clear it.
-            ptr = self.cast_instance_to_base_ptr(tracing_vable_rti)
-            assert virtualizable.vable_rti == ptr
-            virtualizable.vable_rti = self.null_vable_rti
+            assert virtualizable.vable_token == self.token_tracing
+            virtualizable.vable_token = self.token_none
             return False
         else:
             # marker "modified during residual call" set.
             return True
 
     def force_now(self, virtualizable):
-        rti = virtualizable.vable_rti
-        virtualizable.vable_rti = self.null_vable_rti
-        if we_are_translated():
-            rti = cast_base_ptr_to_instance(AbstractVableRti, rti)
-        rti.force_now(virtualizable)
+        token = virtualizable.vable_token
+        virtualizable.vable_token = self.token_none
+        if token == self.token_tracing:
+            # The values in the virtualizable are always correct during
+            # tracing.  We only need to reset vable_token to token_none
+            # as a marker for the tracing, to tell it that this
+            # virtualizable escapes.
+            pass
+        else:
+            from pypy.jit.metainterp.compile import ResumeGuardForcedDescr
+            faildescr = self.cpu.force(token)
+            assert isinstance(faildescr, ResumeGuardForcedDescr)
+            faildescr.force_virtualizable(self, virtualizable, token)
     force_now._dont_inline_ = True
 
 # ____________________________________________________________
 #
-# The 'vable_rti' field of a virtualizable is either NULL or points
-# to an instance of the following classes.  It is:
+# The 'vable_token' field of a virtualizable is either 0, -1, or points
+# into the CPU stack to a particular field in the current frame.  It is:
 #
-#   1. NULL if not in the JIT at all, except as described below.
+#   1. 0 (token_none) if not in the JIT at all, except as described below.
 #
-#   2. always NULL when tracing is in progress.
+#   2. equal to 0 when tracing is in progress; except:
 #
-#   3. 'tracing_vable_rti' during tracing when we do a residual call,
+#   3. equal to -1 (token_tracing) during tracing when we do a residual call,
 #      calling random unknown other parts of the interpreter; it is
-#      reset to NULL as soon as something occurs to the virtualizable.
+#      reset to 0 as soon as something occurs to the virtualizable.
 #
-#   4. NULL for now when running the machine code with a virtualizable;
-#      later it will be a RunningVableRti().
-
-
-class AbstractVableRti(object):
-
-    def force_now(self, virtualizable):
-        raise NotImplementedError
-
-
-class TracingVableRti(AbstractVableRti):
-
-    def force_now(self, virtualizable):
-        # The values if the virtualizable are always correct during tracing.
-        # We only need to set a marker to tell that forcing occurred.
-        # As the caller resets vable_rti to NULL, it plays the role of marker.
-        pass
-
-tracing_vable_rti = TracingVableRti()
+#   4. when running the machine code with a virtualizable, it is set
+#      to the address in the CPU stack by the FORCE_TOKEN operation.
diff --git a/pypy/jit/metainterp/warmstate.py b/pypy/jit/metainterp/warmstate.py
index 589e5b8e3f..67471dd34e 100644
--- a/pypy/jit/metainterp/warmstate.py
+++ b/pypy/jit/metainterp/warmstate.py
@@ -213,6 +213,8 @@ class WarmEnterState(object):
                 virtualizable = vinfo.cast_to_vtype(virtualizable)
                 assert virtualizable != globaldata.blackhole_virtualizable, (
                     "reentering same frame via blackhole")
+            else:
+                virtualizable = None
 
             # look for the cell corresponding to the current greenargs
             greenargs = args[:num_green_args]
@@ -247,6 +249,8 @@ class WarmEnterState(object):
                 fail_descr = metainterp_sd.cpu.execute_token(loop_token)
                 debug_stop("jit-running")
                 metainterp_sd.profiler.end_running()
+                if vinfo is not None:
+                    vinfo.reset_vable_token(virtualizable)
                 loop_token = fail_descr.handle_fail(metainterp_sd)
 
         maybe_compile_and_run._dont_inline_ = True
diff --git a/pypy/rlib/jit.py b/pypy/rlib/jit.py
index 6fc41c1b52..cff697d986 100644
--- a/pypy/rlib/jit.py
+++ b/pypy/rlib/jit.py
@@ -22,6 +22,7 @@ def unroll_safe(func):
 def purefunction_promote(func):
     import inspect
     purefunction(func)
+    func._pure_function_with_all_promoted_args_ = True
     args, varargs, varkw, defaults = inspect.getargspec(func)
     args = ["v%s" % (i, ) for i in range(len(args))]
     assert varargs is None and varkw is None
diff --git a/pypy/rpython/llinterp.py b/pypy/rpython/llinterp.py
index 2fd890e8f8..75c56fe8c6 100644
--- a/pypy/rpython/llinterp.py
+++ b/pypy/rpython/llinterp.py
@@ -807,9 +807,6 @@ class LLFrame(object):
     def op_gc__collect(self, *gen):
         self.heap.collect(*gen)
 
-    def op_gc_assume_young_pointers(self, addr):
-        raise NotImplementedError
-
     def op_gc_heap_stats(self):
         raise NotImplementedError
 
diff --git a/pypy/rpython/lltypesystem/ll2ctypes.py b/pypy/rpython/lltypesystem/ll2ctypes.py
index 70bf7a8871..570ae08e30 100644
--- a/pypy/rpython/lltypesystem/ll2ctypes.py
+++ b/pypy/rpython/lltypesystem/ll2ctypes.py
@@ -448,6 +448,9 @@ class _array_of_unknown_length(_parentable_mixin, lltype._parentable):
         self._storage._setitem(index, value, boundscheck=False)
 
     def getitems(self):
+        if self._TYPE.OF != lltype.Char:
+            raise Exception("cannot get all items of an unknown-length "
+                            "array of %r" % self._TYPE.OF)
         _items = []
         i = 0
         while 1:
diff --git a/pypy/rpython/lltypesystem/lloperation.py b/pypy/rpython/lltypesystem/lloperation.py
index 251c941742..7ef29dc0b9 100644
--- a/pypy/rpython/lltypesystem/lloperation.py
+++ b/pypy/rpython/lltypesystem/lloperation.py
@@ -460,7 +460,7 @@ LL_OPERATIONS = {
                                  # allocating non-GC structures only
     'gc_thread_run'       : LLOp(),
     'gc_thread_die'       : LLOp(),
-    'gc_assume_young_pointers': LLOp(),
+    'gc_assume_young_pointers': LLOp(canrun=True),
     'gc_heap_stats'       : LLOp(canunwindgc=True),
 
     # ------- JIT & GC interaction, only for some GCs ----------
diff --git a/pypy/rpython/lltypesystem/opimpl.py b/pypy/rpython/lltypesystem/opimpl.py
index 691352054b..2596a70565 100644
--- a/pypy/rpython/lltypesystem/opimpl.py
+++ b/pypy/rpython/lltypesystem/opimpl.py
@@ -486,6 +486,9 @@ op_gc_gettypeptr_group.need_result_type = True
 def op_get_member_index(memberoffset):
     raise NotImplementedError
 
+def op_gc_assume_young_pointers(addr):
+    pass
+
 # ____________________________________________________________
 
 def get_op_impl(opname):
diff --git a/pypy/rpython/lltypesystem/rffi.py b/pypy/rpython/lltypesystem/rffi.py
index c6ef3777ad..be34f96871 100644
--- a/pypy/rpython/lltypesystem/rffi.py
+++ b/pypy/rpython/lltypesystem/rffi.py
@@ -57,7 +57,7 @@ class _IsLLPtrEntry(ExtRegistryEntry):
 def llexternal(name, args, result, _callable=None,
                compilation_info=ExternalCompilationInfo(),
                sandboxsafe=False, threadsafe='auto',
-               canraise=False, _nowrapper=False, calling_conv='c',
+               _nowrapper=False, calling_conv='c',
                oo_primitive=None, pure_function=False):
     """Build an external function that will invoke the C function 'name'
     with the given 'args' types and 'result' type.
@@ -68,6 +68,10 @@ def llexternal(name, args, result, _callable=None,
     pointing to a read-only null-terminated character of arrays, as usual
     for C.
 
+    The C function can have callbacks, but they must be specified explicitly
+    as constant RPython functions.  We don't support yet C functions that
+    invoke callbacks passed otherwise (e.g. set by a previous C call).
+
     threadsafe: whether it's ok to release the GIL around the call.
                 Default is yes, unless sandboxsafe is set, in which case
                 we consider that the function is really short-running and
@@ -84,12 +88,22 @@ def llexternal(name, args, result, _callable=None,
     kwds = {}
     if oo_primitive:
         kwds['oo_primitive'] = oo_primitive
+
+    has_callback = False
+    for ARG in args:
+        if _isfunctype(ARG):
+            has_callback = True
+    if has_callback:
+        kwds['_callbacks'] = callbackholder = CallbackHolder()
+    else:
+        callbackholder = None
+
     funcptr = lltype.functionptr(ext_type, name, external='C',
                                  compilation_info=compilation_info,
                                  _callable=_callable,
                                  _safe_not_sandboxed=sandboxsafe,
                                  _debugexc=True, # on top of llinterp
-                                 canraise=canraise,
+                                 canraise=False,
                                  **kwds)
     if isinstance(_callable, ll2ctypes.LL2CtypesCallable):
         _callable.funcptr = funcptr
@@ -170,9 +184,11 @@ def llexternal(name, args, result, _callable=None,
                 # XXX pass additional arguments
                 if invoke_around_handlers:
                     arg = llhelper(TARGET, _make_wrapper_for(TARGET, arg,
+                                                             callbackholder,
                                                              aroundstate))
                 else:
-                    arg = llhelper(TARGET, _make_wrapper_for(TARGET, arg))
+                    arg = llhelper(TARGET, _make_wrapper_for(TARGET, arg,
+                                                             callbackholder))
             else:
                 SOURCE = lltype.typeOf(arg)
                 if SOURCE != TARGET:
@@ -202,7 +218,11 @@ def llexternal(name, args, result, _callable=None,
 
     return func_with_new_name(wrapper, name)
 
-def _make_wrapper_for(TP, callable, aroundstate=None):
+class CallbackHolder:
+    def __init__(self):
+        self.callbacks = {}
+
+def _make_wrapper_for(TP, callable, callbackholder, aroundstate=None):
     """ Function creating wrappers for callbacks. Note that this is
     cheating as we assume constant callbacks and we just memoize wrappers
     """
@@ -213,6 +233,7 @@ def _make_wrapper_for(TP, callable, aroundstate=None):
     else:
         errorcode = TP.TO.RESULT._example()
     callable_name = getattr(callable, '__name__', '?')
+    callbackholder.callbacks[callable] = True
     args = ', '.join(['a%d' % i for i in range(len(TP.TO.ARGS))])
     source = py.code.Source(r"""
         def wrapper(%s):    # no *args - no GIL for mallocing the tuple
diff --git a/pypy/rpython/lltypesystem/rvirtualizable2.py b/pypy/rpython/lltypesystem/rvirtualizable2.py
index cd7b835ccd..cd6dce1f4a 100644
--- a/pypy/rpython/lltypesystem/rvirtualizable2.py
+++ b/pypy/rpython/lltypesystem/rvirtualizable2.py
@@ -3,23 +3,21 @@ from pypy.rpython.lltypesystem import lltype, llmemory
 from pypy.rpython.lltypesystem.rclass import InstanceRepr, OBJECTPTR
 from pypy.rpython.rvirtualizable2 import AbstractVirtualizable2InstanceRepr
 
-VABLERTIPTR = OBJECTPTR
 
 class Virtualizable2InstanceRepr(AbstractVirtualizable2InstanceRepr, InstanceRepr):
 
     def _setup_repr_llfields(self):
         llfields = []
         if self.top_of_virtualizable_hierarchy:
-            llfields.append(('vable_base', llmemory.Address))
-            llfields.append(('vable_rti', VABLERTIPTR))
+            llfields.append(('vable_token', lltype.Signed))
         return llfields
 
     def set_vable(self, llops, vinst, force_cast=False):
         if self.top_of_virtualizable_hierarchy:
             if force_cast:
                 vinst = llops.genop('cast_pointer', [vinst], resulttype=self)
-            cname = inputconst(lltype.Void, 'vable_rti')
-            vvalue = inputconst(VABLERTIPTR, lltype.nullptr(VABLERTIPTR.TO))
-            llops.genop('setfield', [vinst, cname, vvalue])
+            cname = inputconst(lltype.Void, 'vable_token')
+            cvalue = inputconst(lltype.Signed, 0)
+            llops.genop('setfield', [vinst, cname, cvalue])
         else:
             self.rbase.set_vable(llops, vinst, force_cast=True)
diff --git a/pypy/rpython/lltypesystem/test/test_rffi.py b/pypy/rpython/lltypesystem/test/test_rffi.py
index 989e9a0f26..8d6b4e03ac 100644
--- a/pypy/rpython/lltypesystem/test/test_rffi.py
+++ b/pypy/rpython/lltypesystem/test/test_rffi.py
@@ -387,6 +387,7 @@ class BaseTestRffi:
 
         fn = self.compile(f, [])
         assert fn() == 6
+        assert eating_callback._ptr._obj._callbacks.callbacks == {g: True}
 
     def test_double_callback(self):
         eating_callback = self.eating_callback()
@@ -406,6 +407,8 @@ class BaseTestRffi:
         fn = self.compile(f, [int])
         assert fn(4) == 4
         assert fn(1) == 3
+        assert eating_callback._ptr._obj._callbacks.callbacks == {one: True,
+                                                                  two: True}
 
     def test_exception_callback(self):
         eating_callback = self.eating_callback()
diff --git a/pypy/rpython/memory/gctransform/framework.py b/pypy/rpython/memory/gctransform/framework.py
index a6c39d54db..c5564cb3cc 100644
--- a/pypy/rpython/memory/gctransform/framework.py
+++ b/pypy/rpython/memory/gctransform/framework.py
@@ -29,12 +29,15 @@ class CollectAnalyzer(graphanalyze.BoolGraphAnalyzer):
     def analyze_direct_call(self, graph, seen=None):
         try:
             func = graph.func
+        except AttributeError:
+            pass
+        else:
             if func is rstack.stack_check:
                 return self.translator.config.translation.stackless
-            if func._gctransformer_hint_cannot_collect_:
+            if getattr(func, '_gctransformer_hint_cannot_collect_', False):
                 return False
-        except AttributeError:
-            pass
+            if getattr(func, '_gctransformer_hint_close_stack_', False):
+                return True
         return graphanalyze.GraphAnalyzer.analyze_direct_call(self, graph,
                                                               seen)
     
diff --git a/pypy/rpython/memory/gctransform/test/test_framework.py b/pypy/rpython/memory/gctransform/test/test_framework.py
index f8e30eefb3..b47f14f72b 100644
--- a/pypy/rpython/memory/gctransform/test/test_framework.py
+++ b/pypy/rpython/memory/gctransform/test/test_framework.py
@@ -6,7 +6,7 @@ from pypy.rpython.memory.gctransform.test.test_transform import rtype, \
 from pypy.rpython.memory.gctransform.transform import GcHighLevelOp
 from pypy.rpython.memory.gctransform.framework import FrameworkGCTransformer, \
     CollectAnalyzer, find_initializing_stores, find_clean_setarrayitems
-from pypy.rpython.lltypesystem import lltype
+from pypy.rpython.lltypesystem import lltype, rffi
 from pypy.rpython.rtyper import LowLevelOpList
 from pypy.translator.c.gc import FrameworkGcPolicy
 from pypy.translator.translator import TranslationContext, graphof
@@ -87,6 +87,33 @@ def test_cancollect_stack_check():
     can_collect = CollectAnalyzer(t).analyze_direct_call(with_check_graph)
     assert can_collect
 
+def test_cancollect_external():
+    fext1 = rffi.llexternal('fext1', [], lltype.Void, threadsafe=False)
+    def g():
+        fext1()
+    t = rtype(g, [])
+    gg = graphof(t, g)
+    assert not CollectAnalyzer(t).analyze_direct_call(gg)
+
+    fext2 = rffi.llexternal('fext2', [], lltype.Void, threadsafe=True)
+    def g():
+        fext2()
+    t = rtype(g, [])
+    gg = graphof(t, g)
+    assert CollectAnalyzer(t).analyze_direct_call(gg)
+
+    S = lltype.GcStruct('S', ('x', lltype.Signed))
+    FUNC = lltype.Ptr(lltype.FuncType([lltype.Signed], lltype.Void))
+    fext3 = rffi.llexternal('fext3', [FUNC], lltype.Void, threadsafe=False)
+    def h(x):
+        lltype.malloc(S, zero=True)
+    def g():
+        fext3(h)
+    t = rtype(g, [])
+    gg = graphof(t, g)
+    assert CollectAnalyzer(t).analyze_direct_call(gg)
+
+
 class WriteBarrierTransformer(FrameworkGCTransformer):
     clean_sets = {}
     GC_PARAMS = {}
diff --git a/pypy/rpython/ootypesystem/rvirtualizable2.py b/pypy/rpython/ootypesystem/rvirtualizable2.py
index 9e3a0c62b7..be3df371b3 100644
--- a/pypy/rpython/ootypesystem/rvirtualizable2.py
+++ b/pypy/rpython/ootypesystem/rvirtualizable2.py
@@ -11,7 +11,7 @@ class Virtualizable2InstanceRepr(AbstractVirtualizable2InstanceRepr, InstanceRep
     def _setup_repr_llfields(self):
         llfields = []
         if self.top_of_virtualizable_hierarchy:
-            llfields.append(('vable_rti', VABLERTI))
+            llfields.append(('vable_token', VABLERTI))
         return llfields
 
     def set_vable(self, llops, vinst, force_cast=False):
diff --git a/pypy/rpython/test/test_rvirtualizable2.py b/pypy/rpython/test/test_rvirtualizable2.py
index 3545a3b698..b128e02e2f 100644
--- a/pypy/rpython/test/test_rvirtualizable2.py
+++ b/pypy/rpython/test/test_rvirtualizable2.py
@@ -361,7 +361,7 @@ class TestLLtype(LLRtypeMixin, BaseTest):
         assert res.item1 == 42
         res = lltype.normalizeptr(res.item0)
         assert res.inst_v == 42
-        assert not res.vable_rti
+        assert res.vable_token == 0
 
 class TestOOtype(OORtypeMixin, BaseTest):
     prefix = 'o'
diff --git a/pypy/testrunner_cfg.py b/pypy/testrunner_cfg.py
index eb108dc92e..c9e11536aa 100644
--- a/pypy/testrunner_cfg.py
+++ b/pypy/testrunner_cfg.py
@@ -7,7 +7,8 @@ def collect_one_testdir(testdirs, reldir, tests):
         reldir.startswith('rlib/test') or
         reldir.startswith('rpython/memory/') or
         reldir.startswith('jit/backend/x86/') or
-        reldir.startswith('jit/backend/cli')):
+        #reldir.startswith('jit/backend/cli') or
+        0):
         testdirs.extend(tests)
     else:
         testdirs.append(reldir)
diff --git a/pypy/translator/backendopt/canraise.py b/pypy/translator/backendopt/canraise.py
index db890643ba..4ed3ae0d95 100644
--- a/pypy/translator/backendopt/canraise.py
+++ b/pypy/translator/backendopt/canraise.py
@@ -17,7 +17,7 @@ class RaiseAnalyzer(graphanalyze.BoolGraphAnalyzer):
             log.WARNING("Unknown operation: %s" % op.opname)
             return True
 
-    def analyze_external_call(self, op):
+    def analyze_external_call(self, op, seen=None):
         fnobj = get_funcobj(op.args[0].value)
         return getattr(fnobj, 'canraise', True)
 
diff --git a/pypy/translator/backendopt/graphanalyze.py b/pypy/translator/backendopt/graphanalyze.py
index 61f8ae8807..eeed4f65e9 100644
--- a/pypy/translator/backendopt/graphanalyze.py
+++ b/pypy/translator/backendopt/graphanalyze.py
@@ -1,4 +1,4 @@
-from pypy.translator.simplify import get_graph
+from pypy.translator.simplify import get_graph, get_funcobj
 from pypy.rpython.lltypesystem.lloperation import llop, LL_OPERATIONS
 from pypy.rpython.lltypesystem import lltype
 
@@ -38,8 +38,17 @@ class GraphAnalyzer(object):
     def analyze_startblock(self, block, seen=None):
         return self.bottom_result()
 
-    def analyze_external_call(self, op):
-        return self.top_result()
+    def analyze_external_call(self, op, seen=None):
+        funcobj = get_funcobj(op.args[0].value)
+        result = self.bottom_result()
+        if hasattr(funcobj, '_callbacks'):
+            bk = self.translator.annotator.bookkeeper
+            for function in funcobj._callbacks.callbacks:
+                desc = bk.getdesc(function)
+                for graph in desc.getgraphs():
+                    result = self.join_two_results(
+                        result, self.analyze_direct_call(graph, seen))
+        return result
 
     def analyze_external_method(self, op, TYPE, meth):
         return self.top_result()
@@ -59,7 +68,7 @@ class GraphAnalyzer(object):
         if op.opname == "direct_call":
             graph = get_graph(op.args[0], self.translator)
             if graph is None:
-                return self.analyze_external_call(op)
+                return self.analyze_external_call(op, seen)
             return self.analyze_direct_call(graph, seen)
         elif op.opname == "indirect_call":
             if op.args[-1].value is None:
diff --git a/pypy/translator/backendopt/test/test_canraise.py b/pypy/translator/backendopt/test/test_canraise.py
index 2546994e2a..b82c6bd789 100644
--- a/pypy/translator/backendopt/test/test_canraise.py
+++ b/pypy/translator/backendopt/test/test_canraise.py
@@ -189,7 +189,8 @@ class TestLLType(LLRtypeMixin, BaseTestCanRaise):
         result = ra.can_raise(fgraph.startblock.operations[0])
         assert not result
 
-        z = llexternal('z', [lltype.Signed], lltype.Signed, canraise=True)
+        z = lltype.functionptr(lltype.FuncType([lltype.Signed], lltype.Signed),
+                               'foobar')
         def g(x):
             return z(x)
         t, ra = self.translate(g, [int])
diff --git a/pypy/translator/backendopt/test/test_writeanalyze.py b/pypy/translator/backendopt/test/test_writeanalyze.py
index 7944ae4f07..a9488d0a3f 100644
--- a/pypy/translator/backendopt/test/test_writeanalyze.py
+++ b/pypy/translator/backendopt/test/test_writeanalyze.py
@@ -178,6 +178,31 @@ class TestLLtype(BaseTestCanRaise):
         assert name == "length"
         assert S1 is S2
 
+    def test_llexternal_with_callback(self):
+        from pypy.rpython.lltypesystem.rffi import llexternal
+        from pypy.rpython.lltypesystem import lltype
+
+        class Abc:
+            pass
+        abc = Abc()
+
+        FUNC = lltype.FuncType([lltype.Signed], lltype.Signed)
+        z = llexternal('z', [lltype.Ptr(FUNC)], lltype.Signed)
+        def g(n):
+            abc.foobar = n
+            return n + 1
+        def f(x):
+            return z(g)
+        t, wa = self.translate(f, [int])
+        fgraph = graphof(t, f)
+        backend_optimizations(t)
+        assert fgraph.startblock.operations[0].opname == 'direct_call'
+
+        result = wa.analyze(fgraph.startblock.operations[0])
+        assert len(result) == 1
+        (struct, T, name), = result
+        assert struct == "struct"
+        assert name.endswith("foobar")
 
 
 class TestOOtype(BaseTestCanRaise):
diff --git a/pypy/translator/backendopt/writeanalyze.py b/pypy/translator/backendopt/writeanalyze.py
index 4f7d979c98..b8a4b096bb 100644
--- a/pypy/translator/backendopt/writeanalyze.py
+++ b/pypy/translator/backendopt/writeanalyze.py
@@ -1,6 +1,5 @@
 from pypy.translator.backendopt import graphanalyze
 from pypy.rpython.ootypesystem import ootype
-reload(graphanalyze)
 
 top_set = object()
 empty_set = frozenset()
@@ -38,9 +37,6 @@ class WriteAnalyzer(graphanalyze.GraphAnalyzer):
     def _array_result(self, TYPE):
         return frozenset([("array", TYPE)])
 
-    def analyze_external_call(self, op):
-        return self.bottom_result() # an external call cannot change anything
-
     def analyze_external_method(self, op, TYPE, meth):
         if isinstance(TYPE, ootype.Array):
             methname = op.args[0].value
diff --git a/pypy/translator/stackless/transform.py b/pypy/translator/stackless/transform.py
index 789a5d7cd6..6518f40c30 100644
--- a/pypy/translator/stackless/transform.py
+++ b/pypy/translator/stackless/transform.py
@@ -260,7 +260,7 @@ class StacklessAnalyzer(graphanalyze.BoolGraphAnalyzer):
             return  LL_OPERATIONS[op.opname].canunwindgc
         return False
 
-    def analyze_external_call(self, op):
+    def analyze_external_call(self, op, seen=None):
         # An external call cannot cause a stack unwind
         # Note that this is essential to get good performance in framework GCs
         # because there is a pseudo-external call to ROUND_UP_FOR_ALLOCATION
author	Armin Rigo <arigo@tunes.org>	2009-12-01 10:56:34 +0000
committer	Armin Rigo <arigo@tunes.org>	2009-12-01 10:56:34 +0000
commit	0868c1fc43a4ce9db10f8a95034aa03757e989b5 (patch)
tree	b32334affd3b208ee416e16a1665b39963cfa45c /pypy
parent	Add str_w to the TinyObjSpace, to "fix" app-level tests of the locale module (diff)
download	pypy-0868c1fc43a4ce9db10f8a95034aa03757e989b5.tar.gz pypy-0868c1fc43a4ce9db10f8a95034aa03757e989b5.tar.bz2 pypy-0868c1fc43a4ce9db10f8a95034aa03757e989b5.zip