Implement GLB algorithm. (Issue #2263)

r=brson
2012-12-05 15:13:24 -08:00 · 2012-12-05 15:13:24 -08:00 · c3a74d87bd
commit c3a74d87bd
parent 3b71d14442
15 changed files with 511 additions and 101 deletions
--- a/src/librustc/metadata/tyencode.rs
+++ b/src/librustc/metadata/tyencode.rs
@ -188,6 +188,9 @@ fn enc_bound_region(w: io::Writer, cx: @ctxt, br: ty::bound_region) {
        w.write_char('|');
        enc_bound_region(w, cx, *br);
      }
      ty::br_fresh(id) => {
        w.write_uint(id);
      }
    }
 }
--- a/src/librustc/middle/astencode.rs
+++ b/src/librustc/middle/astencode.rs
@ -418,7 +418,8 @@ impl ty::Region: tr {
 impl ty::bound_region: tr {
    fn tr(xcx: extended_decode_ctxt) -> ty::bound_region {
        match self {
-            ty::br_anon(_) | ty::br_named(_) | ty::br_self => self,
+            ty::br_anon(_) | ty::br_named(_) | ty::br_self |
            ty::br_fresh(_) => self,
            ty::br_cap_avoid(id, br) => ty::br_cap_avoid(xcx.tr_id(id),
                                                         @br.tr(xcx))
        }
--- a/src/librustc/middle/ty.rs
+++ b/src/librustc/middle/ty.rs
@ -72,7 +72,7 @@ export expr_ty_params_and_ty;
 export expr_is_lval, expr_kind;
 export ExprKind, LvalueExpr, RvalueDatumExpr, RvalueDpsExpr, RvalueStmtExpr;
 export field_ty;
-export fold_ty, fold_sty_to_ty, fold_region, fold_regions;
+export fold_ty, fold_sty_to_ty, fold_region, fold_regions, fold_sig;
 export apply_op_on_t_to_ty_fn;
 export fold_regions_and_ty, walk_regions_and_ty;
 export field;
@ -145,7 +145,7 @@ export ty_struct;
 export Region, bound_region, encl_region;
 export re_bound, re_free, re_scope, re_static, re_infer;
 export ReVar, ReSkolemized;
-export br_self, br_anon, br_named, br_cap_avoid;
+export br_self, br_anon, br_named, br_cap_avoid, br_fresh;
 export get, type_has_params, type_needs_infer, type_has_regions;
 export type_is_region_ptr;
 export type_id;
@ -606,6 +606,9 @@ enum bound_region {
    /// Named region parameters for functions (a in &a/T)
    br_named(ast::ident),
    /// Fresh bound identifiers created during GLB computations.
    br_fresh(uint),
    /**
     * Handles capture-avoiding substitution in a rather subtle case.  If you
     * have a closure whose argument types are being inferred based on the
@ -1311,6 +1314,17 @@ fn fold_sty_to_ty(tcx: ty::ctxt, sty: &sty, foldop: fn(t) -> t) -> t {
    mk_t(tcx, fold_sty(sty, foldop))
 }
 fn fold_sig(sig: &FnSig, fldop: fn(t) -> t) -> FnSig {
    let args = do sig.inputs.map |arg| {
        { mode: arg.mode, ty: fldop(arg.ty) }
    };
    FnSig {
        inputs: move args,
        output: fldop(sig.output)
    }
 }
 fn fold_sty(sty: &sty, fldop: fn(t) -> t) -> sty {
    fn fold_substs(substs: &substs, fldop: fn(t) -> t) -> substs {
        {self_r: substs.self_r,
@ -1489,8 +1503,8 @@ fn apply_op_on_t_to_ty_fn(
 fn fold_regions(
    cx: ctxt,
    ty: t,
-    fldr: fn(r: Region, in_fn: bool) -> Region) -> t {
+    fldr: fn(r: Region, in_fn: bool) -> Region) -> t
-
+{
    fn do_fold(cx: ctxt, ty: t, in_fn: bool,
               fldr: fn(Region, bool) -> Region) -> t {
        if !type_has_regions(ty) { return ty; }
@ -2744,7 +2758,10 @@ impl bound_region : to_bytes::IterBytes {
          to_bytes::iter_bytes_2(&2u8, ident, lsb0, f),
          ty::br_cap_avoid(ref id, ref br) =>
-          to_bytes::iter_bytes_3(&3u8, id, br, lsb0, f)
+          to_bytes::iter_bytes_3(&3u8, id, br, lsb0, f),
          ty::br_fresh(ref x) =>
          to_bytes::iter_bytes_2(&4u8, x, lsb0, f)
        }
    }
 }
@ -4493,6 +4510,12 @@ impl bound_region : cmp::Eq {
                    _ => false
                }
            }
            br_fresh(e0a) => {
                match (*other) {
                    br_fresh(e0b) => e0a == e0b,
                    _ => false
                }
            }
        }
    }
    pure fn ne(&self, other: &bound_region) -> bool { !(*self).eq(other) }
--- a/src/librustc/middle/typeck/infer/assignment.rs
+++ b/src/librustc/middle/typeck/infer/assignment.rs
@ -80,7 +80,7 @@ enum Assign = combine_fields;
 impl Assign {
    fn tys(a: ty::t, b: ty::t) -> ares {
-        debug!("Assign.tys(%s -> %s)",
+        debug!("Assign.tys(%s => %s)",
               a.to_str(self.infcx),
               b.to_str(self.infcx));
        let _r = indenter();
@ -139,7 +139,7 @@ priv impl Assign {
        a: ty::t, b: ty::t,
        +a_bnd: Option<ty::t>, +b_bnd: Option<ty::t>) -> ares {
-        debug!("Assign.assign_tys_or_sub(%s -> %s, %s -> %s)",
+        debug!("Assign.assign_tys_or_sub(%s => %s, %s => %s)",
               a.to_str(self.infcx), b.to_str(self.infcx),
               a_bnd.to_str(self.infcx), b_bnd.to_str(self.infcx));
        let _r = indenter();
--- a/src/librustc/middle/typeck/infer/combine.rs
+++ b/src/librustc/middle/typeck/infer/combine.rs
@ -69,6 +69,7 @@ trait combine {
    fn infcx() -> infer_ctxt;
    fn tag() -> ~str;
    fn a_is_expected() -> bool;
    fn span() -> span;
    fn sub() -> Sub;
    fn lub() -> Lub;
--- a/src/librustc/middle/typeck/infer/glb.rs
+++ b/src/librustc/middle/typeck/infer/glb.rs
@ -15,6 +15,8 @@ use middle::typeck::infer::lattice::*;
 use middle::typeck::infer::sub::Sub;
 use middle::typeck::infer::to_str::ToStr;
 use std::list;
 use syntax::ast::{Many, Once};
 enum Glb = combine_fields;  // "greatest lower bound" (common subtype)
@ -23,6 +25,7 @@ impl Glb: combine {
    fn infcx() -> infer_ctxt { self.infcx }
    fn tag() -> ~str { ~"glb" }
    fn a_is_expected() -> bool { self.a_is_expected }
    fn span() -> span { self.span }
    fn sub() -> Sub { Sub(*self) }
    fn lub() -> Lub { Lub(*self) }
@ -144,7 +147,122 @@ impl Glb: combine {
    }
    fn fns(a: &ty::FnTy, b: &ty::FnTy) -> cres<ty::FnTy> {
-        super_fns(&self, a, b)
+        // Note: this is a subtle algorithm.  For a full explanation,
        // please see the large comment in `region_inference.rs`.
        debug!("%s.fns(%?, %?)",
               self.tag(), a.to_str(self.infcx), b.to_str(self.infcx));
        let _indenter = indenter();
        // Take a snapshot.  We'll never roll this back, but in later
        // phases we do want to be able to examine "all bindings that
        // were created as part of this type comparison", and making a
        // snapshot is a convenient way to do that.
        let snapshot = self.infcx.region_vars.start_snapshot();
        // Instantiate each bound region with a fresh region variable.
        let (a_with_fresh, a_isr) =
            self.infcx.replace_bound_regions_with_fresh_regions(
                self.span, a);
        let a_vars = var_ids(&self, a_isr);
        let (b_with_fresh, b_isr) =
            self.infcx.replace_bound_regions_with_fresh_regions(
                self.span, b);
        let b_vars = var_ids(&self, b_isr);
        // Collect constraints.
        let fn_ty0 = if_ok!(super_fns(&self, &a_with_fresh, &b_with_fresh));
        debug!("fn_ty0 = %s", fn_ty0.to_str(self.infcx));
        // Generalize the regions appearing in fn_ty0 if possible
        let new_vars =
            self.infcx.region_vars.vars_created_since_snapshot(snapshot);
        let fn_ty1 =
            self.infcx.fold_regions_in_sig(
                &fn_ty0,
                |r, _in_fn| generalize_region(&self, snapshot,
                                              new_vars, a_isr, a_vars, b_vars,
                                              r));
        debug!("fn_ty1 = %s", fn_ty1.to_str(self.infcx));
        return Ok(move fn_ty1);
        fn generalize_region(self: &Glb,
                             snapshot: uint,
                             new_vars: &[RegionVid],
                             a_isr: isr_alist,
                             a_vars: &[RegionVid],
                             b_vars: &[RegionVid],
                             r0: ty::Region) -> ty::Region {
            if !is_var_in_set(new_vars, r0) {
                return r0;
            }
            let tainted = self.infcx.region_vars.tainted(snapshot, r0);
            let mut a_r = None, b_r = None, only_new_vars = true;
            for tainted.each |r| {
                if is_var_in_set(a_vars, *r) {
                    if a_r.is_some() {
                        return fresh_bound_variable(self);
                    } else {
                        a_r = Some(*r);
                    }
                } else if is_var_in_set(b_vars, *r) {
                    if b_r.is_some() {
                        return fresh_bound_variable(self);
                    } else {
                        b_r = Some(*r);
                    }
                } else if !is_var_in_set(new_vars, *r) {
                    only_new_vars = false;
                }
            }
                // NB---I do not believe this algorithm computes
                // (necessarily) the GLB.  As written it can
                // spuriously fail.  In particular, if there is a case
                // like: fn(fn(&a)) and fn(fn(&b)), where a and b are
                // free, it will return fn(&c) where c = GLB(a,b).  If
                // however this GLB is not defined, then the result is
                // an error, even though something like
                // "fn<X>(fn(&X))" where X is bound would be a
                // subtype of both of those.
                //
                // The problem is that if we were to return a bound
                // variable, we'd be computing a lower-bound, but not
                // necessarily the *greatest* lower-bound.
            if a_r.is_some() && b_r.is_some() && only_new_vars {
                // Related to exactly one bound variable from each fn:
                return rev_lookup(self, a_isr, a_r.get());
            } else if a_r.is_none() && b_r.is_none() {
                // Not related to bound variables from either fn:
                return r0;
            } else {
                // Other:
                return fresh_bound_variable(self);
            }
        }
        fn rev_lookup(self: &Glb,
                      a_isr: isr_alist,
                      r: ty::Region) -> ty::Region
        {
            for list::each(a_isr) |pair| {
                let (a_br, a_r) = *pair;
                if a_r == r {
                    return ty::re_bound(a_br);
                }
            }
            self.infcx.tcx.sess.span_bug(
                self.span,
                fmt!("could not find original bound region for %?", r));
        }
        fn fresh_bound_variable(self: &Glb) -> ty::Region {
            self.infcx.region_vars.new_bound()
        }
    }
    fn fn_metas(a: &ty::FnMeta, b: &ty::FnMeta) -> cres<ty::FnMeta> {
--- a/src/librustc/middle/typeck/infer/lattice.rs
+++ b/src/librustc/middle/typeck/infer/lattice.rs
@ -14,6 +14,8 @@ use middle::typeck::infer::combine::*;
 use middle::typeck::infer::unify::*;
 use middle::typeck::infer::to_str::ToStr;
 use std::list;
 // ______________________________________________________________________
 // Lattice operations on variables
 //
@ -158,3 +160,29 @@ fn lattice_var_and_t<L:lattice_ops combine>(
      }
    }
 }
 // ___________________________________________________________________________
 // Random utility functions used by LUB/GLB when computing LUB/GLB of
 // fn types
 fn var_ids<T: combine>(self: &T, isr: isr_alist) -> ~[RegionVid] {
    let mut result = ~[];
    for list::each(isr) |pair| {
        match pair.second() {
            ty::re_infer(ty::ReVar(r)) => { result.push(r); }
            r => {
                self.infcx().tcx.sess.span_bug(
                    self.span(),
                    fmt!("Found non-region-vid: %?", r));
            }
        }
    }
    return result;
 }
 fn is_var_in_set(new_vars: &[RegionVid], r: ty::Region) -> bool {
    match r {
        ty::re_infer(ty::ReVar(ref v)) => new_vars.contains(v),
        _ => false
    }
 }
--- a/src/librustc/middle/typeck/infer/lub.rs
+++ b/src/librustc/middle/typeck/infer/lub.rs
@ -31,6 +31,7 @@ impl Lub: combine {
    fn infcx() -> infer_ctxt { self.infcx }
    fn tag() -> ~str { ~"lub" }
    fn a_is_expected() -> bool { self.a_is_expected }
    fn span() -> span { self.span }
    fn sub() -> Sub { Sub(*self) }
    fn lub() -> Lub { Lub(*self) }
@ -139,12 +140,10 @@ impl Lub: combine {
        let new_vars =
            self.infcx.region_vars.vars_created_since_snapshot(snapshot);
        let fn_ty1 =
-            ty::apply_op_on_t_to_ty_fn(
+            self.infcx.fold_regions_in_sig(
-                self.infcx.tcx, &fn_ty0,
+                &fn_ty0,
-                |t| ty::fold_regions(
+                |r, _in_fn| generalize_region(&self, snapshot, new_vars,
-                    self.infcx.tcx, t,
+                                              a_isr, r));
                    |r, _in_fn| generalize_region(&self, snapshot,
                                                  new_vars, a_isr, r)));
        return Ok(move fn_ty1);
        fn generalize_region(self: &Lub,
@ -153,7 +152,7 @@ impl Lub: combine {
                             a_isr: isr_alist,
                             r0: ty::Region) -> ty::Region {
            // Regions that pre-dated the LUB computation stay as they are.
-            if !is_new_var(new_vars, r0) {
+            if !is_var_in_set(new_vars, r0) {
                debug!("generalize_region(r0=%?): not new variable", r0);
                return r0;
            }
@ -163,7 +162,7 @@ impl Lub: combine {
            // Variables created during LUB computation which are
            // *related* to regions that pre-date the LUB computation
            // stay as they are.
-            if !tainted.all(|r| is_new_var(new_vars, *r)) {
+            if !tainted.all(|r| is_var_in_set(new_vars, *r)) {
                debug!("generalize_region(r0=%?): \
                        non-new-variables found in %?",
                       r0, tainted);
@ -190,13 +189,6 @@ impl Lub: combine {
                fmt!("Region %? is not associated with \
                      any bound region from A!", r0));
        }
        fn is_new_var(new_vars: &[RegionVid], r: ty::Region) -> bool {
            match r {
                ty::re_infer(ty::ReVar(ref v)) => new_vars.contains(v),
                _ => false
            }
        }
    }
    fn fn_metas(a: &ty::FnMeta, b: &ty::FnMeta) -> cres<ty::FnMeta> {
--- a/src/librustc/middle/typeck/infer/mod.rs
+++ b/src/librustc/middle/typeck/infer/mod.rs
@ -799,5 +799,16 @@ impl infer_ctxt {
        (fn_ty, isr)
    }
    fn fold_regions_in_sig(
        &self,
        fn_ty: &ty::FnTy,
        fldr: &fn(r: ty::Region, in_fn: bool) -> ty::Region) -> ty::FnTy
    {
        let sig = do ty::fold_sig(&fn_ty.sig) |t| {
            ty::fold_regions(self.tcx, t, fldr)
        };
        ty::FnTyBase {meta: fn_ty.meta, sig: sig}
    }
 }
--- a/src/librustc/middle/typeck/infer/region_inference.rs
+++ b/src/librustc/middle/typeck/infer/region_inference.rs
@ -337,6 +337,61 @@ therefore considering subtyping and in particular does not consider
 LUB or GLB computation.  We have to consider this.  Here is the
 algorithm I implemented.
 First though, let's discuss what we are trying to compute in more
 detail.  The LUB is basically the "common supertype" and the GLB is
 "common subtype"; one catch is that the LUB should be the
 *most-specific* common supertype and the GLB should be *most general*
 common subtype (as opposed to any common supertype or any common
 subtype).
 Anyway, to help clarify, here is a table containing some
 function pairs and their LUB/GLB:
 ```
 Type 1              Type 2              LUB               GLB
 fn<a>(&a)           fn(&X)              fn(&X)            fn<a>(&a)
 fn(&A)              fn(&X)              --                fn<a>(&a)
 fn<a,b>(&a, &b)     fn<x>(&x, &x)       fn<a>(&a, &a)     fn<a,b>(&a, &b)
 fn<a,b>(&a, &b, &a) fn<x,y>(&x, &y, &y) fn<a>(&a, &a, &a) fn<a,b,c>(&a,&b,&c)
 ```
 ### Conventions
 I use lower-case letters (e.g., `&a`) for bound regions and upper-case
 letters for free regions (`&A`).  Region variables written with a
 dollar-sign (e.g., `$a`).  I will try to remember to enumerate the
 bound-regions on the fn type as well (e.g., `fn<a>(&a)`).
 ### High-level summary
 Both the LUB and the GLB algorithms work in a similar fashion.  They
 begin by replacing all bound regions (on both sides) with fresh region
 inference variables.  Therefore, both functions are converted to types
 that contain only free regions.  We can then compute the LUB/GLB in a
 straightforward way, as described in `combine.rs`.  This results in an
 interim type T.  The algorithms then examine the regions that appear
 in T and try to, in some cases, replace them with bound regions to
 yield the final result.
 To decide whether to replace a region `R` that appears in `T` with a
 bound region, the algorithms make use of two bits of information.
 First is a set `V` that contains all region variables created as part
 of the LUB/GLB computation. `V` will contain the region variables
 created to replace the bound regions in the input types, but it also
 contains 'intermediate' variables created to represent the LUB/GLB of
 individual regions.  Basically, when asked to compute the LUB/GLB of a
 region variable with another region, the inferencer cannot oblige
 immediately since the valuese of that variables are not known.
 Therefore, it creates a new variable that is related to the two
 regions.  For example, the LUB of two variables `$x` and `$y` is a
 fresh variable `$z` that is constrained such that `$x <= $z` and `$y
 <= $z`.  So `V` will contain these intermediate variables as well.
 The other important factor in deciding how to replace a region in T is
 the function `Tainted($r)` which, for a region variable, identifies
 all regions that the region variable is related to in some way
 (`Tainted()` made an appearance in the subtype computation as well).
 ### LUB
 The LUB algorithm proceeds in three steps:
@ -345,47 +400,28 @@ The LUB algorithm proceeds in three steps:
   inference variables.
 2. Compute the LUB "as normal", meaning compute the GLB of each
   pair of argument types and the LUB of the return types and
-   so forth.  Combine those to a new function type F.
+   so forth.  Combine those to a new function type `F`.
-3. Map the regions appearing in `F` using the procedure described below.
+3. Replace each region `R` that appears in `F` as follows:
   - Let `V` be the set of variables created during the LUB
     computational steps 1 and 2, as described in the previous section.
   - If `R` is not in `V`, replace `R` with itself.
   - If `Tainted(R)` contains a region that is not in `V`,
     replace `R` with itself.
   - Otherwise, select the earliest variable in `Tainted(R)` that originates
     from the left-hand side and replace `R` with the bound region that
     this variable was a replacement for.
-For each region `R` that appears in `F`, we may need to replace it
+So, let's work through the simplest example: `fn(&A)` and `fn<a>(&a)`.
-with a bound region.  Let `V` be the set of fresh variables created as
+In this case, `&a` will be replaced with `$a` and the interim LUB type
-part of the LUB procedure (either in step 1 or step 2).  You may be
+`fn($b)` will be computed, where `$b=GLB(&A,$a)`.  Therefore, `V =
-wondering how variables can be created in step 2.  The answer is that
+{$a, $b}` and `Tainted($b) = { $b, $a, &A }`.  When we go to replace
-when we are asked to compute the LUB or GLB of two region variables,
+`$b`, we find that since `&A \in Tainted($b)` is not a member of `V`,
-we do so by producing a new region variable that is related to those
+we leave `$b` as is.  When region inference happens, `$b` will be
-two variables.  i.e., The LUB of two variables `$x` and `$y` is a
+resolved to `&A`, as we wanted.
 fresh variable `$z` that is constrained such that `$x <= $z` and `$y
 <= $z`.
-To decide how to replace a region `R`, we must examine `Tainted(R)`.
+Let's look at a more complex one: `fn(&a, &b)` and `fn(&x, &x)`.  In
-This function searches through the constraints which were generated
+this case, we'll end up with a (pre-replacement) LUB type of `fn(&g,
-when computing the bounds of all the argument and return types and
+&h)` and a graph that looks like:
 produces a list of all regions to which `R` is related, directly or
 indirectly.
 If `R` is not in `V` or `Tainted(R)` contains any region that is not
 in `V`, then `R` is not replaced (that is, `R` is mapped to itself).
 Otherwise, if `Tainted(R)` is a subset of `V`, then we select the
 earliest variable in `Tainted(R)` that originates from the left-hand
 side and replace `R` with a bound version of that variable.
 So, let's work through the simplest example: `fn(&A)` and `fn(&a)`.
 In this case, `&a` will be replaced with `$a` (the $ indicates an
 inference variable) which will be linked to the free region `&A`, and
 hence `V = { $a }` and `Tainted($a) = { &A }`.  Since `$a` is not a
 member of `V`, we leave `$a` as is.  When region inference happens,
 `$a` will be resolved to `&A`, as we wanted.
 So, let's work through the simplest example: `fn(&A)` and `fn(&a)`.
 In this case, `&a` will be replaced with `$a` (the $ indicates an
 inference variable) which will be linked to the free region `&A`, and
 hence `V = { $a }` and `Tainted($a) = { $a, &A }`.  Since `&A` is not a
 member of `V`, we leave `$a` as is.  When region inference happens,
 `$a` will be resolved to `&A`, as we wanted.
 Let's look at a more complex one: `fn(&a, &b)` and `fn(&x, &x)`.
 In this case, we'll end up with a graph that looks like:
 ```
     $a        $b     *--$x
@ -399,8 +435,8 @@ the LUB/GLB of things requiring inference.  This means that `V` and
 `Tainted` will look like:
 ```
-V = {$a, $b, $x}
+V = {$a, $b, $g, $h, $x}
-Tainted($g) = Tainted($h) = { $a, $b, $h, $x }
+Tainted($g) = Tainted($h) = { $a, $b, $h, $g, $x }
 ```
 Therefore we replace both `$g` and `$h` with `$a`, and end up
@ -413,40 +449,90 @@ in computing the replacements for the various variables. For each
 region `R` that appears in the type `F`, we again compute `Tainted(R)`
 and examine the results:
-1. If `Tainted(R) = {R}` is a singleton set, replace `R` with itself.
+1. If `R` is not in `V`, it is not replaced.
 2. Else, if `Tainted(R)` contains only variables in `V`, and it
   contains exactly one variable from the LHS and one variable from
   the RHS, then `R` can be mapped to the bound version of the
   variable from the LHS.
-3. Else, `R` is mapped to a fresh bound variable.
+3. Else, if `Tainted(R)` contains no variable from the LHS and no
   variable from the RHS, then `R` can be mapped to itself.
 4. Else, `R` is mapped to a fresh bound variable.
 These rules are pretty complex.  Let's look at some examples to see
 how they play out.
-Out first example was `fn(&a)` and `fn(&X)`---in
+Out first example was `fn(&a)` and `fn(&X)`.  In this case, `&a` will
-this case, the LUB will be a variable `$g`, and `Tainted($g) =
+be replaced with `$a` and we will ultimately compute a
-{$g,$a,$x}`.  By these rules, we'll replace `$g` with a fresh bound
+(pre-replacement) GLB type of `fn($g)` where `$g=LUB($a,&X)`.
-variable, so the result is `fn(&z)`, which is fine.
+Therefore, `V={$a,$g}` and `Tainted($g)={$g,$a,&X}.  To find the
 replacement for `$g` we consult the rules above:
 - Rule (1) does not apply because `$g \in V`
 - Rule (2) does not apply because `&X \in Tainted($g)`
 - Rule (3) does not apply because `$a \in Tainted($g)`
 - Hence, by rule (4), we replace `$g` with a fresh bound variable `&z`.
 So our final result is `fn(&z)`, which is correct.
-The next example is `fn(&A)` and `fn(&Z)`. XXX
+The next example is `fn(&A)` and `fn(&Z)`. In this case, we will again
 have a (pre-replacement) GLB of `fn(&g)`, where `$g = LUB(&A,&Z)`.
 Therefore, `V={$g}` and `Tainted($g) = {$g, &A, &Z}`.  In this case,
 by rule (3), `$g` is mapped to itself, and hence the result is
 `fn($g)`.  This result is correct (in this case, at least), but it is
 indicative of a case that *can* lead us into concluding that there is
 no GLB when in fact a GLB does exist.  See the section "Questionable
 Results" below for more details.
-The next example is `fn(&a, &b)` and `fn(&x, &x)`. In this case, as
+The next example is `fn(&a, &b)` and `fn(&c, &c)`. In this case, as
-before, we'll end up with `F=fn(&g, &h)` where `Tainted($g) =
+before, we'll end up with `F=fn($g, $h)` where `Tainted($g) =
-Tainted($h) = {$g, $a, $b, $x}`.  This means that we'll select fresh
+Tainted($h) = {$g, $h, $a, $b, $c}`.  Only rule (4) applies and hence
-bound varibales `g` and `h` and wind up with `fn(&g, &h)`.
+we'll select fresh bound variables `y` and `z` and wind up with
 `fn(&y, &z)`.
 For the last example, let's consider what may seem trivial, but is
-not: `fn(&a, &a)` and `fn(&x, &x)`.  In this case, we'll get `F=fn(&g,
+not: `fn(&a, &a)` and `fn(&b, &b)`.  In this case, we'll get `F=fn($g,
-&h)` where `Tainted($g) = {$g, $a, $x}` and `Tainted($h) = {$h, $a,
+$h)` where `Tainted($g) = {$g, $a, $x}` and `Tainted($h) = {$h, $a,
 $x}`.  Both of these sets contain exactly one bound variable from each
-side, so we'll map them both to `&a`, resulting in `fn(&a, &a)`.
+side, so we'll map them both to `&a`, resulting in `fn(&a, &a)`, which
-Horray!
+is the desired result.
-### Why are these correct?
+### Shortcomings and correctness
-You may be wondering whether this algorithm is correct.  So am I.  But
+You may be wondering whether this algorithm is correct.  The answer is
-I believe it is.  (Justification forthcoming, haven't had time to
+"sort of".  There are definitely cases where they fail to compute a
-write it)
+result even though a correct result exists.  I believe, though, that
 if they succeed, then the result is valid, and I will attempt to
 convince you.  The basic argument is that the "pre-replacement" step
 computes a set of constraints.  The replacements, then, attempt to
 satisfy those constraints, using bound identifiers where needed.
 For now I will briefly go over the cases for LUB/GLB and identify
 their intent:
 - LUB:
  - The region variables that are substituted in place of bound regions
    are intended to collect constraints on those bound regions.
  - If Tainted(R) contains only values in V, then this region is unconstrained
    and can therefore be generalized, otherwise it cannot.
 - GLB:
  - The region variables that are substituted in place of bound regions
    are intended to collect constraints on those bound regions.
  - If Tainted(R) contains exactly one variable from each side, and
    only variables in V, that indicates that those two bound regions
    must be equated.
  - Otherwise, if Tainted(R) references any variables from left or right
    side, then it is trying to combine a bound region with a free one or
    multiple bound regions, so we need to select fresh bound regions.
 Sorry this is more of a shorthand to myself.  I will try to write up something
 more convincing in the future.
 #### Where are the algorithms wrong?
 - The pre-replacement computation can fail even though using a
  bound-region would have succeeded.
 - We will compute GLB(fn(fn($a)), fn(fn($b))) as fn($c) where $c is the
  GLB of $a and $b.  But if inference finds that $a and $b must be mapped
  to regions without a GLB, then this is effectively a failure to compute
  the GLB.  However, the result `fn<$c>(fn($c))` is a valid GLB.
 */
@ -457,7 +543,7 @@ use middle::region::is_subregion_of;
 use middle::region;
 use middle::ty;
 use middle::ty::{Region, RegionVid, re_static, re_infer, re_free, re_bound};
-use middle::ty::{re_scope, ReVar, ReSkolemized};
+use middle::ty::{re_scope, ReVar, ReSkolemized, br_fresh};
 use middle::typeck::infer::to_str::ToStr;
 use syntax::codemap;
 use util::ppaux::note_and_explain_region;
@ -553,6 +639,7 @@ struct RegionVarBindings {
    lubs: CombineMap,
    glbs: CombineMap,
    mut skolemization_count: uint,
    mut bound_count: uint,
    // The undo log records actions that might later be undone.
    //
@ -579,6 +666,7 @@ fn RegionVarBindings(tcx: ty::ctxt) -> RegionVarBindings {
        lubs: CombineMap(),
        glbs: CombineMap(),
        skolemization_count: 0,
        bound_count: 0,
        undo_log: ~[]
    }
 }
@ -655,6 +743,26 @@ impl RegionVarBindings {
        re_infer(ReSkolemized(sc, br))
    }
    fn new_bound(&self) -> Region {
        // Creates a fresh bound variable for use in GLB computations.
        // See discussion of GLB computation in the large comment at
        // the top of this file for more details.
        //
        // This computation is mildly wrong in the face of rollover.
        // It's conceivable, if unlikely, that one might wind up with
        // accidental capture for nested functions in that case, if
        // the outer function had bound regions created a very long
        // time before and the inner function somehow wound up rolling
        // over such that supposedly fresh identifiers were in fact
        // shadowed.  We should convert our bound_region
        // representation to use deBruijn indices or something like
        // that to eliminate that possibility.
        let sc = self.bound_count;
        self.bound_count += 1;
        re_bound(br_fresh(sc))
    }
    fn add_constraint(&self, +constraint: Constraint, span: span) {
        // cannot add constraints once regions are resolved
        assert self.values.is_empty();
@ -687,6 +795,16 @@ impl RegionVarBindings {
            self.add_constraint(ConstrainVarSubReg(sub_id, r), span);
            Ok(())
          }
          (re_bound(br), _) => {
            self.tcx.sess.span_bug(
                span,
                fmt!("Cannot relate bound region as subregion: %?", br));
          }
          (_, re_bound(br)) => {
            self.tcx.sess.span_bug(
                span,
                fmt!("Cannot relate bound region as superregion: %?", br));
          }
          _ => {
            if self.is_subregion_of(sub, sup) {
                Ok(())
--- a/src/librustc/middle/typeck/infer/sub.rs
+++ b/src/librustc/middle/typeck/infer/sub.rs
@ -24,6 +24,7 @@ impl Sub: combine {
    fn infcx() -> infer_ctxt { self.infcx }
    fn tag() -> ~str { ~"sub" }
    fn a_is_expected() -> bool { self.a_is_expected }
    fn span() -> span { self.span }
    fn sub() -> Sub { Sub(*self) }
    fn lub() -> Lub { Lub(*self) }
--- a/src/librustc/middle/typeck/infer/test.rs
+++ b/src/librustc/middle/typeck/infer/test.rs
@ -16,7 +16,7 @@ Note: This module is only compiled when doing unit testing.
 */
-
+use dvec::DVec;
 use std::getopts;
 use std::map::HashMap;
 use std::getopts;
@ -36,7 +36,8 @@ use middle::ty::{FnTyBase, FnMeta, FnSig};
 struct Env {
    crate: @ast::crate,
    tcx: ty::ctxt,
-    infcx: infer::infer_ctxt
+    infcx: infer::infer_ctxt,
    err_messages: @DVec<~str>
 }
 struct RH {
@ -44,10 +45,14 @@ struct RH {
    sub: &[RH]
 }
 const EMPTY_SOURCE_STR: &str = "/* Hello, world! */";
 fn setup_env(test_name: &str, source_string: &str) -> Env {
-    let matches = &getopts(~[~"-Z", ~"verbose"], optgroups()).get();
+    let messages = @DVec();
-    let sessopts = build_session_options(~"rustc", matches, diagnostic::emit);
+    let matches = getopts(~[~"-Z", ~"verbose"], optgroups()).get();
-    let sess = build_session(sessopts, diagnostic::emit);
+    let diag = diagnostic::collect(messages);
    let sessopts = build_session_options(~"rustc", &matches, diag);
    let sess = build_session(sessopts, diag);
    let cfg = build_configuration(sess, ~"whatever", str_input(~""));
    let dm = HashMap();
    let amap = HashMap();
@ -66,7 +71,10 @@ fn setup_env(test_name: &str, source_string: &str) -> Env {
    let infcx = infer::new_infer_ctxt(tcx);
-    return Env { crate: crate, tcx: tcx, infcx: infcx };
+    return Env {crate: crate,
                tcx: tcx,
                infcx: infcx,
                err_messages: messages};
 }
 impl Env {
@ -210,6 +218,22 @@ impl Env {
    fn lub() -> Lub { Lub(self.infcx.combine_fields(true, dummy_sp())) }
    fn glb() -> Glb { Glb(self.infcx.combine_fields(true, dummy_sp())) }
    fn resolve_regions(exp_count: uint) {
        debug!("resolve_regions(%u)", exp_count);
        self.infcx.resolve_regions();
        if self.err_messages.len() != exp_count {
            for self.err_messages.each |msg| {
                debug!("Error encountered: %s", *msg);
            }
            fmt!("Resolving regions encountered %u errors but expected %u!",
                 self.err_messages.len(),
                 exp_count);
        }
    }
    /// Checks that `LUB(t1,t2) == t_lub`
    fn check_lub(&self, t1: ty::t, t2: ty::t, t_lub: ty::t) {
        match self.lub().tys(t1, t2) {
@ -222,6 +246,30 @@ impl Env {
                // sanity check for good measure:
                self.assert_subtype(t1, t);
                self.assert_subtype(t2, t);
                self.resolve_regions(0);
            }
        }
    }
    /// Checks that `GLB(t1,t2) == t_glb`
    fn check_glb(&self, t1: ty::t, t2: ty::t, t_glb: ty::t) {
        debug!("check_glb(t1=%s, t2=%s, t_glb=%s)",
               self.ty_to_str(t1),
               self.ty_to_str(t2),
               self.ty_to_str(t_glb));
        match self.glb().tys(t1, t2) {
            Err(e) => {
                fail fmt!("Unexpected error computing LUB: %?", e)
            }
            Ok(t) => {
                self.assert_eq(t, t_glb);
                // sanity check for good measure:
                self.assert_subtype(t, t1);
                self.assert_subtype(t, t2);
                self.resolve_regions(0);
            }
        }
    }
@ -236,11 +284,22 @@ impl Env {
            }
        }
    }
    /// Checks that `GLB(t1,t2)` is undefined
    fn check_no_glb(&self, t1: ty::t, t2: ty::t) {
        match self.glb().tys(t1, t2) {
            Err(_) => {}
            Ok(t) => {
                fail fmt!("Unexpected success computing GLB: %?",
                          self.ty_to_str(t))
            }
        }
    }
 }
 #[test]
 fn contravariant_region_ptr() {
-    let env = setup_env("contravariant_region_ptr", "");
+    let env = setup_env("contravariant_region_ptr", EMPTY_SOURCE_STR);
    env.create_simple_region_hierarchy();
    let t_rptr1 = env.t_rptr_scope(1);
    let t_rptr10 = env.t_rptr_scope(10);
@ -251,7 +310,7 @@ fn contravariant_region_ptr() {
 #[test]
 fn lub_bound_bound() {
-    let env = setup_env("lub_bound_bound", "");
+    let env = setup_env("lub_bound_bound", EMPTY_SOURCE_STR);
    let t_rptr_bound1 = env.t_rptr_bound(1);
    let t_rptr_bound2 = env.t_rptr_bound(2);
    env.check_lub(env.t_fn([t_rptr_bound1], env.t_int()),
@ -261,7 +320,7 @@ fn lub_bound_bound() {
 #[test]
 fn lub_bound_free() {
-    let env = setup_env("lub_bound_free", "");
+    let env = setup_env("lub_bound_free", EMPTY_SOURCE_STR);
    let t_rptr_bound1 = env.t_rptr_bound(1);
    let t_rptr_free1 = env.t_rptr_free(0, 1);
    env.check_lub(env.t_fn([t_rptr_bound1], env.t_int()),
@ -271,7 +330,7 @@ fn lub_bound_free() {
 #[test]
 fn lub_bound_static() {
-    let env = setup_env("lub_bound_static", "");
+    let env = setup_env("lub_bound_static", EMPTY_SOURCE_STR);
    let t_rptr_bound1 = env.t_rptr_bound(1);
    let t_rptr_static = env.t_rptr_static();
    env.check_lub(env.t_fn([t_rptr_bound1], env.t_int()),
@ -281,7 +340,7 @@ fn lub_bound_static() {
 #[test]
 fn lub_bound_bound_inverse_order() {
-    let env = setup_env("lub_bound_bound_inverse_order", "");
+    let env = setup_env("lub_bound_bound_inverse_order", EMPTY_SOURCE_STR);
    let t_rptr_bound1 = env.t_rptr_bound(1);
    let t_rptr_bound2 = env.t_rptr_bound(2);
    env.check_lub(env.t_fn([t_rptr_bound1, t_rptr_bound2], t_rptr_bound1),
@ -291,7 +350,7 @@ fn lub_bound_bound_inverse_order() {
 #[test]
 fn lub_free_free() {
-    let env = setup_env("lub_free_free", "");
+    let env = setup_env("lub_free_free", EMPTY_SOURCE_STR);
    let t_rptr_free1 = env.t_rptr_free(0, 1);
    let t_rptr_free2 = env.t_rptr_free(0, 2);
    let t_rptr_static = env.t_rptr_static();
@ -302,10 +361,50 @@ fn lub_free_free() {
 #[test]
 fn lub_returning_scope() {
-    let env = setup_env("lub_returning_scope", "");
+    let env = setup_env("lub_returning_scope", EMPTY_SOURCE_STR);
    let t_rptr_scope10 = env.t_rptr_scope(10);
    let t_rptr_scope11 = env.t_rptr_scope(11);
    env.check_no_lub(env.t_fn([], t_rptr_scope10),
                     env.t_fn([], t_rptr_scope11));
 }
 #[test]
 fn glb_free_free_with_common_scope() {
    let env = setup_env("glb_free_free", EMPTY_SOURCE_STR);
    let t_rptr_free1 = env.t_rptr_free(0, 1);
    let t_rptr_free2 = env.t_rptr_free(0, 2);
    let t_rptr_scope = env.t_rptr_scope(0);
    env.check_glb(env.t_fn([t_rptr_free1], env.t_int()),
                  env.t_fn([t_rptr_free2], env.t_int()),
                  env.t_fn([t_rptr_scope], env.t_int()));
 }
 #[test]
 fn glb_bound_bound() {
    let env = setup_env("glb_bound_bound", EMPTY_SOURCE_STR);
    let t_rptr_bound1 = env.t_rptr_bound(1);
    let t_rptr_bound2 = env.t_rptr_bound(2);
    env.check_glb(env.t_fn([t_rptr_bound1], env.t_int()),
                  env.t_fn([t_rptr_bound2], env.t_int()),
                  env.t_fn([t_rptr_bound1], env.t_int()));
 }
 #[test]
 fn glb_bound_free() {
    let env = setup_env("glb_bound_free", EMPTY_SOURCE_STR);
    let t_rptr_bound1 = env.t_rptr_bound(1);
    let t_rptr_free1 = env.t_rptr_free(0, 1);
    env.check_glb(env.t_fn([t_rptr_bound1], env.t_int()),
                  env.t_fn([t_rptr_free1], env.t_int()),
                  env.t_fn([t_rptr_bound1], env.t_int()));
 }
 #[test]
 fn glb_bound_static() {
    let env = setup_env("glb_bound_static", EMPTY_SOURCE_STR);
    let t_rptr_bound1 = env.t_rptr_bound(1);
    let t_rptr_static = env.t_rptr_static();
    env.check_glb(env.t_fn([t_rptr_bound1], env.t_int()),
                  env.t_fn([t_rptr_static], env.t_int()),
                  env.t_fn([t_rptr_bound1], env.t_int()));
 }
--- a/src/librustc/util/ppaux.rs
+++ b/src/librustc/util/ppaux.rs
@ -13,7 +13,8 @@ use middle::ty;
 use middle::ty::{arg, canon_mode};
 use middle::ty::{bound_copy, bound_const, bound_durable, bound_owned,
        bound_trait};
-use middle::ty::{bound_region, br_anon, br_named, br_self, br_cap_avoid};
+use middle::ty::{bound_region, br_anon, br_named, br_self, br_cap_avoid,
                 br_fresh};
 use middle::ty::{ctxt, field, method};
 use middle::ty::{mt, t, param_bound};
 use middle::ty::{re_bound, re_free, re_scope, re_infer, re_static, Region};
@ -94,6 +95,7 @@ fn explain_region_and_span(cx: ctxt, region: ty::Region)
        let prefix = match br {
          br_anon(idx) => fmt!("the anonymous lifetime #%u defined on",
                               idx + 1),
          br_fresh(_) => fmt!("an anonymous lifetime defined on"),
          _ => fmt!("the lifetime %s as defined on",
                    bound_region_to_str(cx, br))
        };
@ -140,6 +142,7 @@ fn bound_region_to_str_adorned(cx: ctxt, prefix: &str,
      br_named(id)         => fmt!("%s%s%s", prefix, cx.sess.str_of(id), sep),
      br_self              => fmt!("%sself%s", prefix, sep),
      br_anon(_)           => prefix.to_str(),
      br_fresh(_)          => prefix.to_str(),
      br_cap_avoid(_, br)  => bound_region_to_str_adorned(cx, prefix,
                                                          *br, sep)
    }
--- a/src/libsyntax/codemap.rs
+++ b/src/libsyntax/codemap.rs
@ -315,6 +315,10 @@ pub impl CodeMap {
    }
    pub fn span_to_str(&self, sp: span) -> ~str {
        if self.files.len() == 0 && sp == ast_util::dummy_sp() {
            return ~"no-location";
        }
        let lo = self.lookup_char_pos_adj(sp.lo);
        let hi = self.lookup_char_pos_adj(sp.hi);
        return fmt!("%s:%u:%u: %u:%u", lo.filename,
--- a/src/libsyntax/diagnostic.rs
+++ b/src/libsyntax/diagnostic.rs
@ -17,9 +17,11 @@ use core::io;
 use core::option;
 use core::str;
 use core::vec;
 use core::dvec::DVec;
 use std::term;
-export emitter, emit;
+export emitter, collect, emit;
 export level, fatal, error, warning, note;
 export span_handler, handler, mk_span_handler, mk_handler;
 export codemap_span_handler, codemap_handler;
@ -206,6 +208,12 @@ fn print_diagnostic(topic: ~str, lvl: level, msg: &str) {
    io::stderr().write_str(fmt!(" %s\n", msg));
 }
 fn collect(messages: @DVec<~str>)
    -> fn@(Option<(@codemap::CodeMap, span)>, &str, level)
 {
    |_o, msg: &str, _l| { messages.push(msg.to_str()); }
 }
 fn emit(cmsp: Option<(@codemap::CodeMap, span)>, msg: &str, lvl: level) {
    match cmsp {
      Some((cm, sp)) => {