1
Fork 0

Switch map-reduce control protocol to use pipes. This exposed a bug in the pipe compiler, which is now fixed.

Use hashmaps in MapReduce

Tweak word-count difficulty
This commit is contained in:
Eric Holk 2012-07-11 14:50:39 -07:00
parent d5b8bbb4b2
commit 1a276dba52
2 changed files with 101 additions and 65 deletions

View file

@ -56,34 +56,11 @@ impl methods for message {
// Return the type parameters actually used by this message // Return the type parameters actually used by this message
fn get_params() -> ~[ast::ty_param] { fn get_params() -> ~[ast::ty_param] {
let mut used = ~[];
alt self { alt self {
message(_, tys, this, _, next_tys) { message(_, _, this, _, _) {
let parms = this.ty_params; this.ty_params
for vec::append(tys, next_tys).each |ty| {
alt ty.node {
ast::ty_path(path, _) {
if path.idents.len() == 1 {
let id = path.idents[0];
let found = parms.find(|p| id == p.ident);
alt found {
some(p) {
if !used.contains(p) {
vec::push(used, p);
}
}
none { }
}
}
}
_ { }
}
}
} }
} }
used
} }
fn gen_send(cx: ext_ctxt) -> @ast::item { fn gen_send(cx: ext_ctxt) -> @ast::item {

View file

@ -14,7 +14,8 @@ import option = option;
import option::some; import option::some;
import option::none; import option::none;
import str; import str;
import std::treemap; import std::map;
import std::map::hashmap;
import vec; import vec;
import io; import io;
import io::{reader_util, writer_util}; import io::{reader_util, writer_util};
@ -30,10 +31,30 @@ import comm::recv;
import comm::send; import comm::send;
import comm::methods; import comm::methods;
macro_rules! move {
{ $x:expr } => { unsafe { let y <- *ptr::addr_of($x); y } }
}
trait word_reader { trait word_reader {
fn read_word() -> option<str>; fn read_word() -> option<str>;
} }
trait hash_key {
fn hash() -> uint;
fn eq(self) -> bool;
}
fn mk_hash<K: const hash_key, V: copy>() -> map::hashmap<K, V> {
fn hashfn<K: const hash_key>(k: K) -> uint { k.hash() }
map::hashmap(hashfn::<K>, |x, y| x.eq(y))
}
impl of hash_key for str {
fn hash() -> uint { str::hash(self) }
fn eq(&&x: str) -> bool { str::eq(self, x) }
}
// These used to be in task, but they disappeard. // These used to be in task, but they disappeard.
type joinable_task = port<()>; type joinable_task = port<()>;
fn spawn_joinable(+f: fn~()) -> joinable_task { fn spawn_joinable(+f: fn~()) -> joinable_task {
@ -79,6 +100,23 @@ fn reduce(&&word: str, get: map_reduce::getter<int>) {
io::println(#fmt("%s\t%?", word, count)); io::println(#fmt("%s\t%?", word, count));
} }
class box<T> {
let mut contents: option<T>;
new(+x: T) { self.contents = some(x); }
fn swap(f: fn(+T) -> T) {
let mut tmp = none;
self.contents <-> tmp;
self.contents = some(f(option::unwrap(tmp)));
}
fn unwrap() -> T {
let mut tmp = none;
self.contents <-> tmp;
option::unwrap(tmp)
}
}
mod map_reduce { mod map_reduce {
export putter; export putter;
export getter; export getter;
@ -99,54 +137,74 @@ mod map_reduce {
mapper_done mapper_done
} }
proto! ctrl_proto {
open: send<K: copy send, V: copy send> {
find_reducer(K) -> reducer_response<K, V>,
mapper_done -> terminated
}
reducer_response: recv<K: copy send, V: copy send> {
reducer(chan<reduce_proto<V>>) -> open<K, V>
}
terminated: send { }
}
enum reduce_proto<V: copy send> { emit_val(V), done, ref, release } enum reduce_proto<V: copy send> { emit_val(V), done, ref, release }
fn start_mappers<K1: copy send, K2: copy send, V: copy send>( fn start_mappers<K1: copy send, K2: const copy send hash_key,
V: copy send>(
map: mapper<K1, K2, V>, map: mapper<K1, K2, V>,
ctrl: chan<ctrl_proto<K2, V>>, inputs: ~[K1]) &ctrls: ~[ctrl_proto::server::open<K2, V>],
inputs: ~[K1])
-> ~[joinable_task] -> ~[joinable_task]
{ {
let mut tasks = ~[]; let mut tasks = ~[];
for inputs.each |i| { for inputs.each |i| {
let (ctrl, ctrl_server) = ctrl_proto::init();
let ctrl = box(ctrl);
vec::push(tasks, spawn_joinable(|| map_task(map, ctrl, i) )); vec::push(tasks, spawn_joinable(|| map_task(map, ctrl, i) ));
vec::push(ctrls, ctrl_server);
} }
ret tasks; ret tasks;
} }
fn map_task<K1: copy send, K2: copy send, V: copy send>( fn map_task<K1: copy send, K2: const copy send hash_key, V: copy send>(
map: mapper<K1, K2, V>, map: mapper<K1, K2, V>,
ctrl: chan<ctrl_proto<K2, V>>, ctrl: box<ctrl_proto::client::open<K2, V>>,
input: K1) input: K1)
{ {
// log(error, "map_task " + input); // log(error, "map_task " + input);
let intermediates = treemap::treemap(); let intermediates = mk_hash();
fn emit<K2: copy send, V: copy send>( do map(input) |key, val| {
im: treemap::treemap<K2, chan<reduce_proto<V>>>, let mut c = none;
ctrl: chan<ctrl_proto<K2, V>>, key: K2, val: V) alt intermediates.find(key) {
{ some(_c) { c = some(_c); }
let c;
alt treemap::find(im, key) {
some(_c) { c = _c; }
none { none {
let p = port(); do ctrl.swap |ctrl| {
send(ctrl, find_reducer(key, chan(p))); let ctrl = ctrl_proto::client::find_reducer(ctrl, key);
c = recv(p); alt pipes::recv(ctrl) {
treemap::insert(im, key, c); ctrl_proto::reducer(c_, ctrl) {
send(c, ref); c = some(c_);
move!{ctrl}
}
}
}
intermediates.insert(key, c.get());
send(c.get(), ref);
} }
} }
send(c, emit_val(val)); send(c.get(), emit_val(val));
} }
map(input, {|a,b|emit(intermediates, ctrl, a, b)});
fn finish<K: copy send, V: copy send>(_k: K, v: chan<reduce_proto<V>>) fn finish<K: copy send, V: copy send>(_k: K, v: chan<reduce_proto<V>>)
{ {
send(v, release); send(v, release);
} }
treemap::traverse(intermediates, finish); for intermediates.each_value |v| { send(v, release) }
send(ctrl, mapper_done); ctrl_proto::client::mapper_done(ctrl.unwrap());
} }
fn reduce_task<K: copy send, V: copy send>( fn reduce_task<K: copy send, V: copy send>(
@ -184,30 +242,32 @@ mod map_reduce {
reduce(key, || get(p, ref_count, is_done) ); reduce(key, || get(p, ref_count, is_done) );
} }
fn map_reduce<K1: copy send, K2: copy send, V: copy send>( fn map_reduce<K1: copy send, K2: const copy send hash_key, V: copy send>(
map: mapper<K1, K2, V>, map: mapper<K1, K2, V>,
reduce: reducer<K2, V>, reduce: reducer<K2, V>,
inputs: ~[K1]) inputs: ~[K1])
{ {
let ctrl = port(); let mut ctrl = ~[];
// This task becomes the master control task. It task::_spawns // This task becomes the master control task. It task::_spawns
// to do the rest. // to do the rest.
let reducers = treemap::treemap(); let reducers = mk_hash();
let mut tasks = start_mappers(map, chan(ctrl), inputs); let mut tasks = start_mappers(map, ctrl, inputs);
let mut num_mappers = vec::len(inputs) as int; let mut num_mappers = vec::len(inputs) as int;
while num_mappers > 0 { while num_mappers > 0 {
alt recv(ctrl) { let (_ready, message, ctrls) = pipes::select(ctrl);
mapper_done { alt option::unwrap(message) {
ctrl_proto::mapper_done(_) {
// #error("received mapper terminated."); // #error("received mapper terminated.");
num_mappers -= 1; num_mappers -= 1;
ctrl = ctrls;
} }
find_reducer(k, cc) { ctrl_proto::find_reducer(k, cc) {
let c; let c;
// log(error, "finding reducer for " + k); // log(error, "finding reducer for " + k);
alt treemap::find(reducers, k) { alt reducers.find(k) {
some(_c) { some(_c) {
// log(error, // log(error,
// "reusing existing reducer for " + k); // "reusing existing reducer for " + k);
@ -221,19 +281,17 @@ mod map_reduce {
vec::push(tasks, vec::push(tasks,
spawn_joinable(|| reduce_task(r, kk, ch) )); spawn_joinable(|| reduce_task(r, kk, ch) ));
c = recv(p); c = recv(p);
treemap::insert(reducers, k, c); reducers.insert(k, c);
} }
} }
send(cc, c); ctrl = vec::append_one(
ctrls,
ctrl_proto::server::reducer(move!{cc}, c));
} }
} }
} }
fn finish<K: copy send, V: copy send>(_k: K, v: chan<reduce_proto<V>>) for reducers.each_value |v| { send(v, done) }
{
send(v, done);
}
treemap::traverse(reducers, finish);
for tasks.each |t| { join(t); } for tasks.each |t| { join(t); }
} }
@ -254,7 +312,7 @@ fn main(argv: ~[str]) {
} }
else { else {
let num_readers = 50; let num_readers = 50;
let words_per_reader = 1000; let words_per_reader = 600;
vec::from_fn( vec::from_fn(
num_readers, num_readers,
|_i| fn~() -> word_reader { |_i| fn~() -> word_reader {
@ -301,7 +359,8 @@ class random_word_reader: word_reader {
fn read_word() -> option<str> { fn read_word() -> option<str> {
if self.remaining > 0 { if self.remaining > 0 {
self.remaining -= 1; self.remaining -= 1;
some(self.rng.gen_str(5)) let len = self.rng.gen_uint_range(1, 4);
some(self.rng.gen_str(len))
} }
else { none } else { none }
} }