Switch map-reduce control protocol to use pipes. This exposed a bug in the pipe compiler, which is now fixed.
Use hashmaps in MapReduce Tweak word-count difficulty
This commit is contained in:
parent
d5b8bbb4b2
commit
1a276dba52
2 changed files with 101 additions and 65 deletions
|
@ -56,34 +56,11 @@ impl methods for message {
|
||||||
|
|
||||||
// Return the type parameters actually used by this message
|
// Return the type parameters actually used by this message
|
||||||
fn get_params() -> ~[ast::ty_param] {
|
fn get_params() -> ~[ast::ty_param] {
|
||||||
let mut used = ~[];
|
|
||||||
alt self {
|
alt self {
|
||||||
message(_, tys, this, _, next_tys) {
|
message(_, _, this, _, _) {
|
||||||
let parms = this.ty_params;
|
this.ty_params
|
||||||
for vec::append(tys, next_tys).each |ty| {
|
|
||||||
alt ty.node {
|
|
||||||
ast::ty_path(path, _) {
|
|
||||||
if path.idents.len() == 1 {
|
|
||||||
let id = path.idents[0];
|
|
||||||
|
|
||||||
let found = parms.find(|p| id == p.ident);
|
|
||||||
|
|
||||||
alt found {
|
|
||||||
some(p) {
|
|
||||||
if !used.contains(p) {
|
|
||||||
vec::push(used, p);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
none { }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
_ { }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
used
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn gen_send(cx: ext_ctxt) -> @ast::item {
|
fn gen_send(cx: ext_ctxt) -> @ast::item {
|
||||||
|
|
|
@ -14,7 +14,8 @@ import option = option;
|
||||||
import option::some;
|
import option::some;
|
||||||
import option::none;
|
import option::none;
|
||||||
import str;
|
import str;
|
||||||
import std::treemap;
|
import std::map;
|
||||||
|
import std::map::hashmap;
|
||||||
import vec;
|
import vec;
|
||||||
import io;
|
import io;
|
||||||
import io::{reader_util, writer_util};
|
import io::{reader_util, writer_util};
|
||||||
|
@ -30,10 +31,30 @@ import comm::recv;
|
||||||
import comm::send;
|
import comm::send;
|
||||||
import comm::methods;
|
import comm::methods;
|
||||||
|
|
||||||
|
macro_rules! move {
|
||||||
|
{ $x:expr } => { unsafe { let y <- *ptr::addr_of($x); y } }
|
||||||
|
}
|
||||||
|
|
||||||
trait word_reader {
|
trait word_reader {
|
||||||
fn read_word() -> option<str>;
|
fn read_word() -> option<str>;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
trait hash_key {
|
||||||
|
fn hash() -> uint;
|
||||||
|
fn eq(self) -> bool;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn mk_hash<K: const hash_key, V: copy>() -> map::hashmap<K, V> {
|
||||||
|
fn hashfn<K: const hash_key>(k: K) -> uint { k.hash() }
|
||||||
|
|
||||||
|
map::hashmap(hashfn::<K>, |x, y| x.eq(y))
|
||||||
|
}
|
||||||
|
|
||||||
|
impl of hash_key for str {
|
||||||
|
fn hash() -> uint { str::hash(self) }
|
||||||
|
fn eq(&&x: str) -> bool { str::eq(self, x) }
|
||||||
|
}
|
||||||
|
|
||||||
// These used to be in task, but they disappeard.
|
// These used to be in task, but they disappeard.
|
||||||
type joinable_task = port<()>;
|
type joinable_task = port<()>;
|
||||||
fn spawn_joinable(+f: fn~()) -> joinable_task {
|
fn spawn_joinable(+f: fn~()) -> joinable_task {
|
||||||
|
@ -79,6 +100,23 @@ fn reduce(&&word: str, get: map_reduce::getter<int>) {
|
||||||
io::println(#fmt("%s\t%?", word, count));
|
io::println(#fmt("%s\t%?", word, count));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class box<T> {
|
||||||
|
let mut contents: option<T>;
|
||||||
|
new(+x: T) { self.contents = some(x); }
|
||||||
|
|
||||||
|
fn swap(f: fn(+T) -> T) {
|
||||||
|
let mut tmp = none;
|
||||||
|
self.contents <-> tmp;
|
||||||
|
self.contents = some(f(option::unwrap(tmp)));
|
||||||
|
}
|
||||||
|
|
||||||
|
fn unwrap() -> T {
|
||||||
|
let mut tmp = none;
|
||||||
|
self.contents <-> tmp;
|
||||||
|
option::unwrap(tmp)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
mod map_reduce {
|
mod map_reduce {
|
||||||
export putter;
|
export putter;
|
||||||
export getter;
|
export getter;
|
||||||
|
@ -99,54 +137,74 @@ mod map_reduce {
|
||||||
mapper_done
|
mapper_done
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
proto! ctrl_proto {
|
||||||
|
open: send<K: copy send, V: copy send> {
|
||||||
|
find_reducer(K) -> reducer_response<K, V>,
|
||||||
|
mapper_done -> terminated
|
||||||
|
}
|
||||||
|
|
||||||
|
reducer_response: recv<K: copy send, V: copy send> {
|
||||||
|
reducer(chan<reduce_proto<V>>) -> open<K, V>
|
||||||
|
}
|
||||||
|
|
||||||
|
terminated: send { }
|
||||||
|
}
|
||||||
|
|
||||||
enum reduce_proto<V: copy send> { emit_val(V), done, ref, release }
|
enum reduce_proto<V: copy send> { emit_val(V), done, ref, release }
|
||||||
|
|
||||||
fn start_mappers<K1: copy send, K2: copy send, V: copy send>(
|
fn start_mappers<K1: copy send, K2: const copy send hash_key,
|
||||||
|
V: copy send>(
|
||||||
map: mapper<K1, K2, V>,
|
map: mapper<K1, K2, V>,
|
||||||
ctrl: chan<ctrl_proto<K2, V>>, inputs: ~[K1])
|
&ctrls: ~[ctrl_proto::server::open<K2, V>],
|
||||||
|
inputs: ~[K1])
|
||||||
-> ~[joinable_task]
|
-> ~[joinable_task]
|
||||||
{
|
{
|
||||||
let mut tasks = ~[];
|
let mut tasks = ~[];
|
||||||
for inputs.each |i| {
|
for inputs.each |i| {
|
||||||
|
let (ctrl, ctrl_server) = ctrl_proto::init();
|
||||||
|
let ctrl = box(ctrl);
|
||||||
vec::push(tasks, spawn_joinable(|| map_task(map, ctrl, i) ));
|
vec::push(tasks, spawn_joinable(|| map_task(map, ctrl, i) ));
|
||||||
|
vec::push(ctrls, ctrl_server);
|
||||||
}
|
}
|
||||||
ret tasks;
|
ret tasks;
|
||||||
}
|
}
|
||||||
|
|
||||||
fn map_task<K1: copy send, K2: copy send, V: copy send>(
|
fn map_task<K1: copy send, K2: const copy send hash_key, V: copy send>(
|
||||||
map: mapper<K1, K2, V>,
|
map: mapper<K1, K2, V>,
|
||||||
ctrl: chan<ctrl_proto<K2, V>>,
|
ctrl: box<ctrl_proto::client::open<K2, V>>,
|
||||||
input: K1)
|
input: K1)
|
||||||
{
|
{
|
||||||
// log(error, "map_task " + input);
|
// log(error, "map_task " + input);
|
||||||
let intermediates = treemap::treemap();
|
let intermediates = mk_hash();
|
||||||
|
|
||||||
fn emit<K2: copy send, V: copy send>(
|
do map(input) |key, val| {
|
||||||
im: treemap::treemap<K2, chan<reduce_proto<V>>>,
|
let mut c = none;
|
||||||
ctrl: chan<ctrl_proto<K2, V>>, key: K2, val: V)
|
alt intermediates.find(key) {
|
||||||
{
|
some(_c) { c = some(_c); }
|
||||||
let c;
|
|
||||||
alt treemap::find(im, key) {
|
|
||||||
some(_c) { c = _c; }
|
|
||||||
none {
|
none {
|
||||||
let p = port();
|
do ctrl.swap |ctrl| {
|
||||||
send(ctrl, find_reducer(key, chan(p)));
|
let ctrl = ctrl_proto::client::find_reducer(ctrl, key);
|
||||||
c = recv(p);
|
alt pipes::recv(ctrl) {
|
||||||
treemap::insert(im, key, c);
|
ctrl_proto::reducer(c_, ctrl) {
|
||||||
send(c, ref);
|
c = some(c_);
|
||||||
|
move!{ctrl}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
intermediates.insert(key, c.get());
|
||||||
|
send(c.get(), ref);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
send(c, emit_val(val));
|
send(c.get(), emit_val(val));
|
||||||
}
|
}
|
||||||
|
|
||||||
map(input, {|a,b|emit(intermediates, ctrl, a, b)});
|
|
||||||
|
|
||||||
fn finish<K: copy send, V: copy send>(_k: K, v: chan<reduce_proto<V>>)
|
fn finish<K: copy send, V: copy send>(_k: K, v: chan<reduce_proto<V>>)
|
||||||
{
|
{
|
||||||
send(v, release);
|
send(v, release);
|
||||||
}
|
}
|
||||||
treemap::traverse(intermediates, finish);
|
for intermediates.each_value |v| { send(v, release) }
|
||||||
send(ctrl, mapper_done);
|
ctrl_proto::client::mapper_done(ctrl.unwrap());
|
||||||
}
|
}
|
||||||
|
|
||||||
fn reduce_task<K: copy send, V: copy send>(
|
fn reduce_task<K: copy send, V: copy send>(
|
||||||
|
@ -184,30 +242,32 @@ mod map_reduce {
|
||||||
reduce(key, || get(p, ref_count, is_done) );
|
reduce(key, || get(p, ref_count, is_done) );
|
||||||
}
|
}
|
||||||
|
|
||||||
fn map_reduce<K1: copy send, K2: copy send, V: copy send>(
|
fn map_reduce<K1: copy send, K2: const copy send hash_key, V: copy send>(
|
||||||
map: mapper<K1, K2, V>,
|
map: mapper<K1, K2, V>,
|
||||||
reduce: reducer<K2, V>,
|
reduce: reducer<K2, V>,
|
||||||
inputs: ~[K1])
|
inputs: ~[K1])
|
||||||
{
|
{
|
||||||
let ctrl = port();
|
let mut ctrl = ~[];
|
||||||
|
|
||||||
// This task becomes the master control task. It task::_spawns
|
// This task becomes the master control task. It task::_spawns
|
||||||
// to do the rest.
|
// to do the rest.
|
||||||
|
|
||||||
let reducers = treemap::treemap();
|
let reducers = mk_hash();
|
||||||
let mut tasks = start_mappers(map, chan(ctrl), inputs);
|
let mut tasks = start_mappers(map, ctrl, inputs);
|
||||||
let mut num_mappers = vec::len(inputs) as int;
|
let mut num_mappers = vec::len(inputs) as int;
|
||||||
|
|
||||||
while num_mappers > 0 {
|
while num_mappers > 0 {
|
||||||
alt recv(ctrl) {
|
let (_ready, message, ctrls) = pipes::select(ctrl);
|
||||||
mapper_done {
|
alt option::unwrap(message) {
|
||||||
|
ctrl_proto::mapper_done(_) {
|
||||||
// #error("received mapper terminated.");
|
// #error("received mapper terminated.");
|
||||||
num_mappers -= 1;
|
num_mappers -= 1;
|
||||||
|
ctrl = ctrls;
|
||||||
}
|
}
|
||||||
find_reducer(k, cc) {
|
ctrl_proto::find_reducer(k, cc) {
|
||||||
let c;
|
let c;
|
||||||
// log(error, "finding reducer for " + k);
|
// log(error, "finding reducer for " + k);
|
||||||
alt treemap::find(reducers, k) {
|
alt reducers.find(k) {
|
||||||
some(_c) {
|
some(_c) {
|
||||||
// log(error,
|
// log(error,
|
||||||
// "reusing existing reducer for " + k);
|
// "reusing existing reducer for " + k);
|
||||||
|
@ -221,19 +281,17 @@ mod map_reduce {
|
||||||
vec::push(tasks,
|
vec::push(tasks,
|
||||||
spawn_joinable(|| reduce_task(r, kk, ch) ));
|
spawn_joinable(|| reduce_task(r, kk, ch) ));
|
||||||
c = recv(p);
|
c = recv(p);
|
||||||
treemap::insert(reducers, k, c);
|
reducers.insert(k, c);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
send(cc, c);
|
ctrl = vec::append_one(
|
||||||
|
ctrls,
|
||||||
|
ctrl_proto::server::reducer(move!{cc}, c));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn finish<K: copy send, V: copy send>(_k: K, v: chan<reduce_proto<V>>)
|
for reducers.each_value |v| { send(v, done) }
|
||||||
{
|
|
||||||
send(v, done);
|
|
||||||
}
|
|
||||||
treemap::traverse(reducers, finish);
|
|
||||||
|
|
||||||
for tasks.each |t| { join(t); }
|
for tasks.each |t| { join(t); }
|
||||||
}
|
}
|
||||||
|
@ -254,7 +312,7 @@ fn main(argv: ~[str]) {
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
let num_readers = 50;
|
let num_readers = 50;
|
||||||
let words_per_reader = 1000;
|
let words_per_reader = 600;
|
||||||
vec::from_fn(
|
vec::from_fn(
|
||||||
num_readers,
|
num_readers,
|
||||||
|_i| fn~() -> word_reader {
|
|_i| fn~() -> word_reader {
|
||||||
|
@ -301,7 +359,8 @@ class random_word_reader: word_reader {
|
||||||
fn read_word() -> option<str> {
|
fn read_word() -> option<str> {
|
||||||
if self.remaining > 0 {
|
if self.remaining > 0 {
|
||||||
self.remaining -= 1;
|
self.remaining -= 1;
|
||||||
some(self.rng.gen_str(5))
|
let len = self.rng.gen_uint_range(1, 4);
|
||||||
|
some(self.rng.gen_str(len))
|
||||||
}
|
}
|
||||||
else { none }
|
else { none }
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue