all stats

vintium's stats

guessed the most

namecorrect guessesgames togetherratio

were guessed the most by

namecorrect guessesgames togetherratio

entries

round #15

5 likes

guesses
comments 0

post a comment


code-guessing-fifteen.zig ASCII text
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
const std = @import("std");

const stdout = std.io.getStdOut().writer();
const assert = std.debug.assert;

// positions start in the top left corner of the card,
// counting from zero. x increases rightwards, y increases leftwards.
// x and y must be in the range [0, 4].
const Pos = struct {
    x: u3,
    y: u3,
};

const Card = struct {
    c: [5][5]u7,
    fn find(self: Card, needle: u7) ?Pos {
        for (self.c) |row, y| {
            for (row) |number, x| {
                if (number == needle) {
                    return Pos{ .x = @intCast(u3, x), .y = @intCast(u3, y) };
                }
            }
        }
        return null;
    }

    test "find a number in a card" {
        // zig fmt: off
        const c = Card{
            .c = [5][5]u7{ 
                [_]u7{ 67, 64, 11, 28, 16 }, 
                [_]u7{ 63, 26, 20, 15, 10 }, 
                [_]u7{ 68, 44, 00, 53, 70 }, // <- 70 is here at (4, 2)
                [_]u7{ 22, 56, 38, 51, 09 }, 
                [_]u7{ 47, 33, 17, 39, 59 } 
            },
        };
        // zig fmt: on
        try std.testing.expectEqual(c.find(70), Pos{ .x = 4, .y = 2 });
    }

    test "don't find a number in a card" {
        // zig fmt: off
        const c = Card{
            .c = [5][5]u7{ 
                [_]u7{ 67, 64, 11, 28, 16 }, 
                [_]u7{ 63, 26, 20, 15, 10 }, 
                [_]u7{ 68, 44, 00, 53, 70 }, // 69 is not in this card
                [_]u7{ 22, 56, 38, 51, 09 }, 
                [_]u7{ 47, 33, 17, 39, 59 } 
            },
        };
        // zig fmt: on
        try std.testing.expectEqual(c.find(69), null);
    }
};

const Marks = struct {
    msk: u25, // bit mask storing marked positions. the lowest bit is 0, 0, the highest bit is at 4, 4.
    // mapping diagram:
    // 0b_00000_00000_00000_00000_00000
    // y: 44444 33333 22222 11111 00000
    // x: 43210 43210 43210 43210 43210

    // set the bit at the mapped position to 1
    fn mark(self: *Marks, x: u3, y: u3) void {
        assert(x <= 4 and y <= 4);
        self.msk |= (@as(u25, 1) << (@as(u5, x) + @as(u5, y) * @as(u5, 5)));
    }

    // wrapper to use the `Pos` struct. tests depend on passing in x, y as args, so the original
    // function wasn't changed.
    fn markPos(self: *Marks, p: Pos) void {
        self.mark(p.x, p.y);
    }

    fn row(self: Marks) bool {
        // zig fmt: off
        const rows = [5]u25{ 
            0b00000_00000_00000_00000_11111, 
            0b00000_00000_00000_11111_00000, 
            0b00000_00000_11111_00000_00000, 
            0b00000_11111_00000_00000_00000, 
            0b11111_00000_00000_00000_00000 
        };
        // zig fmt: on
        var match = false;
        for (rows) |r| {
            match = ((r & self.msk) == r) or match;
        }
        return match;
    }

    test "detect five-in-a-row" {
        var m = Marks{ .msk = 0 };
        // O O O O O
        // O O O O O
        // O O O O O
        // X X X X X
        // O O O O O
        m.mark(0, 3);
        m.mark(1, 3);
        m.mark(2, 3);
        m.mark(3, 3);
        m.mark(4, 3);
        try std.testing.expectEqual(m.row(), true);
        try std.testing.expectEqual(m.wins(), true);
    }

    test "noisy row" {
        var m = Marks{ .msk = 0 };
        // X X O O O
        // O O O O O
        // O O O O O
        // X X X X X
        // O X O O O
        m.mark(0, 3);
        m.mark(1, 0);
        m.mark(1, 3);
        m.mark(2, 3);
        m.mark(3, 3);
        m.mark(4, 3);
        m.mark(0, 0);
        m.mark(1, 0);
        m.mark(1, 4);
        try std.testing.expectEqual(m.row(), true);
        try std.testing.expectEqual(m.wins(), true);
    }

    fn col(self: Marks) bool {
        // zig fmt: off
        const cols = [5]u25{ 
            0b00001_00001_00001_00001_00001, 
            0b00010_00010_00010_00010_00010, 
            0b00100_00100_00100_00100_00100,
            0b01000_01000_01000_01000_01000, 
            0b10000_10000_10000_10000_10000 
        };
        // zig fmt: on
        var match = false;
        for (cols) |c| {
            match = ((c & self.msk) == c) or match;
        }
        return match;
    }

    test "detect five-in-a-col" {
        var m = Marks{ .msk = 0 };
        // O O X O O
        // O O X O O
        // O O X O O
        // O O X O O
        // O O X O O
        m.mark(2, 0);
        m.mark(2, 1);
        m.mark(2, 2);
        m.mark(2, 3);
        m.mark(2, 4);
        try std.testing.expectEqual(m.col(), true);
        try std.testing.expectEqual(m.wins(), true);
    }

    test "noisy col" {
        var m = Marks{ .msk = 0 };
        // X O X O O
        // O O X O O
        // O O X O X
        // O O X O O
        // O O X O O
        m.mark(2, 0);
        m.mark(2, 1);
        m.mark(2, 2);
        m.mark(2, 3);
        m.mark(2, 4);
        m.mark(4, 2);
        m.mark(0, 0);
        try std.testing.expectEqual(m.col(), true);
        try std.testing.expectEqual(m.wins(), true);
    }

    fn diag(self: Marks) bool {
        // zig fmt: off
        const diags = [2]u25{
            0b00001_00010_00100_01000_10000,
            0b10000_01000_00100_00010_00001,
        };
        // zig fmt: on
        var match = false;
        for (diags) |d| {
            match = ((d & self.msk) == d) or match;
        }
        return match;
    }

    test "detect diagonal" {
        var m = Marks{ .msk = 0 };
        // X O O O O
        // O X O O O
        // O O X O O
        // O O O X O
        // O O O O X
        m.mark(0, 0);
        m.mark(1, 1);
        m.mark(2, 2);
        m.mark(3, 3);
        m.mark(4, 4);
        try std.testing.expectEqual(m.diag(), true);
        try std.testing.expectEqual(m.wins(), true);
    }

    test "noisy diag" {
        var m = Marks{ .msk = 0 };
        // X O O O X
        // O X O O O
        // O O X O O
        // O O X X O
        // O O X O X
        m.mark(0, 0);
        m.mark(1, 1);
        m.mark(2, 2);
        m.mark(2, 3);
        m.mark(3, 3);
        m.mark(4, 4);
        m.mark(2, 4);
        m.mark(4, 0);
        try std.testing.expectEqual(m.diag(), true);
        try std.testing.expectEqual(m.wins(), true);
    }

    fn wins(self: Marks) bool {
        return self.row() or self.col() or self.diag();
    }

    test "loses 1" {
        var m = Marks{ .msk = 0 };
        // X O O X O
        // O O O X O
        // O O X O O
        // O O X O O
        // O O X O X
        m.mark(0, 0);
        m.mark(3, 1);
        m.mark(2, 2);
        m.mark(2, 3);
        m.mark(3, 0);
        m.mark(4, 4);
        m.mark(2, 4);
        try std.testing.expectEqual(m.wins(), false);
    }

    test "loses 2" {
        var m = Marks{ .msk = 0 };
        // X O O X O
        // O X O X O
        // X O X O X
        // O X O O O
        // O O X O X
        m.mark(0, 0);
        m.mark(3, 0);
        m.mark(1, 1);
        m.mark(3, 1);
        m.mark(0, 2);
        m.mark(2, 2);
        m.mark(4, 2);
        m.mark(1, 3);
        m.mark(4, 4);
        m.mark(2, 4);
        try std.testing.expectEqual(m.wins(), false);
    }
};

const Player = struct {
    id: usize,
    card: Card,
    marked: Marks,
    fn play(self: *Player, call: u7) ?usize {
        if (self.card.find(call)) |p| {
            self.marked.markPos(p);
        }
        if (self.marked.wins()) {
            return self.id;
        } else {
            return null;
        }
    }
};

test "a test game" {
    var me = Player{
        .id = 0,
        // zig fmt: off
        .card = Card{
            .c = [5][5]u7{ 
                [_]u7{ 67, 64, 11, 28, 16 }, 
                [_]u7{ 63, 26, 20, 15, 10 }, 
                [_]u7{ 68, 44, 00, 53, 70 }, 
                [_]u7{ 22, 56, 38, 51, 09 }, 
                [_]u7{ 47, 33, 17, 39, 59 },
            },
        },
        // zig fmt: on
        .marked = Marks{ .msk = 0 },
    };
    try std.testing.expectEqual(me.play(0), null);
    try std.testing.expectEqual(me.play(11), null);
    try std.testing.expectEqual(me.play(38), null);
    try std.testing.expectEqual(me.play(50), null);
    try std.testing.expectEqual(me.play(10), null);
    try std.testing.expectEqual(me.play(17), null);
    try std.testing.expectEqual(me.play(20), 0);
}

pub fn entry(cards: []const [5][5]u7, calls: []const u7) usize {
    for (cards) |crd, i| {
        var player = Player{
            .id = i,
            .card = Card{ .c = crd },
            .marked = Marks{ .msk = 0 },
        };
        for (calls) |call| {
            const winnermaybe = player.play(call);
            if (winnermaybe) |winner| {
                return winner;
            }
        }
    }
    unreachable;
}

test "example in prompt" {
    var started = try std.time.Instant.now();
    var i: usize = 0;
    while (i < 1000) {
        // zig fmt: off
        var calls = &[_]u7{0, 72, 3, 8, 59, 66, 61, 58, 23, 14, 16, 42, 10, 17, 2, 48, 44, 26, 70, 21, 31, 19, 9};
        var cards = &[_][5][5]u7{
            [5][5]u7{
                [5]u7{67, 64, 11, 28, 16},
                [5]u7{63, 26, 20, 15, 10},
                [5]u7{68, 44, 00, 53, 70},
                [5]u7{22, 56, 38, 51, 09},
                [5]u7{47, 33, 17, 39, 59},
            },
            [5][5]u7{
                [5]u7{51, 24, 53, 70, 62},
                [5]u7{54, 44, 57, 72, 35},
                [5]u7{32, 05, 00, 20, 38},
                [5]u7{36, 04, 73, 29, 69},
                [5]u7{63, 42, 07, 08, 58},
            }
        };
        i += 1;
        try std.testing.expectEqual(entry(cards, calls), 0);
    }
    // zig fmt: on
    var ended = try std.time.Instant.now();
    std.debug.print("took: {} ", .{ended.since(started)});
}

pub fn main() anyerror!void {
    std.debug.print("It was as if an occult hand brought you to run this program as an application. It is a library, yet lack of zig expertise gently drew me towards creating a application project.", .{});
}

round #14

submitted at
8 likes

guesses
comments 0

post a comment


Cargo.toml ASCII text
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
[package]
name = "cg14-rust"
version = "0.1.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[build-dependencies]
cc = "1.0"

[dependencies]
rand = "0.8.5"
rayon = "1.5.1"
libc = "0.2.119"
lazy_static = "1.4.0"
IMPORTANT_NOTE ASCII text
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
In order for this to be build properly, (i think) you must put the 
included files into this directory structure:
```
./
 | - src
      | - lib.rs
      | - hello.c
      | test
         | - data.rs
 | - Cargo.toml
 | - build.rs
```

and then you can run it with `cargo test`
if you want to see performance information, id reccomend using these commands:

```
cargo test --release -- --show-output
```
or if you're on x86_64 and want to see how it works with the cpu features,

```
RUSTFLAGS='-C target_feature=+avx,+fma' cargo test --release -- --show-output
```
build.rs ASCII text
1
2
3
4
5
6
7
8
use cc;

fn main() {
    println!("cargo:rerun-if-changed=src/hello.c");
    cc::Build::new()
        .file("src/hello.c")
        .compile("hello");
}
data.rs ASCII text, with very long lines (65536), with no line terminators
hello.c ASCII text
1
2
3
4
5
6
7
8
9
#include <stddef.h>

double c_raw_dotprod(const double *a, const double *b, size_t len) {
    double acc = 0.0;
    for (size_t idx = 0; idx < len; idx ++ ) {
        acc += a[idx] * b[idx];
    }
    return acc;
}
lib.rs ASCII text
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
#![allow(dead_code)]
#![feature(portable_simd)]

const SOLUTIONS: &'static [Result<fn(&[f64], &[f64]) -> f64, usize>] = &[
    Ok(idiomatic_dotprod),
    Ok(simple_dotprod),
    Ok(c_dotprod),
    Ok(par_dotprod),
    Ok(lame_simd_dotprod), 
    Ok(simd_dotprod),
    Ok(fma_dotprod), 
    Err(0),
];

// TODO: figure out how to have a lazy static threadpool so that i can use 
// simd_par_dotprod without making a zillion threads and being slow asf
use rand::seq::SliceRandom;
pub fn entry(a: &[f64], b: &[f64]) -> f64 {
    match SOLUTIONS.choose(&mut rand::thread_rng()).unwrap() {
        Ok(simple) => simple(a, b),
        Err(0) => aproximate_dotprod(a, b, rand::random::<u8>() as f64 / 100.0),
        Err(_) => unreachable!(),
    }
}

/* how rust is supposed to be. this is the way that ferris would be proud of. */
use std::iter::zip;
fn idiomatic_dotprod(a: &[f64], b: &[f64]) -> f64 {
    zip(a, b)
        .map(|(x, y)| x * y)
        .fold(0.0, |acc, x| acc + x)
}

// simple and fast. basically the same as the c implementaiton.
fn simple_dotprod(a: &[f64], b: &[f64]) -> f64 {
    let mut acc = 0.0;
    for idx in 0..a.len() {
        acc += a[idx] * b[idx];
    }
    acc
}


// simple and fast. basically the same as the simple rust implementation.
use libc::{size_t, c_double};
extern {
    fn c_raw_dotprod(a: *const c_double, b: *const c_double, len: size_t) -> c_double;
}

fn c_dotprod(a: &[f64], b: &[f64]) -> f64 {
    assert_eq!(a.len(), b.len());
    unsafe { c_raw_dotprod(a.as_ptr(), b.as_ptr(), a.len()) }
}


// the zoomer way to solve this problem. use a concurrency dependency and hope that
// paralell iterators are the answer!!
use rayon::prelude::*;
fn par_dotprod(a: &[f64], b: &[f64]) -> f64 {
    let zipped: Vec<_> = zip(a, b).collect();
    zipped.par_iter().map(|(x, y)| **x * **y).sum()
}


// this was my first attempt to make a simd vectorized version. its pretty bad and slow.
use std::ops::Add;
use std::simd::f64x4;
fn simd_sum(x: &[f64]) -> f64 {
   let (prefix, middle, suffix) = x.as_simd();
   let sums = f64x4::from_array([
        prefix.iter().copied().sum(),
        0.0,
        0.0,
        suffix.iter().copied().sum(),
    ]);
    let sums = middle.iter().copied().fold(sums, f64x4::add);
    sums.horizontal_sum()
}

fn simd_product(x: &[f64], y: &[f64], out: &mut [f64]) {
    assert_eq!(x.len(), y.len());
    assert_eq!(y.len(), out.len());
    let (p1, m1, s1) = x.as_simd::<4>();
    let (p2, m2, s2) = y.as_simd::<4>();
    let p1 = [p1, s1].concat();
    let p2 = [p2, s2].concat();
    let (p, m) = (
        zip(p1, p2).map(|(x, y)| x * y),
        zip(m1, m2).map(|(x, y)| x * y),
    );
    let mut idx: usize = 0;
    let _ = p.map(|q| { out[idx] = q; idx += 1; }).count();
    let _ = m.map(|q| { let _ = q.as_array().iter().copied().map(|elem| { out[idx] = elem; idx += 1; }).count(); }).count();
}

fn lame_simd_dotprod(a: &[f64], b: &[f64]) -> f64 {
    let mut buf: Vec<f64> = Vec::with_capacity(a.len());
    buf.resize(a.len(), 0.0);
    simd_product(a, b, buf.as_mut_slice());
    simd_sum(&buf) 
}


// this is an actually good simd approach. somehow, it's faster than fma_dotprod.
use std::simd::Simd;
fn simd_dotprod(a: &[f64], b: &[f64]) -> f64 {
    let mut idx: usize = 0;
    let mut acc: f64 = 0.0;
    while idx < a.len() {
        if (a.len() - idx) > 4 {
            acc += (Simd::<f64, 4>::from_slice(&a[idx..]) * Simd::<f64, 4>::from_slice(&b[idx..])).horizontal_sum();
            idx += 4;
        } else {
            acc += a[idx] * b[idx];
            idx += 1;
        }
    }
    acc
}

/* fma_dotprod calculates the dot product using x86_64 specific 
   simd intrinsics. this gives it access to the _mm256d_fmadd_pd (wrf is this naming convention btw :vomit:)
   intrinsic, which can be used to reduce the multiplication of vectors and the
   adding into the accumulator into one instruction. this instruction is not 
   available in rust's portable_simd. */
// NOTE: in order for these features to be used, rustc must know to enable them statically! 
// compile with RUSTFLAGS="-C target_feature=+avx,+fma" to enable them.
#[cfg(all(any(target_arch = "x86_64", target_arch = "x86"), target_feature = "avx", target_feature = "fma"))]
fn fma_dotprod(a: &[f64], b: &[f64]) -> f64 {
    // SAFETY: 
    // - transmuting a __m256d into a [f64; 4] should always work since
    // the two types have the same memory layout. 
    // - the proper precautions have been taken to call these intrinsics,
    // so calling them will be safe.
    unsafe {
        #[cfg(target_arch = "x86_64")]
        use std::arch::x86_64::{
            __m256d as f64x4,
            _mm256_fmadd_pd as fma,
            _mm256_setzero_pd as f64x4_zero,
            _mm256_set_pd as f64x4_from_parts,
        };
        #[cfg(target_arch = "x86")]
        use std::arch::x86::{
            __m256d as f64x4,
            _mm256_fmadd_pd as fma,
            _mm256_setzero_pd as f64x4_zero,
            _mm256_set_pd as f64x4_from_parts,
        };
    
        use std::mem::transmute;
        let mut idx: usize = 0;
        let mut acc: f64x4 = f64x4_zero();
        let mut rest: f64 = 0.0;
        while idx < a.len() {
            if (a.len() - idx) > 4 {
                /* if we have enough elements left, create simd vectors of four doubles */
                let x = f64x4_from_parts(
                    a[idx + 0],
                    a[idx + 1],
                    a[idx + 2],
                    a[idx + 3]);
                let y = f64x4_from_parts(
                    b[idx + 0],
                    b[idx + 1],
                    b[idx + 2],
                    b[idx + 3]);
                /* do a fused-multiply-add on them and the accumulator */
                acc = fma(x, y, acc);
                idx += 4;
            } else {
                /* if there are leftover elements, 
                   multiply them and add them to a scalar accumulator */
                rest += a[idx] * b[idx];
                idx += 1;
            }
        }
        /* reinterperet the simd vector into an array, sum it up, and add the 
        scalar accumulator to get the full total */
        transmute::<f64x4, [f64; 4]>(acc).iter().sum::<f64>() + rest
    }

}

// fallback to portable simd implementation if fma is not available.
#[cfg(not(all(any(target_arch = "x86_64", target_arch = "x86"), target_feature = "avx", target_feature = "fma")))]
fn fma_dotprod(a: &[f64], b: &[f64]) -> f64 {
    simd_dotprod(a, b)
}


// this approach is similar to the rayon one, but a bit better.
// the overhead of managing work queues and work stealing does not make
// sense for tasks as small as a multiply and an add, so the rayon one is really slow.
// instead, what works better is to give each thread in a pool a chunk of the two arrays
// to take the dot product of, because this will actually take time. Interestingly, it's
// still not enough time to make this faster than singlethreaded simd on even 100,000 floats.
// probably it would be the fastest implementation here asymptotically though.
use std::sync::mpsc::{Sender, Receiver, channel};
use std::sync::Arc;
use std::thread;
fn setup_threads(amt: usize) -> Vec<(Sender<(Arc<[f64]>, Arc<[f64]>)>, Receiver<f64>)> { 
    let mut threads: Vec<(Sender<(Arc<[f64]>, Arc<[f64]>)>, Receiver<f64>)> = Vec::new();
    for _ in 0..amt {
        let (into_thread_sender, inside_thread_receiver) = channel::<(Arc<[f64]>, Arc<[f64]>)>();
        let (inside_thread_sender, outof_thread_receiver) = channel::<f64>();
        thread::spawn( move || {
            for task in inside_thread_receiver.iter()  {
                inside_thread_sender.send(simd_dotprod(&task.0, &task.1)).unwrap(); 
            }
        });
        threads.push((into_thread_sender, outof_thread_receiver));
    }
    threads
}

// i decided to see if i could get any good performance out of doing a (definitely bad) type of approximation.
// what it does is it basically downscales the array and calculates that array's dot product instead.
// it's quite innacurate, but it's kinda fun and definitely funny.
fn aproximate_dotprod(a: &[f64], b: &[f64], skip: f64) -> f64 {
    assert!(skip.is_sign_positive() && !skip.is_nan() && !skip.is_subnormal() && !skip.is_infinite());
    let skip = skip + 1.0;
    let mut idx: f64 = 0.0;
    let mut acc: f64 = 0.0;
    while let Some((x, y)) = (a.get(idx as usize)).zip(b.get(idx as usize)) {
        acc += (x * y) * ((if (skip + idx) as usize > a.len() { a.len() as f64 - idx } else { skip }) as f64);
        idx += skip;
    }
    acc
}

fn simd_par_dotprod(a: &[f64], b: &[f64], threads: &[(Sender<(Arc<[f64]>, Arc<[f64]>)>, Receiver<f64>)]) -> f64 {
    assert!(threads.len() < a.len());
    let chunk_size = (a.len() as f64 / threads.len() as f64).ceil() as usize;
    let chunks = zip(a.chunks(chunk_size), b.chunks(chunk_size));
    let mut acc: f64 = 0.0; 
    for (n, chunk) in chunks.enumerate() {
       threads[n].0.send((Arc::from(chunk.0), Arc::from(chunk.1))).unwrap(); 
    }
    
    for thread in threads {
        acc += thread.1.recv().unwrap();
    }
    acc 
}

// TESTS TO MAKE SURE THIS SHIT WORKS!!! :::;)
#[cfg(test)]
mod tests {
    mod data;
    use super::*;
    use rand::Fill;

    use std::time::Instant;


    #[test]
    fn small() {
        let a = [0.5, 0.5, 2.0];
        let b = [4.0, 4.0, 4.0];
        assert_eq!(entry(&a, &b), 12.0, "This test might fail! if aproximate_dotprod is the chosen solution, it could be off. rerun the test.");
    }
 
    #[test]
    fn small_idiomatic_dotprod() {
        let a = [0.5, 0.5, 2.0];
        let b = [4.0, 4.0, 4.0];
        assert_eq!(idiomatic_dotprod(&a, &b), 12.0);
    }

    #[test]
    fn small_simple_dotprod() {
        let a = [0.5, 0.5, 2.0];
        let b = [4.0, 4.0, 4.0];
        assert_eq!(simple_dotprod(&a, &b), 12.0);
    }

    #[test]
    fn small_c_dotprod() {
        let a = [0.5, 0.5, 2.0];
        let b = [4.0, 4.0, 4.0];
        assert_eq!(c_dotprod(&a, &b), 12.0);
    }

    #[test]
    fn small_par_dotprod() {
        let a = [0.5, 0.5, 2.0];
        let b = [4.0, 4.0, 4.0];
        assert_eq!(par_dotprod(&a, &b), 12.0);
    }

    #[test]
    fn small_lame_simd_dotprod() {
        let a = [0.5, 0.5, 2.0];
        let b = [4.0, 4.0, 4.0];
        assert_eq!(lame_simd_dotprod(&a, &b), 12.0);
    }

    #[test]
    fn small_simd_dotprod() {
        let a = [0.5, 0.5, 2.0];
        let b = [4.0, 4.0, 4.0];
        assert_eq!(simd_dotprod(&a, &b), 12.0);
    }

    #[test]
    fn small_fma_dotprod() {
        let a = [0.5, 0.5, 2.0];
        let b = [4.0, 4.0, 4.0];
        assert_eq!(fma_dotprod(&a, &b), 12.0);
    }

    #[test]
    fn small_aproximate_dotprod() {
        let a = [0.5, 0.5, 2.0];
        let b = [4.0, 4.0, 4.0];
        assert_eq!(aproximate_dotprod(&a, &b, 0.0), 12.0);
    }

    #[test]
    fn small_simd_par_dotprod() {
        let a = [0.5, 0.5, 2.0];
        let b = [4.0, 4.0, 4.0];
        let ths = setup_threads(2);
        assert_eq!(simd_par_dotprod(&a, &b, &ths), 12.0);
    }

    // this is not a test. i used it to generate the data for the tests.
    fn generate_big() {
        let mut rng = rand::thread_rng();
        let mut a: [f64; 100_000] = [0.0; 100_000]; 
        let mut b: [f64; 100_000] = [0.0; 100_000];
        assert!(a.try_fill(&mut rng).is_ok());
        assert!(b.try_fill(&mut rng).is_ok()); 
        println!("const BIG1: [f64; 100_000] =  {:?};\n\nconst BIG2: : [f64; 100_000] = {:?};", a, b);
    }

    /* the following tests do not test the soundess of the functions! they are for 
       performance testing only! refer to the small_* tests to check soundess. */
    #[test]
    fn big() {
        for _ in 0..10 {
            let start = Instant::now();
            let result = entry(data::BIG1.as_slice(), data::BIG2.as_slice());
            let end = start.elapsed();
            println!("(took {:?}) {}", end, result);
        }
    } 


    #[test]
    fn big_idiomatic_dotprod() {
        for _ in 0..10 {
            let start = Instant::now();
            let result = idiomatic_dotprod(data::BIG1.as_slice(), data::BIG2.as_slice());
            let end = start.elapsed();
            println!("(took {:?}) {}", end, result);
        }
    } 

    #[test]
    fn big_simple_dotprod() {
        for _ in 0..10 {
            let start = Instant::now();
            let result = simple_dotprod(data::BIG1.as_slice(), data::BIG2.as_slice());
            let end = start.elapsed();
            println!("(took {:?}) {}", end, result);
        }
    } 

    #[test]
    fn big_c_dotprod() {
        for _ in 0..10 {
            let start = Instant::now();
            let result = c_dotprod(data::BIG1.as_slice(), data::BIG2.as_slice());
            let end = start.elapsed();
            println!("(took {:?}) {}", end, result);
        }
    } 

    #[test]
    fn big_par_dotprod() {
        for _ in 0..10 {
            let start = Instant::now();
            let result = par_dotprod(data::BIG1.as_slice(), data::BIG2.as_slice());
            let end = start.elapsed();
            println!("(took {:?}) {}", end, result);
        }
    } 

    #[test]
    fn big_lame_simd_dotprod() {
        for _ in 0..10 {
            let start = Instant::now();
            let result = lame_simd_dotprod(data::BIG1.as_slice(), data::BIG2.as_slice());
            let end = start.elapsed();
            println!("(took {:?}) {}", end, result);
        }
    } 

    #[test]
    fn big_simd_dotprod() {
        for _ in 0..10 {
            let start = Instant::now();
            let result = simd_dotprod(data::BIG1.as_slice(), data::BIG2.as_slice());
            let end = start.elapsed();
            println!("(took {:?}) {}", end, result);
        }
    } 

    #[test]
    fn big_fma_dotprod() {
        for _ in 0..10 {
            let start = Instant::now();
            let result = fma_dotprod(data::BIG1.as_slice(), data::BIG2.as_slice());
            let end = start.elapsed();
            println!("(took {:?}) {}", end, result);
        }
    } 

    #[test]
    fn big_aproximate_dotprod() {
        for _ in 0..10 {
            let start = Instant::now();
            let result = aproximate_dotprod(data::BIG1.as_slice(), data::BIG2.as_slice(), 0.25);
            let end = start.elapsed();
            println!("(took {:?}) {}", end, result);
        }
    } 

    #[test]
    fn big_simd_par_dotprod() {
        let ths =  setup_threads(4);
        for _ in 0..10 {
            let start = Instant::now();
            let result = simd_par_dotprod(data::BIG1.as_slice(), data::BIG2.as_slice(), &ths);
            let end = start.elapsed();
            println!("(took {:?}) {}", end, result);
        }
    } 

}