1 files changed, 256 insertions, 157 deletions
diff --git a/Math_Ext.jai b/Math_Ext.jai
index e2bba35..bf1e8d6 100644
--- a/Math_Ext.jai
+++ b/Math_Ext.jai
@@ -2,182 +2,281 @@
 #import "Compiler";
 #import "Math";
 
-#run test_math_ext();
+// #run test_math_ext();
 
-test_math_ext :: () {
+ // TODO Comparing implementaitons using dump
 
-    set_build_options_dc(.{do_output=false});
+// test_math_ext :: () { set_build_options_dc(.{do_output=false});
+main :: () {
 
     write_strings("=====================\n", "--- Test Math_Ext ---\n");
 
-    Test_Inputs :: struct(ia: $T, ib: T, ir: T, is: bool) {
-        // t: Type;
-        a := ia;
-        b := ib;
-        r := ir;
-        s := is;
-    };
-
-/*
-    tests := Test_Inputs.[
-        // .{1, 2, 3, false},
-    ];
-
-    for * tests {
-        result, saturated := add(cast(it.t)it.a, cast(it.t)it.b);
-        assert(result == it.r && saturated == it.s, "Failed: % %\n", result, saturated);
+    // Different signals: only works if signaled variable is higher.
+    /*
+    #run cena();
+    cena :: () {
+        a: s64 = -232;
+        b: u32 = 4;
+        c := a+b;
+        print("\n\n--- --- ---\ntttt : % : % + % = %\n--- --- ---\n\n", type_of(c), a, b, c);
     }
-*/
+    */
+
+    #import "Random";
+    add_test :: (x: $Tx, y: $Ty, r: $Tr, t: Type, o: bool) {
+        tr, to := add(cast(Tx)x, cast(Ty)y);
+        print("add(%): % + % = % : %\n", t, x, y, r, o);
+        if r != tr              print(" > incorrect result value: got % expected %\n", tr, r);
+        if t != type_of(tr)     print(" > incorrect result type: got % expected %\n", type_of(tr), t);
+        if o != to              print(" > incorrect overflow flag: got % expected %\n", to, o);
+    }
+
+    // add_test(cast(u8)1,       cast(u8)2,   3, u8, false);
+    // add_test(cast(u8)255,       cast(u8)1,   255, u8, true);
+
+    add_test(cast(s8)S8_MAX,       cast(s8)1,   S8_MAX, s8, true);
+    // add_test(cast(s32)66,       cast(s64)-2,   64, s64, false);
+    // add_test(cast(u32)66,       cast(s64)4,    70, s64, false);
+    // add_test(cast(s32)S32_MAX,  cast(s64)1,    2147483648, s64, false);
+    // add_test(cast(s32)S32_MAX,  cast(s32)1,    S32_MAX, s32, true);
+    // add_test(cast(s64)S64_MAX,  cast(s64)0,    S64_MAX, s64, false);
+    // add_test(cast(s64)9223372036854775806,  cast(s64)1,    S64_MAX, s64, false);
+    // add_test(cast(s64)9223372036854775806,  cast(s64)2,    S64_MAX, s64, true);
+
+    // add_test(cast(u8)7, cast(u8)1, 8, u8, false);
+    // add_test(cast(u8)U8_MAX, cast(u8)1, U8_MAX, u8, true);
+
+    // add_test(cast(u16)10, cast(u8)3, 13, u16, false);
+    // add_test(cast(u8)1, cast(u16)U16_MAX, U16_MAX, u16, true);
+
+    return;
     
 
-    t1a := S64_MAX;
-    t1b := 0;
-    t1v, t1r := add(t1a, t1b);
-    assert(t1v == S64_MAX && t1r == false, "Failed: % %\n", t1v, t1r);
-    write_string("t1: OK\n");
+    best_generic: float;
+    best_asm: float;
+    for 0..100 {
+        size, time_generic, time_asm := performance_test();
+        perf_generic    := cast(float)size/cast(float)to_microseconds(time_generic);
+        perf_asm        := cast(float)size/cast(float)to_microseconds(time_asm);
+        best_generic    = max(best_generic, perf_generic);
+        best_asm        = max(best_asm, perf_asm);
+    }
 
-    // t2a := S64_MAX;
-    // t2b := 1;
-    // t2v, t2r := add(t2a, t2b);
-    // assert(t2v == S64_MAX && t2r == true, "Failed: % %\n", t2v, t2r);
-}
+    print("generic : %\nasm     : %\n", best_generic, best_asm);
+
+    performance_test :: () -> sum_size: s64, time_generic: Apollo_Time, time_asm: Apollo_Time {
+    
+        SUM_SIZE := 2000000;
+        numbers: [..] s64;
+        array_reserve(*numbers, SUM_SIZE);
+        
+        for 0..SUM_SIZE-1 {
+            array_add(*numbers, cast(s64)random_get());
+        }
 
+        sum := 0;
+        start := current_time_monotonic();
+        for numbers sum = old_add(sum, it);
+        time := current_time_monotonic() - start;
 
-add_int64 :: (x :s64, y: s64) -> s64 #dump { // TODO Comparing implementations.
-    return
-        ifx (y > 0 && x > S64_MAX - y) then S64_MAX else
-        ifx (y < 0 && x < S64_MIN - y) then S64_MIN else
-        x + y;
-}
+        sum_asm := 0;
+        start_asm := current_time_monotonic();
+        for numbers sum_asm = add(sum_asm, it);
+        time_asm := current_time_monotonic() - start_asm;
+
+        assert(sum == sum_asm);
+
+        return SUM_SIZE, time, time_asm;
+    }
 
-sub_int64 :: (x :s64, y :s64) -> s64 {
-    return
-        ifx (y < 0 && x > S64_MAX + y) then S64_MAX else
-        ifx (y > 0 && x < S64_MIN + y) then S64_MIN else
-        x - y;
 }
 
-add :: (x: s64, y: s64) -> result: s64, saturated: bool #dump { // TODO Comparing implementaitons using dump
-    #if CPU == .X64 {
-        saturated := false;
-        ...test this case...
-        if (y > 0 && x > S64_MAX - y) then return S64_MAX, true;
-        if (y < 0 && x < S64_MIN - y) then return S64_MIN, true;
-                return x + y, false;
-        // if (y > 0 && x > S64_MAX - y) then return S64_MAX, true;
-        // if (y < 0 && x < S64_MIN - y) then return S64_MIN, true;
-        // return x + y, false;
-    } else {
-        result: s64 = ---;
-        flag: bool = ---;
-        #asm {
-            mov     d: gpr === d, 9223372036854775807;
-            mov     a: gpr === a, x;
-            mov     b: gpr === b, y;
-            add     a, b;
-            seto    flag;                  // Flag overflow.
-            mov     result, a;
-            mov     a, x;
-            shr     a, 63;
-            add     a, d;
-            mov     c: gpr, x;
-            xor     c, b;
-            xor     b, result;
-            not     b;
-            or      c, b;
-            test    c, c;
-            cmovns  result, a;
-        }
-        return result, flag;
+old_add :: (x: $Tx, y: $Ty) -> result: $Tr, overflow: bool // #dump
+    #modify {
+        type_info_x := cast(*Type_Info)Tx;
+        type_info_y := cast(*Type_Info)Ty;
+        if type_info_x.type != .INTEGER || type_info_y.type != .INTEGER return false, "Non integers values passed.";
+        tx := cast(*Type_Info_Integer)type_info_x;
+        ty := cast(*Type_Info_Integer)type_info_y;
+
+        largest_type :=
+            ifx tx.runtime_size > ty.runtime_size then Tx else
+            ifx ty.runtime_size > tx.runtime_size then Ty else
+            ifx tx.signed == ty.signed then Tx else
+            void;
+
+        // Only allow to add different signedness values if largest type is the signed one (as in JAI).
+        if tx.signed == ty.signed {
+            Tx = largest_type;
+            Ty = largest_type;
+            Tr = largest_type;
+        }
+        else if tx.signed && Tx == largest_type {
+            Ty = largest_type;
+            Tr = largest_type;
+        }
+        else if ty.signed && Ty == largest_type {
+            Tx = largest_type;
+            Tr = largest_type;
+        }
+        else return false, "Number signedness mismatch.";
+
+        print("old>tx:ty:%:%\n", Tx, Ty);
+        return true;
     }
+{
+    #if Tr == u64 { MAX :: U64_MAX; MIN :: 0; BITS :: 64; }
+    #if Tr == u32 { MAX :: U32_MAX; MIN :: 0; BITS :: 32; }
+    #if Tr == u16 { MAX :: U16_MAX; MIN :: 0; BITS :: 16; }
+    #if Tr ==  u8 { MAX ::  U8_MAX; MIN :: 0; BITS ::  8; }
+
+    #if Tr == s64 { MAX :: S64_MAX; MIN :: S64_MIN; BITS :: 63; }
+    #if Tr == s32 { MAX :: S32_MAX; MIN :: S32_MIN; BITS :: 31; }
+    #if Tr == s16 { MAX :: S16_MAX; MIN :: S16_MIN; BITS :: 15; }
+    #if Tr ==  s8 { MAX ::  S8_MAX; MIN ::  S8_MIN; BITS ::  7; }
+    
+    if (y > 0 && x > MAX - y) then return MAX, true;
+    if (y < 0 && x < MIN - y) then return MIN, true;
+    return x + y, false;
 }
 
-/*
-// value_a: s64 = 2;
-// value_b: s64 = S64_MAX-1;
-value_a: s64 = -2;
-value_b: s64 = S64_MIN+1;
-print(">%\n", S64_MAX);
-argx :=  get_command_line_arguments();
-if argx.count > 1
-    value_a = parse_int(*argx[1]);
-if argx.count > 2
-    value_b = parse_int(*argx[2]);
+add :: (x: $Tx, y: $Ty) -> result: $Tr, overflow: bool // #dump
+    #modify {
+        type_info_x := cast(*Type_Info)Tx;
+        type_info_y := cast(*Type_Info)Ty;
+        if type_info_x.type != .INTEGER || type_info_y.type != .INTEGER return false, "Non integers values passed.";
+        tx := cast(*Type_Info_Integer)type_info_x;
+        ty := cast(*Type_Info_Integer)type_info_y;
 
-result: s64 = ---;
-flags: s64;// = ---;
-#asm LAHF_SAHF { // TODO Remove LAHF_SAHF it not required.
+        largest_type :=
+            ifx tx.runtime_size > ty.runtime_size then Tx else
+            ifx ty.runtime_size > tx.runtime_size then Ty else
+            ifx tx.signed == ty.signed then Tx else
+            void;
 
-    // Code from https://locklessinc.com/articles/sat_arithmetic/
+        // Only allow to add different signedness values if largest type is the signed one (as in JAI).
+        if tx.signed == ty.signed {
+            Tx = largest_type;
+            Ty = largest_type;
+            Tr = largest_type;
+        }
+        else if tx.signed && Tx == largest_type {
+            Ty = largest_type;
+            Tr = largest_type;
+        }
+        else if ty.signed && Ty == largest_type {
+            Tx = largest_type;
+            Tr = largest_type;
+        }
+        else return false, "Number signedness mismatch.";
+
+        print(">tx:ty:%:%\n", Tx, Ty);
+        return true;
+    }
+{
+
+    #if Tr == u64 { MAX :: U64_MAX; MIN :: 0; BITS :: 64; }
+    #if Tr == u32 { MAX :: U32_MAX; MIN :: 0; BITS :: 32; }
+    #if Tr == u16 { MAX :: U16_MAX; MIN :: 0; BITS :: 16; }
+    #if Tr ==  u8 { MAX ::  U8_MAX; MIN :: 0; BITS ::  8; }
 
+    #if Tr == s64 { MAX :: S64_MAX; MIN :: S64_MIN; BITS :: 63; }
+    #if Tr == s32 { MAX :: S32_MAX; MIN :: S32_MIN; BITS :: 31; }
+    #if Tr == s16 { MAX :: S16_MAX; MIN :: S16_MIN; BITS :: 15; }
+    #if Tr ==  s8 { MAX ::  S8_MAX; MIN ::  S8_MIN; BITS ::  7; }
     
-    // value_a === b;
-    // value_b === c;
-    // add     value_a, value_b;
-    // mov     result, value_a;
-
-    // Version 1
-    // mov     b: gpr === b, value_a;
-    // mov     c: gpr === c, value_b;
-    // add     b, c;
-    // mov     result, b;
-    // seto    flags;
-    // cmovns  b, c;
-
-//        s64b sat_adds64b(s64b x, s64b y)
-//        {
-//        	u64b ux = x;
-//        	u64b uy = y;
-//        	u64b res = ux + uy;
-//        	
-//        	ux = (ux >> 63) + LONG_MAX;
-//        	
-//        	/* Force compiler to use cmovns instruction */
-//        	if ((s64b) ((ux ^ uy) | ~(uy ^ res)) >= 0)
-//        	{
-//        		res = ux;
-//        	}
-//        		
-//        	return res;
-//        }
-    // Version 2 - WORKS
-    mov     d: gpr === d, 9223372036854775807;
-    mov     a: gpr === a, value_a;
-    mov     b: gpr === b, value_b;
-    add     a, b;
-    seto    flags;                  // Flag overflow.
-    mov     result, a;
-    mov     a, value_a;
-    shr     a, 63;
-    add     a, d;
-    mov     c: gpr, value_a;
-    xor     c, b;
-    xor     b, result;
-    not     b;
-    or      c, b;
-    test    c, c;
-    cmovns  result, a;
-
-//        s64b sat_subs64b(s64b x, s64b y)
-//        {
-//        	u64b ux = x;
-//        	u64b uy = y;
-//        	u64b res = ux - uy;
-//        	
-//        	ux = (ux >> 63) + LONG_MAX;
-//        	
-//        	// Force compiler to use cmovns instruction
-//        	if ((s64b)((ux ^ uy) & (ux ^ res)) < 0)
-//        	{
-//        		res = ux;
-//        	}
-//        		
-//        	return res;
-//        }
-    // TODO Use https://godbolt.org/ to help
-	
+    #if CPU != .X64 {
+        if (y > 0 && x > MAX - y) then return MAX, true;
+        if (y < 0 && x < MIN - y) then return MIN, true;
+        return x + y, false;
+    } else {
+        result: Tr = ---;
+        overflow: bool = ---;
+
+        #if Tr == s8 {
+        #asm { // s8
+            x === a;
+            y === b;
+            mov     d: gpr === d, MAX;
+            mov.b     c: gpr === c, x; // TODO Not using .b was erroing.
+            shr     c, BITS;
+            add     c, d;
+            add.b   x, y; // add.b for 8bits
+            cmovo   x, c;
+            seto    overflow;
+            mov     result, x;
+        }
+        }
+        else #if Tr == s32 {
+        #asm { // s32
+            x === a;
+            y === b;
+            mov     d: gpr === d, MAX;
+            mov.d     c: gpr === c, x;
+            shr     c, BITS;
+            add     c, d;
+            add.d   x, y; // add.d for 32bits
+            cmovo   x, c;
+            seto    overflow;
+            mov     result, x;
+        }
+        }
+        else #if Tr == s64 {
+        #asm { // s64
+            x === a;
+            y === b;
+            mov     d: gpr === d, MAX;
+            mov     c: gpr === c, x;
+            shr     c, BITS;
+            add     c, d;
+            // add.b   x, y; //  8bits
+            // add.w   x, y; // 16bits
+            // add.d   x, y; // 32bits
+            // add.q   x, y; // 64bits
+            add     x, y;
+            cmovo   x, c;
+            seto    overflow;
+            mov     result, x;
+        }
+        }
+        else #if Tr == u8 {
+        #asm { // u8
+            x === a;
+            y === b;
+            d: gpr;
+            add.b   x, y; // add.b for 8bits
+            setc    overflow;
+            sbb     d, d;
+            or      d, x;
+            mov     result, d;
+        }
+        }
+        else #if Tr == u16 {
+        #asm { // u16
+            x === a;
+            y === b;
+            d: gpr;
+            add.w   x, y; // add.w for 16bits
+            setc    overflow;
+            sbb     d, d;
+            or      d, x;
+            mov     result, d;
+        }
+        }
+        return result, overflow;
+    }
+}
+
+sub :: (x: $Tx, y: $Ty) -> result: $Tr, overflow: bool //#dump
+    #modify {
+        return true;
+    }
+{
+    #if CPU != .X64 {
+        if (y < 0 && x > S64_MAX + y) then return S64_MAX, true;
+        if (y > 0 && x < S64_MIN + y) then return S64_MIN, true;
+        return x - y, false;
+    } else {
+        return x + y, false; // TODO Implement me please.
+    }
 }
-print("% + % = %\n", value_a, value_b, result);
-print("flag: %\n", flags);
-return;
-*/
-\ No newline at end of file