1 files changed, 29 insertions, 220 deletions
diff --git a/Math_Ext.jai b/Math_Ext.jai
index c4f5c47..9ec34d0 100644
--- a/Math_Ext.jai
+++ b/Math_Ext.jai
@@ -1,206 +1,10 @@
-// Integer saturating arighmetic (with some branch-free procedures on x64).
+// Integer saturating arighmetic (with branch-free procedures on x64).
+// Expects signed values in two's complement.
 
 #import "Basic";
 #import "Compiler";
 #import "Math";
 
-// TODO Comparing implementaitons using dump
-
-// #run test_math_ext();
-
-// test_math_ext :: () { set_build_options_dc(.{do_output=false});
-main :: () {
-
-    write_strings("=====================\n", "--- Test Math_Ext ---\n");
-
-    test_op :: (operation: string, x: $Tx, y: $Ty, result: $Tr, type: Type, saturated: bool, remainder: Tr = 0) -> errors_found: int #expand {
-
-        print_test_call :: (operation: string) -> string {
-            #import "String";
-            str: string = ---;
-            if operation != "div" {
-                TEST_CALL :: #string DONE
-                t_result, t_saturated := OP(cast(Tx)x, cast(Ty)y);
-                print("%_%(%, %) = %0%0\n", operation, type, x, y, result, ifx saturated then " : saturated");
-                DONE
-                str = replace(TEST_CALL, "OP", operation);
-            } else {
-                TEST_CALL :: #string DONE
-                t_result, t_remainder, t_saturated := OP(cast(Tx)x, cast(Ty)y);
-                print("%_%(%, %) = % + %0%0\n", operation, type, x, y, result, remainder, ifx saturated then " : saturated");
-                DONE
-                str = replace(TEST_CALL, "OP", operation);
-            }
-            return str;
-        }
-
-        #insert #run print_test_call(operation);
-
-        errors := 0;
-        if result != t_result           { errors += 1; print(" > incorrect result value: got % expected %\n", t_result, result); };
-        if type != type_of(t_result)    { errors += 1; print(" > incorrect result type: got % expected %\n", type_of(t_result), type); };
-        if saturated != t_saturated     { errors += 1; print(" > incorrect saturated flag: got % expected %\n", t_saturated, saturated); };
-        #if operation == "div" {
-            if remainder != t_remainder { errors += 1; print(" > incorrect remainder value: got % expected %\n", t_remainder, remainder); };
-        }
-        return errors;
-    }
-
-    errors := 0;
-
-    // Test signed add.
-    errors += test_op("add", cast( s8) S8_MAX,  cast( s8)1,          S8_MAX,     s8, true);
-    errors += test_op("add", cast(s16)S16_MAX,  cast( u8)1,         S16_MAX,    s16, true);
-    errors += test_op("add", cast(s32)S32_MAX,  cast(s32)1,         S32_MAX,    s32, true);
-    errors += test_op("add", cast(s64)S64_MAX,  cast(u32)1,         S64_MAX,    s64, true);
-
-    errors += test_op("add", cast( s8) S8_MAX,  cast( s8) S8_MIN,   -1,          s8, false);
-    errors += test_op("add", cast(s16)S16_MAX,  cast(s16)S16_MIN,   -1,         s16, false);
-    errors += test_op("add", cast(s32)S32_MAX,  cast(s32)S32_MIN,   -1,         s32, false);
-    errors += test_op("add", cast(s64)S64_MAX,  cast(s64)S64_MIN,   -1,         s64, false);
-
-    // Test unsigned add.
-    errors += test_op("add", cast( u8) U8_MAX,  cast( u8)1,          U8_MAX,     u8, true);
-    errors += test_op("add", cast(u16)U16_MAX,  cast(u16)1,         U16_MAX,    u16, true);
-    errors += test_op("add", cast(u32)U32_MAX,  cast(u32)1,         U32_MAX,    u32, true);
-    errors += test_op("add", cast(u64)U64_MAX,  cast(u64)1,         U64_MAX,    u64, true);
-
-    errors += test_op("add", cast( u8) U8_MAX,  cast( u8)0,          U8_MAX,     u8, false);
-    errors += test_op("add", cast(u16)U16_MAX,  cast(u16)0,         U16_MAX,    u16, false);
-    errors += test_op("add", cast(u32)U32_MAX,  cast(u32)0,         U32_MAX,    u32, false);
-    errors += test_op("add", cast(u64)U64_MAX,  cast(u64)0,         U64_MAX,    u64, false);
-
-    // Test signed sub.
-    errors += test_op("sub", cast( s8) S8_MIN,  cast( s8)1,          S8_MIN,     s8, true);
-    errors += test_op("sub", cast(s16)S16_MIN,  cast( u8)1,         S16_MIN,    s16, true);
-    errors += test_op("sub", cast(s32)S32_MIN,  cast(s32)1,         S32_MIN,    s32, true);
-    errors += test_op("sub", cast(s64)S64_MIN,  cast(u32)1,         S64_MIN,    s64, true);
-
-    errors += test_op("sub", cast( s8)-1,       cast( s8) S8_MAX,    S8_MIN,     s8, false);
-    errors += test_op("sub", cast(s16)-1,       cast(s16)S16_MAX,   S16_MIN,    s16, false);
-    errors += test_op("sub", cast(s32)-1,       cast(s32)S32_MAX,   S32_MIN,    s32, false);
-    errors += test_op("sub", cast(s64)-1,       cast(s64)S64_MAX,   S64_MIN,    s64, false);
-
-    // Test unsigned sub.
-    errors += test_op("sub", cast( u8)1,        cast( u8) U8_MAX,   0,           u8, true);
-    errors += test_op("sub", cast( u8)1,        cast(u16)U16_MAX,   0,          u16, true);
-    errors += test_op("sub", cast(u32)1,        cast(u32)U32_MAX,   0,          u32, true);
-    errors += test_op("sub", cast(u32)1,        cast(u64)U64_MAX,   0,          u64, true);
-
-    errors += test_op("sub", cast( u8) U8_MAX,  cast( u8)0,          U8_MAX,     u8, false);
-    errors += test_op("sub", cast(u16)U16_MAX,  cast( u8)0,         U16_MAX,    u16, false);
-    errors += test_op("sub", cast(u32)U32_MAX,  cast(u32)0,         U32_MAX,    u32, false);
-    errors += test_op("sub", cast(u64)U64_MAX,  cast(u32)0,         U64_MAX,    u64, false);
-
-    // Test signed mul.
-    errors += test_op("mul", cast( s8) S8_MIN,  cast( s8)-1,         S8_MAX,     s8, true);
-    errors += test_op("mul", cast(s16)S16_MIN,  cast( s8)-1,        S16_MAX,    s16, true);
-    errors += test_op("mul", cast(s32)S32_MIN,  cast(s32)-1,        S32_MAX,    s32, true);
-    errors += test_op("mul", cast(s64)S64_MIN,  cast(s32)-1,        S64_MAX,    s64, true);
-
-    errors += test_op("mul", cast( s8) S8_MAX,  cast( s8)-2,         S8_MIN,     s8, true);
-    errors += test_op("mul", cast(s16)S16_MAX,  cast( s8)-2,        S16_MIN,    s16, true);
-    errors += test_op("mul", cast(s32)S32_MAX,  cast(s32)-2,        S32_MIN,    s32, true);
-    errors += test_op("mul", cast(s64)S64_MAX,  cast(s32)-2,        S64_MIN,    s64, true);
-
-    errors += test_op("mul", cast( s8)-2,       cast( s8) S8_MAX,   S8_MIN,     s8, true);
-    errors += test_op("mul", cast( s8)-2,       cast(s16)S16_MAX,   S16_MIN,    s16, true);
-    errors += test_op("mul", cast(s32)-2,       cast(s32)S32_MAX,   S32_MIN,    s32, true);
-    errors += test_op("mul", cast(s32)-2,       cast(s64)S64_MAX,   S64_MIN,    s64, true);
-
-    errors += test_op("mul", cast( s8) S8_MAX,  cast( s8)2,          S8_MAX,     s8, true);
-    errors += test_op("mul", cast(s16)S16_MAX,  cast( s8)2,         S16_MAX,    s16, true);
-    errors += test_op("mul", cast(s32)S32_MAX,  cast(s32)2,         S32_MAX,    s32, true);
-    errors += test_op("mul", cast(s64)S64_MAX,  cast(s32)2,         S64_MAX,    s64, true);
-
-    errors += test_op("mul", cast( s8) S8_MAX,  cast( s8)-1,         -S8_MAX,    s8, false);
-    errors += test_op("mul", cast(s16)S16_MAX,  cast( s8)-1,        -S16_MAX,   s16, false);
-    errors += test_op("mul", cast(s32)S32_MAX,  cast(s32)-1,        -S32_MAX,   s32, false);
-    errors += test_op("mul", cast(s64)S64_MAX,  cast(s32)-1,        -S64_MAX,   s64, false);
-
-    errors += test_op("mul", cast( s8) S8_MAX,  cast( s8)0,         0,           s8, false);
-    errors += test_op("mul", cast(s16)S16_MAX,  cast( u8)0,         0,          s16, false);
-    errors += test_op("mul", cast(s32)S32_MAX,  cast(s32)0,         0,          s32, false);
-    errors += test_op("mul", cast(s64)S64_MAX,  cast(u32)0,         0,          s64, false);
-
-    // Test unsigned mul.
-    errors += test_op("mul", cast( u8) U8_MAX,  cast( u8)1,          U8_MAX,     u8, false);
-    errors += test_op("mul", cast(u16)U16_MAX,  cast( u8)1,         U16_MAX,    u16, false);
-    errors += test_op("mul", cast(u32)U32_MAX,  cast(u32)1,         U32_MAX,    u32, false);
-    errors += test_op("mul", cast(u64)U64_MAX,  cast(u32)1,         U64_MAX,    u64, false);
-
-    errors += test_op("mul", cast( u8) U8_MAX,  cast( u8)2,          U8_MAX,     u8, true);
-    errors += test_op("mul", cast(u16)U16_MAX,  cast( u8)2,         U16_MAX,    u16, true);
-    errors += test_op("mul", cast(u32)U32_MAX,  cast(u32)2,         U32_MAX,    u32, true);
-    errors += test_op("mul", cast(u64)U64_MAX,  cast(u32)2,         U64_MAX,    u64, true);
-
-    // Test signed div.
-    errors += test_op("div", cast( s8) S8_MIN,  cast( s8)-1,         S8_MAX,     s8, true,  -1);
-    errors += test_op("div", cast(s16)S16_MIN,  cast( s8)-1,        S16_MAX,    s16, true,  -1);
-    errors += test_op("div", cast(s32)S32_MIN,  cast(s32)-1,        S32_MAX,    s32, true,  -1);
-    errors += test_op("div", cast(s64)S64_MIN,  cast(s32)-1,        S64_MAX,    s64, true,  -1);
-
-    errors += test_op("div", cast( s8) S8_MAX,  cast( s8)-2,        - S8_MAX/2,  s8, false, 1);
-    errors += test_op("div", cast(s16)S16_MAX,  cast( s8)-2,        -S16_MAX/2, s16, false, 1);
-    errors += test_op("div", cast(s32)S32_MAX,  cast(s32)-2,        -S32_MAX/2, s32, false, 1);
-    errors += test_op("div", cast(s64)S64_MAX,  cast(s32)-2,        -S64_MAX/2, s64, false, 1);
-
-    errors += test_op("div", cast( s8)15,       cast( s8)5,         3,          s8, false,  0);
-    errors += test_op("div", cast( u8)15,       cast(s16)7,         2,          s16, false, 1);
-    errors += test_op("div", cast(s16)15,       cast(s32)13,        1,          s32, false, 2);
-    errors += test_op("div", cast(u16)100,      cast(s64)3,         33,         s64, false, 1);
-
-    // Test unsigned div.
-    errors += test_op("div", cast( u8) U8_MAX,  cast( u8)2,          U8_MAX/2,   u8, false, 1);
-    errors += test_op("div", cast(u16)U16_MAX,  cast( u8)2,         U16_MAX/2,  u16, false, 1);
-    errors += test_op("div", cast(u32)U32_MAX,  cast(u32)2,         U32_MAX/2,  u32, false, 1);
-    errors += test_op("div", cast(u64)U64_MAX,  cast(u32)2,         U64_MAX/2,  u64, false, 1);
-
-
-    if errors > 0 print("# Found % %!\n", errors, ifx errors == 1 then "error" else "errors"); else print("  No errors found.\n");
-
-/*
-    // Performance test.
-    #import "Random";
-    best_generic: float;
-    best_asm: float;
-    for 0..100 {
-        size, time_generic, time_asm := performance_test();
-        perf_generic    := cast(float)size/cast(float)to_microseconds(time_generic);
-        perf_asm        := cast(float)size/cast(float)to_microseconds(time_asm);
-        best_generic    = max(best_generic, perf_generic);
-        best_asm        = max(best_asm, perf_asm);
-    }
-
-    print("method  : ops/usec\ngeneric : %\nasm     : %\n", best_generic, best_asm);
-
-    performance_test :: () -> sum_size: s64, time_generic: Apollo_Time, time_asm: Apollo_Time {
-    
-        SUM_SIZE := 200;//0000;
-        numbers: [..] s64;
-        array_reserve(*numbers, SUM_SIZE);
-        
-        for 0..SUM_SIZE-1 {
-            array_add(*numbers, cast(s64)random_get());
-        }
-
-        sum := 0;
-        start := current_time_monotonic();
-        for numbers sum = add(sum, it, true);
-        time := current_time_monotonic() - start;
-
-        sum_asm := 0;
-        start_asm := current_time_monotonic();
-        for numbers sum_asm = add(sum_asm, it);
-        time_asm := current_time_monotonic() - start_asm;
-
-        assert(sum == sum_asm);
-
-        return SUM_SIZE, time, time_asm;
-    }
-*/
-}
-
 is_signed :: ($t: Type) -> bool { return (cast(*Type_Info_Integer)type_info(t)).signed; }
 
 INTEGER_ARITHMETIC_TYPES_CHECK :: #string DONE
@@ -235,10 +39,10 @@ INTEGER_ARITHMETIC_TYPES_CHECK :: #string DONE
     return true;
 DONE
 
-add :: (x: $Tx, y: $Ty) -> result: $Tr, saturated: bool #modify { #insert INTEGER_ARITHMETIC_TYPES_CHECK; } // #dump
+add :: (x: $Tx, y: $Ty, $USE_GENERIC: bool = false) -> result: $Tr, saturated: bool #modify { #insert INTEGER_ARITHMETIC_TYPES_CHECK; } // #dump
 {
 
-    #if CPU != .X64 {
+    #if USE_GENERIC || CPU != .X64 {
 
         // #if #run is_signed(Tr) { // TODO Maybe use this?
         #if Tr == s8 || Tr == s16 || Tr == s32 || Tr == s64 {
@@ -278,7 +82,7 @@ add :: (x: $Tx, y: $Ty) -> result: $Tr, saturated: bool #modify { #insert INTEGE
             mov         sign: gpr, x;
             shr.SIZE    sign, BITS;
             add.SIZE    limit, sign;            // If sign is 1, then limit will overflow from MAX to MIN.
-
+            
             mov         result, x;
             add.SIZE    result, y;
             seto        saturated;
@@ -298,11 +102,11 @@ add :: (x: $Tx, y: $Ty) -> result: $Tr, saturated: bool #modify { #insert INTEGE
         
         U_ADD_ASM :: #string DONE
         #asm {
-            mov         max: gpr, MAX;
+            mov         limit: gpr, MAX;
             mov         result, x;
             add.SIZE    result, y;
             setc        saturated;
-            cmovc       result, max;
+            cmovc       result, limit;
         }
         DONE
 
@@ -321,10 +125,10 @@ add :: (x: $Tx, y: $Ty) -> result: $Tr, saturated: bool #modify { #insert INTEGE
     }
 }
 
-sub :: (x: $Tx, y: $Ty) -> result: $Tr, overflow: bool #modify { #insert INTEGER_ARITHMETIC_TYPES_CHECK; } // #dump
+sub :: (x: $Tx, y: $Ty, $USE_GENERIC: bool = false) -> result: $Tr, overflow: bool #modify { #insert INTEGER_ARITHMETIC_TYPES_CHECK; } // #dump
 {
 
-    #if CPU != .X64 {
+    #if USE_GENERIC || CPU != .X64 {
         
         #if Tr == s8 || Tr == s16 || Tr == s32 || Tr == s64 {
 
@@ -401,10 +205,10 @@ sub :: (x: $Tx, y: $Ty) -> result: $Tr, overflow: bool #modify { #insert INTEGER
     
 }
 
-mul :: (x: $Tx, y: $Ty) -> result: $Tr, overflow: bool #modify { #insert INTEGER_ARITHMETIC_TYPES_CHECK; } // #dump
+mul :: (x: $Tx, y: $Ty, $USE_GENERIC: bool = false) -> result: $Tr, overflow: bool #modify { #insert INTEGER_ARITHMETIC_TYPES_CHECK; } // #dump
 {
 
-    #if CPU != .X64 {
+    #if USE_GENERIC || CPU != .X64 {
 
         // #if #run is_signed(Tr) { // TODO Maybe use this?
         #if Tr == s8 || Tr == s16 || Tr == s32 || Tr == s64 {
@@ -417,7 +221,7 @@ mul :: (x: $Tx, y: $Ty) -> result: $Tr, overflow: bool #modify { #insert INTEGER
             if x == 0 || y == 0 then return 0, false;
             if x > 0 && y > 0 && x > MAX / y then return MAX, true;
             if x < 0 && y < 0 && x < MAX / y then return MAX, true;
-            if (y < 0 && y < MIN / x) || (x < 0 && x < MIN / y) then return MIN, true;
+            if (y < 0 && x > 0 && y < MIN / x) || (x < 0 && y > 0 && x < MIN / y) then return MIN, true;
             
         } else {
 
@@ -435,7 +239,7 @@ mul :: (x: $Tx, y: $Ty) -> result: $Tr, overflow: bool #modify { #insert INTEGER
 
         S_MUL_ASM :: #string DONE
         #asm {
-            result === a;
+            result === a;    // TODO Try changing to non-aregister to see if we're using the single-argument version of imul.
         
             // Calculate limit based on (x^y)'s sign.
             mov         limit: gpr, MAX;
@@ -466,10 +270,10 @@ mul :: (x: $Tx, y: $Ty) -> result: $Tr, overflow: bool #modify { #insert INTEGER
             result === a;
             
             mov         result, x;
-            mul.SIZE    r_d:, result, y;
+            mul.SIZE    r_d:, result, y;      // TODO Try to use same as below (remove r_d)
             setc        saturated;
-            sbb         max:, max;              // If CF: max = -1 (all bits set); otherwise: max = 0.
-            or          result, max;
+            sbb         limit:, limit;              // If CF: limit = -1 (all bits set); otherwise: limit = 0.
+            or          result, limit;
         }
         DONE
 
@@ -480,8 +284,8 @@ mul :: (x: $Tx, y: $Ty) -> result: $Tr, overflow: bool #modify { #insert INTEGER
             mov         result, x;
             mul.SIZE    result, y;
             setc        saturated;
-            sbb         max:, max;              // If CF: max = -1 (all bits set); otherwise: max = 0.
-            or          result, max;
+            sbb         limit:, limit;              // If CF: limit = -1 (all bits set); otherwise: limit = 0.
+            or          result, limit;
         }
         DONE
 
@@ -500,10 +304,10 @@ mul :: (x: $Tx, y: $Ty) -> result: $Tr, overflow: bool #modify { #insert INTEGER
     }
 }
 
-div :: (x: $Tx, y: $Ty) -> result: $Tr, remainder: Tr, saturated: bool #modify { #insert INTEGER_ARITHMETIC_TYPES_CHECK; } // #dump
+div :: (x: $Tx, y: $Ty, $USE_GENERIC: bool = false) -> result: $Tr, remainder: Tr, saturated: bool #modify { #insert INTEGER_ARITHMETIC_TYPES_CHECK; } //#dump
 {
 
-    #if CPU != .X64 {
+    #if USE_GENERIC || CPU != .X64 {
 
         // #if #run is_signed(Tr) { // TODO Maybe use this?
         #if Tr == s8 || Tr == s16 || Tr == s32 || Tr == s64 {
@@ -516,7 +320,7 @@ div :: (x: $Tx, y: $Ty) -> result: $Tr, remainder: Tr, saturated: bool #modify {
             if x == MIN && y == -1 then return MAX, -1, true;
             
         }
-
+        
         result := x / y;
         remainder := x - (y * result);
         return result, remainder, false;
@@ -527,15 +331,15 @@ div :: (x: $Tx, y: $Ty) -> result: $Tr, remainder: Tr, saturated: bool #modify {
         result: Tr = ---;
         remainder: Tr = ---;
         saturated: bool = ---;
-
+        
         S_DIV_ASM :: #string DONE
         #asm {
             result === a;
             remainder === d;
 
             // Detect div(MIN/-1) and flag it on ZF.
-            mov         xT: gpr, MIN;
-            mov         xV: gpr, x;
+            mov         xT: gpr, MIN;   // TODO Rename xT to x_test
+            mov         xV: gpr, x;     // TODO Rename xV to x_val
             xor.SIZE    xT, xV;
             mov         yT: gpr, y;
             xor.SIZE    yT, -1;
@@ -544,6 +348,7 @@ div :: (x: $Tx, y: $Ty) -> result: $Tr, remainder: Tr, saturated: bool #modify {
             mov         limit: gpr, LIMIT;
             mov         result, x;
             cmovz       result, limit;          // If ZF: limit dividend to MIN-1.
+            mov.SIZE       saturated, 0;        // Clear register up to the size used on last operation "sub.SIZE""
             setz        saturated;
             SIGN_EXT    remainder, result;      // Prepare dividend high bits.
             idiv.SIZE   remainder, result, y;
@@ -555,6 +360,7 @@ div :: (x: $Tx, y: $Ty) -> result: $Tr, remainder: Tr, saturated: bool #modify {
 
         S_DIV_ASM_8BITS :: #string DONE
         #asm {
+
             result === a;
             remainder === d;
 
@@ -565,9 +371,11 @@ div :: (x: $Tx, y: $Ty) -> result: $Tr, remainder: Tr, saturated: bool #modify {
             xor.SIZE    t_y, -1;
             or.SIZE     t_x, t_y;
 
+            
             mov         limit: gpr, LIMIT;
             mov         result, x;
             cmovz       result, limit;          // If ZF: limit dividend to MIN-1.
+            cbw         result;
             setz        saturated;
             idiv.SIZE   result, y;
 
@@ -577,6 +385,7 @@ div :: (x: $Tx, y: $Ty) -> result: $Tr, remainder: Tr, saturated: bool #modify {
 
             // If saturated: remainder = 0 - 1; otherwise: remainder = x - 0.
             sub.SIZE    remainder, saturated;
+
         }
         DONE