From 108e59b2b649caf3e995a14a2aa5d59f5d916f37 Mon Sep 17 00:00:00 2001 From: dam Date: Wed, 5 Jul 2023 21:13:21 +0100 Subject: Finalized add, sub, and mul. --- Math_Ext.jai | 257 +++++++++++++++++++++++++--------------------------------- Math_Test.jai | 64 +++++++-------- 2 files changed, 141 insertions(+), 180 deletions(-) diff --git a/Math_Ext.jai b/Math_Ext.jai index a56b174..d42b1bd 100644 --- a/Math_Ext.jai +++ b/Math_Ext.jai @@ -39,80 +39,61 @@ DONE add :: (x: $Tx, y: $Ty, $USE_GENERIC: bool = false) -> result: $Tr, saturated: bool #modify { #insert INTEGER_ARITHMETIC_TYPES_CHECK; } // #dump { - + #if USE_GENERIC || CPU != .X64 { - + #if Tr == s8 || Tr == s16 || Tr == s32 || Tr == s64 { - + #if Tr == s8 { MAX :: S8_MAX; MIN :: S8_MIN; } #if Tr == s16 { MAX :: S16_MAX; MIN :: S16_MIN; } #if Tr == s32 { MAX :: S32_MAX; MIN :: S32_MIN; } #if Tr == s64 { MAX :: S64_MAX; MIN :: S64_MIN; } - + if (y > 0 && x > MAX - y) then return MAX, true; if (y < 0 && x < MIN - y) then return MIN, true; - + } else { - + #if Tr == u8 { MAX :: U8_MAX; } #if Tr == u16 { MAX :: U16_MAX; } #if Tr == u32 { MAX :: U32_MAX; } #if Tr == u64 { MAX :: U64_MAX; } - + if (x > MAX - y) then return MAX, true; - + } - + return x + y, false; - + } else { - + #import "String"; result: Tr = ---; saturated: bool = ---; - - + + S_ADD_ASM :: #string DONE - #asm { - - // Performance - // s8 | s16 | s32 | s64 - // 1.243 | 1.242 | 1.215 | 1.210 - // - mov result, -1; // Pre-set result with signed maximum: set all ones. - shr.SIZE result, 1; // Pre-set result with signed maximum: insert zero on MSB. - bt x, SHIFT; // Test signal bit (affect CF). + #asm { + mov result, -1; // Pre-set result with signed maximum (set all ones... + shr.SIZE result, 1; // ...then, insert zero on MSB). + bt x, SIGN_BIT; // Test sign bit (affect CF). adc result, 0; // Overflow signed maximum to signed minimum if CF is set. add.SIZE x, y; // Add values (affect OF). - seto saturated; // Set saturated flat if OF. + seto saturated; // Set saturated flag if OF. cmovno result, x; // Move add-result to result if NOT OF. - - // Performance - a bit of improvement... with some more code. - // s8 | s16 | s32 | s64 - // 1.336 | 1.305 | 1.217 | 1.210 - // - // mov sign: gpr, x; // Copy x value to sign variable. - // mov limit: gpr, MAX; // Pre-set limit with signed maximum. - // shr.SIZE sign, SHIFT; // Get sign of x value. - // add.SIZE limit, sign; // If sign is 1, overflow from signed maximum to signed minimum. - // - // mov result, x; // Copy x value to result. - // add.SIZE result, y; // Add values (affect OF). - // seto saturated; // Set saturated flag if OF. - // cmovo result, limit; // Move limit to result if OF. } DONE - + #if Tr == s8 - #insert #run replace(replace(S_ADD_ASM, ".SIZE", ".b"), "SHIFT", "7"); // , "MAX", "127") + #insert #run replace(replace(S_ADD_ASM, ".SIZE", ".b"), "SIGN_BIT", "7"); #if Tr == s16 - #insert #run replace(replace(S_ADD_ASM, ".SIZE", ".w"), "SHIFT", "15"); // , "MAX", "32767") + #insert #run replace(replace(S_ADD_ASM, ".SIZE", ".w"), "SIGN_BIT", "15"); #if Tr == s32 - #insert #run replace(replace(S_ADD_ASM, ".SIZE", ".d"), "SHIFT", "31"); // , "MAX", "2147483647") + #insert #run replace(replace(S_ADD_ASM, ".SIZE", ".d"), "SIGN_BIT", "31"); #if Tr == s64 - #insert #run replace(replace(S_ADD_ASM, ".SIZE", ".q"), "SHIFT", "63"); // , "MAX", "9223372036854775807") - + #insert #run replace(replace(S_ADD_ASM, ".SIZE", ".q"), "SIGN_BIT", "63"); + U_ADD_ASM :: #string DONE #asm { @@ -122,7 +103,7 @@ add :: (x: $Tx, y: $Ty, $USE_GENERIC: bool = false) -> result: $Tr, saturated: b cmovnc result, x; // Move add-result to result if NOT CF. } DONE - + #if Tr == u8 #insert #run replace(U_ADD_ASM, ".SIZE", ".b"); #if Tr == u16 @@ -131,8 +112,8 @@ add :: (x: $Tx, y: $Ty, $USE_GENERIC: bool = false) -> result: $Tr, saturated: b #insert #run replace(U_ADD_ASM, ".SIZE", ".d"); #if Tr == u64 #insert #run replace(U_ADD_ASM, ".SIZE", ".q"); - - + + return result, saturated; } @@ -140,7 +121,7 @@ add :: (x: $Tx, y: $Ty, $USE_GENERIC: bool = false) -> result: $Tr, saturated: b sub :: (x: $Tx, y: $Ty, $USE_GENERIC: bool = false) -> result: $Tr, overflow: bool #modify { #insert INTEGER_ARITHMETIC_TYPES_CHECK; } // #dump { - + #if USE_GENERIC || CPU != .X64 { #if Tr == s8 || Tr == s16 || Tr == s32 || Tr == s64 { @@ -149,16 +130,16 @@ sub :: (x: $Tx, y: $Ty, $USE_GENERIC: bool = false) -> result: $Tr, overflow: bo #if Tr == s16 { MAX :: S16_MAX; MIN :: S16_MIN; } #if Tr == s32 { MAX :: S32_MAX; MIN :: S32_MIN; } #if Tr == s64 { MAX :: S64_MAX; MIN :: S64_MIN; } - + if (y < 0 && x > MAX + y) then return MAX, true; if (y > 0 && x < MIN + y) then return MIN, true; } else { - + if (y > x) then return 0, true; - + } - + return x - y, false; } else { @@ -166,40 +147,37 @@ sub :: (x: $Tx, y: $Ty, $USE_GENERIC: bool = false) -> result: $Tr, overflow: bo #import "String"; result: Tr = ---; saturated: bool = ---; - - + + S_SUB_ASM :: #string DONE #asm { - // Calculate limit based on x's sign. - mov limit: gpr, MAX; - mov sign: gpr, x; - shr.SIZE sign, BITS; - add.SIZE limit, sign; // If sign is 1, then limit will overflow from MAX to MIN. - - mov result, x; - sub.SIZE result, y; - seto saturated; - cmovo result, limit; + mov result, -1; // Pre-set result with signed maximum (set all ones... + shr.SIZE result, 1; // ...then, insert zero on MSB). + bt x, SIGN_BIT; // Test signal bit (affect CF). + adc result, 0; // Overflow signed maximum to signed minimum if CF is set. + + sub.SIZE x, y; // Subtract values (affect OF). + seto saturated; // Set saturated flag if OF. + cmovno result, x; // Move subtract-result to result if NOT OF. } DONE #if Tr == s8 - #insert #run replace(replace(replace(S_SUB_ASM, ".SIZE", ".b"), "MAX", "127"), "BITS", "7"); + #insert #run replace(replace(S_SUB_ASM, ".SIZE", ".b"), "SIGN_BIT", "7"); #if Tr == s16 - #insert #run replace(replace(replace(S_SUB_ASM, ".SIZE", ".w"), "MAX", "32767"), "BITS", "15"); + #insert #run replace(replace(S_SUB_ASM, ".SIZE", ".w"), "SIGN_BIT", "15"); #if Tr == s32 - #insert #run replace(replace(replace(S_SUB_ASM, ".SIZE", ".d"), "MAX", "2147483647"), "BITS", "31"); + #insert #run replace(replace(S_SUB_ASM, ".SIZE", ".d"), "SIGN_BIT", "31"); #if Tr == s64 - #insert #run replace(replace(replace(S_SUB_ASM, ".SIZE", ".q"), "MAX", "9223372036854775807"), "BITS", "63"); - - + #insert #run replace(replace(S_SUB_ASM, ".SIZE", ".q"), "SIGN_BIT", "63"); + + U_SUB_ASM :: #string DONE #asm { - mov limit: gpr, 0; - mov result, x; - sub.SIZE result, y; - setc saturated; - cmovc result, limit; + mov result, 0; // Pre-set result with usigned minimum. + sub.SIZE x, y; // Subtract values (affect CF). + setc saturated; // Set saturated flag if CF. + cmovnc result, x; // Move subtract-result to result if NOT CF. } DONE @@ -211,105 +189,92 @@ sub :: (x: $Tx, y: $Ty, $USE_GENERIC: bool = false) -> result: $Tr, overflow: bo #insert #run replace(U_SUB_ASM, ".SIZE", ".d"); #if Tr == u64 #insert #run replace(U_SUB_ASM, ".SIZE", ".q"); - - + + return result, saturated; } - + } mul :: (x: $Tx, y: $Ty, $USE_GENERIC: bool = false) -> result: $Tr, overflow: bool #modify { #insert INTEGER_ARITHMETIC_TYPES_CHECK; } // #dump { - + #if USE_GENERIC || CPU != .X64 { #if Tr == s8 || Tr == s16 || Tr == s32 || Tr == s64 { - + #if Tr == s8 { MAX :: S8_MAX; MIN :: S8_MIN; } #if Tr == s16 { MAX :: S16_MAX; MIN :: S16_MIN; } #if Tr == s32 { MAX :: S32_MAX; MIN :: S32_MIN; } #if Tr == s64 { MAX :: S64_MAX; MIN :: S64_MIN; } - + if x == 0 || y == 0 then return 0, false; if x > 0 && y > 0 && x > MAX / y then return MAX, true; if x < 0 && y < 0 && x < MAX / y then return MAX, true; if (y < 0 && x > 0 && y < MIN / x) || (x < 0 && y > 0 && x < MIN / y) then return MIN, true; } else { - + #if Tr == u8 { MAX :: U8_MAX; } #if Tr == u16 { MAX :: U16_MAX; } #if Tr == u32 { MAX :: U32_MAX; } #if Tr == u64 { MAX :: U64_MAX; } - + if x == 0 || y == 0 then return 0, false; if x > MAX / y then return MAX, true; } - + return x * y, false; - + } else { #import "String"; result: Tr = ---; saturated: bool = ---; - + S_MUL_ASM :: #string DONE #asm { - result === a; // TODO Try changing to non-aregister to see if we're using the single-argument version of imul. - - // Calculate limit based on (x^y)'s sign. - mov limit: gpr, MAX; - mov sign: gpr, x; - xor sign, y; - shr.SIZE sign, BITS; - add.SIZE limit, sign; // If sign is 1, then limit will overflow from MAX to MIN. - - mov result, x; - imul.SIZE result, y; - seto saturated; - cmovo result, limit; + mov x_: gpr === a, x; // Pin copy of x value to register A (don't know why... but seems faster this way). + + mov result, -1; // Pre-set result with signed maximum (set all ones... + shr.SIZE result, 1; // ...then, insert zero on MSB). + mov sign:, x; // Use copy of x (don't know why... but seems faster this way). + xor sign, y; // Calculate result signal bit using xor. + bt sign, SIGN_BIT; // Test signal bit (affect CF). + adc result, 0; // Overflow signed maximum to signed minimum if CF is set. + + imul.SIZE x_, y; // Multiply values (affect OF). + seto saturated; // Set saturated flag if OF. + cmovno result, x_; // Move multiply-result to result if NOT OF. } DONE #if Tr == s8 - #insert #run replace(replace(replace(S_MUL_ASM, ".SIZE", ".b"), "MAX", "127"), "BITS", "7"); + #insert #run replace(replace(S_MUL_ASM, ".SIZE", ".b"), "SIGN_BIT", "7"); #if Tr == s16 - #insert #run replace(replace(replace(S_MUL_ASM, ".SIZE", ".w"), "MAX", "32767"), "BITS", "15"); + #insert #run replace(replace(S_MUL_ASM, ".SIZE", ".w"), "SIGN_BIT", "15"); #if Tr == s32 - #insert #run replace(replace(replace(S_MUL_ASM, ".SIZE", ".d"), "MAX", "2147483647"), "BITS", "31"); + #insert #run replace(replace(S_MUL_ASM, ".SIZE", ".d"), "SIGN_BIT", "31"); #if Tr == s64 - #insert #run replace(replace(replace(S_MUL_ASM, ".SIZE", ".q"), "MAX", "9223372036854775807"), "BITS", "63"); + #insert #run replace(replace(S_MUL_ASM, ".SIZE", ".q"), "SIGN_BIT", "63"); U_MUL_ASM :: #string DONE #asm { - result === a; - - mov result, x; - mul.SIZE r_d:, result, y; // TODO Try to use same as below (remove r_d) - setc saturated; - sbb limit:, limit; // If CF: limit = -1 (all bits set); otherwise: limit = 0. - or result, limit; - } - DONE - - U_MUL_ASM_8BITS :: #string DONE - #asm { - result === a; + result === a; // Pin result to register A. - mov result, x; - mul.SIZE result, y; - setc saturated; - sbb limit:, limit; // If CF: limit = -1 (all bits set); otherwise: limit = 0. - or result, limit; + mov result, x; // Move value x to result. + mul.SIZE reg_d:, result, y; // Multiply values (affect CF). + setc saturated; // Set saturated flag if CF. + sbb mask:, mask; // If CF: mask = -1 (all bits set); else: mask = 0. + or result, mask; // If CF was set, then result will be set to unsigned maximum (all bits set). } DONE - + #if Tr == u8 - #insert #run replace(U_MUL_ASM_8BITS, ".SIZE", ".b"); + #insert #run replace(replace(U_MUL_ASM, ".SIZE", ".b"), "reg_d:,", ""); // For 8bits mul, we do not need D register. #if Tr == u16 #insert #run replace(U_MUL_ASM, ".SIZE", ".w"); #if Tr == u32 @@ -325,16 +290,16 @@ mul :: (x: $Tx, y: $Ty, $USE_GENERIC: bool = false) -> result: $Tr, overflow: bo div :: (x: $Tx, y: $Ty, $USE_GENERIC: bool = false) -> result: $Tr, remainder: Tr, saturated: bool #modify { #insert INTEGER_ARITHMETIC_TYPES_CHECK; } //#dump { - + #if USE_GENERIC || CPU != .X64 { - + #if Tr == s8 || Tr == s16 || Tr == s32 || Tr == s64 { - + #if Tr == s8 { MAX :: S8_MAX; MIN :: S8_MIN; } #if Tr == s16 { MAX :: S16_MAX; MIN :: S16_MIN; } #if Tr == s32 { MAX :: S32_MAX; MIN :: S32_MIN; } #if Tr == s64 { MAX :: S64_MAX; MIN :: S64_MIN; } - + if x == MIN && y == -1 then return MAX, -1, true; } @@ -342,7 +307,7 @@ div :: (x: $Tx, y: $Ty, $USE_GENERIC: bool = false) -> result: $Tr, remainder: T result := x / y; remainder := x - (y * result); return result, remainder, false; - + } else { #import "String"; @@ -354,7 +319,7 @@ div :: (x: $Tx, y: $Ty, $USE_GENERIC: bool = false) -> result: $Tr, remainder: T #asm { result === a; remainder === d; - + // Detect div(MIN/-1) and flag it on ZF. mov xT: gpr, MIN; // TODO Rename xT to x_test mov xV: gpr, x; // TODO Rename xV to x_val @@ -362,7 +327,7 @@ div :: (x: $Tx, y: $Ty, $USE_GENERIC: bool = false) -> result: $Tr, remainder: T mov yT: gpr, y; xor.SIZE yT, -1; or.SIZE xT, yT; - + mov limit: gpr, LIMIT; mov result, x; cmovz result, limit; // If ZF: limit dividend to MIN-1. @@ -370,25 +335,25 @@ div :: (x: $Tx, y: $Ty, $USE_GENERIC: bool = false) -> result: $Tr, remainder: T setz saturated; SIGN_EXT remainder, result; // Prepare dividend high bits. idiv.SIZE remainder, result, y; - + // If saturated: remainder = 0 - 1; otherwise: remainder = x - 0. sub.SIZE remainder, saturated; } DONE - + S_DIV_ASM_8BITS :: #string DONE #asm { - + result === a; remainder === d; - + // Detect div(MIN/-1) and flag it on ZF. mov t_x: gpr, x; mov t_y: gpr, y; xor.SIZE t_x, MIN; xor.SIZE t_y, -1; or.SIZE t_x, t_y; - + mov limit: gpr, LIMIT; mov result, x; @@ -396,17 +361,17 @@ div :: (x: $Tx, y: $Ty, $USE_GENERIC: bool = false) -> result: $Tr, remainder: T cbw result; // Sign-extension. setz saturated; idiv.SIZE result, y; - + // Extract remainder from result's high bits. mov remainder, result; sar remainder, 8; - + // If saturated: remainder = 0 - 1; otherwise: remainder = x - 0. sub.SIZE remainder, saturated; - + } DONE - + #if Tr == s8 #insert #run replace(replace(replace(S_DIV_ASM_8BITS, ".SIZE", ".b"), "MIN", "-128"), "LIMIT", "-127"); #if Tr == s16 @@ -415,35 +380,35 @@ div :: (x: $Tx, y: $Ty, $USE_GENERIC: bool = false) -> result: $Tr, remainder: T #insert #run replace(replace(replace(replace(S_DIV_ASM, ".SIZE", ".d"), "MIN", "-2147483648"), "LIMIT", "-2147483647"), "SIGN_EXT", "cdq"); #if Tr == s64 #insert #run replace(replace(replace(replace(S_DIV_ASM, ".SIZE", ".q"), "MIN", "-9223372036854775808"), "LIMIT", "-9223372036854775807"), "SIGN_EXT", "cqo"); - - + + U_DIV_ASM :: #string DONE #asm { result === a; remainder === d; - + mov saturated, 0; mov result, x; mov remainder, 0; // Prepare dividend high bits. div.SIZE remainder, result, y; } DONE - + U_DIV_ASM_8BITS :: #string DONE #asm { result === a; remainder === d; - + mov saturated, 0; movzxbw result, x; // Move zero-extended byte to word. div.SIZE result, y; - + // Extract remainder from result's high bits. mov remainder, result; sar remainder, 8; } DONE - + #if Tr == u8 #insert #run replace(U_DIV_ASM_8BITS, ".SIZE", ".b"); #if Tr == u16 @@ -452,7 +417,7 @@ div :: (x: $Tx, y: $Ty, $USE_GENERIC: bool = false) -> result: $Tr, remainder: T #insert #run replace(U_DIV_ASM, ".SIZE", ".d"); #if Tr == u64 #insert #run replace(U_DIV_ASM, ".SIZE", ".q"); - + return result, remainder, saturated; diff --git a/Math_Test.jai b/Math_Test.jai index bd9990b..5fe87c5 100644 --- a/Math_Test.jai +++ b/Math_Test.jai @@ -8,10 +8,10 @@ #load "Math_Ext.jai"; main :: () { - + write_strings( "#=======================#\n", - "# Unit tests #\n" + "# Basic tests #\n" ); test_op :: (operation: string, x: $Tx, y: $Ty, result: $Tr, type: Type, saturated: bool, remainder: Tr = 0) -> errors_found: int #expand { @@ -162,12 +162,22 @@ main :: () { "#=======================#\n", "# Benchmarks #\n" ); - + #import "Random"; + performance_test :: ($operation: string, $type: Type, print_result: bool = true) -> ops_per_us_gen: float, ops_per_us_asm: float { - NUM_TESTS :: 5000; - DATA_SIZE_BITS :: 64*1024*8/2; + #if type == u8 { MIN :: 0; MAX :: U8_MAX; } + #if type == u16 { MIN :: 0; MAX :: U16_MAX; } + #if type == u32 { MIN :: 0; MAX :: U32_MAX; } + #if type == u64 { MIN :: 0; MAX :: U64_MAX; } + #if type == s8 { MIN :: S8_MIN; MAX :: S8_MAX; } + #if type == s16 { MIN :: S16_MIN; MAX :: S16_MAX; } + #if type == s32 { MIN :: S32_MIN; MAX :: S32_MAX; } + #if type == s64 { MIN :: S64_MIN; MAX :: S64_MAX; } + + NUM_TESTS :: 50000; + DATA_SIZE_BITS :: 64*1024*8; #if type == s8 || type == u8 then DATA_SIZE :: DATA_SIZE_BITS/8; else #if type == s16 || type == u16 then @@ -176,27 +186,15 @@ main :: () { DATA_SIZE :: DATA_SIZE_BITS/32; else #if type == s64 || type == u64 then DATA_SIZE :: DATA_SIZE_BITS/64; - + best_gen := 0.0; best_asm := 0.0; - - numbers_xgen: [..] type; - numbers_ygen: [..] type; - numbers_xasm: [..] type; - numbers_yasm: [..] type; - array_reserve(*numbers_xgen, DATA_SIZE); - array_reserve(*numbers_ygen, DATA_SIZE); - array_reserve(*numbers_xasm, DATA_SIZE); - array_reserve(*numbers_yasm, DATA_SIZE); - - #if type == u8 { MIN :: 0; MAX :: U8_MAX; } - #if type == u16 { MIN :: 0; MAX :: U16_MAX; } - #if type == u32 { MIN :: 0; MAX :: U32_MAX; } - #if type == u64 { MIN :: 0; MAX :: U64_MAX; } - #if type == s8 { MIN :: S8_MIN; MAX :: S8_MAX; } - #if type == s16 { MIN :: S16_MIN; MAX :: S16_MAX; } - #if type == s32 { MIN :: S32_MIN; MAX :: S32_MAX; } - #if type == s64 { MIN :: S64_MIN; MAX :: S64_MAX; } + numbers_x: [..] type; + numbers_y: [..] type; + array_reserve(*numbers_x, DATA_SIZE); + array_reserve(*numbers_y, DATA_SIZE); + + random_seed(cast(u64)to_nanoseconds(current_time_monotonic())); for 0..DATA_SIZE-1 { x := cast(type) random_get_within_range(xx MIN, xx MAX); @@ -204,27 +202,25 @@ main :: () { if y == 0 && operation == "div" { y = 1; } - array_add(*numbers_xgen, x); - array_add(*numbers_ygen, y); - array_add(*numbers_xasm, x); - array_add(*numbers_yasm, y); + array_add(*numbers_x, x); + array_add(*numbers_y, y); } for 0..NUM_TESTS-1 { - + r_gen: type = 0; r_asm: type = 0; time_gen := current_time_monotonic(); - for 0..DATA_SIZE-1 #insert #run replace("r_gen ^= OP(numbers_xgen[it], numbers_ygen[it], true);", "OP", operation); + for idx: 0..DATA_SIZE-1 #insert #run replace("r_gen ^= OP(numbers_x[idx], numbers_y[idx], true);", "OP", operation); time_gen = current_time_monotonic() - time_gen; - + time_asm := current_time_monotonic(); - for 0..DATA_SIZE-1 #insert #run replace("r_asm ^= OP(numbers_xasm[it], numbers_yasm[it]);", "OP", operation); + for idx: 0..DATA_SIZE-1 #insert #run replace("r_asm ^= OP(numbers_x[idx], numbers_y[idx]);", "OP", operation); time_asm = current_time_monotonic() - time_asm; assert(r_gen == r_asm); - + perf_gen := cast(float)DATA_SIZE/cast(float)to_nanoseconds(time_gen); perf_asm := cast(float)DATA_SIZE/cast(float)to_nanoseconds(time_asm); best_gen = max(best_gen, perf_gen); @@ -236,7 +232,7 @@ main :: () { ff := *context.print_style.default_format_float; ff.zero_removal = .NO; ff.width = 7; - ff.trailing_width = 3; + ff.trailing_width = 2; fi := *context.print_style.default_format_int; fi.minimum_digits = 3; -- cgit v1.2.3