aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordam <dam@gudinoff>2023-07-02 20:02:26 +0100
committerdam <dam@gudinoff>2023-07-02 20:02:26 +0100
commit4d1a0ca8c06ee558a92c9981a572bb962f73a6f6 (patch)
tree876d07b0cdd1a6ec9d3c88cfc07d0ec971c910dc
parente33e8c5d19f56e2c48f116c2e36e6e413e1d5fc5 (diff)
downloadtask-time-tracker-4d1a0ca8c06ee558a92c9981a572bb962f73a6f6.tar.zst
task-time-tracker-4d1a0ca8c06ee558a92c9981a572bb962f73a6f6.zip
Tweaked signed add.
-rw-r--r--Math_Ext.jai47
-rw-r--r--Math_Test.jai57
2 files changed, 64 insertions, 40 deletions
diff --git a/Math_Ext.jai b/Math_Ext.jai
index 500962f..368779f 100644
--- a/Math_Ext.jai
+++ b/Math_Ext.jai
@@ -74,35 +74,53 @@ add :: (x: $Tx, y: $Ty, $USE_GENERIC: bool = false) -> result: $Tr, saturated: b
result: Tr = ---;
saturated: bool = ---;
+
S_ADD_ASM :: #string DONE
#asm {
- // Calculate limit based on x's sign.
- mov limit: gpr, MAX;
- mov sign: gpr, x;
- shr.SIZE sign, BITS;
- add.SIZE limit, sign; // If sign is 1, then limit will overflow from MAX to MIN.
- mov result, x;
- add.SIZE result, y;
- seto saturated;
- cmovo result, limit;
+ // Performance
+ // s8 | s16 | s32 | s64
+ // 1.243 | 1.242 | 1.215 | 1.210
+ //
+ mov result, -1; // Pre-set result with signed maximum: set all ones.
+ shr.SIZE result, 1; // Pre-set result with signed maximum: insert zero on MSB.
+ bt x, SHIFT; // Test signal bit (affect CF).
+ adc result, 0; // Overflow signed maximum to signed minimum if CF is set.
+
+ add.SIZE x, y; // Add values (affect OF).
+ seto saturated; // Set saturated flat if OF.
+ cmovno result, x; // Move add-result to result if NOT OF.
+
+ // Performance - a bit of improvement... with some more code.
+ // s8 | s16 | s32 | s64
+ // 1.336 | 1.305 | 1.217 | 1.210
+ //
+ // mov sign: gpr, x; // Copy x value to sign variable.
+ // mov limit: gpr, MAX; // Pre-set limit with signed maximum.
+ // shr.SIZE sign, SHIFT; // Get sign of x value.
+ // add.SIZE limit, sign; // If sign is 1, overflow from signed maximum to signed minimum.
+ //
+ // mov result, x; // Copy x value to result.
+ // add.SIZE result, y; // Add values (affect OF).
+ // seto saturated; // Set saturated flag if OF.
+ // cmovo result, limit; // Move limit to result if OF.
}
DONE
#if Tr == s8
- #insert #run replace(replace(replace(S_ADD_ASM, ".SIZE", ".b"), "MAX", "127"), "BITS", "7");
+ #insert #run replace(replace(S_ADD_ASM, ".SIZE", ".b"), "SHIFT", "7"); // , "MAX", "127")
#if Tr == s16
- #insert #run replace(replace(replace(S_ADD_ASM, ".SIZE", ".w"), "MAX", "32767"), "BITS", "15");
+ #insert #run replace(replace(S_ADD_ASM, ".SIZE", ".w"), "SHIFT", "15"); // , "MAX", "32767")
#if Tr == s32
- #insert #run replace(replace(replace(S_ADD_ASM, ".SIZE", ".d"), "MAX", "2147483647"), "BITS", "31");
+ #insert #run replace(replace(S_ADD_ASM, ".SIZE", ".d"), "SHIFT", "31"); // , "MAX", "2147483647")
#if Tr == s64
- #insert #run replace(replace(replace(S_ADD_ASM, ".SIZE", ".q"), "MAX", "9223372036854775807"), "BITS", "63");
+ #insert #run replace(replace(S_ADD_ASM, ".SIZE", ".q"), "SHIFT", "63"); // , "MAX", "9223372036854775807")
U_ADD_ASM :: #string DONE
#asm {
- add.SIZE x, y; // Add values.
mov result, -1; // Pre-set result with unsigned maximum.
+ add.SIZE x, y; // Add values (affect CF).
setc saturated; // Set saturated flag if CF.
cmovnc result, x; // Move add-result to result if NOT CF.
}
@@ -152,6 +170,7 @@ sub :: (x: $Tx, y: $Ty, $USE_GENERIC: bool = false) -> result: $Tr, overflow: bo
result: Tr = ---;
saturated: bool = ---;
+
S_SUB_ASM :: #string DONE
#asm {
// Calculate limit based on x's sign.
diff --git a/Math_Test.jai b/Math_Test.jai
index d03bd32..bd9990b 100644
--- a/Math_Test.jai
+++ b/Math_Test.jai
@@ -167,26 +167,23 @@ main :: () {
performance_test :: ($operation: string, $type: Type, print_result: bool = true) -> ops_per_us_gen: float, ops_per_us_asm: float {
NUM_TESTS :: 5000;
- // DATA_SIZE :: 32768;
- DATA_SIZE_BITS :: 96*1024*8;
+ DATA_SIZE_BITS :: 64*1024*8/2;
#if type == s8 || type == u8 then
- DATA_SIZE :: DATA_SIZE_BITS/3/8;
+ DATA_SIZE :: DATA_SIZE_BITS/8;
else #if type == s16 || type == u16 then
- DATA_SIZE :: DATA_SIZE_BITS/3/16;
+ DATA_SIZE :: DATA_SIZE_BITS/16;
else #if type == s32 || type == u32 then
- DATA_SIZE :: DATA_SIZE_BITS/3/32;
+ DATA_SIZE :: DATA_SIZE_BITS/32;
else #if type == s64 || type == u64 then
- DATA_SIZE :: DATA_SIZE_BITS/3/64;
+ DATA_SIZE :: DATA_SIZE_BITS/64;
best_gen := 0.0;
best_asm := 0.0;
numbers_xgen: [..] type;
numbers_ygen: [..] type;
- numbers_zgen: [DATA_SIZE] type;
numbers_xasm: [..] type;
numbers_yasm: [..] type;
- numbers_zasm: [DATA_SIZE] type;
array_reserve(*numbers_xgen, DATA_SIZE);
array_reserve(*numbers_ygen, DATA_SIZE);
array_reserve(*numbers_xasm, DATA_SIZE);
@@ -212,18 +209,21 @@ main :: () {
array_add(*numbers_xasm, x);
array_add(*numbers_yasm, y);
}
-
+
for 0..NUM_TESTS-1 {
+ r_gen: type = 0;
+ r_asm: type = 0;
+
time_gen := current_time_monotonic();
- for 0..DATA_SIZE-1 #insert #run replace("numbers_zgen[it] = OP(numbers_xgen[it], numbers_ygen[it], true);", "OP", operation);
+ for 0..DATA_SIZE-1 #insert #run replace("r_gen ^= OP(numbers_xgen[it], numbers_ygen[it], true);", "OP", operation);
time_gen = current_time_monotonic() - time_gen;
time_asm := current_time_monotonic();
- for 0..DATA_SIZE-1 #insert #run replace("numbers_zasm[it] = OP(numbers_xasm[it], numbers_yasm[it]);", "OP", operation);
+ for 0..DATA_SIZE-1 #insert #run replace("r_asm ^= OP(numbers_xasm[it], numbers_yasm[it]);", "OP", operation);
time_asm = current_time_monotonic() - time_asm;
- for 0..DATA_SIZE-1 assert(numbers_zgen[it] == numbers_zasm[it]);
+ assert(r_gen == r_asm);
perf_gen := cast(float)DATA_SIZE/cast(float)to_nanoseconds(time_gen);
perf_asm := cast(float)DATA_SIZE/cast(float)to_nanoseconds(time_asm);
@@ -231,26 +231,31 @@ main :: () {
best_asm = max(best_asm, perf_asm);
}
- if print_result {
- if type == s8 || type == u8 write_string(" ");
- print("% | % | % | %\n", type, best_gen, best_asm, DATA_SIZE);
+ tmp_context := context;
+ push_context tmp_context {
+ ff := *context.print_style.default_format_float;
+ ff.zero_removal = .NO;
+ ff.width = 7;
+ ff.trailing_width = 3;
+
+ fi := *context.print_style.default_format_int;
+ fi.minimum_digits = 3;
+
+ if print_result {
+ if type == s8 || type == u8 write_string(" ");
+ print("% | % | % | %\n", type, best_gen, best_asm, cast(int)(100*best_asm/best_gen));
+ }
}
return best_gen, best_asm;
}
- ff := context.print_style.default_format_float;
- ff.zero_removal = .NO;
- ff.width = 7;
- ff.trailing_width = 3;
- context.print_style.default_format_float = ff;
-
write_strings(
" | (ops / nsec) |\n",
- " | generic | x64 asm |\n"
+ " T | generic | x64 asm | %\n"
);
write_strings(
- "--- | ----------------- |\n",
+ "--- | ----------------- | ---\n",
" | add |\n"
);
performance_test("add", u8);
@@ -263,7 +268,7 @@ main :: () {
performance_test("add", s64);
write_strings(
- "--- | ----------------- |\n",
+ "--- | ----------------- | ---\n",
" | sub |\n"
);
performance_test("sub", u8);
@@ -276,7 +281,7 @@ main :: () {
performance_test("sub", s64);
write_strings(
- "--- | ----------------- |\n",
+ "--- | ----------------- | ---\n",
" | mul |\n"
);
performance_test("mul", u8);
@@ -289,7 +294,7 @@ main :: () {
performance_test("mul", s64);
write_strings(
- "--- | ----------------- |\n",
+ "--- | ----------------- | ---\n",
" | div |\n"
);
performance_test("div", u8);