aboutsummaryrefslogtreecommitdiff
path: root/ttt.jai
diff options
context:
space:
mode:
authordam <dam@gudinoff>2023-03-17 10:23:58 +0000
committerdam <dam@gudinoff>2023-03-17 10:23:58 +0000
commit7bd617825b0edca20bd4ef5ad4e49c4fd2ffde76 (patch)
tree45925bb2233672b448d28dab7518b7e8e71fd26c /ttt.jai
parentefb6c8606d7e39a6ac9776bb1e4e28340aba2e29 (diff)
downloadtask-time-tracker-7bd617825b0edca20bd4ef5ad4e49c4fd2ffde76.tar.zst
task-time-tracker-7bd617825b0edca20bd4ef5ad4e49c4fd2ffde76.zip
Improve truncate_string to be aware of ASCII and UTF8 encodings.
Diffstat (limited to 'ttt.jai')
-rw-r--r--ttt.jai53
1 files changed, 30 insertions, 23 deletions
diff --git a/ttt.jai b/ttt.jai
index 8a75568..d1e52ab 100644
--- a/ttt.jai
+++ b/ttt.jai
@@ -177,35 +177,42 @@ is_equal_to_any :: (to_compare :string, test_a :string, test_b :string) -> bool
// The string should have capacity for at least length + 1.
// The terminating null byte ('\0') is not included in length.
// Returns the truncated string length.
-truncate_string_utf8 :: (str: string, length: s64) -> length: s64 {
+
+Text_Encoding :: enum u8 #specified {
+ ASCII :: 1;
+ UTF8 :: 2;
+}
+
+// WIP TODO Ues compiler time code to see the auto bake being used... just for fun, once! :D
+truncate_string :: (str: string, length: s64, $encoding: Text_Encoding = .UTF8) -> length: s64 #no_abc { // TODO Should I use #no_abc ?
assert(str.data != null);
assert(str.count >= length);
data := str.data;
count := str.count;
- // WIP simplify this if/else and try to use only idx or length
-
- // Find index of first continuation byte.
- idx := length;
- while (idx > 0 && ((data[idx - 1] & 0xC0) == 0x80)) {
- idx -= 1;
- }
- continuation_bytes := length - idx;
+ #if encoding == .UTF8 { // WIP simplify this if/else and try to use only idx or length
+ // Find index of first continuation byte.
+ idx := length;
+ while (idx > 0 && ((data[idx - 1] & 0xC0) == 0x80)) {
+ idx -= 1;
+ }
+ continuation_bytes := length - idx;
- // If string starts with continuation bytes, it's an invalid UTF8 string.
- if (idx == 0 && continuation_bytes > 0) {
- length = 0;
- }
- // If length truncates some continuation bytes, remove incomplete UTF8 character.
- else if (idx > 0 // string is not empty
- // continuation bytes are not complete
- && !(continuation_bytes == 0 && (data[idx - 1] & 0x80) == 0x00)
- && !(continuation_bytes == 1 && (data[idx - 1] & 0xE0) == 0xC0)
- && !(continuation_bytes == 2 && (data[idx - 1] & 0xF0) == 0xE0)
- && !(continuation_bytes == 3 && (data[idx - 1] & 0xF8) == 0xF0)
- ) {
- length -= (continuation_bytes + 1); // Remove '+ 1' start byte.
+ // If string starts with continuation bytes, it's an invalid UTF8 string.
+ if (idx == 0 && continuation_bytes > 0) {
+ length = 0;
+ }
+ // If length truncates some continuation bytes, remove incomplete UTF8 character.
+ else if (idx > 0 // string is not empty
+ // continuation bytes are not complete
+ && !(continuation_bytes == 0 && (data[idx - 1] & 0x80) == 0x00)
+ && !(continuation_bytes == 1 && (data[idx - 1] & 0xE0) == 0xC0)
+ && !(continuation_bytes == 2 && (data[idx - 1] & 0xF0) == 0xE0)
+ && !(continuation_bytes == 3 && (data[idx - 1] & 0xF8) == 0xF0)
+ ) {
+ length -= (continuation_bytes + 1); // Remove '+ 1' start byte.
+ }
}
memset(data + length, 0, count - length);
@@ -1300,7 +1307,7 @@ main :: () {
print("--- --- ---\n");
xpto: string = copy_string("ç€dam");
memcpy(vb.data, xpto.data, xpto.count);
- truncate_string_utf8(xx vb, 5);
+ truncate_string(xx vb, 4);
// memset(xpto.data, 0, 5);
print(">'%'\n", xpto);
print(">'%'\n", cast(string)vb);