Added UTF8 support to read_input_line.

author: dam <dam@gudinoff> 2024-04-11 02:46:03 +0100
committer: dam <dam@gudinoff> 2024-04-11 02:46:03 +0100
commit: 52c375a02e663a87140ef34f140de16c71f7af5f (patch)
tree: fba3eb7940946abe78193c92cc3dbf4ff287acc9 /modules/UTF8.jai
parent: 3491fff478a7ebf7e9281595230528726828f48c (diff)
download: task-time-tracker-52c375a02e663a87140ef34f140de16c71f7af5f.tar.zst
task-time-tracker-52c375a02e663a87140ef34f140de16c71f7af5f.zip
1 files changed, 47 insertions, 1 deletions
diff --git a/modules/UTF8.jai b/modules/UTF8.jai
index fac1326..eba4585 100644
--- a/modules/UTF8.jai
+++ b/modules/UTF8.jai
@@ -1,4 +1,5 @@
 // BBBB BBBB & 1100 0000 == 10XX XXXX -> is continuation byte
+// TODO Maybe rename to: is_continuation_byte
 is_utf8_continuation_byte :: inline (byte: u8) -> bool {
    return (byte & 0xC0) == 0x80;
 }
@@ -6,6 +7,7 @@ is_utf8_continuation_byte :: inline (byte: u8) -> bool {
 // BBBB BBBB & 1110 0000 == 110X XXXX -> 1 initial + 1 continuation byte
 // BBBB BBBB & 1111 0000 == 1110 XXXX -> 1 initial + 2 continuation byte
 // BBBB BBBB & 1111 1000 == 1111 0XXX -> 1 initial + 3 continuation byte
+// TODO Maybe rename to: count_character_bytes
 count_utf8_bytes :: inline (byte: u8) -> int {
     if (byte & 0xE0) == 0xC0 return 1+1;
     if (byte & 0xF0) == 0xE0 return 1+2;
@@ -16,6 +18,7 @@ count_utf8_bytes :: inline (byte: u8) -> int {
 // Truncates the string to the length provided or shorter, in case of UTF8 strings that require so.
 // Truncation is done by zeroing the tail of the string in place.
 // Returns length of truncated string.
+// TODO Maybe rename to: truncate
 truncate_string :: (str: string, length: int) -> length: int {
     if str.data == null     then return -1;
     
@@ -26,7 +29,6 @@ truncate_string :: (str: string, length: int) -> length: int {
 
     // Find index of first continuation byte.
     idx := length;
-    // while (idx > 0 && ((data[idx - 1] & 0xC0) == 0x80)) { TODO REMOVE AFTER TESTING
     while (idx > 0 && is_utf8_continuation_byte(data[idx - 1])) {
         idx -= 1;
     }
@@ -48,10 +50,12 @@ truncate_string :: (str: string, length: int) -> length: int {
     }
 
     memset(data + length, 0, count - length);
+    // str.count = length;  TODO We should be doing this...
     return length;
 }
 
 // Returns true when the string is empty or consists of space characters.
+// TODO Maybe rename to: is_empty
 is_empty_string :: (str: string) -> bool {
     for 0..str.count-1 {
         if str[it] == {
@@ -69,3 +73,45 @@ is_empty_string :: (str: string) -> bool {
     }
     return true;
 }
+
+// Counts number of characters in string.
+count_characters :: (str: string) -> int {
+    characters := 0;
+    idx := 0;
+    while idx < str.count {
+        idx += count_utf8_bytes(str[idx]);
+        characters += 1;
+    }
+    return characters;
+}
+
+// Delete character.
+delete_character :: (str: *string, character_idx: int) {
+    buffer_idx := map_character_to_buffer_idx(str.*, character_idx);
+    bytes_to_delete := count_utf8_bytes(str.data[buffer_idx]);
+    
+    for buffer_idx..str.count-1-bytes_to_delete {
+        str.data[it] = str.data[it+bytes_to_delete];
+    }
+    for str.count-bytes_to_delete..str.count-1 {
+        str.data[it] = 0;
+    }
+    
+    str.count -= bytes_to_delete;
+}
+
+// Get character index.
+// TODO Maybe rename to: map_character_to_byte_idx or get_character_byte_idx
+map_character_to_buffer_idx :: (str: string, character_idx: int) -> buffer_idx: int, success: bool {
+    if character_idx < 0            then return -1, false;
+    if character_idx > str.count    then return -2, false;
+    if character_idx == 0           then return 0, true;
+    
+    buff_idx := 0;
+    char_idx := 0;
+    while buff_idx < str.count && char_idx != character_idx {
+        buff_idx += count_utf8_bytes(str[buff_idx]);
+        char_idx += 1;
+    }
+    return buff_idx, char_idx == character_idx;
+}
author	dam <dam@gudinoff>	2024-04-11 02:46:03 +0100
committer	dam <dam@gudinoff>	2024-04-11 02:46:03 +0100
commit	52c375a02e663a87140ef34f140de16c71f7af5f (patch)
tree	fba3eb7940946abe78193c92cc3dbf4ff287acc9 /modules/UTF8.jai
parent	3491fff478a7ebf7e9281595230528726828f48c (diff)
download	task-time-tracker-52c375a02e663a87140ef34f140de16c71f7af5f.tar.zst task-time-tracker-52c375a02e663a87140ef34f140de16c71f7af5f.zip