-
Notifications
You must be signed in to change notification settings - Fork 1
/
string_encoding.c
100 lines (90 loc) · 2.78 KB
/
string_encoding.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#define VOCABULARY "abcdefghijklmnopqrstuvwxyz,._ -1234567890"
#define MAX_LENGTH 256
typedef struct {
char *data;
int length;
} string_t;
typedef struct {
int *data;
int length;
} array_t;
string_t string_concat(string_t a, string_t b) {
string_t result;
result.data = (char *)malloc((a.length + b.length + 1) * sizeof(char));
strcpy(result.data, a.data);
strcat(result.data, b.data);
result.length = a.length + b.length;
return result;
}
array_t string_to_array(string_t s) {
array_t result;
result.data = (int *)malloc(s.length * sizeof(int));
for (int i = 0; i < s.length; i++) {
result.data[i] = (int)s.data[i];
}
result.length = s.length;
return result;
}
array_t encode_string(string_t s, string_t vocabulary) {
array_t result;
result.data = (int *)malloc(s.length * sizeof(int));
for (int i = 0; i < s.length; i++) {
char c = s.data[i];
int index = -1;
for (int j = 0; j < vocabulary.length; j++) {
if (vocabulary.data[j] == c) {
index = j;
break;
}
}
result.data[i] = index;
}
result.length = s.length;
return result;
}
array_t repeat_array(array_t a, int repeat_factor) {
array_t result;
result.data = (int *)malloc((a.length * repeat_factor) * sizeof(int));
for (int i = 0; i < repeat_factor; i++) {
for (int j = 0; j < a.length; j++) {
result.data[i * a.length + j] = a.data[j];
}
}
result.length = a.length * repeat_factor;
return result;
}
int main() {
string_t text = {"hello", 5};
string_t dose = {"1", 1};
string_t collective_string = string_concat(text, dose);
string_t vocabulary = {VOCABULARY, strlen(VOCABULARY)};
array_t encoded = encode_string(collective_string, vocabulary);
array_t data = encoded;
for (int i = 0; i < data.length; i++) {
data.data[i] /= 10;
}
int repeat_factor = 2097152 / data.length;
array_t repeat_tensor = repeat_array(data, repeat_factor + 1);
repeat_tensor.length -= data.length;
// Reshape the array to 5D
int dims[] = {1, 1, 128, 128, 128};
int size = 1;
for (int i = 0; i < 5; i++) {
size *= dims[i];
}
array_t result;
result.data = (int *)malloc(size * sizeof(int));
for (int i = 0; i < size; i++) {
result.data[i] = repeat_tensor.data[i % repeat_tensor.length];
}
result.length = size;
// Print result
for (int i = 0; i < result.length; i++) {
printf("%d ", result.data[i]);
}
return 0;
}
/* Very simple modify to a custom weighted version of CLIP */