-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathttv_assembly.c
71 lines (62 loc) · 2.41 KB
/
ttv_assembly.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
// Generated by the Tensor Algebra Compiler (tensor-compiler.org)
// taco "A(i,j)=B(i,j,k)*c(k)" -f=A:ds:0,1 -f=B:sss:0,1,2 -f=c:d:0 -s="assemble(A,Insert)" -s="parallelize(i,CPUThread,NoRaces)" -write-source=taco_kernel.c -write-compute=taco_compute.c -write-assembly=taco_assembly.c
int assemble(taco_tensor_t *A, taco_tensor_t *B, taco_tensor_t *c) {
int A1_dimension = (int)(A->dimensions[0]);
int* restrict A2_pos = (int*)(A->indices[1][0]);
int* restrict A2_crd = (int*)(A->indices[1][1]);
double* restrict A_vals = (double*)(A->vals);
int B1_dimension = (int)(B->dimensions[0]);
int* restrict B1_pos = (int*)(B->indices[0][0]);
int* restrict B1_crd = (int*)(B->indices[0][1]);
int* restrict B2_pos = (int*)(B->indices[1][0]);
int* restrict B2_crd = (int*)(B->indices[1][1]);
int* restrict B3_pos = (int*)(B->indices[2][0]);
int* restrict B3_crd = (int*)(B->indices[2][1]);
int c1_dimension = (int)(c->dimensions[0]);
int32_t* restrict A2_nnz = calloc(B1_dimension, sizeof(int32_t));
#pragma omp parallel for schedule(runtime)
for (int32_t iB = B1_pos[0]; iB < B1_pos[1]; iB++) {
int32_t i = B1_crd[iB];
int32_t tjA2_nnz_val = 0;
for (int32_t jB = B2_pos[iB]; jB < B2_pos[(iB + 1)]; jB++) {
bool qtkA_val = 0;
if (B3_pos[jB] < B3_pos[(jB + 1)]) {
qtkA_val = 1;
}
tjA2_nnz_val += (int32_t)qtkA_val;
}
A2_nnz[i] = tjA2_nnz_val;
}
A2_pos = (int32_t*)malloc(sizeof(int32_t) * (A1_dimension + 1));
A2_pos[0] = 0;
for (int32_t i = 0; i < A1_dimension; i++) {
A2_pos[i + 1] = A2_pos[i] + A2_nnz[i];
}
A2_crd = (int32_t*)malloc(sizeof(int32_t) * A2_pos[A1_dimension]);
A_vals = (double*)malloc(sizeof(double) * A2_pos[A1_dimension]);
#pragma omp parallel for schedule(runtime)
for (int32_t iB = B1_pos[0]; iB < B1_pos[1]; iB++) {
int32_t i = B1_crd[iB];
for (int32_t jB = B2_pos[iB]; jB < B2_pos[(iB + 1)]; jB++) {
int32_t j = B2_crd[jB];
bool tkA_set = 0;
if (B3_pos[jB] < B3_pos[(jB + 1)]) {
tkA_set = 1;
}
if (tkA_set) {
int32_t pA2 = A2_pos[i];
A2_pos[i] = A2_pos[i] + 1;
A2_crd[pA2] = j;
}
}
}
for (int32_t p = 0; p < A1_dimension; p++) {
A2_pos[A1_dimension - p] = A2_pos[((A1_dimension - p) - 1)];
}
A2_pos[0] = 0;
free(A2_nnz);
A->indices[1][0] = (uint8_t*)(A2_pos);
A->indices[1][1] = (uint8_t*)(A2_crd);
A->vals = (uint8_t*)A_vals;
return 0;
}