#ifndef __UINT8_H__
#define __UINT8_H__
#include <stdio.h> 
#include <sys/time.h>
#include <stdbool.h>
#include <cblas.h>
#include <pthread.h>
#include <stdint.h>
#include <math.h>
#include "util.h"
#include "print.h"
#endif

struct timeval start, end;
long seconds, microseconds;
double cpu_time_used;

struct sgemm_args {
    char transa;
    char transb;
    int m;
    int n;
    int k;
    float alpha;
    uint8_t *a;
    int lda;
    uint8_t *b;
    int ldb;
    float beta;
    uint16_t *c;
    int ldc;
};
#define L 1024
#define FM L
#define FK L
#define FN L

uint16_t test_mat[FM * FN];

float ori_mat1[FM * FK];
float ori_mat2[FK * FN];

float max_mat1;
float min_mat1;
float max_mat2;
float min_mat2;

uint8_t matA[FM * FK];
uint8_t matW[FK * FN];

uint8_t matA_0[FM * FK / 2];
uint8_t matA_1[FM * FK / 2];
uint8_t matW_0[FK * FN / 2];
uint8_t matW_1[FK * FN / 2];

uint16_t matB[FM * FN];

uint16_t matB0[FM * FN / 4];
uint16_t matB1[FM * FN / 4];
uint16_t matB2[FM * FN / 4];
uint16_t matB3[FM * FN / 4];


uint8_t matGB[FM * FN];
uint8_t matWT[FN * FK];

uint8_t matGB_0[FM * FN / 2];
uint8_t matGB_1[FM * FN / 2];
uint8_t matWT_0[FN * FK / 2];
uint8_t matWT_1[FN * FK / 2];

uint16_t matGA[FM * FK];

uint16_t matGA0[FM * FK / 4];
uint16_t matGA1[FM * FK / 4];
uint16_t matGA2[FM * FK / 4];
uint16_t matGA3[FM * FK / 4];


uint8_t matAT[FK * FM];
uint8_t matGB[FM * FN];

uint8_t matAT_0[FK * FM / 2];
uint8_t matAT_1[FK * FM / 2];
uint8_t matGB_0[FM * FN / 2];
uint8_t matGB_1[FM * FN / 2];

uint16_t matGW[FK * FN];

uint16_t matGW0[FK * FN / 4];
uint16_t matGW1[FK * FN / 4];
uint16_t matGW2[FK * FN / 4];
uint16_t matGW3[FK * FN / 4];
