#ifndef __Multi_H__
#define __Multi_H__
#include <stdio.h> 
#include "util.h"
#include <sys/time.h>
#include <stdbool.h>
#include <cblas.h>
#include <pthread.h>
#include <math.h>
#endif
// clock_t start, end;
// double cpu_time_used;

struct timeval start, end;
long seconds, microseconds;
double cpu_time_used;

#define L 1024
#define M 1 * 256
#define K 512
#define N 1024
float mat1[M * K];
float mat2[K * N];

float mat1_0[M * K / 2];
float mat1_1[M * K / 2];
float mat2_0[K * N / 2];
float mat2_1[K * N / 2];

float out_mat[M * N];

float out_mat0[M * N / 4];
float out_mat1[M * N / 4];
float out_mat2[M * N / 4];
float out_mat3[M * N / 4];

float test_mat[M * N];