#  Copyright 2021 The PlenOctree Authors.
#  Redistribution and use in source and binary forms, with or without
#  modification, are permitted provided that the following conditions are met:
#
#  1. Redistributions of source code must retain the above copyright notice,
#  this list of conditions and the following disclaimer.
#
#  2. Redistributions in binary form must reproduce the above copyright notice,
#  this list of conditions and the following disclaimer in the documentation
#  and/or other materials provided with the distribution.
#
#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
#  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
#  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
#  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
#  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
#  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
#  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
#  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
#  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
#  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
#  POSSIBILITY OF SUCH DAMAGE.
import math
import torch
import time
import numpy as np
import torch.nn.functional as F
from utils.brdf_utils import rendering_equation
#from sphericalHarmonics import shEvaluate
C0 = 0.28209479177387814
C1 = 0.4886025119029199
C2 = [
    1.0925484305920792,
    -1.0925484305920792,
    0.31539156525252005,
    -1.0925484305920792,
    0.5462742152960396
]
C3 = [
    -0.5900435899266435,
    2.890611442640554,
    -0.4570457994644658,
    0.3731763325901154,
    -0.4570457994644658,
    1.445305721320277,
    -0.5900435899266435
]
C4 = [
    2.5033429417967046,
    -1.7701307697799304,
    0.9461746957575601,
    -0.6690465435572892,
    0.10578554691520431,
    -0.6690465435572892,
    0.47308734787878004,
    -1.7701307697799304,
    0.6258357354491761,
]   

C0=torch.tensor(C0,device="cuda")
C1=torch.tensor(C1,device="cuda")
C2=torch.tensor(C2,device="cuda")
C3=torch.tensor(C3,device="cuda")
C4=torch.tensor(C4,device="cuda")
def eval_sh(deg, sh, dirs):
    """
    Evaluate spherical harmonics at unit directions
    using hardcoded SH polynomials.
    Works with torch/np/jnp.
    ... Can be 0 or more batch dimensions.
    Args:
        deg: int SH deg. Currently, 0-3 supported
        sh: jnp.ndarray SH coeffs [..., C, (deg + 1) ** 2]
        dirs: jnp.ndarray unit directions [..., 3]
    Returns:
        [..., C]
    """
    assert deg <= 4 and deg >= 0
    coeff = (deg + 1) ** 2
    assert sh.shape[-1] >= coeff

    result = C0 * sh[..., 0]
    if deg > 0:
        x, y, z = dirs[..., 0:1], dirs[..., 1:2], dirs[..., 2:3]
        result = (result -
                C1 * y * sh[..., 1] +
                C1 * z * sh[..., 2] -
                C1 * x * sh[..., 3])

        if deg > 1:
            xx, yy, zz = x * x, y * y, z * z
            xy, yz, xz = x * y, y * z, x * z
            result = (result +
                    C2[0] * xy * sh[..., 4] +
                    C2[1] * yz * sh[..., 5] +
                    C2[2] * (2.0 * zz - xx - yy) * sh[..., 6] +
                    C2[3] * xz * sh[..., 7] +
                    C2[4] * (xx - yy) * sh[..., 8])

            if deg > 2:
                result = (result +
                C3[0] * y * (3 * xx - yy) * sh[..., 9] +
                C3[1] * xy * z * sh[..., 10] +
                C3[2] * y * (4 * zz - xx - yy)* sh[..., 11] +
                C3[3] * z * (2 * zz - 3 * xx - 3 * yy) * sh[..., 12] +
                C3[4] * x * (4 * zz - xx - yy) * sh[..., 13] +
                C3[5] * z * (xx - yy) * sh[..., 14] +
                C3[6] * x * (xx - 3 * yy) * sh[..., 15])

                if deg > 3:
                    result = (result + C4[0] * xy * (xx - yy) * sh[..., 16] +
                            C4[1] * yz * (3 * xx - yy) * sh[..., 17] +
                            C4[2] * xy * (7 * zz - 1) * sh[..., 18] +
                            C4[3] * yz * (7 * zz - 3) * sh[..., 19] +
                            C4[4] * (zz * (35 * zz - 30) + 3) * sh[..., 20] +
                            C4[5] * xz * (7 * zz - 3) * sh[..., 21] +
                            C4[6] * (xx - yy) * (7 * zz - 1) * sh[..., 22] +
                            C4[7] * xz * (xx - 3 * yy) * sh[..., 23] +
                            C4[8] * (xx * (xx - 3 * yy) - yy * (3 * xx - yy)) * sh[..., 24])
    return result
def eval_sh_coef(deg, dirs):
    """
    Evaluate spherical harmonics at unit directions
    using hardcoded SH polynomials.
    Works with torch/np/jnp.
    ... Can be 0 or more batch dimensions.
    Args:
        deg: int SH deg. Currently, 0-3 supported
        dirs: jnp.ndarray unit directions [..., 3]
    Returns:
        [..., C]
    """
    assert 4 >= deg >= 0
    coeff = (deg + 1) ** 2
    results = torch.zeros(dirs.shape[:-1] + (coeff,), device=dirs.device)
    results[..., 0] = C0
    if deg > 0:
        x, y, z = dirs[..., 0], dirs[..., 1], dirs[..., 2]
        results[..., 1] = -C1 * y
        results[..., 2] = C1 * z
        results[..., 3] = -C1 * x

        if deg > 1:
            xx, yy, zz = x * x, y * y, z * z
            xy, yz, xz = x * y, y * z, x * z
            results[..., 4] = C2[0] * xy
            results[..., 5] = C2[1] * yz
            results[..., 6] = C2[2] * (2.0 * zz - xx - yy)
            results[..., 7] = C2[3] * xz
            results[..., 8] = C2[4] * (xx - yy)

            if deg > 2:
                results[..., 9] = C3[0] * y * (3 * xx - yy)
                results[..., 10] = C3[1] * xy * z
                results[..., 11] = C3[2] * y * (4 * zz - xx - yy)
                results[..., 12] = C3[3] * z * (2 * zz - 3 * xx - 3 * yy)
                results[..., 13] = C3[4] * x * (4 * zz - xx - yy)
                results[..., 14] = C3[5] * z * (xx - yy)
                results[..., 15] = C3[6] * x * (xx - 3 * yy)

                if deg > 3:
                    results[..., 16] = C4[0] * xy * (xx - yy)
                    results[..., 17] = C4[1] * yz * (3 * xx - yy)
                    results[..., 18] = C4[2] * xy * (7 * zz - 1)
                    results[..., 19] = C4[3] * yz * (7 * zz - 3)
                    results[..., 20] = C4[4] * (zz * (35 * zz - 30) + 3)
                    results[..., 21] = C4[5] * xz * (7 * zz - 3)
                    results[..., 22] = C4[6] * (xx - yy) * (7 * zz - 1)
                    results[..., 23] = C4[7] * xz * (xx - 3 * yy)
                    results[..., 24] = C4[8] * (xx * (xx - 3 * yy) - yy * (3 * xx - yy))

    return results
def RGB2SH(rgb):
    return (rgb - 0.5) / C0

def SH2RGB(sh):
    return sh * C0 + 0.5
    
def ToVector(phi,theta):
    r = torch.sin(theta)
    return torch.cat((r * torch.cos(phi), r * torch.sin(phi), torch.cos(theta)),dim=2)
def factorial(x):
	if(x == 0):
		return 1.0
	return x * factorial(x-1)
def P(l, m, x):
	pmm = 1.0
	if(m>0):
		somx2 = torch.sqrt((1.0-x)*(1.0+x))
		fact = 1.0
		for i in range(1,m+1):
			pmm *= (-fact) * somx2
			fact += 2.0
	
	if(l==m):
		return pmm * torch.ones(x.shape,device="cuda")
	
	pmmp1 = x * (2.0*m+1.0) * pmm
	
	if(l==m+1):
		return pmmp1
	
	pll = torch.zeros(x.shape,device="cuda").float()
	for ll in range(m+2, l+1):
		pll = ( (2.0*ll-1.0)*x*pmmp1-(ll+m-1.0)*pmm ) / (ll-m)
		pmm = pmmp1
		pmmp1 = pll
	
	return pll


def K(l, m):
	#return torch.sqrt((2.0 * l + 1.0) * 0.07957747154594766788 * divfact(l, m))
	return math.sqrt( ((2 * l + 1) * factorial(l-m)) / (4*torch.pi*factorial(l+m)) )

def SH(l, m, theta, phi):
	sqrt2 = pow(2.0,0.5)
	#print(sqrt2)
	if(m==0):
		return K(l,m)*P(l,m,torch.cos(theta))*torch.ones(phi.shape,device="cuda").float()
	elif(m>0):
		return sqrt2*K(l,m)*torch.cos(m*phi)*P(l,m,torch.cos(theta)).float()
	else:
		return sqrt2*K(l,-m)*torch.sin(-m*phi)*P(l,-m,torch.cos(theta)).float()
	
def shTerms(l):
	return (l + 1) * (l + 1)

def shIndex(l, m):
	return l*l+l+m

def shEvaluate(theta, phi, lmax):

	coeffsMatrix = torch.zeros((theta.shape[0],theta.shape[1],shTerms(lmax)),device="cuda")
	# print("coeffsMatrix")
	# print(coeffsMatrix.shape)
	for l in range(0,lmax+1):
		for m in range(-l,l+1):
			index = shIndex(l, m)
			#a= SH(l, m, theta, phi).squeeze()
			#print("a_shape")
			#print(a.shape)
			#shtime=time.time()
			coeffsMatrix[...,index] = SH(l, m, theta, phi).squeeze()
			#print("p1_sh run time:", time.time() - shtime)

	return coeffsMatrix
def rotation_between_z(vec):
    """
    https://math.stackexchange.com/questions/180418/calculate-rotation-matrix-to-align-vector-a-to-vector-b-in-3d/476311#476311
    Args:
        vec: [..., 3]

    Returns:
        R: [..., 3, 3]

    """
    v1 = -vec[..., 1]
    v2 = vec[..., 0]
    v3 = torch.zeros_like(v1)
    v11 = v1 * v1
    v22 = v2 * v2
    v33 = v3 * v3
    v12 = v1 * v2
    v13 = v1 * v3
    v23 = v2 * v3
    cos_p_1 = (vec[..., 2] + 1).clamp_min(1e-7)
    R = torch.zeros(vec.shape[:-1] + (3, 3,), dtype=torch.float32, device="cuda")
    R[..., 0, 0] = 1 + (-v33 - v22) / cos_p_1
    R[..., 0, 1] = -v3 + v12 / cos_p_1
    R[..., 0, 2] = v2 + v13 / cos_p_1
    R[..., 1, 0] = v3 + v12 / cos_p_1
    R[..., 1, 1] = 1 + (-v33 - v11) / cos_p_1
    R[..., 1, 2] = -v1 + v23 / cos_p_1
    R[..., 2, 0] = -v2 + v13 / cos_p_1
    R[..., 2, 1] = v1 + v23 / cos_p_1
    R[..., 2, 2] = 1 + (-v22 - v11) / cos_p_1
    R = torch.where((vec[..., 2] + 1 > 0)[..., None, None], R,
                    -torch.eye(3, dtype=torch.float32, device="cuda").expand_as(R))
    return R
def fibonacci_sphere_sampling(normals, sample_num, random_rotate=True):
    pre_shape = normals.shape[:-1]
    if len(pre_shape) > 1:
        normals = normals.reshape(-1, 3)
    delta = np.pi * (3.0 - np.sqrt(5.0))

    # fibonacci sphere sample around z axis
    idx = torch.arange(sample_num, dtype=torch.float, device='cuda')[None]
    z = 1 - 2 * idx / (sample_num - 1)
    rad = torch.sqrt(1 - z ** 2)
    theta = delta * idx
    if random_rotate:
        theta = torch.rand(*pre_shape, 1, device='cuda') * 2 * np.pi + theta
    y = torch.cos(theta) * rad
    x = torch.sin(theta) * rad
    z_samples = torch.stack([x, y, z.expand_as(y)], dim=-2)

    # rotate to normal
    # z_vector = torch.zeros_like(normals)
    # z_vector[..., 2] = 1  # [H, W, 3]
    # rotation_matrix = rotation_between_vectors(z_vector, normals)
    rotation_matrix = rotation_between_z(normals)
    incident_dirs = rotation_matrix @ z_samples
    incident_dirs = F.normalize(incident_dirs, dim=-2).transpose(-1, -2)
    incident_areas = torch.ones_like(incident_dirs)[..., 0:1] * 2 * np.pi
    if len(pre_shape) > 1:
        incident_dirs = incident_dirs.reshape(*pre_shape, sample_num, 3)
        incident_areas = incident_areas.reshape(*pre_shape, sample_num, 1)
    return incident_dirs, incident_areas
def sample_incident_rays(normals, is_training=False, sample_num=24):
    if is_training:
        incident_dirs, incident_areas = fibonacci_sphere_sampling(
            normals, sample_num, random_rotate=True)
    else:
        incident_dirs, incident_areas = fibonacci_sphere_sampling(
            normals, sample_num, random_rotate=False)

    return incident_dirs, incident_areas  # [N, S, 3], [N, S, 1]

def ProjectFunction(order, func, sample_side, N, visibility,V,albedo,metallic,roughness):
    with torch.no_grad():
    # This is the approach demonstrated in [1] and is useful for arbitrary
    # functions on the sphere that are represented analytically.
        V1=V
        torch.cuda.synchronize()
        proj_start_time = time.time()
        lenth=N.shape[0]
        sample_count=sample_side**2
        rngh = torch.rand((sample_count,1),device="cuda").float()
        rngt = torch.rand((sample_count,1),device="cuda").float()
        t = torch.arange(sample_count,device="cuda").unsqueeze(0).float().unsqueeze(2)
        # print("rngh.shape")
        # print(rngh.shape)
        # print("t.shape")
        # print(t.shape)
        # print("rngh")
        # print(rngh[0])
        # print("t")
        # print(t[0])
        #sample_side=(self.max_sh_degree + 1) ** 2
        t_mod=t%sample_side
        alpha = ((t -t_mod)/sample_side+rngt) / sample_side
        beta = (t_mod + rngh) / sample_side
        # print("alpha")
        # print(alpha.shape)
        # print("beta")
        # print(beta.shape)
        phi = 2.0 * 3.1415926535 * beta
        theta = (2.0 * alpha - 1.0).acos()
        torch.cuda.synchronize()
        # print("p1_sh run time:", time.time() - proj_start_time)
        d=ToVector(theta,phi).squeeze()
        print("ds")
        print(d.shape)
        d=d.unsqueeze(0).repeat(albedo.shape[0],1,1)
        theta=theta.squeeze().unsqueeze(0).repeat(albedo.shape[0],1,1)
        phi=phi.squeeze().unsqueeze(0).repeat(albedo.shape[0],1,1)
        d,a=fibonacci_sphere_sampling(N, sample_count, random_rotate=True)
        d,a=fibonacci_sphere_sampling(N, sample_count, random_rotate=True)
        theta = torch.acos(d[...,2])
        phi = torch.atan2(d[...,1], d[...,0])
        #print(d[0:101])
        #print("ds")
        #print(d.shape)
        #d_reshape=d.reshape(d.shape[0]*d.shape[1],3,1)
        #print(d_reshape[0:101])
        #print("visis")
        #visibility=visibility.unsqueeze(1).repeat(1,d.shape[1],1,1)
        #print(visibility[0:101,:,0])
        #visibility=visibility.reshape(d.shape[0]*d.shape[1],order**2,1)
        #print(visibility[0:101,0])
        #print(visibility.unsqueeze(1).repeat(1,3,1,1)[0:101,:,0,:])
        #print(aaaaaa)
        # print("vs")
        #print(vvisibility.unsqueeze(1).repeat(1,3,1,1).squeeze().shape)
        #print("drs")
        #print(d_reshape.shape)
        shs_visibility = visibility.transpose(1, 2).view(albedo.shape[0], 1, 1, -1)
        #shs_visibility=torch.ones(3030, 1, 1, 16).cuda()
        torch.cuda.synchronize()
        eval_start_time = time.time()
        sh_coef=eval_sh_coef(order-1,d).unsqueeze(2)
        deg = int(np.sqrt(visibility.shape[1]) - 1)
        sh_coef_2=eval_sh_coef(deg,d).unsqueeze(2)
        # print(d.shape)
        # print(sh_coef.shape)
        # print(shs_visibility.shape)
        visibility=torch.clamp_min((sh_coef_2[..., :shs_visibility.shape[-1]] * shs_visibility).sum(-1), 0).squeeze()
        # print(visibility.shape)
        torch.cuda.synchronize()
        print("eval_sh run time:", time.time() - eval_start_time)
        #print(aaaa)
        # print("vs")
        # print(visibility.shape)
        #visibility=visibility[:,0].reshape(d.shape[0],d.shape[1])
        # print("vss")
        # print(visibility.shape)	
        # print(aaaaaa)		
        out_occ=visibility.mean(dim=1)		
        N=N.unsqueeze(1).repeat(1,sample_count,1)
        #d_N=d
        # print("Ns")
        # print(N.shape)
        # print("ds")
        # print(d.shape)
        cos=(torch.mul(d,N).mean(dim=2)*3)
        zero = torch.zeros_like(cos)
        cos = torch.where(cos <= 0.0, zero, cos)
        #cos=cos.unsqueeze(2).repeat(1,1,order** 2)
        #visibility=visibility.unsqueeze(2).repeat(1,1,order** 2)
        zeros = torch.zeros_like(visibility)
        ones = torch.ones_like(visibility)
        visibility = torch.where(visibility >= 0.7, ones, visibility) # TODO
        visibility = torch.where(visibility < 0.7, zeros, visibility)
        # visibiliti = torch.ones_like()
        # print("cos_s")
        # print(cos.device)
        torch.cuda.synchronize()
        print("p2_sh run time:", time.time() - proj_start_time)
        torch.cuda.synchronize()
        sh_eval_time=time.time()                 
        #TODO order - 1 => 2
        # coeffsMatrix=shEvaluate(theta.squeeze(), phi.squeeze(), order-1)#.unsqueeze(0).repeat(N.shape[0],1,1)
        coeffsMatrix=shEvaluate(theta.squeeze(), phi.squeeze(), 3)
        torch.cuda.synchronize()
        print("sheval run time:", time.time()-sh_eval_time)
        #coeffsMatrix= coeffsMatrix.unsqueeze(0).repeat(lenth,1,1)
        # print("coe_s")
        # print(coeffsMatrix.shape)
        # print(aaaaa)
        #H=(visibility*cos*coeffsMatrix).mean(dim=1) # H /= torch.pi
        # print(V.shape)
        # print(d.shape)
        # print(N.shape)
        # roughness=torch.ones_like(roughness)*0.1
        # metallic=torch.ones_like(metallic)
        H=(visibility*cos)
        H1=H
        V=V.unsqueeze(1).repeat(1,sample_count,1)
        #BRDF1=brdf(V=V,L=d,N=N,baseColor=albedo)
    

    # print(visibility.shape)
    # print(BRDF1.shape)		 # H /= torch.pi
    #print(coeffsMatrix.shape)
    # print(BRDF1.shape)
    # print(BRDF1.shape)
    # print(coeffsMatrix.shape)
    # print(aaaaaa)
    with torch.no_grad():
        # print(d.shape)
        brdf=[]
        # print(V.shape)
        for i in range(sample_count):
            # print(i)
            BRDF1=rendering_equation(output_dirs=d[:,i,:].unsqueeze(1).repeat(1,sample_count,1), 
                        normals=N, 
                        base_color=albedo.unsqueeze(1).repeat(1,sample_count,1), 
                        roughness=roughness.unsqueeze(1).repeat(1,sample_count,1), 
                        metallic=metallic.unsqueeze(1).repeat(1,sample_count,1), 
                        incident_dirs=d)
            #print(BRDF1.shape)
            #print(AAAAA)
            brdf.append(BRDF1.unsqueeze(2))
            #del BRDF1
            torch.cuda.empty_cache()
        BRDF1 = torch.cat(brdf, dim=2)
        # print(H.shape)
        # print(BRDF1.shape)
        # print(aaaaa)
        chunk_size=10000
        shs=[]
        # for offset in range(0, H.shape[0], chunk_size):
        #     H_chunk=H[offset:offset+chunk_size].unsqueeze(2).repeat(1,1,3)
        #     BRDF_chunk=torch.cat(brdf[offset:offset+chunk_size], dim=2)
        #     H_chunk=H_chunk.unsqueeze(2).repeat(1,1,sample_count,1)
        #     H_chunk=(H_chunk*(BRDF_chunk)).transpose(2,3).transpose(1,2)
        #     H_chunk=(H_chunk).transpose(2,3).transpose(1,2)
        #     coeffsMatrix_chunk=coeffsMatrix[offset:offset+chunk_size].unsqueeze(1).repeat(1,3,1,1)
        #     coeffsMatrix_chunk_T=coeffsMatrix_chunk.transpose(2,3)
        #     print(H_chunk.shape)
        #     print(coeffsMatrix_chunk.shape)
        #     print(coeffsMatrix_chunk_T.shape)
        #     tan_i=torch.matmul(H_chunk,coeffsMatrix_chunk)
        #     tan_i_j=torch.matmul(coeffsMatrix_chunk_T,tan_i)
        #     shs.append(tan_i_j/(sample_count**2 ))
        #     torch.cuda.empty_cache()
        H_chunk=H.unsqueeze(2).repeat(1,1,3)
        BRDF_chunk=BRDF1
        # print("brdf:",BRDF_chunk.shape)
        H_chunk=H_chunk.unsqueeze(2).repeat(1,1,sample_count,1)
        # print("H:",H_chunk.shape)
        H_chunk=(H_chunk*BRDF_chunk).transpose(2,3).transpose(1,2)
        coeffsMatrix_chunk=coeffsMatrix.unsqueeze(1).repeat(1,3,1,1)
        coeffsMatrix_chunk_T=coeffsMatrix_chunk.transpose(2,3)
        tan_i=torch.matmul(H_chunk,coeffsMatrix_chunk)
        tan_i_j=torch.matmul(coeffsMatrix_chunk_T,tan_i)
        shs=(tan_i_j/(sample_count**2 ))
        torch.cuda.empty_cache()
        weight = 4.0 * torch.pi 
        shs=shs*weight*weight
        # print(V1.shape)
        V1=V1.unsqueeze(1).repeat(1,order**2,1)
        return shs,out_occ
