"""
This file defines functions to generate data for use across runs

"""

#Import packages and variables:

from var import *  # importing all variables from var.py
import pickle  # Library that serializes and deserializes python objects
import scipy.stats as stats  # Stats module from scipy library
import numpy as np  # Numpy library

def site_def():
    """ Defines site effect sizes corresponding to an effect size distribution """ 
    
    # S is defined in var
    eff_size_params = np.array((9.76428009e-01, 5.78934275, 2.79397064e-07, 2.71518734e-01))
    # An array of four parameters used in the beta prime distribution
    # Shape parameter, a = 9.76428009e-01
    # Shape parameter, b = 5.78934275
    # Scale parameter, m = 2.79397064e-07
    # Location parameter, loc = 2.71518734e-01
    
    sites = stats.betaprime.rvs(*eff_size_params, size=S) # Generates a random sample containing S random effect sizes from the beta prime distribution defined

    #To ensure that each effect size is less than or equal to 0.5: 
    for n, i in enumerate(sites): # Iterate over each element i in the sites array
        while i > 0.5:  # while the effect size is greater than 0.5
            i = stats.betaprime.rvs(*eff_size_params, size=1) # generate a new random effect size using the same beta prime distribution parameters
            sites[n] = i # Assign the new effect size to the current element of the sites array
            
    with open('sites.pickle', 'wb') as fp:  # Creates and opens a file called sites.pickle in write binary mode
        pickle.dump(sites, fp) # Serializes the sites array and writes it to the sites.pickle file

def sums_map():
    """ Generates a set of potential additive genotypic values for individuals to allow for linear mapping between simulated genotypic values and empirical values of variance in turn bias """
    
    # SD_tb is the mapping data from Ayroles 2015; for a different map, replace the file imported
    with open('SD_tb.pickle', 'rb') as fp:  # Opens the SD_tb.pickle file in read binary mode
        SD_tb = pickle.load(fp)  
    with open('sites.pickle', 'rb') as fp:  # Opens the sites.pickle file in read binary mode
        sites = pickle.load(fp)  
    map_1 = np.sort(SD_tb)  # Sorts the SD_tb NumPy array in ascending order
 
    numAni = 10000 # Sets the number of animals to 10000
    aniPool = np.zeros((numAni, S, 2))  
    
    for i in range(numAni):  # Iterates over each animal
        
        temp = np.random.random((S, 1)) < 0.5 # Pick S numbers randomly between 0 and 1, and check if they are less than 0.5
        
        # Assign non-zero effect sizes to certain animal-site combinations based on the random boolean values generated in the previous step: 
        for j, k in enumerate(temp):  # Iterate over each element k in the temp array, containing boolean values
            if k == True:  # If number picked for a site is randomly less than 0.5
                aniPool[i, j, 0] = sites[j]  # Assign that animal to have a non-zero effect size value for that site, for the first chromosome
                
        #Repeat the steps above for the second chromosome
        temp2 = np.random.random((S, 1)) < 0.5

        for l, m in enumerate(temp2):  # Iterate over each element k in the temp2 array, containing boolean values
            if m == True:  # If number picked for a site is randomly less than 0.5
                aniPool[i, l, 1] = sites[l] # Assign that animal to have a non-zero effect size value for that site
                

    sums_sites = np.sum(np.sum(aniPool, axis=1), axis=1)  # Sums the aniPool array along the first and second axis to obtain total genoypic value for each individual

    sums_sites = np.sort(sums_sites)  # Sorts the sums_S1 array in ascending order

    idx = np.round(np.linspace(0, len(sums_sites) - 1, len(SD_tb))).astype(int)
    # Generates an array of evenly spaced integer numbers over the interval from 0 to len(sums_S1) - 1
    # The number of elements in the array is equal to the length of the SD_tb array

    sample_sums = sums_sites[idx]
    # Creates a new array containing the elements of the sums_S1 array at the indices specified by the idx array
    # This new array has the same length as the SD_tb array and contains elements from sums_S1 at evenly spaced positions

    with open('sums_map.pickle', 'wb') as fp:  # Creates and opens a file called sums_map.pickle in write binary mode
        pickle.dump(sample_sums, fp)  # Dumps the sample_sums array into the sums_map.pickle file


def epi_map(level):  # Level parameter is a string representing the level of epistasis, defined as the ratio of variance in epistatic value to the variance in total genptypic value
    """Generates a set of potential additive + epistatic genotypic values for individuals to allow for linear mapping between simulated genotypic values and empirical values of variance in turn bias"""
    
    if level == 'high':  # If the level is high
        sc = 0.0021  # Set the sc variable to 0.0021. 
        
    if level == 'mid':  # If the level is mid
        seed_run = 430  # Set the seed_run variable to 430
        sc = 0.0021 / np.sqrt(3)  # Set the sc variable to 0.0021 / the square root of 3.

    if level == 'low':  # If the level is low
        seed_run = 268  # Set the seed_run variable to 268
        sc = 0.0021 / np.sqrt(7)  # Set the sc variable to 0.0021 / the square root of 7.

    # SD_tb is the mapping data from Ayroles 2015; for a different map, replace the file imported
    with open('SD_tb.pickle', 'rb') as fp:  # Opens the SD_tb.pickle file in read binary mode
        SD_tb = pickle.load(fp)  # Loads the contents of the SD_tb.pickle file into the SD_tb variable
        
    with open('sites.pickle', 'rb') as fp:  # Opens the sites.pickle file in read binary mode
        sites = pickle.load(fp)  # Loads the contents of the sites.pickle file into the sites variable
    
    numAni = 10000  # Sets the number of animals to 10000
    aniPool = np.zeros((numAni, S, 2))  
    
    #Additive Effects (same procedure as sums_map):
    for i in range(numAni):  # Iterate over each animal
        temp = np.random.random((S, 1)) < 0.5 # Pick S number of numbers randomly between 0 and 1, and check if they are less than 0.5
        
        for j, k in enumerate(temp):  # Iterate over each element k in the temp array, containing boolean values
            if k == True:  # If number picked for a site is randomly less than 0.5
                aniPool[i, j, 0] = sites[j]  # Assign that animal to have a non-zero effect size value for that site, for the first chromosome
 
        #Repeat the steps above for the second chromosome
        temp2 = np.random.random((S, 1)) < 0.5

        for l, m in enumerate(temp2):  # Iterate over each element m in the temp2 array, containing boolean values
            if m == True:  # If number picked for a site is randomly less than 0.5
                aniPool[i, l, 1] = sites[l] # Assign that animal to have a non-zero effect size value for that site

    sums_sites = np.sum(np.sum(aniPool, axis=1), axis=1) # Sums the aniPool array along the first and second axis to obtain additive genoypic value for each individual
    # 1D array where each element represents the total effect size for a specific animal
    sums_sites = np.sort(sums_sites)  # Sorts the elements of the sums_sites array in ascending order

    # Epistatic Effects:
    
    epi_val_1 = -stats.expon.rvs(loc=0, scale=sc, size=int(((S * S) - S) / 4))
    # Generates an array of negative random samples from an exponential distribution with the specified location and scale parameters.
    # The array size is set to generate values for filling half of the non-diagonal elements of a symmetric SxS matrix of pairwise interactions

    epi_val_2 = stats.expon.rvs(loc=0, scale=sc, size=int(((S * S) - S) / 4))
    # Generates an array of positive random samples from an exponential distribution with the specified location and scale parameters.

    epi_val = np.concatenate((epi_val_1, epi_val_2)) # Concatenates the two arrays of random samples into a single array
    np.random.shuffle(epi_val)  # Randomly shuffles the elements of the epi_val array

    
    epi_mat = np.zeros((S, S))  # Creates a 2D array of zeros with dimensions S x S to hold pairwise epistatic interaction values
    
    xs, ys = np.triu_indices(S, 1)  # Returns the row & column indices for the upper-triangle of the (S x S) array
    
    epi_mat[xs, ys] = epi_val  # Assign the shuffled epi_val array values to the upper triangle of the epi_mat matrix
    epi_mat[ys, xs] = epi_val  # Assign the shuffled epi_val array values to the lower triangle of the epi_mat matrix
    # Makes epi_mat a symmetric matrix.

    pool_rs = aniPool.transpose(0, 2, 1)  # Rearrange the elements of the aniPool array

    epi_all = [] # To store epistatic effect sizes for all individuals
    for a in pool_rs:  # iterate over each animal
        epi_a = 0 # to store the epistatic value for the current individual
        for b in a:  # iterate over each chromosome for animal'a'
            
            indices = np.where(b != 0, 1, 0) # returns an array of 1s and 0s, where 1 indicates that the site has a non-zero additive effect size
            
            indices = indices.reshape((S, 1))  # reshapes the indices array into a two-dimensional array

            #Extract the epistatic value for animal 'a' for all interactions on chromosome 'b'
            epi_b = np.sum(np.multiply(np.multiply(epi_mat, indices), indices.T)) # if site has a zero additive effect size, its epistatic value is also 0
        
            epi_a += epi_b # add value for chromosome 'b' to cumulative epistatic value for animal a
            
        epi_all.append(epi_a)  # Append the total epistatic effect size for each animal to the epi_all list
    
    epi_all = np.array(epi_all)  # Converts the epi_all list to a numpy array

    #Combined genotypic effect:
    s_overall = sums_sites + epi_all  # adds the sums_sites array to the epi_all array
    #s_overall array contains the total combined effect (additive and epistasis effects) for each animal.
    
    sums_sort = np.sort(s_overall)  # sorts the s_overall array in ascending order
    
    idx_epi = np.round(np.linspace(0, len(sums_sort) - 1, len(SD_tb))).astype(int)
    # Generates an array of evenly spaced integer numbers over the interval from 0 to len(sums_sort) - 1
    # The number of elements in the array is equal to the length of the SD_tb array
    sums_overall = sums_sort[idx_epi]  # creates a new array containing the elements of the sums_sort array
    # at the indices specified by the idx_epi array

    if level == 'high':  # if the level of epistasis is high
        with open('sums_overall_h.pickle', 'wb') as fp:  # creates and opens a pickle file in write mode
            pickle.dump(sums_overall, fp)  # dumps the sums_overall array into the pickle file
        with open('epi_mat_h.pickle', 'wb') as fp:  # creates and opens a pickle file in write mode
            pickle.dump(epi_mat, fp)  # dumps the epi_mat array into the pickle file
    
    if level == 'mid':  # if the level of epistasis is medium
        with open('sums_overall_m.pickle', 'wb') as fp:  # creates and opens a pickle file in write mode
            pickle.dump(sums_overall, fp)  # dumps the sums_overall array into the pickle file
        with open('epi_mat_m.pickle', 'wb') as fp:  # creates and opens a pickle file in write mode
            pickle.dump(epi_mat, fp)  # dumps the epi_mat array into the pickle file
    
    if level == 'low':  # if the level of epistasis is low
        with open('sums_overall_l.pickle', 'wb') as fp:  # creates and opens a pickle file in write mode
            pickle.dump(sums_overall, fp)  # dumps the sums_overall array into the pickle file
        with open('epi_mat_l.pickle', 'wb') as fp:  # creates and opens a pickle file in write mode
            pickle.dump(epi_mat, fp)  # dumps the epi_mat array into the pickle file