import os
import io

import numpy as np
import polars as pl
import scipy.optimize

import iqplot

import bokeh.io
import bokeh.plotting
bokeh.io.output_notebook()

# This is where the data are on my machine. If you are in 
# the same directory as the data sets, then you can use
# data_path = './'
data_path = '/Users/bois/Dropbox/bi1x_2024_fall_data/efflux/'

def timestamp_to_seconds(t_stamp):
    """Convert H:M:S string to seconds. This is necessary
    to hand-build because the datetime utilities do not handle
    times over 24 hours."""
    h, m, s = t_stamp.split(':')
    return 3600 * int(h) + 60 * int(m) + float(s)


def read_plate_reader(fname, header_row, last_row):
    """Read in time series data from Bi 1x plate reader.

    Parameters
    ----------
    fname : str
        Path to file outputted from plate reader.
    header_row : int
        Number of row containing header column (zero-indexed)
    last_row : int
        Last row of the file containing the time series data (zero-indexed)

    Returns
    -------
    output : DataFrame
        Polars DataFrame containing time series data.
    """
    # Get relevant contents of file as string
    with open(fname, "r", encoding="ISO-8859-1") as f:
        lines = f.readlines()
    file_str = "".join(lines[header_row : last_row + 1])

    # How many total lines to read
    n_rows = last_row - header_row

    # Read in data frame
    df = pl.read_csv(
        io.StringIO(file_str),
        separator="\t",
        encoding="ISO-8859-1",
    )

    # Rename temperature column
    T_col = [col for col in df.columns if "T°" in col][0]
    df = df.rename({T_col: "temperature (deg C)"})

    # Parse the time column
    df = df.with_columns(
        pl.col("Time")
        .map_elements(timestamp_to_seconds, return_dtype=float)
        .alias("time (s)")
    ).with_columns(
        pl.col('time (s)') - pl.col('time (s)').min()
    )

    # Time in units of hours
    df = df.with_columns((pl.col('time (s)') / 3600).alias('time (hr)'))

    return df

fname = os.path.join(data_path, '20241122_efflux.txt')
header_row = 53
last_row = 294

# Load in data
df = read_plate_reader(fname, header_row, last_row)

well_metadata = dict(
    A1="MG-0.0",
    A2="WT-0.21",
    A3="MG-0.0",
    A4="WT-0.90",
    A5="MG-0.0",
    A6="WT-3.90",
    A7="MG-0.0",
    A8="WT-16.90",
    A9="MG-0.0",
    A10="WT-73.15",
    A11="MG-0.0",
    A12="WT-208.33",
    B1="3.19-0.0",
    B2="UV5-0.21",
    B3="3.19-0.0",
    B4="UV5-0.90",
    B5="3.19-0.0",
    B6="UV5-3.90",
    B7="3.19-0.0",
    B8="UV5-16.90",
    B9="3.19-0.0",
    B10="UV5-73.15",
    B11="3.19-0.0",
    B12="UV5-208.33",
    C1="IW-0.0",
    C2="MG-0.44",
    C3="IW-0.0",
    C4="MG-1.88",
    C5="IW-0.0",
    C6="MG-8.13",
    C7="IW-0.0",
    C8="MG-35.17",
    C9="IW-0.0",
    C10="MG-152.19",
    C11="IW-0.0",
    C12="MG-312.50",
    D1="WT-0.0",
    D2="3.19-0.44",
    D3="WT-0.0",
    D4="3.19-1.88",
    D5="WT-0.0",
    D6="3.19-8.13",
    D7="WT-0.0",
    D8="3.19-35.17",
    D9="WT-0.0",
    D10="3.19-152.19",
    D11="WT-0.0",
    D12="3.19-312.50",
    E1="UV5-0.0",
    E2="IW-0.44",
    E3="UV5-0.0",
    E4="IW-1.88",
    E5="UV5-0.0",
    E6="IW-8.13",
    E7="UV5-0.0",
    E8="IW-35.17",
    E9="UV5-0.0",
    E10="IW-152.19",
    E11="UV5-0.0",
    E12="IW-312.50",
    F1="MG-0.21",
    F2="WT-0.44",
    F3="MG-0.90",
    F4="WT-1.88",
    F5="MG-3.90",
    F6="WT-8.13",
    F7="MG-16.90",
    F8="WT-35.17",
    F9="MG-73.15",
    F10="WT-152.19",
    F11="MG-208.33",
    F12="WT-312.50",
    G1="3.19-0.21",
    G2="UV5-0.44",
    G3="3.19-0.90",
    G4="UV5-1.88",
    G5="3.19-3.90",
    G6="UV5-8.13",
    G7="3.19-16.90",
    G8="UV5-35.17",
    G9="3.19-73.15",
    G10="UV5-152.19",
    G11="3.19-208.33",
    G12="UV5-312.50",
    H1="IW-0.21",
    H2="blank",
    H3="IW-0.90",
    H4="blank",
    H5="IW-3.90",
    H6="blank",
    H7="IW-16.90",
    H8="blank",
    H9="IW-73.15",
    H10="blank",
    H11="IW-208.33",
    H12="blank",
)

promoter_binding_energy = {
    'blank': None,
    'MG': np.inf,
    '3.19': -3.19,
    'IW': -4.28,
    'WT': -5.19,
    'UV5': -7.93
}

def conc(well, well_metadata):
    """Return abx concentration for a given well"""
    strain_conc = well_metadata[well]
    if strain_conc == "blank":
        return 0.0
    else:
        ind = strain_conc.find("-")
        return float(strain_conc[ind + 1 :])


def strain(well, well_metadata):
    """Return strain for a given well"""
    strain_conc = well_metadata[well]
    if strain_conc == "blank":
        return "blank"
    else:
        ind = strain_conc.find("-")
        return strain_conc[:ind]


def tidy(df, well_metadata, date):
    """Convert data frame to tidy format."""
    # Convert to tall format
    df_tidy = df.unpivot(
        index=["Time", "temperature (deg C)", "time (s)", "time (hr)"],
        variable_name="well",
        value_name="absorbance",
    )

    # Add metadata columns
    df_tidy = df_tidy.with_columns(
        pl.col('well').map_elements(lambda x: conc(x, well_metadata), return_dtype=float).alias('conc'),
        pl.col('well').map_elements(lambda x: strain(x, well_metadata), return_dtype=str).alias('strain')
    ).with_columns(
        pl.col('strain').replace_strict(promoter_binding_energy, return_dtype=pl.Float64).alias('promoter_binding_energy')
    )

    # Add the date
    df_tidy = df_tidy.with_columns(date=pl.lit(date))
    
    # Sort, starting with promoter binding energy
    df_tidy = df_tidy.sort(
        by=["promoter_binding_energy", "conc", "well", "time (hr)"]
    )

    return df_tidy

df = tidy(df, well_metadata, 20241122)

df_spring = pl.read_csv(
    os.path.join(data_path, 'tidy_efflux_data_spring_2024.csv'),
    schema=df.schema
)

df = pl.concat((df, df_spring))

# To pull out the spring data to use
# df = df.filter(pl.col('date')<20241122)

df = df.filter(pl.col('date')==20241122)

p = bokeh.plotting.figure(
    frame_width=350,
    frame_height=200,
    x_axis_label="time (hr)",
    y_axis_label="absorbance",
    tooltips=[('well', '@well')],
)

# Pull out only entries for blanks
df_blank = df.filter(pl.col("strain") == "blank")

# Groupby and iterate, adding to plot
for (well,), sub_df in df_blank.group_by("well"):
    p.line(
        source=sub_df.to_dict(),
        x="time (hr)",
        y="absorbance",
    )

bokeh.io.show(p)

# ##################
# You need to give an explanation of what the code in this cell is doing.
# ##################

# Compute blanks
blank = df.filter((pl.col("strain") == "blank") & (pl.col('well') != 'H12'))["absorbance"].mean()

# Subtract blanks
df = df.with_columns((pl.col('absorbance') - blank).alias('OD600'))

# Clean out negative values
df = df.filter(pl.col('OD600') > 0)

def plot_curves(df, conc):
    p = bokeh.plotting.figure(
        frame_width=400,
        frame_height=250,
        x_axis_label="time (hr)",
        y_axis_label="OD600",
        y_axis_type="log",
        toolbar_location="above",
        tooltips=[('well', '@well')],
        title=f'conc = {conc} µM',
    )
    
    # Color scheme
    colors = bokeh.palettes.Category10_10
    
    # Pull out only entries for antibiotic concentration of interest (and not blanks)
    inds = (pl.col("conc") == conc) & (pl.col("strain") != "blank")
    df_conc = df.filter(inds)
    
    # Items we will place in the legend
    legend_items = []
    
    # Groupby and iterate, adding to plot
    color_index = 0
    for (strain,), sub_df in df_conc.group_by("strain", maintain_order=True):
        # Lines for a given strain
        lines = []
    
        # Get each well for the strain and add a line to the plot
        for well, subsub_df in sub_df.group_by("well", maintain_order=True):
            lines.append(
                p.line(source=subsub_df.to_dict(), x="time (hr)", y="OD600", color=colors[color_index])
            )
            
        color_index += 1
    
        # Add the lines to the legend
        legend_items.append((strain, lines))
    
    # Create the legend from the items
    legend = bokeh.models.Legend(items=legend_items, click_policy="hide")
    
    # Add the legend to the plot
    p.add_layout(legend, "right")

    return p

tabs = []
for c in df['conc'].unique():
    tabs.append(bokeh.models.TabPanel(child=plot_curves(df, c), title=str(c)))

all_plots = bokeh.models.Tabs(tabs=tabs)
bokeh.io.show(bokeh.models.Tabs(tabs=tabs))

def exp_growth(t, A0, lam):
    return A0 * np.exp(lam * t)


def fit_growth(t, OD, ODmin=0.15, ODmax=0.3):
    """Obtain estimates for A0 and λ for a given sub-trace"""
    # Pull out linear regime
    inds = np.logical_and(OD >= ODmin, OD <= ODmax)
    t = t[inds]
    OD = OD[inds]
    
    # If no growth, return zero
    if len(t) == 0:
        return 0.0
    
    # Parameter guesses
    A0_guess = 0.01
    lam_guess = 1.0  # 1/hr

    # Perform optimization
    popt, _ = scipy.optimize.curve_fit(exp_growth, t, OD, p0=(A0_guess, lam_guess))
    
    # Just return growth rate 
    return popt[1]

# ##################
# You need to give an explanation of what the code in this cell is doing.
# ##################

# Pull out only entries for zero antibiotic concentration (and not blanks)
df_0abx = df.filter((pl.col("conc") == 0.0) & (pl.col("strain") != "blank"))

# Compute growth rates
growth_rates_0abx = (
    df_0abx.group_by(["strain", "well"], maintain_order=True)
    .agg(
        pl.struct(["time (hr)", "OD600"]).map_elements(
            lambda s: fit_growth(
                s.struct.field("time (hr)").to_numpy(),
                s.struct.field("OD600").to_numpy(),
            ),
            return_dtype=float,
            returns_scalar=True,
        )
        .alias("growth rate (1/hr)")
    )
)

growth_rates_0abx

bokeh.io.show(
    iqplot.strip(
        growth_rates_0abx,
        q="growth rate (1/hr)",
        cats="strain",
        spread="swarm",
        tooltips=[('well', '@well')],
    )
)

# ##################
# You need to give an explanation of what the code in this cell is doing.
# ##################

lam0 = growth_rates_0abx.group_by("strain").agg(
    pl.col("growth rate (1/hr)").median().alias("lam0")
)

# Take a look
lam0

df = df.join(lam0, on='strain')

def solve_growth_rate_dimensionless(
    aex, kappa, K, qmax, Pin, Pout, return_all_roots=False
):
    """Solve for the growth rate for a single antibiotic
    concentation in dimensionless units. All parameters
    are dimensionless. If `return_all_roots` is True,
    returns all physically viable steady state growth rates.
    Otherwise, only the single stable growth rate is returned.
    """
    # No antibiotic: max growth rate
    if aex == 0.0:
        lam = np.array([1.0])
    else:
        # Coefficients for cubic polynomial
        a3 = K + qmax - 1
        a2 = 2 - (1 - K) * (Pin * aex + Pout) - (1 - kappa) * (qmax + K)
        a1 = (
            -1
            - kappa * (qmax + K)
            + (2 - (1 - kappa) * K) * Pout
            + (1 - (1 - 2 * K) * kappa) * Pin * aex
        )
        a0 = (1 + kappa * K) * (kappa * Pin * aex - Pout)

        # Set up the cubic polynomial
        cubic = np.polynomial.Polynomial([a0, a1, a2, a3])

        # Solve the cubic
        lam = np.sort([r for r in cubic.roots() if np.isreal(r) and 0 <= r <= 1])

    # No roots, return 0
    if len(lam) == 0:
        return 0.0

    if return_all_roots:
        return lam if len(lam) > 1 else lam[0]
    else:
        return lam[-1]


def growth_rate(
    aex,            # µM
    qmax,           # µM/hr
    lam0,           # 1/hr
    rmin=19.3,      # µM
    kappa_t=0.061,  # 1/µM
    Pin=2.85,       # 1/hr
    Pout=2.85,      # 1/hr
    Kd=1.0,         # µM
    KM=10.0,        # µM
):
    """Solve for the growth rate for antibiotic concentration, or
    array of antibiotic concentrations, aex. Assumes all
    concentration units are µM and all time units are hours.
    """
    # Nondimensionalize
    kappa = kappa_t * rmin / lam0
    K = KM / Kd
    qmax = qmax / Kd / lam0
    Pin = Pin / lam0
    Pout = Pout / lam0
    aex = aex / Kd

    if np.isscalar(aex):
        result = solve_growth_rate_dimensionless(
            aex, kappa, K, qmax, Pin, Pout, return_all_roots=False
        )
    else:
        result = [
            solve_growth_rate_dimensionless(
                aex_val, kappa, K, qmax, Pin, Pout, return_all_roots=False
            )
            for aex_val in aex
        ]
        result = np.array(result)

    # Return dimensional growth rate
    return result * lam0

strain	well	growth rate (1/hr)
str	str	f64
"UV5"	"E1"	0.414492
"UV5"	"E11"	0.526582
"UV5"	"E3"	0.388895
"UV5"	"E5"	0.343108
"UV5"	"E7"	0.2435
…	…	…
"MG"	"A11"	0.576421
"MG"	"A3"	0.572663
"MG"	"A5"	0.532847
"MG"	"A7"	0.46699
"MG"	"A9"	0.471456

strain	lam0
str	f64
"3.19"	0.504496
"IW"	0.494827
"UV5"	0.370197
"WT"	0.408679
"MG"	0.552755

Preliminary analysis of growth data for efflux experiment¶

Data wrangling¶

We may also want to look at previous years' data¶

Checking the blanks¶

Plotting the curves¶

Obtaining growth rate¶

Solver for theoretical growth rate¶