import matplotlib.pyplot as plt
import numpy as np
import ramanspy as rp
import pint
import scipy

u = pint.UnitRegistry()

# Generate synthetic spectrum
spectra = rp.synth.generate_spectra(num_spectra=1, n_bands=1000, realistic=True, seed=10)

rp.plot.spectra(spectra)
rp.plot.show()

def peak(pos: float, width: float) -> np.ndarray:
    # for simplicity we set spectral range and sample number
    # to constants
    x = np.linspace(0,100, 1000)

    # Gauss function, with amplitude 1.
    return 1 * np.exp( -(x - pos)**2 / (2*width**2) )

# simplified spectrum s1
s1 = peak(50,1) + 1/5*peak(75,3)

plt.figure()
plt.plot(s1)
plt.title("spectrum s1")
plt.xlabel("Raman shift")
plt.ylabel("Intensity")

Text(0, 0.5, 'Intensity')

# simplified spectrum s2
s2 = 0.3*peak(20,2) + 1.3*peak(60,1)

plt.figure()
plt.plot(s2, 'r')
plt.title("spectrum s2")
plt.xlabel("Raman shift")
plt.ylabel("Intensity")

Text(0, 0.5, 'Intensity')

def mix(spec: list[np.ndarray], coefficients: list[float]) -> np.ndarray:
    ret = None
    for i, s in enumerate(spec):
        ret = coefficients[i] * s + (0 if ret is None else ret)
    if ret is None:
        raise ValueError("expecting at least 1 spectrum")
    return ret

mixed = mix([s1, s2], [0.2, 0.5])

plt.figure()
plt.plot(s1, 'b--')
plt.plot(s2, 'r--')
plt.plot(mixed, 'k')
plt.title("spectrum mixed")
plt.xlabel("Raman shift")
plt.ylabel("Intensity")

Text(0, 0.5, 'Intensity')

def demix(components: list[np.ndarray], mixed: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
    S = np.vstack(components).T
    c, residuals, _, _ = np.linalg.lstsq(a=S, b=mixed, rcond=None)
    return c, residuals

c, residuals = demix(components=[s1, s2], mixed=mixed)
print(f"The coefficients are {c} and the sums of squared residuals is {residuals}.")

The coefficients are [0.2 0.5] and the sums of squared residuals is [3.816829e-30].

s3 = 0.3*peak(20,2) + 1.3*peak(60,1) + 1.2*peak(67,1) + 0.5*peak(70,4)
s4 = 0.2*peak(15,3) + 0.2*peak(62,1) + 1.0*peak(64,1) + 0.1*peak(80,1)
s5 = 1.0*peak(10,1) + 1.2*peak(40,1) + 0.1*peak(44,1)
s6 = 0.5*peak(16,1) + 1.0*peak(22,4) + 0.7*peak(80,2)
mixed = mix([s3, s4, s5, s6], [0.2, 0.5, 0.0, 0.0])

plt.figure()
plt.plot(s3, 'b--')
plt.plot(s4, 'r--')
plt.plot(s5, 'g--')
plt.plot(s6, 'y--')
plt.plot(mixed, 'k')
plt.title("spectrum mixed")
plt.xlabel("Raman shift")
plt.ylabel("Intensity")

c, residuals = demix(components=[s3, s4, s5, s6], mixed=mixed)
print(f"The coefficients are {c} and the sums of squared residuals is {residuals}.")

The coefficients are [ 2.00000000e-01  5.00000000e-01 -4.28245569e-17 -9.07151373e-17] and the sums of squared residuals is [2.00698394e-30].

def make_noisy(spec: list[np.ndarray], factor=0.1) -> list[np.ndarray]:
    ret = []
    for s in spec:
        gaussian_multiplicative_noise = np.random.normal(loc=1.0, scale=factor, size=spec[0].shape)
        ret.append(s * gaussian_multiplicative_noise)
    return ret

s3 = 0.3*peak(20,2) + 1.3*peak(60,1) + 1.2*peak(67,1) + 0.5*peak(70,4)
s4 = 0.2*peak(15,3) + 0.2*peak(62,1) + 1.0*peak(64,1) + 0.1*peak(80,1)
s5 = 1.0*peak(10,1) + 1.2*peak(40,1) + 0.1*peak(44,1)
s6 = 0.5*peak(16,1) + 1.0*peak(22,4) + 0.7*peak(80,2)

# make noisy
s3_noisy, s4_noisy, s5_noisy, s6_noisy = make_noisy([s3, s4, s5, s6])

# use the real / not noisy compounds to create the spectrum
mixed = mix([s3, s4, s5, s6], [0.2, 0.5, 0.0, 0.0])
# and make it noisy
mixed_noisy = make_noisy([mixed])[0]

plt.figure()
plt.plot(s3_noisy, 'b--')
plt.plot(s4_noisy, 'r--')
plt.plot(s5_noisy, 'g--')
plt.plot(s6_noisy, 'y--')
plt.plot(mixed_noisy, 'k')
plt.title("spectrum mixed, with noise")
plt.xlabel("Raman shift")
plt.ylabel("Intensity")

c, residuals = demix(components=[s3_noisy, s4_noisy, s5_noisy, s6_noisy], mixed=mixed_noisy)
print(f"The coefficients are {c} and the sums of squared residuals is {residuals}.")

The coefficients are [ 2.03043721e-01  4.78771439e-01  5.85396227e-05 -1.27394654e-04] and the sums of squared residuals is [0.20819475].

# change this to the downloaded data dir
data_dir = r"data"

# get the training data
X_train, y_train = rp.datasets.bacteria("val", folder=data_dir)
y_labels, _ = rp.datasets.bacteria("labels")

spectra = [[X_train[y_train == species_id]] for species_id in list(np.unique(y_train))]
spectra_norm = rp.preprocessing.normalise.MinMax().apply(spectra)

# plot all spectra
plt.figure(figsize=(6.5, 9))
rp.plot.mean_spectra(spectra_norm, label=y_labels, plot_type="single stacked", title="bacterial spectra");

plt.figure()
idx_ecoli = 3
rp.plot.mean_spectra(spectra_norm[idx_ecoli], label=y_labels[idx_ecoli], plot_type="single stacked", title="bacterial spectra");
spectra_norm[idx_ecoli]

[<ramanspy.core.SpectralContainer at 0x15de8c470>]

# the two components
plt.figure()
ecoli_spectrum = spectra[idx_ecoli][0][0]
rp.plot.spectra(ecoli_spectrum)
plt.title("E. coli spectrum")

plt.figure()
calbicans_spectrum = spectra[0][0][0]
rp.plot.spectra(calbicans_spectrum)
plt.title("C. albicans spectrum")

# creating a mixed one
to_mix = [ecoli_spectrum, calbicans_spectrum]
cs = np.array([0.2, 0.5])
mixed_bac = rp.synth.mix(to_mix, cs, mixture_mode='linear', noise=False, baseline=False, seed=42)

plt.figure()
rp.plot.spectra(mixed_bac)
plt.title("mixed bacterial spectrum")

Text(0.5, 1.0, 'mixed bacterial spectrum')

c, redidual = demix(components=[ecoli_spectrum.spectral_data, calbicans_spectrum.spectral_data], mixed=mixed_bac.spectral_data)
print(f"The coefficients are {c} and the sums of squared residuals is {residuals}.")

The coefficients are [0.2 0.5] and the sums of squared residuals is [0.20819475].

x = np.linspace(0,100, 1000)

# make a noisy background
background_signal = make_noisy(
    [x**2 / (1 + x**2/1000) * 1/100],
    factor=0.005
)
plt.plot(background_signal[0])

# and add it to our signal
s3_nosiy_bck = background_signal[0] + s3_noisy
plt.plot(s3_nosiy_bck)

[<matplotlib.lines.Line2D at 0x15f01d760>]

s4_nosiy_bck = background_signal[0] + s4_noisy
plt.plot(s4_nosiy_bck)

[<matplotlib.lines.Line2D at 0x15dfb2270>]

s3_denoise_est = scipy.signal.savgol_filter(x=s3_nosiy_bck, window_length=9, polyorder=3)

plt.figure()
plt.plot(s3_nosiy_bck)
plt.plot(s3_denoise_est)

[<matplotlib.lines.Line2D at 0x15d2e4d40>]

# setup the preprocessing pipeline
pipe = rp.preprocessing.protocols.Pipeline([
    # rp.preprocessing.denoise.SavGol(window_length=9, polyorder=3),
    rp.preprocessing.baseline.IModPoly(poly_order=10),
])

# apply pipeline
s3_est = pipe.apply(rp.Spectrum(s3_denoise_est, x))

# backgorund estimate
bkg_est = s3_denoise_est - s3_est.spectral_data

# plot the results
# _ = rp.plot.spectra(s3_est, plot_type='separate')

plt.figure()
plt.plot(background_signal[0])
plt.plot(bkg_est)
plt.title("Estimated background and background")

plt.figure()
plt.plot(s3)
plt.plot(s3_est.spectral_data)
plt.title("Estimated signal and signal")

Text(0.5, 1.0, 'Estimated signal and signal')

Measurements¶

Composition of a medium¶

Raman spectroscopy¶

Quick overview on how it works¶

A first synthetic spectrum¶

More involved spectra¶

Bacterial spectra¶

Removing noise and a background signal¶

Removing noise: Savitzky–Golay filter¶

Removing the background¶

Application: Determining the growth state of a bacterium¶

Application: Tracking the kinetics in a bioreactor¶

Optical density and Spectrophotometry¶

Quick overview on how it works¶

Application: Tracking the cell density in a microplate reader¶

Flow cytometry¶