This notebook is part of the deepcell-spots documentation: https://deepcell-spots.readthedocs.io/
Singleplex FISH Analysis¶
[1]:
import glob
import os
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from deepcell_toolbox.processing import histogram_normalization,normalize
from deepcell_toolbox.deep_watershed import deep_watershed
from deepcell.applications import CytoplasmSegmentation,NuclearSegmentation
from deepcell_spots.applications import Polaris,SpotDetection
from deepcell_spots.singleplex import *
class OverrideCytoplasmSegmentation(CytoplasmSegmentation):
def __init__(self, model=None):
super(CytoplasmSegmentation, self).__init__(model, model_image_shape=(512, 512, 1))
# Override preprocessing input
self.preprocessing_fn = histogram_normalization
self.postprocessing_fn = deep_watershed
Import images¶
[2]:
from tensorflow.keras.utils import get_file
from deepcell.datasets import Dataset
def load_data(self, path=None, test_size=0.2, seed=0):
path = path if path else self.path
basepath = os.path.expanduser(os.path.join('~', '.keras', 'datasets'))
prefix = path.split(os.path.sep)[:-1]
data_dir = os.path.join(basepath, *prefix) if prefix else basepath
if not os.path.exists(data_dir):
os.makedirs(data_dir)
elif not os.path.isdir(data_dir):
raise IOError('{} exists but is not a directory'.format(data_dir))
path = get_file(path,
origin=self.url,
file_hash=self.file_hash)
data = Image.open(path)
return(data)
Dataset.load_data = load_data
[3]:
from deepcell.datasets import Dataset
datafile = Dataset(
path='scGFP_Maximum intensity projection.tif',
url='https://deepcell-data.s3.us-west-1.amazonaws.com/spot_detection/singleplex/scGFP_Maximum+intensity+projection.tif',
file_hash='a0e5c38ef3b26e521d3837f1098393c4',
metadata={})
image_stack = datafile.load_data()
[4]:
image_stack.seek(0)
fluor_image = np.expand_dims(np.array(image_stack), axis=[0,-1])
image_stack.seek(1)
cyto_image = np.expand_dims(np.array(image_stack), axis=[0,-1])
image_stack.seek(2)
nuc_image = np.expand_dims(np.array(image_stack), axis=[0,-1])
image_stack.seek(3)
spots_image = np.expand_dims(np.array(image_stack), axis=[0,-1])
[5]:
fig,ax = plt.subplots(2,2,figsize=(10,10))
ax[0,0].imshow(nuc_image[0,...,0],cmap='gray')
ax[0,0].set_title('Nuclear label')
ax[0,1].imshow(cyto_image[0,...,0],cmap='gray')
ax[0,1].set_title('Cytoplasmic label')
ax[1,0].imshow(fluor_image[0,...,0],cmap='gray')
ax[1,0].set_title('Fluorescent channel')
ax[1,1].imshow(spots_image[0,...,0],cmap='gray')
ax[1,1].set_title('FISH channel')
for i in range(np.shape(ax)[0]):
for ii in range(np.shape(ax)[1]):
ax[i,ii].set_xticks([])
ax[i,ii].set_yticks([])
plt.show()

Cytoplasmic segmentation¶
[6]:
# Polaris app performs cytoplasmic segmentation by default
app = Polaris()
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/layers/core.py:1059: UserWarning: deepcell_spots.dotnet is not loaded, but a Lambda layer uses it. It may cause errors.
, UserWarning)
Downloading data from https://deepcell-data.s3-us-west-1.amazonaws.com/saved-models/CytoplasmSegmentation-2.tar.gz
95117312/95115934 [==============================] - 3s 0us/step
WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.
[7]:
# image_mpp is estimate for pixel size in microns
cyto_pred = app.predict(spots_image=spots_image, segmentation_image=fluor_image, image_mpp=0.2)
[8]:
# app returns a list of dictionaries
# dictionary keys are 'spots_assignment', 'cell_segmentation', 'spot_locations'
spot_dict_cyto = cyto_pred[0]['spots_assignment']
labeled_im_cyto = cyto_pred[0]['cell_segmentation']
coords_new,cmap_list = process_spot_dict(spot_dict_cyto)
fig,ax = plt.subplots(2,2,figsize=(15,15))
ax[0,0].imshow(fluor_image[0,...,0],cmap='gray',vmax=200)
ax[0,0].set_title('Cytoplasmic label')
ax[0,1].imshow(labeled_im_cyto[0,...,0],cmap='jet')
ax[0,1].set_title('Cytoplasmic segmentation')
ax[1,0].imshow(spots_image[0,...,0],cmap='gray',vmax=100)
ax[1,0].set_title('SeqFISH')
ax[1,1].imshow(spots_image[0,...,0],cmap='gray',vmax=100)
ax[1,1].scatter(coords_new[:,1],coords_new[:,0],c=cmap_list,cmap='jet', s=8)
ax[1,1].set_title('Spot assignment to cells')
for i in range(np.shape(ax)[0]):
for ii in range(np.shape(ax)[1]):
ax[i,ii].set_xticks([])
ax[i,ii].set_yticks([])
plt.tight_layout()
plt.show()

Nuclear segmentation¶
[9]:
# `segmentation_compartment` variable can be set to 'nucleus', 'cytoplasm', or 'None'
app = Polaris(segmentation_type='nucleus')
Downloading data from https://deepcell-data.s3-us-west-1.amazonaws.com/saved-models/NuclearSegmentation-3.tar.gz
95150080/95148111 [==============================] - 2s 0us/step
WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.
WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.
[10]:
nuc_pred = app.predict(spots_image=spots_image, segmentation_image=nuc_image, image_mpp=0.15)
[11]:
spot_dict_nuc = nuc_pred[0]['spots_assignment']
labeled_im_nuc = nuc_pred[0]['cell_segmentation']
coords_new,cmap_list = process_spot_dict(spot_dict_nuc)
fig,ax = plt.subplots(2,2,figsize=(15,15))
ax[0,0].imshow(nuc_image[0,...,0],cmap='gray',vmax=100)
ax[0,0].set_title('Nuclear label')
ax[0,1].imshow(labeled_im_nuc[0,...,0],cmap='jet')
ax[0,1].set_title('Nuclear segmentation')
ax[1,0].imshow(spots_image[0,...,0],cmap='gray',vmax=100)
ax[1,0].set_title('SeqFISH')
ax[1,1].imshow(spots_image[0,...,0],cmap='gray',vmax=100)
ax[1,1].scatter(coords_new[:,1],coords_new[:,0],c=cmap_list,cmap='jet', s=8)
ax[1,1].set_title('Spot assignment to cells')
for i in range(np.shape(ax)[0]):
for ii in range(np.shape(ax)[1]):
ax[i,ii].set_xticks([])
ax[i,ii].set_yticks([])
plt.tight_layout()
plt.show()

No segmentation¶
[13]:
app = Polaris(segmentation_type='no segmentation')
/usr/local/lib/python3.6/dist-packages/deepcell_spots/applications/polaris.py:97: UserWarning: No segmentation application instantiated.
warnings.warn('No segmentation application instantiated.')
[14]:
# if `segmentation_type` is 'no segmentation', app returns a list of lists
spots_pred = app.predict(spots_image=spots_image)
[15]:
fig,ax = plt.subplots(1,2,figsize=(15,15))
ax[0].imshow(spots_image[0,...,0],cmap='gray',vmax=100)
ax[0].set_title('SeqFISH')
ax[1].imshow(spots_image[0,...,0],cmap='gray',vmax=100)
ax[1].scatter(spots_pred[0][:,1], spots_pred[0][:,0], c='m', s=8)
ax[1].set_title('Detected spots')
for i in range(np.shape(ax)[0]):
ax[i].set_xticks([])
ax[i].set_yticks([])
plt.tight_layout()
plt.show()

This notebook is part of the deepcell-spots documentation: https://deepcell-spots.readthedocs.io/
Multiplex FISH Analysis¶
[1]:
import glob
import numpy as np
import pandas as pd
import scipy.io
import os
import time
import cv2
import sys
import random
import collections
import matplotlib.pyplot as plt
from scipy.spatial import distance
from skimage import transform
from skimage.feature import register_translation
from sklearn.cluster import DBSCAN
from deepcell_spots.applications import SpotDetection
from deepcell_spots.spot_em import define_edges
from deepcell_spots.point_metrics import *
from deepcell_spots.singleplex import *
from deepcell_spots.multiplex import *
from deepcell_spots.image_alignment import *
from deepcell.applications import CytoplasmSegmentation
import tensorflow as tf
from deepcell_toolbox.processing import histogram_normalization,normalize
from deepcell_toolbox.deep_watershed import deep_watershed
[2]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
# Restrict TensorFlow to only allocate 1GB of memory on the first GPU
try:
tf.config.set_logical_device_configuration(
gpus[0],
[tf.config.LogicalDeviceConfiguration(memory_limit=5000)])
logical_gpus = tf.config.list_logical_devices('GPU')
print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
except RuntimeError as e:
# Virtual devices must be set before GPUs have been initialized
print(e)
1 Physical GPUs, 1 Logical GPUs
Import data¶
We can define the directory that contains all of the data and information for the experiment, including the image set, the codebook, and a .csv file describing the organization of the data.
[3]:
from tensorflow.keras.utils import get_file
from deepcell.datasets import Dataset
def load_csv(self, path=None, test_size=0.2, seed=0):
path = path if path else self.path
basepath = os.path.expanduser(os.path.join('~', '.keras', 'datasets'))
prefix = path.split(os.path.sep)[:-1]
data_dir = os.path.join(basepath, *prefix) if prefix else basepath
if not os.path.exists(data_dir):
os.makedirs(data_dir)
elif not os.path.isdir(data_dir):
raise IOError('{} exists but is not a directory'.format(data_dir))
path = get_file(path,
origin=self.url,
file_hash=self.file_hash)
data = pd.read_csv(path)
return(data)
[4]:
Dataset.load_data = load_csv
codebook_files = Dataset(
path='codebook-example.csv',
url='https://deepcell-data.s3.us-west-1.amazonaws.com/spot_detection/multiplex/codebook-example.csv',
file_hash='81eacbc9558b7e1b5dfbeb9cb34b1a59',
metadata={})
dataorg_files = Dataset(
path='dataorg-example.csv',
url='https://deepcell-data.s3.us-west-1.amazonaws.com/spot_detection/multiplex/dataorg-example.csv',
file_hash='691ec7efcd31c37aedf1253b5e8d7cdc',
metadata={})
codebook = codebook_files.load_data()
dataorg = dataorg_files.load_data()
[5]:
codebook.head()
[5]:
name | Spots 1 | Spots 2 | Spots 3 | Spots 4 | Spots 5 | Spots 6 | Spots 7 | Spots 8 | Spots 10 | Spots 11 | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | Gabbr1 | 1 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 |
1 | Gabbr2 | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 1 |
2 | Tmem119 | 1 | 0 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 0 |
3 | Gpr34 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 1 | 0 |
4 | Fcrls | 0 | 0 | 0 | 1 | 1 | 0 | 1 | 1 | 0 | 0 |
[6]:
dataorg
[6]:
readoutName | fileName | imagingRound | color | frame | zPos | |
---|---|---|---|---|---|---|
0 | Spots 1 | hal-config-749z7-638z7-546z7-477z9-405z7_00.npy | -1 | 546 | [14, 15, 16, 17, 18, 19, 20] | [1.5, 3.0, 4.5, 6.0, 7.5, 9.0, 10.5] |
1 | Spots 2 | hal-config-749z7-638z7-546z7-477z9-405z7_00.npy | -1 | 638 | [7, 8, 9, 10, 11, 12, 13] | [10.5, 9.0, 7.5, 6.0, 4.5, 3.0, 1.5] |
2 | Spots 3 | hal-config-749z7-638z7-546z7-477z9-405z7_00.npy | -1 | 749 | [0, 1, 2, 3, 4, 5, 6] | [1.5, 3.0, 4.5, 6.0, 7.5, 9.0, 10.5] |
3 | Spots 4 | hal-config-749z7-638z7-546z7-477z9-405z7_00_0.npy | 0 | 546 | [14, 15, 16, 17, 18, 19, 20] | [1.5, 3.0, 4.5, 6.0, 7.5, 9.0, 10.5] |
4 | Spots 5 | hal-config-749z7-638z7-546z7-477z9-405z7_00_0.npy | 0 | 638 | [7, 8, 9, 10, 11, 12, 13] | [10.5, 9.0, 7.5, 6.0, 4.5, 3.0, 1.5] |
5 | Spots 6 | hal-config-749z7-638z7-546z7-477z9-405z7_00_0.npy | 0 | 749 | [0, 1, 2, 3, 4, 5, 6] | [1.5, 3.0, 4.5, 6.0, 7.5, 9.0, 10.5] |
6 | Spots 7 | hal-config-749z7-638z7-546z7-477z9-405z7_00_1.npy | 1 | 546 | [14, 15, 16, 17, 18, 19, 20] | [1.5, 3.0, 4.5, 6.0, 7.5, 9.0, 10.5] |
7 | Spots 8 | hal-config-749z7-638z7-546z7-477z9-405z7_00_1.npy | 1 | 638 | [7, 8, 9, 10, 11, 12, 13] | [10.5, 9.0, 7.5, 6.0, 4.5, 3.0, 1.5] |
8 | Spots 9 | hal-config-749z7-638z7-546z7-477z9-405z7_00_1.npy | 1 | 749 | [0, 1, 2, 3, 4, 5, 6] | [1.5, 3.0, 4.5, 6.0, 7.5, 9.0, 10.5] |
9 | Spots 10 | hal-config-749z7-638z7-546z7-477z9-405z7_00_2.npy | 2 | 546 | [14, 15, 16, 17, 18, 19, 20] | [1.5, 3.0, 4.5, 6.0, 7.5, 9.0, 10.5] |
10 | Spots 11 | hal-config-749z7-638z7-546z7-477z9-405z7_00_2.npy | 2 | 638 | [7, 8, 9, 10, 11, 12, 13] | [10.5, 9.0, 7.5, 6.0, 4.5, 3.0, 1.5] |
11 | Spots 12 | hal-config-749z7-638z7-546z7-477z9-405z7_00_2.npy | 2 | 749 | [0, 1, 2, 3, 4, 5, 6] | [1.5, 3.0, 4.5, 6.0, 7.5, 9.0, 10.5] |
12 | Cytoplasm | NaN | -1 | 477 | [21, 22, 23, 24, 25, 26, 27] | [10.5, 9.0, 7.5, 6.0, 4.5, 3.0, 1.5] |
13 | Reference | NaN | -1 | 477 | [29] | [6.0] |
[7]:
def load_data(self, path=None, test_size=0.2, seed=0):
path = path if path else self.path
basepath = os.path.expanduser(os.path.join('~', '.keras', 'datasets'))
prefix = path.split(os.path.sep)[:-1]
data_dir = os.path.join(basepath, *prefix) if prefix else basepath
if not os.path.exists(data_dir):
os.makedirs(data_dir)
elif not os.path.isdir(data_dir):
raise IOError('{} exists but is not a directory'.format(data_dir))
path = get_file(path,
origin=self.url,
file_hash=self.file_hash)
# data = pd.read_csv(path)
return(path)
[8]:
image_files0 = Dataset(
path='hal-config-749z7-638z7-546z7-477z9-405z7_00_0.npy',
url='https://deepcell-data.s3.us-west-1.amazonaws.com/spot_detection/multiplex/hal-config-749z7-638z7-546z7-477z9-405z7_00_0.npy',
file_hash='65e67e1012ee6cfcb29f4a574473969c',
metadata={})
image_files1 = Dataset(
path='hal-config-749z7-638z7-546z7-477z9-405z7_00_1.npy',
url='https://deepcell-data.s3.us-west-1.amazonaws.com/spot_detection/multiplex/hal-config-749z7-638z7-546z7-477z9-405z7_00_1.npy',
file_hash='0822b3af6f45a6187beab1286e212e81',
metadata={})
image_files2 = Dataset(
path='hal-config-749z7-638z7-546z7-477z9-405z7_00_2.npy',
url='https://deepcell-data.s3.us-west-1.amazonaws.com/spot_detection/multiplex/hal-config-749z7-638z7-546z7-477z9-405z7_00_2.npy',
file_hash='a4aa06ba6c0b92931ea013e655bea290',
metadata={})
image_files = Dataset(
path='hal-config-749z7-638z7-546z7-477z9-405z7_00.npy',
url='https://deepcell-data.s3.us-west-1.amazonaws.com/spot_detection/multiplex/hal-config-749z7-638z7-546z7-477z9-405z7_00.npy',
file_hash='17e00336b3a4bec123de1f6df6c39ae5',
metadata={})
[9]:
Dataset.load_data = load_data
image_files0.load_data()
image_files1.load_data()
image_files2.load_data()
image_files.load_data()
[9]:
'/root/.keras/datasets/hal-config-749z7-638z7-546z7-477z9-405z7_00.npy'
Image alignment¶
[10]:
# Read in the images according to the .csv file defining the organization of the data
# Performs a maximum projection of spot images
# Takes middle z-slice for reference images and cytoplasm images
datadir = '/root/.keras/datasets/'
max_im_dict, reference_dict, cytoplasm_dict = read_images(datadir, dataorg)
Working on: Spots 1
Working on: Spots 2
Working on: Spots 3
Working on: Spots 4
Working on: Spots 5
Working on: Spots 6
Working on: Spots 7
Working on: Spots 8
Working on: Spots 9
Working on: Spots 10
Working on: Spots 11
Working on: Spots 12
[11]:
# Align the spot and cytoplasm images according to reference images
aligned = align_images(max_im_dict, reference_dict)
aligned_cytoplasm = align_images(cytoplasm_dict, reference_dict)
[12]:
# Visualize a subset of images from different rounds
name_list = ['Spots 1','Spots 2','Spots 7','Spots 8']
fig,ax = plt.subplots(1,4,figsize=(20,20))
for i in range(len(name_list)):
ax[i].imshow(aligned[name_list[i]][0,:,:,0],vmax=10000)

[13]:
# Crop zero-value pixels after alignment
crop_dict = crop_images(aligned)
crop_cytoplasm_dict = crop_images(aligned_cytoplasm)
# Visualize a subset of images from different rounds
fig,ax = plt.subplots(1,4,figsize=(20,20))
for i in range(len(name_list)):
ax[i].imshow(crop_dict[name_list[i]][0,:,:,0])

Spot detection¶
[14]:
# Instantiate the spot detection application
spots_app = SpotDetection()
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/layers/core.py:1059: UserWarning: deepcell_spots.dotnet is not loaded, but a Lambda layer uses it. It may cause errors.
, UserWarning)
[15]:
# Predict spot locations
# if prediction is slow (over 5 min) try downgrading to TF 2.4
tic = time.perf_counter()
spots_app.postprocessing_fn = None
cp_dict = {}
for i,item in enumerate(crop_dict.keys()):
if item in codebook.columns:
cp_dict[item] = spots_app.predict(crop_dict[item][:,:,:,:])
else:
cp_dict[item] = np.random.random((2,1,np.shape(crop_dict[item])[1], np.shape(crop_dict[item])[2],2))*0.01
toc = time.perf_counter()
print(f"Analyzed {i} rounds in {toc - tic:0.4f} seconds")
Analyzed 11 rounds in 49.7890 seconds
Cell segmentation¶
[16]:
# Instantiate cytoplasm segmentation model
cyto_app = CytoplasmSegmentation()
cyto_app.preprocessing_fn = histogram_normalization
cyto_app.postprocessing_fn = deep_watershed
WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.
[17]:
# Predict cytoplasm segmentation
im = crop_cytoplasm_dict['Spots 1']
labeled_im_cyto = cyto_app.predict(im, image_mpp=0.25)
# Visualize
fig,ax = plt.subplots(1, 2, figsize=(12,6))
ax[0].imshow(im[0,:,:,0], vmax=10000, cmap='gray')
ax[1].imshow(labeled_im_cyto[0,:,:,0], cmap='plasma')
[17]:
<matplotlib.image.AxesImage at 0x7f508a1406a0>

Assign gene barcodes¶
[18]:
decoded_spots_df = assign_gene_identities(cp_dict, dataorg, threshold=0.95, codebook=codebook)
decoded_spots_df.head()
100%|██████████| 60/60 [00:07<00:00, 7.93it/s]
100%|██████████| 106/106 [00:00<00:00, 611.60it/s]
[18]:
Name | Code | Probability | X | Y | |
---|---|---|---|---|---|
0 | Laptm4a | 18 | 0.563712 | 1944 | 1602 |
1 | Blank-9 | 29 | 0.852931 | 1944 | 1414 |
2 | background | 31 | 0.627804 | 1944 | 382 |
3 | Blank-4 | 24 | 0.961169 | 1944 | 368 |
4 | background | 31 | 0.523489 | 1944 | 339 |
[19]:
thr=0.7
df = pd.concat([decoded_spots_df.Name[decoded_spots_df.Probability>thr].value_counts(), decoded_spots_df.Name[decoded_spots_df.Probability <=thr].replace(np.unique(decoded_spots_df.Name),'thr').value_counts()]).sort_index(axis=0)#.sort_values(ascending=False)
fig, ax = plt.subplots(1, 1, figsize=(14,3), dpi=100, facecolor='w', edgecolor='k')
df.plot(kind='bar',width=0.7,rot=90,logy=True,fontsize=6,ax=ax)
num_decoded_barcodes = sum((decoded_spots_df.Name!='background')&(decoded_spots_df.Name!='infeasible')&(decoded_spots_df.Name!='NaN')&(decoded_spots_df.Probability>thr))
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() * 1.005, p.get_height() * 1.005),size=6)
plt.title('Histogram of decoded barcodes afther thresholding with {}: \n in total {} spots detected while {} spots decoded ({:.02f}%)'.format(thr,decoded_spots_df.shape[0], num_decoded_barcodes, 100*num_decoded_barcodes/ decoded_spots_df.shape[0]),fontsize=10)
plt.show()

Assign spots to cells¶
[20]:
output_df = assign_spots_to_cells(decoded_spots_df, labeled_im_cyto)
output_df.head()
[20]:
Name | Code | Probability | X | Y | Cell | |
---|---|---|---|---|---|---|
0 | Laptm4a | 18 | 0.563712 | 1944 | 1602 | 0 |
1 | Blank-9 | 29 | 0.852931 | 1944 | 1414 | 148 |
2 | background | 31 | 0.627804 | 1944 | 382 | 0 |
3 | Blank-4 | 24 | 0.961169 | 1944 | 368 | 0 |
4 | background | 31 | 0.523489 | 1944 | 339 | 137 |
[21]:
# Visualize
plot_threshold = 0.7
fig,ax = plt.subplots(1, 3, figsize=(24,9))
ax[0].imshow(labeled_im_cyto[0,:,:,0], cmap='plasma')
ax[0].set_title('Cell Segmentation')
ax[1].imshow(crop_dict['Spots 2'][0,:,:,0], cmap='gray')
ax[1].set_title('Spots Image')
ax[2].imshow(crop_dict['Spots 2'][0,:,:,0], cmap='gray')
ax[2].scatter(output_df.loc[output_df['Probability'] > plot_threshold]['Y'],
output_df.loc[output_df['Probability'] > plot_threshold]['X'],
# facecolors='None', edgecolors='r'
c=output_df.loc[output_df['Probability'] > plot_threshold]['Cell'], cmap='plasma', s=20
)
ax[2].set_title('Assigned Spots to Cells')
for i in range(len(ax)):
ax[i].set_xlim([0,500])
ax[i].set_ylim([1500,1000])
ax[i].set_xticks([])
ax[i].set_yticks([])
plt.tight_layout()
plt.show()

deepcell_spots package¶
Contents
Applications¶
deepcell_spots.applications package¶
deepcell_spots.applications.polaris¶
Singleplex FISH analysis application
-
class
deepcell_spots.applications.polaris.
Polaris
(segmentation_model=None, segmentation_type='cytoplasm', spots_model=None)[source]¶ Bases:
object
Loads spot detection and cell segmentation applications from deepcell_spots and deepcell_tf, respectively.
The
predict
method calls the predict method of each application.Example:
from skimage.io import imread from deepcell_spots.applications import Polaris # Load the images spots_im = imread('spots_image.png') cyto_im = imread('cyto_image.png') # Expand image dimensions to rank 4 spots_im = np.expand_dims(spots_im, axis=[0,-1]) cyto_im = np.expand_dims(cyto_im, axis=[0,-1]) # Create the application app = Polaris() # Find the spot locations result = app.predict(spots_image=spots_im, segmentation_image=cyto_im) spots_dict = result[0]['spots_assignment'] labeled_im = result[0]['cell_segmentation'] coords = result[0]['spot_locations']
- Parameters
segmentation_model (tf.keras.Model) – The model to load. If
None
, a pre-trained model will be downloaded.segmentation_compartment (str) – The cellular compartment for generating segmentation predictions. Valid values are ‘cytoplasm’, ‘nucleus’, ‘no segmentation’. Defaults to ‘cytoplasm’.
spots_model (tf.keras.Model) – The model to load. If
None
, a pre-trained model will be downloaded.
-
predict
(spots_image, segmentation_image=None, image_mpp=None, spots_threshold=0.95, spots_clip=False)[source]¶ Generates prediction output consisting of a labeled cell segmentation image, detected spot locations, and a dictionary of spot locations assigned to labeled cells of the input.
Input images are required to have 4 dimensions
[batch, x, y, channel]
. Channel dimension should be 2.Additional empty dimensions can be added using
np.expand_dims
.- Parameters
spots_image (numpy.array) – Input image for spot detection with shape
[batch, x, y, channel]
.segmentation_image (numpy.array) – Input image for cell segmentation with shape
[batch, x, y, channel]
. Defaults to None.image_mpp (float) – Microns per pixel for
image
.spots_threshold (float) – Probability threshold for a pixel to be considered as a spot.
spots_clip (bool) – Determines if pixel values will be clipped by percentile. Defaults to false.
- Raises
ValueError – Threshold value must be between 0 and 1.
ValueError – Segmentation application must be instantiated if segmentation image is defined.
- Returns
List of dictionaries, length equal to batch dimension.
- Return type
deepcell_spots.applications.spot_detection¶
Spot detection application
-
class
deepcell_spots.applications.spot_detection.
SpotDetection
(model=None)[source]¶ Bases:
deepcell.applications.Application
Loads a
deepcell.model_zoo.featurenet.FeatureNet
model for fluorescent spot detection with pretrained weights.The
predict
method handles prep and post processing steps to return a list of spot locations.Example:
from skimage.io import imread from deepcell_spots.applications import SpotDetection # Load the image im = imread('spots_image.png') # Expand image dimensions to rank 4 im = np.expand_dims(im, axis=-1) im = np.expand_dims(im, axis=0) # Create the application app = SpotDetection() # Find spot locations coords = app.predict(im)
- Parameters
model (tf.keras.Model) – The model to load. If
None
, a pre-trained model will be downloaded.
-
dataset_metadata
= {'name': 'general_train', 'other': 'Pooled FISH data including MERFISH data\n and SunTag viral RNA data'}¶ Metadata for the dataset used to train the model
-
model_metadata
= {'batch_size': 1, 'lr': 0.01, 'lr_decay': 0.99, 'n_epochs': 10, 'training_seed': 0, 'training_steps_per_epoch': 552}¶ Metadata for the model and training process
-
predict
(image, batch_size=4, pad_mode='reflect', preprocess_kwargs=None, postprocess_kwargs=None, threshold=0.95, clip=False)[source]¶ Generates a list of coordinate spot locations of the input running prediction with appropriate pre and post processing functions.
Input images are required to have 4 dimensions
[batch, x, y, channel]
.Additional empty dimensions can be added using
np.expand_dims
.- Parameters
image (numpy.array) – Input image with shape
[batch, x, y, channel]
.batch_size (int) – Number of images to predict on per batch.
pad_mode (str) – The padding mode, one of “constant” or “reflect”.
preprocess_kwargs (dict) – Keyword arguments to pass to the pre-processing function.
postprocess_kwargs (dict) – Keyword arguments to pass to the post-processing function.
threshold (float) – Probability threshold for a pixel to be considered as a spot.
clip (bool) – Determines if pixel values will be clipped by percentile.
- Raises
ValueError – Input data must match required rank of the application, calculated as one dimension more (batch dimension) than expected by the model.
ValueError – Input data must match required number of channels.
ValueError – Threshold value must be between 0 and 1.
- Returns
Coordinate locations of detected spots.
- Return type
numpy.array
deepcell_spots.cluster_vis¶
Visualization tools for spot expectation maximization
-
deepcell_spots.cluster_vis.
jitter
(coords, size)[source]¶ Add Gaussian noise to a list of coordinates for plotting when coordinates overlap.
- Parameters
coords (matrix) – Matrix with dimensions (number of detections) x 2
size (int) – Standard deviation of the Gaussian noise distribution in pixels.
- Returns
Coords with noise added to locations
- Return type
array
-
deepcell_spots.cluster_vis.
label_graph_ann
(G, coords_df, exclude_last=False)[source]¶ Labels the annotator associated with each node in the graph
- Parameters
G (networkx.Graph) – Graph with edges indicating clusters of points assumed to be derived from the same ground truth detection
coords_df (DataFrame) – Data frame with columns ‘x’ and ‘y’ which encode the spot locations and ‘Algorithm’ which encodes the algorithm that corresponds with that spot
exclude_last (bool) – Only set as True to exclude a point that has been included for the purpose of normalization
- Returns
Labeled graph
- Return type
networkx.Graph
-
deepcell_spots.cluster_vis.
label_graph_gt
(G, detection_data, gt)[source]¶ Labels the ground truth identity of each node in the graph.
Intended for simulated data.
- Parameters
G (networkx.Graph) – Graph with edges indicating clusters of points assumed to be derived from the same ground truth detection
detection_data (numpy.array) – Matrix with dimensions (number of clusters) x (number of algorithms) with value of 1 if an algorithm detected the cluster and 0 if it did not.
gt (numpy.array) – Array with length (number of cluster) with value of 1 if cluster is a true positive detection and 0 if it is a false positive.
- Returns
Labeled graph
- Return type
networkx.Graph
-
deepcell_spots.cluster_vis.
label_graph_prob
(G, detection_data, p_matrix)[source]¶ Labels the EM output probability of being a ground truth true detection for each cluster in the graph.
- Parameters
G (networkx.Graph) – Graph with edges indicating clusters of points assumed to be derived from the same ground truth detection
detection_data (numpy.array) – Matrix with dimensions (number of clusters) x (number of algorithms) with value of 1 if an algorithm detected the cluster and 0 if it did not.
p_matrix (matrix) – Matrix with dimensions (number of clusters) x 2 where first column is the probability that a cluster is a true positive and second column is the probability that it is a false positive.
- Returns
Labeled graph
- Return type
networkx.Graph
deepcell_spots.data_utils¶
Functions for making training data sets
-
deepcell_spots.data_utils.
get_data
(file_name, test_size=0.2, seed=0, allow_pickle=False)[source]¶ Load data from .npz file and split into train and test sets.
This is a copy of
deepcell.utils.data_utils.get_data
, withallow_pickle
added andmode
removed.- Parameters
- Returns
Dictionary of training data and a dictionary of testing data.
- Return type
-
deepcell_spots.data_utils.
slice_annotated_image
(X, y, reshape_size, overlap=0)[source]¶ Slice images in X into smaller parts.
Similar to
deepcell.utils.data_utils.reshape_matrix
.- Parameters
X (np.array) – array containing images with size
(img_number, y, x, channel)
.reshape_size (list) – Shape of reshaped image
[y_size, x_size]
.overlap (int) – Number of pixels overlapping in each row/column with the pixels from the same row/column in the neighboring slice.
y – List or array containing coordinate annotations. Has length (img_number), each element of the list is a (N, 2) np.array where N=the number of points in the image.
- Returns
Two outputs (1) Stack of reshaped images in order of small to large y position, then small to large x position in the original image
np.array
of size(n*img_number, y_size, x_size, channel)
wheren
is number of images each image inX
was sliced into. If the original image lengths aren’t divisible byy_size
,x_size
, the last image in each row / column overlaps with the one before and (2) list of lengthn*img_number
.- Return type
(array, list)
-
deepcell_spots.data_utils.
slice_image
(X, reshape_size, overlap=0)[source]¶ Slice images in X into smaller parts.
Similar to
deepcell.utils.data_utils.reshape_matrix
.- Parameters
- Returns
Stack of reshaped images in order of small to large y, then small to large x position in the original image np.array of size (n*img_number, y_size, x_size, channel) where n = number of images each image in X was sliced into if the original image lengths aren’t divisible by y_size, x_size, the last image in each row / column overlaps with the one before.
- Return type
np.array
deepcell_spots.dotnet¶
CNN architechture with classification and regression outputs for dot center detection
-
deepcell_spots.dotnet.
classification_head
(input_shape, n_features=2, n_dense_filters=128, reg=1e-05, init='he_normal', name='classification_head')[source]¶ Creates a classification head.
- Parameters
- Returns
tensorflow.keras.Model for classification (softmax output).
-
deepcell_spots.dotnet.
default_heads
(input_shape, num_classes)[source]¶ Create a list of the default heads for spot detection.
-
deepcell_spots.dotnet.
dot_net_2D
(receptive_field=13, input_shape=(256, 256, 1), inputs=None, n_skips=3, norm_method='std', padding_mode='reflect', **kwargs)[source]¶ Creates a 2D featurenet with prediction heads for spot detection.
Model architecture based on
deepcell.model_zoo.bn_feature_net_skip_2D
.- Parameters
receptive_field (int) – the receptive field of the neural network.
input_shape (tuple) – Shape of input image.
inputs (tensor) – optional input tensor
n_skips (int) – The number of skip-connections.
norm_method (str) – Normalization method to use with the :mod:
deepcell.layers.normalization.ImageNormalization2D
layer.padding_mode (str) – Type of padding, one of
('reflect' or 'zero')
.kwargs (dict) – Other model options defined in
~bn_feature_net_2D
.
- Returns
2D FeatureNet with prediction heads for spot detection.
- Return type
tensorflow.keras.Model
deepcell_spots.dotnet_losses¶
Custom loss functions for DeepCell spots
-
class
deepcell_spots.dotnet_losses.
DotNetLosses
(alpha=0.25, gamma=2.0, sigma=3.0, n_classes=2, focal=False, d_pixels=1, mu=0, beta=0)[source]¶ Bases:
object
-
classification_loss
(y_true, y_pred)[source]¶ - Parameters
y_true – numpy array of size
(batch, Ly, Lx, 2)
one hot encoded pixel classification.y_pred – numpy array of size
(batch, Ly, Lx, 2)
one hot encoded pixel classification.
- Returns
focal / weighted categorical cross entropy loss
- Return type
-
classification_loss_regularized
(y_true, y_pred)[source]¶ Regularized classification loss.
- Parameters
y_true – numpy array of size
(batch, Ly, Lx, 2)
one hot encoded pixel classification.y_pred – numpy array of size
(batch, Ly, Lx, 2)
one hot encoded pixel classification.mu (float) – weight of regularization term.
- Returns
focal / weighted categorical cross entropy loss
- Return type
-
regression_loss
(y_true, y_pred)[source]¶ Calculates the regression loss of the shift from pixel center, only for pixels containing a dot (true regression shifts smaller in absolute value than 0.5).
- Parameters
y_true – tensor of shape
(batch, Ly, Lx, 2)
.y_pred – tensor of shape
(batch, Ly, Lx, 2)
.Ly
,Lx
are the dimensions of a single image. Dimension 3 containsdelta_y
anddelta_x
.d_pixels (int) – the number of pixels on each side of a point containing pixels over which to calculate the regression loss for the offset image (0 = calculate for point containing pixels only, 1 = calculate for 8-nearest neighbors, …).
- Returns
the normalized smooth L1 loss over all the input pixels with regressed point within the same pixel, i.e.
delta_y = y(...,0)
anddelta_x = y(...,1) <= 0.5
in absolute value.- Return type
-
-
deepcell_spots.dotnet_losses.
smooth_l1
(y_true, y_pred, sigma=3.0)[source]¶ Compute the smooth L1 loss of
y_pred
w.r.t.y_true
.Similar to
deepcell.losses.smooth_l1
without summation over channel axis.- Parameters
y_true – Tensor from the generator of shape
(B, ?, ?)
. The last value for each box is the state of the anchor (ignore, negative, positive).y_pred – Tensor from the network of shape
(B, ?, ?)
. Same shape asy_true
.sigma – The point where the loss changes from L2 to L1.
- Returns
The pixelwise smooth L1 loss of
y_pred
w.r.t.y_true
. Has same shape as each of the inputs:(B, ?, ?)
.
deepcell_spots.image_alignment¶
-
deepcell_spots.image_alignment.
align_images
(image_dict, reference_dict)[source]¶ Aligns input images with alignment transformation learned from reference images.
- Parameters
image_dict (dict) – Dictionary where keys are image IDs (
'readoutName'
) and values are images to be aligned for each readout name.reference_dict (dict) – Dictionary where keys are image IDs (
'readoutName'
) and values are fiducial channel (image used for alignment) for each readout name (multiple readout names may have the same reference image).
- Returns
- Dictionary where keys are image IDs (
'readoutName'
) and values are images from
image_dict
that have been aligned by transformations learned from images fromreference_dict
.
- Dictionary where keys are image IDs (
- Return type
aligned_dict (dict)
-
deepcell_spots.image_alignment.
crop_images
(aligned_dict)[source]¶ Crops images to remove zero-value pixels resulting from image alignment.
- Parameters
aligned_dict (dict) – Dictionary where keys are image IDs (
'readoutName'
) and values are images fromimage_dict
that have been aligned withalign_images
.- Returns
- Dictionary where keys are image IDs (
'readoutName'
) and values are images from
image_dict
that have been aligned withalign_images
with zero-value pixels cropped out.
- Dictionary where keys are image IDs (
- Return type
crop_dict (dict)
-
deepcell_spots.image_alignment.
read_images
(root_dir, dataorg, verbose=True)[source]¶ Reads in image files from given directories and parses them into dictionaries of different types.
- Parameters
root_dir (str) – Directory containing all image files
image_files (list) – List of image names (str) in root directory. Paths must be to images must be saved in .npy format.
dataorg (pandas.DataFrame) – Data frame with required columns
'fileName'
(item in image_files),'readoutName'
(unique ID name given to each channel in each image),'fiducialFrame'
(frame number for image to be used for alignment),'cytoplasmFrame'
(frame number for image to be used for cell segmentation).verbose (bool, optional) – Boolean determining if file names are printed as they are processed. Defaults to
True
.
- Returns
max_im_dict
is a dictionary where keys are image IDs ('readoutName'
) and values are maximum intensity projections of frames associated with that readout name.fiducial_dict
is a dictionary where keys are image IDs ('readoutName'
) and values are fiducial channel (image used for alignment) for each readout name (multiple readout names may have the same).cytoplasm_dict
is a dictionary where keys are image IDs ('readoutName'
) and values are cytoplasm label image for each readout name (multiple readout names may have the same).- Return type
deepcell_spots.image_generators¶
Spot detection image generators
-
class
deepcell_spots.image_generators.
ImageFullyConvDotDataGenerator
(*args, **kwargs)[source]¶ Bases:
tensorflow.keras.preprocessing.image.ImageDataGenerator
Generates batches of tensor image data with real-time data augmentation.
The data will be looped over in batches.
- Parameters
featurewise_center (bool) – Whether to set input mean to 0 over the dataset, feature-wise.
samplewise_center (bool) – Whether to set each sample mean to 0.
featurewise_std_normalization (bool) – Whether to divide inputs by std of the dataset, feature-wise.
samplewise_std_normalization (bool) – Whether to divide each input by its std.
zca_epsilon (float) – Epsilon for ZCA whitening. Default is 1e-6.
zca_whitening (bool) – Whether to apply ZCA whitening.
rotation_range (int) – Degree range for random rotations.
width_shift_range –
float, 1-D array-like or int. Values for the following formats:
float: fraction of total width, if < 1, or pixels if >= 1.
1-D array-like: random elements from the array.
int: integer number of pixels from interval
(-width_shift_range, +width_shift_range)
withwidth_shift_range=2
possible values are ints [-1, 0, +1], same as withwidth_shift_range=[-1, 0, +1]
, while withwidth_shift_range=1.0
possible values are floats in the interval [-1.0, +1.0).
shear_range (float) – Shear angle in counter-clockwise direction in degrees
zoom_range – float or [lower, upper], Range for random zoom. If a float,
[lower, upper] = [1-zoom_range, 1+zoom_range]
.channel_shift_range (float) – Range for random channel shifts.
fill_mode (str) –
One of
("constant", "nearest", "reflect" or "wrap")
. Default is"nearest"
. Points outside the boundaries of the input are filled according to the given mode:'constant'
: kkkkkkkk|abcd|kkkkkkkk (cval=k)'nearest'
: aaaaaaaa|abcd|dddddddd'reflect'
: abcddcba|abcd|dcbaabcd'wrap'
: abcdabcd|abcd|abcdabcd
cval – float or int, value used for points outside the boundaries when
fill_mode = "constant"
.horizontal_flip (bool) – Whether to randomly flip inputs horizontally.
vertical_flip (bool) – Whether to randomly flip inputs vertically.
rescale – rescaling factor. Defaults to None. If None or 0, no rescaling is applied, otherwise we multiply the data by the value provided (before applying any other transformation).
preprocessing_function – function that will be implied on each input. The function will run after the image is resized and augmented. The function should take one image (Numpy tensor with rank 3) as argument and should output a Numpy tensor with the same shape.
data_format – One of
("channels_first", "channels_last")
."channels_last"
means that the images should have shape(samples, height, width, channels)
,"channels_first"
means that the images should have shape(samples, channels, height, width)
. It defaults to theimage_data_format
value found in your Keras config file atjson
. If you never set it, then it will be"channels_last"
.validation_split (float) – Fraction of images reserved for validation (strictly between 0 and 1).
-
apply_points_transform
(y, transform_parameters, image_shape)[source]¶ Applies an affine transformation to a list of point coordinates according to given parameters.
- Parameters
y (np.array) – Array of shape (N, 2) which contains points in the format [y, x] or list of such arrays. (y Cartesian coordinate before the x, as in matrix/image indexing convention. Not to be confused with the variables X,y as in data and labels)
transform_parameters (dict) –
Dictionary with string - parameter pairs describing the transformation.
'theta'
: Float. Rotation angle in degrees.'tx'
: Float. Shift in the x direction.'ty'
: Float. Shift in the y direction.'shear'
: Float. Shear angle in degrees.'zx'
: Float. Zoom in the x direction.'zy'
: Float. Zoom in the y direction.'flip_horizontal'
: Boolean. Horizontal flip.'flip_vertical'
: Boolean. Vertical flip.'channel_shift_intensity'
: Float. Channel shift intensity.'brightness'
: Float. Brightness shift intensity. (taken from: keras ImageDataGenerator documentation)
image_shape (tuple) – shape of 2-dimensional image.
-
flow
(train_dict, batch_size=1, skip=None, transform=None, transform_kwargs={}, shuffle=True, seed=None, save_to_dir=None, save_prefix='', save_format='png')[source]¶ Generates batches of augmented/normalized data with given arrays.
- Parameters
train_dict (dict) – dictionary of X and y tensors. Both should be rank 4.
batch_size (int) – Size of a batch.
shuffle (bool) – Whether to shuffle the data between epochs.
seed (int) – Random seed for data shuffling.
save_to_dir (str) – Optional directory where to save the pictures being yielded, in a viewable format. This is useful for visualizing the random transformations being applied, for debugging purposes.
save_prefix (str) – Prefix to use for saving sample images (if
save_to_dir
is set).save_format (str) – Format to use for saving sample images (if
save_to_dir
is set).
- Returns
An Iterator yielding tuples of
(x, y)
wherex
is a numpy array of image data andy
is a numpy array of labels of the same shape.
-
random_transform
(x, y=None, seed=None)[source]¶ Applies a random transformation to an image
- Parameters
x – 3D tensor or list of 3D tensors.
y (np.array) – Array of shape (N, 2) which contains points in the format [y, x] or list of such arrays referring to coordinates in the image
x
, optional.seed – Random seed.
fill_mode (str) –
One of
("constant", "nearest", "reflect" or "wrap")
for type of padding used for points outside of the input image which correspond to points inside the output image. Points outside the boundaries of the input are filled according to the given mode:’constant’: kkkkkkkk|abcd|kkkkkkkk (cval=k)
’nearest’: aaaaaaaa|abcd|dddddddd
’reflect’: abcddcba|abcd|dcbaabcd
’wrap’: abcdabcd|abcd|abcdabcd
- Returns
A randomly transformed version of the input (same shape). If
y
is passed, it is transformed if necessary and returned. The transformed y contains input and padding (for fill_mode=’reflect’ or ‘wrap’) points mapped to output image space, which are inside the output image (transformed points mapped to outside of the output image boundaries are deleted)
-
class
deepcell_spots.image_generators.
ImageFullyConvDotIterator
(train_dict, image_data_generator, batch_size=1, skip=None, shuffle=False, transform=None, transform_kwargs={}, seed=None, data_format='channels_last', save_to_dir=None, save_prefix='', save_format='png')[source]¶ Bases:
tensorflow.keras.preprocessing.image.Iterator
Iterator yielding data from Numpy arrays (
X
andy
).- Parameters
train_dict (dict) – dictionary consisting of numpy arrays for
X
andy
. X has dimensions (batch, Ly, Lx, channel) and y has length batch containing np.arrays of shape (N, 2) where N is the number of points in the image.image_data_generator – Instance of
ImageDataGenerator
to use for random transformations and normalization.batch_size (int) – Size of a batch.
shuffle (bool) – Whether to shuffle the data between epochs.
seed (int) – Random seed for data shuffling.
data_format (str) – One of (
channels_first
,channels_last
).save_to_dir (str) – Optional directory where to save the pictures being yielded, in a viewable format. This is useful for visualizing the random transformations being applied, for debugging purposes.
save_prefix (str) – Prefix to use for saving sample images (if
save_to_dir
is set).save_format (str) – Format to use for saving sample images (if
save_to_dir
is set).
- Raises
ValueError – Training batches and labels should have the same length.
ValueError – Input data in
ImageFullyConvIterator
should have rank 4.
-
point_list_to_annotations
(points, image_shape, dy=1, dx=1)[source]¶ Generate label images used in loss calculation from point labels.
- Parameters
points (np.array) – array of size (N, 2) which contains points in the format [y, x].
image_shape (tuple) – shape of 2-dimensional image.
dy – pixel y width.
dx – pixel x width.
- Returns
- Dictionary with two keys,
detections
andoffset
. detections
is array of shape (image_shape,2) with pixels one hot encoding spot locations.offset
is array of shape (image_shape,2) with pixel values equal to signed distance to nearest spot in x- and y-directions.
- Dictionary with two keys,
- Return type
annotations (dict)
deepcell_spots.multiplex¶
-
deepcell_spots.multiplex.
assign_gene_identities
(cp_dict, dataorg, threshold, codebook)[source]¶ Assigns gene identity to barcoded spots.
- Parameters
cp_dict (dict) – Dictionary where keys are image IDs (
'readoutName'
) and values are classification prediction output from the spot detection model.dataorg (pandas.DataFrame) –
Dataframe
containing information about organization of image files.threshold (float) – value for the probability threshold a spot must exceed to be considered a spot.
codebook (pandas.DataFrame) –
Dataframe
with columns for each imaging round, rows are barcodes for genes values in data frame are 0 if that barcode includes that imaging round and 1 if the barcode does not.
- Returns
Dataframe
with the spot locations, gene identity,and probability of assignment.
- Return type
pandas.DataFrame
-
deepcell_spots.multiplex.
assign_spots_to_cells
(decoded_spots_df, cytoplasm_pred)[source]¶ Adds column to spots DataFrame with identity of cell for each spot
- Parameters
decoded_spots_df (pandas.DataFrame) –
Dataframe
with the spot locations, gene identity, and probability of assignmentcytoplasm_pred (array) – Image where pixel values are labels for segmented cell cytoplasms.
- Returns
Dataframe
with the spot locations, gene identity,probability of assignment, and cell identity.
- Return type
pandas.DataFrame
-
deepcell_spots.multiplex.
cluster_points
(spots_to_cells_dict, cell_id, threshold=1.5, match_method='min_dist')[source]¶ Clusters points between rounds with one of two methods:
'min_dist'
or'mutual_nearest_neighbor'
.- Parameters
spots_to_cells_dict (dict) – Dict of dicts, keys are image IDs (
'readoutName'
), values are dictionaries where keys are cell cytoplasm labels and values are detected spots associated with that cell label, there is one item in list for each image incoords_dict
.cell_id (int) – Integer key in
spots_to_cells_dict
.threshold (float, optional) – Distance threshold in pixels for matching points between rounds. Defaults to 1.5.
match_method (str, optional) – Method for matching spots between rounds. Options are
'min_dist'
and'mutual_nearest_neighbor'
. Defaults to'min_dist'
.
-
deepcell_spots.multiplex.
error_correction
(barcode, codebook_dict)[source]¶ Corrects barcodes that have no match in codebook.
To be assigned, a barcode may have a maximum of one bit flipped (Hamming distance of one) from input barcode.
- Parameters
-
deepcell_spots.multiplex.
gene_counts
(spots_to_cells_dict, codebook, threshold=1.5, match_method='min_dist', error_corr=True)[source]¶ Assigns combinatorial barcodes corresponding to gene identities.
Matches spots between rounds with one of two methods:
'min_dist'
or'mutual_nearest_neighbor'
.- Parameters
spots_to_cells_dict (dict) – Dict of dicts, keys are image IDs (
'readoutName'
), values are dictionaries where keys are cell cytoplasm labels and values are detected spots associated with that cell label, there is one item in list for each image incoords_dict
.codebook (Pandas DataFrame) –
DataFrame
with columns for each imaging round, rows are barcodes for genes values in data frame are 0 if that barcode includes that imaging round and 1 if the barcode does notthreshold (float, optional) – Distance threshold in pixels for matching points between rounds
match_method (str, optional) – Method for matching spots between rounds. Options are
'min_dist'
and'mutual_nearest_neighbor'
. Defaults to'min_dist'
.error_corr (bool, optional) – Boolean that determines whether error correction is performed on barcodes that don’t have an exact match. Defaults to
True
.
- Returns
DataFrame
containing gene counts for each cell.- Return type
pandas.DateFrame
-
deepcell_spots.multiplex.
gene_counts_DBSCAN
(spots_to_cells_dict, codebook, threshold, error_corr=True)[source]¶ Assigns combinatorial barcodes corresponding to gene identities. Matches spots between rounds with DBSCAN clustering.
- Parameters
spots_to_cells_dict (dict) – Dictionary of dictionaries, keys are image IDs (
'readoutName'
), values are dictionaries where keys are cell cytoplasm labels and values are detected spots associated with that cell label, there is one item in list for each image incoords_dict
.codebook (pandas.DataFrame) –
DataFrame
with columns for each imaging round, rows are barcodes for genes values in data frame are 0 if that barcode includes that imaging round and 1 if the barcode does not.threshold (float) – Distance threshold in pixels for matching points between rounds.
error_corr (bool, optional) – Boolean that determines whether error correction is performed on barcodes that don’t have an exact match. Defaults to
True
.
- Returns
DataFrame
containing gene counts for each cell.- Return type
pandas.DateFrame
-
deepcell_spots.multiplex.
multiplex_match_spots_to_cells
(coords_dict, cytoplasm_pred)[source]¶ Matches detected spots to labeled cell cytoplasms.
- Parameters
coords_dict (dict) – Dictionary where keys are image IDs (
'readoutName'
) and values are coordinates of detected spotscytoplasm_pred (matrix) – Image where pixel values are labels for segmented cell cytoplasms.
- Returns
- dict of dicts, keys are image IDs (
'readoutName'
), values are dictionaries where keys are cell cytoplasm labels and values are detected spots associated with that cell label, there is one item in list for each image in coords_dict.
- dict of dicts, keys are image IDs (
- Return type
deepcell_spots.point_metrics¶
Custom metrics for comparison of sets of points A set of points is an unordered collection of points defined here by a list of the point coordinates. The metrics defined here quantify the similarity between two sets of points, taking into account their spatial structure. Specifically, the distance between points is taken into account (as opposed to the Jaccard distance for example)
-
deepcell_spots.point_metrics.
get_mean_stats
(y_test, y_pred, threshold=0.98, d_thresh=1)[source]¶ Calculates the precision, recall, F1 score, and sum of min distances for stack of predictions.
- Parameters
y_test (array) – Array of shape
(N1,d),
set ofN1
points ind
dimensions.y_pred (array) – A batch of predictions, of the format:
y_pred[annot_type][ind]
is an annotation for image ind in the batch where annot_type = 0 or 1: 0 -classification
(from classification head), 1 -offset_regression
(from regression head).threshold (float) – Probability threshold for determining spot locations.
d_thresh (float) – A distance threshold used in the definition of
tp
andfp
.
-
deepcell_spots.point_metrics.
match_points_min_dist
(pts1, pts2, threshold=None)[source]¶ Find a pairing between two sets of points that minimizes the sum of the Euclidean distances between matched points from each set.
- Parameters
pts1 (array) – Array of shape
(N1,d)
, set ofN1
points ind
dimensions.pts2 (array) – Array of shape
(N2,d)
, set ofN2
points ind
dimensions. Each row ofpts1
andpts2
should be the coordinates of a single d-dimensional point.threshold (float) – A distance threshold for matching two points. Points that are more than the threshold distance apart, cannot be matched.
- Returns
- An array of row indices and one of
corresponding column indices giving the optimal assignment, as described in: https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.linear_sum_assignment.html
- Return type
(numpy.array, numpy.array)
-
deepcell_spots.point_metrics.
match_points_mutual_nearest_neighbor
(pts1, pts2, threshold=None)[source]¶ Find a pairing between two sets of points that ensures that each pair of points are mutual nearest neighbors.
- Parameters
pts1 (array) – Array of shape
(N1,d)
, set ofN1
points ind
dimensions.pts2 (array) – Array of shape
(N2,d)
, set ofN2
points ind
dimensions. Each row ofpts1
andpts2
should be the coordinates of a single d-dimensional point.threshold (float) – A distance threshold for matching two points. Points that are more than the threshold distance apart, cannot be matched.
- Returns
- An array of row indices and one of
corresponding column indices giving the optimal assignment, as described in: https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.linear_sum_assignment.html
- Return type
(numpy.array, numpy.array)
-
deepcell_spots.point_metrics.
model_benchmarking
(pred, coords, threshold, min_distance)[source]¶ Calculates the precision, recall, F1 score, Jacard Index, root mean square error, and sum of min distances for stack of predictions.
- Parameters
pred (array) – A batch of predictions, of the format:
y_pred[annot_type][ind]
is an annotation for image ind in the batch where annot_type = 0 or 1: 0 -classification
(from classification head), 1 -offset_regression
(from regression head).coords (list) – Nested list of coordinate locations for ground truth spots from a single annotator.
threshold (float) – A number in
[0, 1]
. Pixels with classification score >threshold
are considered containing a spot center, and their corresponding regression values will be used to create a final spot position prediction which will be added to the output spot center coordinates list.min_distance (float) – The minimum distance between detected spots in pixels
- Returns
values for the precision of the predicted spot numbers list: values for the recall of the predicted spot numbers list: values for the f1 score of the predicted spot numbers list: values for the jacard index of the predicted spot numbers list: values for the root mean square error of the spot locations list: values for the sum of min distance of the spot locations
- Return type
-
deepcell_spots.point_metrics.
point_F1_score
(points_true, points_pred, threshold, match_points_function=<function match_points_mutual_nearest_neighbor>)[source]¶ Calculates the F1 score of dot detection using the following definitions:
The F1 score is equal to:
F1 score = 2*p*r / (p+r)
where the precision is equal to:(the number of true positives) / (total number of predicted points)
and the recall is equal to:(the number of true positives) / (total number of true points)
and true positive (tp
) = a predicted dot p with a matching true dot t, where the matching between predicted and true points is such that the total distance between matched points is minimized, and points can be matched only if the distance between them is smaller than the threshold. Otherwise, the predicted dot is a false positive (fp
) and the true dot is a false negative (fn
).- Parameters
pts1 (array) – Array of shape
(N1,d)
, set ofN1
points ind
dimensions.pts2 (array) – Array of shape
(N2,d)
, set ofN2
points ind
dimensions. Each row ofpts1
andpts2
should be the coordinates of a single d-dimensional point.threshold (float) – A distance threshold used in the definition of
tp
andfp
.
- Returns
the F1 score as defined above (a number between 0 and 1)
- Return type
-
deepcell_spots.point_metrics.
point_precision
(points_true, points_pred, threshold, match_points_function=<function match_points_mutual_nearest_neighbor>)[source]¶ Calculates the precision,
tp/(tp + fp)
, of point detection using the following definitions:true positive (
tp
) = a predicted dot p with a matching true dot t, where the matching between predicted and true points is such that the total distance between matched points is minimized, and points can be matched only if the distance between them is smaller than the threshold. Otherwise, the predicted dot is a false positive (fp
).The precision is equal to:
(the number of true positives) / (total number of predicted points)
- Parameters
pts1 (array) – Array of shape
(N1,d)
, set ofN1
points ind
dimensions.pts2 (array) – Array of shape
(N2,d)
, set ofN2
points ind
dimensions. Each row ofpts1
andpts2
should be the coordinates of a single d-dimensional point.threshold (float) – A distance threshold used in the definition of
tp
andfp
.match_points_function – A function that matches points in two sets, and has three parameters:
pts1
,pts2
,threshold
- two sets of points, and a threshold distance for allowing a match supported matching functions arematch_points_min_dist
,match_points_mutual_nearest_neighbor
.
- Returns
the precision as defined above (a number between 0 and 1).
- Return type
-
deepcell_spots.point_metrics.
point_recall
(points_true, points_pred, threshold, match_points_function=<function match_points_mutual_nearest_neighbor>)[source]¶ Calculates the recall, tp/(tp + fn), of point detection using the following definitions:
true positive (
tp
) = a predicted dot p with a matching true dot t, where the matching between predicted and true points is such that the total distance between matched points is minimized, and points can be matched only if the distance between them is smaller than the threshold. Otherwise, the true dot is a false negative (fn
).The recall is equal to:
(the number of true positives) / (total number of true points)
- Parameters
pts1 (array) – Array of shape
(N1,d)
, set ofN1
points ind
dimensions.pts2 (array) – Array of shape
(N2,d)
, set ofN2
points ind
dimensions. Each row ofpts1
andpts2
should be the coordinates of a single d-dimensional point.threshold (float) – A distance threshold used in the definition of
tp
andfn
.match_points_function – A function that matches points in two sets, and has three parameters:
pts1
,pts2
,threshold
- two sets of points, and a threshold distance for allowing a match supported matching functions arematch_points_min_dist
,match_points_mutual_nearest_neighbor
.
- Returns
the recall as defined above (a number between 0 and 1).
- Return type
-
deepcell_spots.point_metrics.
stats_points
(points_true, points_pred, threshold, match_points_function=<function match_points_mutual_nearest_neighbor>)[source]¶ Calculates point-based statistics (precision, recall, F1, JAC, RMSE, d_md).
- Parameters
pts1 (array) – Array of shape
(N1,d)
, set ofN1
points ind
dimensions.pts2 (array) – Array of shape
(N2,d)
, set ofN2
points ind
dimensions. Each row ofpts1
andpts2
should be the coordinates of a single d-dimensional point.threshold (float) – A distance threshold used in the definition of
tp
andfp
.
- Returns
containing the calculated statistics
- Return type
dictionary
-
deepcell_spots.point_metrics.
sum_of_min_distance
(pts1, pts2, normalized=False)[source]¶ Calculates the sum of minimal distance measure between two sets of d-dimensional points as suggested by Eiter and Mannila in: https://link.springer.com/article/10.1007/s002360050075
- Parameters
pts1 (array) – Array of shape
(N1,d)
, set ofN1
points ind
dimensions.pts2 (array) – Array of shape
(N2,d)
, set ofN2
points ind
dimensions. Each row ofpts1
andpts2
should be the coordinates of a single d-dimensional point.normalized (bool) – If
True
, each sum will be normalized by the number of elements in it, resulting in an intensive distance measure which doesn’t scale like the number of points.
- Returns
the sum of minimal distance between point sets X and Y, defined as: d(X,Y) = 1/2 * (sum over x in X of min on y in Y of d(x,y) + sum over y in Y of min on x in X of d(x,y)) = 1/2( sum over x in X of d(x,Y) + sum over y in Y of d(X,y)) where d(x,y) is the Euclidean distance Note that this isn’t a metric in the mathematical sense (it doesn’t satisfy the triangle inequality)
- Return type
deepcell_spots.postprocessing_utils¶
Functions that convert deep learning model output to list of detected spots
-
deepcell_spots.postprocessing_utils.
y_annotations_to_point_list
(y_pred, threshold=0.95)[source]¶ Convert raw prediction to a predicted point list: classification of pixel as containing dot >
threshold
, , and their corresponding regression values will be used to create a final spot position prediction which will be added to the output spot center coordinates list.- Parameters
y_pred (array) – a dictionary of predictions with keys
'classification'
and'offset_regression'
corresponding to the named outputs of thedot_net_2D model
.ind (int) – the index of the image in the batch for which to convert the annotations.
threshold (float) – a number in
[0, 1]
. Pixels with classification score >threshold
are considered containing a spot center.
- Returns
spot center coordinates of the format
[[y0, x0], [y1, x1],...]
- Return type
array
-
deepcell_spots.postprocessing_utils.
y_annotations_to_point_list_cc
(y_pred, threshold=0.95)[source]¶ Convert raw prediction to a predicted point list: classification of connected component as containing dot >
threshold
, , and their corresponding regression values will be used to create a final spot position prediction which will be added to the output spot center coordinates list.- Parameters
y_pred (array) – a dictionary of predictions with keys
'classification'
and'offset_regression'
corresponding to the named outputs of thedot_net_2D model
.threshold (float) – a number in
[0, 1]
. Pixels with classification score >threshold
are considered containing a spot center.
- Returns
spot center coordinates of the format
[[y0, x0], [y1, x1],...]
- Return type
array
-
deepcell_spots.postprocessing_utils.
y_annotations_to_point_list_max
(y_pred, threshold=0.95, min_distance=2)[source]¶ Convert raw prediction to a predicted point list using
skimage.feature.peak_local_max
to determine local maxima in classification prediction image, and their corresponding regression values will be used to create a final spot position prediction which will be added to the output spot center coordinates list.- Parameters
y_pred (array) – a dictionary of predictions with keys
'classification'
and'offset_regression'
corresponding to the named outputs of thedot_net_2D model
.threshold (float) – a number in
[0, 1]
. Pixels with classification score >threshold
are considered as containing a spot center.min_distance (float) – the minimum distance between detected spots in pixels.
- Returns
spot center coordinates of the format [[y0, x0], [y1, x1],…]
- Return type
array
-
deepcell_spots.postprocessing_utils.
y_annotations_to_point_list_restrictive
(y_pred, threshold=0.95)[source]¶ Convert raw prediction to a predicted point list: classification of pixel as containing dot >
threshold
AND center regression is contained in the pixel. The corresponding regression values will be used to create a final spot position prediction which will be added to the output spot center coordinates list.- Parameters
y_pred (array) – a dictionary of predictions with keys
'classification'
and'offset_regression'
corresponding to the named outputs of thedot_net_2D model
.ind (int) – the index of the image in the batch for which to convert the annotations.
threshold (float) – a number in
[0, 1]
. Pixels with classification score >threshold
are considered containing a spot center.
- Returns
spot center coordinates of the format
[[y0, x0], [y1, x1],...]
.- Return type
array
deepcell_spots.preprocessing_utils¶
Image normalization methods
-
deepcell_spots.preprocessing_utils.
mean_std_normalize
(image, epsilon=1e-07)[source]¶ Normalize image data by subtracting standard deviation pixel value and dividing by mean pixel value.
- Parameters
image (numpy.array) – 4D numpy array of image data.
epsilon (float) – fuzz factor used in numeric expressions.
- Returns
normalized image data.
- Return type
numpy.array
-
deepcell_spots.preprocessing_utils.
min_max_normalize
(image, clip=False)[source]¶ - Normalize image data by subtracting minimum pixel value and
dividing by the maximum pixel value.
- Parameters
image (numpy.array) – 4D numpy array of image data.
clip (boolean) – Defaults to false. Determines if pixel values are clipped by percentile.
- Returns
normalized image data.
- Return type
numpy.array
deepcell_spots.simulate_data¶
Data simulators for spot images for benchmarking deep learning model and annotator detections for benchmarking EM algorithm
-
deepcell_spots.simulate_data.
add_gaussian_noise
(image, m, s)[source]¶ Adds gaussian random noise with mean m and standard deviation s to the input image.
- Parameters
image (numpy.array) – 2D image to add noise.
m – mean of gaussian random noise to be added to each pixel of image.
s – standard deviation of gaussian random noise to be added to each pixel of image.
- Returns
The noisy image.
- Return type
numpy.array
-
deepcell_spots.simulate_data.
gaussian_spot_image_generator
(L, N_min, N_max, sigma_mean, sigma_std, A_mean=1, A_std=0, noise_mean=0, noise_std=0, segmask=False, yield_pos=False)[source]¶ Generates random images of Gaussian spots with random uniformly distributed center positions in the image area, i.e. in
[0,L-1]*[0,L-1].
The number of spots in an image is uniformly distributed in[N_min, N_max]
. Each spot is a gaussian with standard deviation normally distributed withsigma_mean
,sigma_std
, and cutoff value of 0.5 (it is redrawn if a smaller value is drawn). The intensity of each spot is normally distributed.- Parameters
L – generated image side length - the generated images have shape
(L,L)
N_max (N_min,) – the number of spots plotted in each image is uniformly distributed in
[N_min, N_max]
.sigma_std (sigma_mean,) – the mean and standard deviation of the normally distributed spot width sigma (i.e. each spot is a Gaussian with standard deviation sigma).
A_std (A_mean,) – the intensity of each spot is normally distributed in with mean
A_mean
, and standard deviationA_std
.yield_pos – if
True
, will yield lists of x and y positions and bounding boxes in addition to image and label image.noise_std (noise_mean,) – mean and std of white noise to be added to every pixel of the image
- Returns
(L, L)
numpy array simulated image label:(L, L)
numpy array of - 0 background, 1 for pixel of (rounded) spot center if segmask isFalse
segmentation mask ifsegmask
isTrue
(pixel values are0
in background,1,...,N
for pixels belonging to theN
spots in the image)- Return type
img
-
deepcell_spots.simulate_data.
is_in_image
(x, y, a, L)[source]¶ Determines if a square with defined vertices is contained in an image with larger dimensions
- Parameters
- Returns
Whether the square is contained in image dimensions
- Return type
-
deepcell_spots.simulate_data.
is_overlapping
(x_list, y_list, a_list, x, y, a)[source]¶ Determines if a square overlaps with a list of other squares.
Returns boolean,
True
if square overlaps with any of squares in list,False
if it doesn’t overlap with any of squares in list- Parameters
x_list (list) – List of x coordinates for top left corners of squares to be compared with square of interest.
y_list (list) – List of y coordinates for top left corners of squares to be compared with square of interest.
a_list (list) – List of side lengths of squares to be compared with square of interest.
x (int) – Value for the x coordinate of the top left corner of the square of interest
y (int) – Value for the y coordinate of the top left corner of the square of interest
a (int) – Value for the side length of the square of interest
- Returns
Whether the square overlaps with any of squares in list.
- Return type
-
deepcell_spots.simulate_data.
percent_correct
(gt, data_array)[source]¶ Calculates the percent of detections correctly labeled.
Returns a value from 0 to 1 indicating the fraction of detections correctly labeled.
- Parameters
gt (array) – Array of ground truth cluster labels. 1 indicates a true detection and 0 indicates a false detection.
data_array (array) – Array of simulated detections with length number of detections. A value of 1 indicates a detected clsuter and a value of 0 indicates an undetected cluster.
- Returns
Value for fraction of detections correctly labeled compared to ground truth.
- Return type
percent_corr (float)
-
deepcell_spots.simulate_data.
sim_annotators
(gt, tpr_list, fpr_list)[source]¶ Simulate the detections of multiple annotators with different TPRs and FPRs on the same ground truth data.
Returns a matrix of simulated detection data with dimensions clusters x annotators.
- Parameters
gt (array) – Array of ground truth cluster labels. 1 indicates a true detection and 0 indicates a false detection.
tpr_list (array) – Array of TPR values for each annotator. For a ground truth value of 1, the TPR is the probability that the function will output 1, indicating that the simulated annotator detected the true cluster.
fpr_list (array) – Array of FPR values for each annotator. For a ground truth value of 0, the FPR is the probability that the funciton will output 1, indicating that the simulated annotator falsely detected the cluster.
- Returns
Matrix of simulated detection data with dimensions clusters x annotators. A value of 1 indicates a detected cluster and a value of 0 indicates an undetected cluster.
- Return type
numpy.array
-
deepcell_spots.simulate_data.
sim_detections
(gt, tpr, fpr)[source]¶ Simulates detection data for a set of ground truth cluster labels and an annotator with a specified TPR and FPR.
Returns an array of with same length as input
gt
, where 1 indicates the simulated annotator detected a cluster and 0 indicates an undetected cluster.- Parameters
gt (array) – Array of ground truth cluster labels. 1 indicates a true detection and 0 indicates a false detection.
tpr (float) – The true positive rate of the annotator. For a ground truth value of 1, it is the probability that the function will output 1, indicating that the simulated annotator detected the true cluster.
fpr (float) – The false positive rate of the annotator. For a ground truth value of 0, it is the probability that the funciton will output 1, indicating that the simulated annotator falsely detected the cluster.
- Returns
Array of detected cluster labels. A value of 1 indicates that a cluster was detected by the annotator, and 0 indicates that the cluster was not detected by the annotator.
- Return type
array
-
deepcell_spots.simulate_data.
sim_gt_clusters
(num_clusters, tp_ratio)[source]¶ Generate random simulated labels (true detection or false detection) for clusters, with a specified rate of true detections and false detections,
tp_ratio
.Returns a list of length
num_clusters
of cluster labels with value 1 for a true detection and 0 for a false detection.
deepcell_spots.singleplex¶
Tools for analysis of singleplex FISH images
-
deepcell_spots.singleplex.
match_spots_to_cells
(labeled_im, coords)[source]¶ Assigns detected spots to regions of a labeled image.
Returns a dictionary where keys are labeled regions of input image and values are spot coordinates corresponding with that labeled region.
- Parameters
labeled_im (array) – Image output from segmentation algorithm with dimensions
(1,x,y,1)
where pixels values label regions of the image corresponding with objects of interest (nuclei, cytoplasm, etc.).coords (array) – Array of coordinates for spot location with dimensions
(number of spots,2)
.
- Returns
Dictionary where keys are labeled regions of input image and values are spot coordinates corresponding with that labeled region.
- Return type
-
deepcell_spots.singleplex.
process_spot_dict
(spot_dict)[source]¶ Processes spot dictionary into an array of coordinates and list of region labels for spots.
- Parameters
spot_dict (dict) – Dictionary where keys are labeled regions of input image and values are spot coordinates corresponding with that labeled region.
- Returns
(1) Array of coordinates for spot location with dimensions
(number of spots,2)
. Re-ordered to correspond with list of region labels. (2) List of region labels corresponding with coordinates. Intended to be used to color acmap
when visualizing spots.- Return type
(array, list)
-
deepcell_spots.singleplex.
remove_nuc_spots_from_cyto
(labeled_im_nuc, labeled_im_cyto, coords)[source]¶ Removes spots in nuclear regions from spots assigned to cytoplasmic regions.
Returns a dictionary where keys are labeled cytoplasmic regions of input image and values are spot coordinates corresponding with that labeled cytoplasm region.
- Parameters
labeled_im_nuc (array) – Image output from segmentation algorithm with dimensions
(1,x,y,1)
where pixels values label nuclear regions.labeled_im_cyto (array) – Image output from segmentation algorithm with dimensions
(1,x,y,1)
where pixels values label cytoplasmic regions.coords (array) – Array of coordinates for spot location with dimensions
(number of spots,2)
.
- Returns
- Dictionary where keys are labeled regions of input image and
values are spot coordinates corresponding with that labeled region (cytoplasm excluding nucleus).
- Return type
deepcell_spots.spot_em¶
Expectation maximization functions for spot detection
-
deepcell_spots.spot_em.
calc_tpr_fpr
(gt, data)[source]¶ Calculate the true postivie rate and false positive rate for a pair of ground truth labels and detection data.
- Parameters
gt (array) – Array of ground truth cluster labels. A value of 1 indicates a true detection and a value of 0 indicates a false detection.
- data (array): Array of detection data with same length.
A value of 1 indicates a detected cluster and a value of 0 indicates an undetected cluster.
- Returns
(1) Value for the true positive rate of an annotator. This is the probability that an annotator will detect a spot that is labeled as a ground truth true detection. (2) Value for the false positive rate of an annotator. This is the probability that an annotator will detect a spot that is labeled as a ground truth false detection.
- Return type
-
deepcell_spots.spot_em.
cluster_coords
(coords_df, threshold=1.5)[source]¶ Clusters coordinates in each image by proximity. If clusters contain more than one detection from a single algorithm, the detection closest to the centroid of the cluster is retained and all others are separated into new clusters.
- Parameters
coords_df (DataFrame) –
Dataframe
containing algorithm, image, and location information about each cluster.threshold (float) – Distance in pixels below which detections will be grouped into clusters.
- Returns
Dataframe
containing algorithm, image, location,and cluster information about each cluster.
- Return type
coords_df (DataFrame)
-
deepcell_spots.spot_em.
define_edges
(coords_df, threshold)[source]¶ Defines that adjacency matrix for the multiple annotators, connecting points that are sufficiently close to one another. It is assumed that these spots are derived from the same ground truth spot in the original image.
- Parameters
coords (DataFrame) –
Dataframe
with columns'x'
and'y'
which encode the spot locations and'Algorithm'
which encodes the algorithm that corresponds with that spotthreshold (float) – The distance in pixels. Detections closer than the threshold distance will be grouped into a cluster of detections, assumed to be derived from the same ground truth detection.
- Returns
Matrix of dimensions
(number of detections) x (number of detections)
defining edges of a graph clustering detections by detections from different annotators derived from the same ground truth detection. A value of 1 denotes two connected nodes in the eventual graph and a value of 0 denotes disconnected nodes.- Return type
numpy.array
-
deepcell_spots.spot_em.
det_likelihood
(cluster_data, pr_list)[source]¶ Calculate the likelihood that a cluster is a true positive or false positive. To calculate the likelihood of a true positive,
pr_list
should be a list of TPRs for all annotators. To calculate the likelihood of a cluster being a false positive, pr_list should be a list of FPRs for all annotators.Returns a value for the likelihood that a cluster is either a true positive or a false positive.
- Parameters
cluster_data (array) – Array of detection labels for each annotator. Entry has value 1 if annotator detected the cluster, and entry has value 0 if annotator did not detect the cluster.
pr_list (array) – Array of true postive rates for each annotator if one wants to calculate the likelihood that the cluster is a true positive, or array of false positive rates for each annotator if one wants to calculate the likelihood that the cluster is a false positive.
- Returns
Value for the likelihood that a cluster is either a true positive or a false positive detection.
- Return type
-
deepcell_spots.spot_em.
em_spot
(cluster_matrix, tp_list, fp_list, prior=0.9, max_iter=10)[source]¶ Estimate the TPR/FPR and probability of true detection for various spot annotators using expectation maximization.
Returns the true positive rate and false positive rate for each annotator, and returns the probability that each spot is a true detection or false detection.
- Parameters
cluster_matrix (matrix) – Matrix of detection labels for each spot for each annotator. Dimensions spots x annotators. A value of 1 indicates that the spot was detected by that annotator and a value of 0 indicates that the spot was not detected by that annotator.
tp_list (array) – Array of initial guesses for the true positive rates for each annotator.
fp_list (array) – Array of initial guesses for the false positive rates for each annotator.
prior (float) – Value for the prior probability that a spot is a true positive.
max_iter (int) – Value for the number of times the expectation maximization algorithm will iteratively calculate the MLE for the TPR and FPR of the annotators.
- Returns
(1) Array of final estimates for the true positive rates for each annotator. (2) Array of final estimates for the false postitive rates for each annotator. (3) Matrix of probabilities that each cluster is a true detection (column 0) or false detection (column 1). Dimensions
(spots x 2)
.- Return type
(array, array, matrix)
-
deepcell_spots.spot_em.
load_coords
(coords_dict)[source]¶ Loads a dictionary of coordinate spot locations into a
DataFrame
.- Parameters
coords_dict (dictionary) – Dictionary in which keys are names of spot detection algorithms and values are coordinate locations of spots detected with each algorithm. Coordinates are nested list (length is number of images) of lists of shape
(number spots, 2)
.- Returns
Dataframe
containing algorithm, image, and locationinformation about each cluster.
- Return type
coords_df (DataFrame)
-
deepcell_spots.spot_em.
norm_marg_likelihood
(cluster_data, tp_list, fp_list, prior)[source]¶ Calculates the normalized marginal likelihood that each cluster is a true positive or a false positive.
- Parameters
cluster_data (array) – Array of detection labels for each annotator. Entry has value 1 if annotator detected the cluster, and entry has value 0 if annotator did not detect the cluster.
tp_list (array) – Array of true postive rates for each annotator.
fp_list (array) – Array of false postive rates for each annotator.
- Returns
(1) Value for the normalized marginal likelihood that a cluster is either a true positive detection (2) Value for the normalized marginal likelihood that a cluster is either a false positive detection.
- Return type
-
deepcell_spots.spot_em.
predict_cluster_probabilities
(coords_df, tpr_dict, fpr_dict, prior=0.9, max_iter=10)[source]¶ Predicts the probability that each cluster of detections corresponds with a true positive detection.
- Parameters
coords_df (DataFrame) –
Dataframe
containing algorithm, image, location, and cluster information about each cluster.tpr_dict (dictionary) – Dictionary in which keys are algorithm names and values are estimates for TPR of each algorithm.
fpr_dict (dictionary) – Dictionary in which keys are algorithm names and values are estimates for FPR of each algorithm.
prior (float) – Prior probability that a cluster will correspond with a true positive detection. Value must be between 0 and 1.
max_iter (int) – Number of iterations performed by EM algorithm.
- Returns
Dataframe
containing algorithm, image, location,cluster, spot probability, and centroid information about each cluster.
- Return type
coords_df (DataFrame)
deepcell_spots.training¶
Functions for training convolutional neural networks
-
deepcell_spots.training.
train_model_dot
(model, dataset, expt='', test_size=0.2, seed=0, n_epoch=10, batch_size=1, num_gpus=None, frames_per_batch=5, optimizer=tensorflow.keras.optimizers.SGD, log_dir='/data/tensorboard_logs', model_dir='/data/models', model_name=None, focal=False, sigma=3.0, alpha=0.25, gamma=0.5, lr_sched=deepcell.utils.train_utils.rate_scheduler, rotation_range=0, flip=True, shear=0, zoom_range=0, fill_mode='nearest', cval=0.0, **kwargs)[source]¶ Train a dot center detection model using fully convolutional mode.
- Parameters
model (tensorflow.keras.Model) – The model to train.
dataset (str) – Path to a dataset to train the model with.
expt (str) – Experiment, substring to include in model name.
test_size (float) – Percent of data to leave as test data.
seed (int) – Random seed used for train-test split.
n_epoch (int) – Number of training epochs.
batch_size (int) – Number of batches per training step.
num_gpus (int) – The number of GPUs to train on.
frames_per_batch (int) – Number of training frames if training 3D data.
log_dir (str) – Filepath to save tensorboard logs. If None, disables the tensorboard callback.
model_dir (str) – Directory to save the model file.
model_name (str) – Name of the model (and name of output file).
focal (bool) – If true, uses focal loss.
sigma (float) – The point where the loss changes from L2 to L1.
alpha (float) – Scale the focal weight with alpha.
gamma (float) – Parameter for focal loss (Take the power of the focal weight with gamma.).
optimizer (object) – Pre-initialized optimizer object (SGD, Adam, etc.).
lr_sched (function) – Learning rate scheduler function.
rotation_range (int) – Maximum rotation range for image augmentation.
flip (bool) – Enables horizontal and vertical flipping for augmentation.
shear (int) – Maximum shear range for image augmentation.
zoom_range (tuple) – Minimum and maximum zoom values
(0.8, 1.2)
.fill_mode (str) – padding style for data augmentation (input parameter of
tf.keras.preprocessing.image.ImageDataGenerator
).cval (float or int) – used for pixels outside the boundaries of the input image when
fill_mode='constant'
.kwargs (dict) – Other parameters to pass to
_transform_masks
.
- Returns
The trained model.
- Return type
tensorflow.keras.Model
deepcell_spots.utils¶
Functions for image augmentation
-
deepcell_spots.utils.
affine_transform_points
(points, transform_parameters, image_shape, img_row_axis=0, img_col_axis=1, fill_mode='nearest')[source]¶ Perform an affine transform mapping input coordinates referring to the input image of the
apply_transform
function of the classImageDataGenerator
To the output space of that function. Returned points are (original and padding points) contained in the output image.- Parameters
transform_parameters – dictionary of affine transformation parameters such as the output of
ImageDataGenerator
methodget_random_transform
.points – (N, 2) numpy array which contains points in the format
[y, x]
(NOTE: as in image/matrix notation, not Cartesian notation) points are labels for the input image - they should be-0.5 <= x <= Lx-0.5, -0.5 <= y <= Ly-0.5
whereLx = image_shape[img_col_axis]
andLy = image_shape[img_row_axis]
.image_shape (tuple) – the shape of the image which contains the points (for 2D image, has length 2).
img_row_axis – the index of the axis
(0 or 1)
to be flipped whenflip_vertical
isTrue
.img_col_axis – the index of the axis
(0 or 1)
to be flipped whenflip_horizontal
isTrue
.fill_mode –
One of
("constant", "nearest", "reflect" or "wrap")
. Default is'nearest'
. Points outside the boundaries of the input are filled according to the given mode:'constant'
: kkkkkkkk|abcd|kkkkkkkk (cval=k)'nearest'
: aaaaaaaa|abcd|dddddddd'reflect'
: abcddcba|abcd|dcbaabcd'wrap'
: abcdabcd|abcd|abcdabcd
- Returns
- list of points / or numpy array of shape
(N',2)
which contains points in the format
[y, x]
. NOTEN' != N
because points in the original image may fall outside of the transformed output image. Also, if fill_mode is'reflect'
or'wrap'
, point images in the padding of the input image can be inside the output image.
- list of points / or numpy array of shape
- Return type
transformed_points
-
deepcell_spots.utils.
generate_transformation_matrix
(transform_parameters, image_shape, img_row_axis, img_col_axis)[source]¶ Given a dictionary of affine transformation parameters (such as the one generated by the
ImageDataGenerator
functionget_random_transform
), generate the transformation matrix and offset whichapply_affine_transform
generates and passes toscipy.ndimage.interpolation.affine_transform
:ndimage.interpolation.affine_transform( x_channel, final_affine_matrix, final_offset, order=order, mode=fill_mode, cval=cval)
this function performs the calculations performed by
tf.keras.preprocessing.image.apply_affine_transform
to obtainfinal_affine_matrix
andfinal_offset
, and returns them.A point
p
in the output image ofaffine_transform
corresponds to the pointpT+s
in the input image- Parameters
transform_parameters –
dictionary of affine transformation parameters such as the output of
ImageDataGenerator
methodget_random_transform
. (as used in input toapply_transform
called on image) Fromkeras-preprocessing/keras_preprocessing/image/image_data_generator.py
methodapply_transform
documentation: Dictionary with string - parameter pairs describing the transformation. Currently, the following parameters from the dictionary are used:'theta'
: Float. Rotation angle in degrees.'tx'
: Float. Shift in the x direction.'ty'
: Float. Shift in the y direction.'shear'
: Float. Shear angle in degrees.'zx'
: Float. Zoom in the x direction.'zy'
: Float. Zoom in the y direction.'flip_horizontal'
: Boolean. Horizontal flip. - NOT USED HERE'flip_vertical'
: Boolean. Vertical flip. - NOT USED HERE'channel_shift_intensity'
: Float. Channel shift intensity. - NOT USED HERE'brightness'
: Float. Brightness shift intensity. - NOT USED HERE
- Returns
final_affine_matrix (2*2 matrix ,denote below: T), final_offset (length 2 vector, denote below: s)
- Return type
(array, array)
-
deepcell_spots.utils.
subpixel_distance_transform
(point_list, image_shape, dy=1, dx=1)[source]¶ For each pixel in image, return the vectorial distance to a point in
point_list
that is in the pixel nearest to it.- Parameters
point_list – Array of size
(N,2)
of point coordinates[y, x]
(y before x as in image/matrix indexing)image_shape –
(Ly,Lx)
specifies the shape of an image that contains the coordinates. The coordinates should be indy*[-0.5, Ly-0.5] x dx*[-0.5, Lx-0.5]
dy – pixel width in y axis
dx – pixel width in x axis
- Returns
(Ly, Lx)
, nearest_point[i,j] is the index in point_list ofa point in a point-containing pixel which is closest to pixel
[i,j]
. Note no uniqueness of the point or the pixel, since there could be several point-containing pixels with minimal distance to pixel[i,j]
and there could be several points contained in the pixel[i,j]
but only one is chosendelta_x[i,j]
,delta_y[i,j]
are elements of the vectorial distance between the chosen point whichnearest_point[i,j]
refers to, and the center of the pixel[i,j]
, which is atx =j * dx, y = i * dy
.- numpy.array:
(Ly, Lx)
numpy array of signed y distance between a point from
point_list
that is near pixel[i,j]
and the center of the pixel.- numpy.array: (Ly, Lx) numpy array of signed x distance between a point
from
point_list
that is near pixel[i,j]
and the center of the pixel.
- Return type
numpy.array
DeepCell Spots¶
deepcell-spots
is a deep learning library for fluorescent spot detection image analysis. It allows you to apply pre-existing models and train new deep learning models for spot detection. It is written in Python and built using TensorFlow, Keras and DeepCell. More detailed documentation is available here.

DeepCell Spots Application¶
deepcell-spots
contains an applications that greatly simplify the implementation of deep learning models for spot detection. deepcell-spots.applications.SpotDetection
contains a pre-trained model for fluorescent spot detection on images derived from assays such as RNA FISH and in-situ sequencing. This model returns a list of coordinate locations for fluorescent spots detected in the input image. deepcell-spots.applications.Polaris
pairs this spot detection model with DeepCell models for nuclear and cytoplasmic segmentation.
How to Use¶
from deepcell_spots.applications import SpotDetection
app = SpotDetection()
# image is an np array with dimensions (batch,x,y,channel)
# threshold is the probability threshold that a pixel must exceed to be considered a spot
coords = app.predict(image,threshold=0.9)
DeepCell-Spots for Developers¶
Build and run a local docker container, similarly to the instructions for deepcell-tf. The relevant parts are copied here with modifications to work for deepcell-spots. For more elaborate instructions, see the deepcell-tf README.
Build a local docker container, specifying the deepcell version with DEEPCELL_VERSION¶
git clone https://github.com/vanvalenlab/deepcell-spots.git
cd deepcell-spots
docker build --build-arg DEEPCELL_VERSION=0.12.0-gpu -t $USER/deepcell-spots .
Run the new docker image¶
# '"device=0"' refers to the specific GPU(s) to run DeepCell-Spots on, and is not required
docker run --gpus '"device=0"' -it \
-p 8888:8888 \
$USER/deepcell-spots
It can also be helpful to mount the local copy of the repository and the notebooks to speed up local development.
# you can now start the docker image with the code mounted for easy editing
docker run --gpus '"device=0"' -it \
-p 8888:8888 \
-v $PWD/deepcell-spots/deepcell_spots:/usr/local/lib/python3.6/dist-packages/deepcell_spots \
-v $PWD/notebooks:/notebooks \
-v /$PWD:/data \
$USER/deepcell-spots
Copyright¶
Copyright © 2019-2022 The Van Valen Lab at the California Institute of Technology (Caltech), with support from the Shurl and Kay Curci Foundation, Google Research Cloud, the Paul Allen Family Foundation, & National Institutes of Health (NIH) under Grant U24CA224309-01. All rights reserved.
Trademarks¶
All other trademarks referenced herein are the property of their respective owners.