Knight#
Index#
Let’s first import some packages:
[1]:
import os
import inspect
import shutil
import json
import torch
import pandas as pd
import pyaging as pya
Instantiate model class#
[2]:
def print_entire_class(cls):
source = inspect.getsource(cls)
print(source)
print_entire_class(pya.models.Knight)
class Knight(pyagingModel):
def __init__(self):
super().__init__()
def preprocess(self, x):
return x
def postprocess(self, x):
return x
[3]:
model = pya.models.Knight()
Define clock metadata#
[4]:
model.metadata["clock_name"] = 'knight'
model.metadata["data_type"] = 'methylation'
model.metadata["species"] = 'Homo sapiens'
model.metadata["year"] = 2016
model.metadata["approved_by_author"] = '✅'
model.metadata["citation"] = "Knight, Anna K., et al. \"An epigenetic clock for gestational age at birth based on blood methylation data.\" Genome biology 17.1 (2016): 1-11."
model.metadata["doi"] = "https://doi.org/10.1186/s13059-016-1068-z"
model.metadata["research_only"] = None
model.metadata["notes"] = None
Download clock dependencies#
Download directly with curl#
[5]:
supplementary_url = "https://static-content.springer.com/esm/art%3A10.1186%2Fs13059-016-1068-z/MediaObjects/13059_2016_1068_MOESM3_ESM.csv"
supplementary_file_name = "coefficients.csv"
os.system(f"curl -o {supplementary_file_name} {supplementary_url}")
[5]:
0
[6]:
supplementary_url = "https://static-content.springer.com/esm/art%3A10.1186%2Fgb-2013-14-10-r115/MediaObjects/13059_2013_3156_MOESM22_ESM.csv"
supplementary_file_name = "reference_feature_values.csv"
os.system(f"curl -o {supplementary_file_name} {supplementary_url}")
[6]:
0
Load features#
From CSV file#
[7]:
df = pd.read_csv('coefficients.csv')
df['feature'] = df['CpGmarker']
df['coefficient'] = df['CoefficientTraining']
model.features = features = df['feature'][1:].tolist()
Load weights into base model#
[8]:
weights = torch.tensor(df['coefficient'][1:].tolist()).unsqueeze(0)
intercept = torch.tensor([df['coefficient'][0]])
Linear model#
[9]:
base_model = pya.models.LinearModel(input_dim=len(model.features))
base_model.linear.weight.data = weights.float()
base_model.linear.bias.data = intercept.float()
model.base_model = base_model
Load reference values#
From CSV file#
[10]:
reference_feature_values_df = pd.read_csv('reference_feature_values.csv', index_col=0)
reference_feature_values_df = reference_feature_values_df.loc[model.features]
model.reference_values = reference_feature_values_df['goldstandard2'].tolist()
Load preprocess and postprocess objects#
[11]:
model.preprocess_name = None
model.preprocess_dependencies = None
[12]:
model.postprocess_name = None
model.postprocess_dependencies = None
Check all clock parameters#
[13]:
pya.utils.print_model_details(model)
%==================================== Model Details ====================================%
Model Attributes:
training: True
metadata: {'approved_by_author': '✅',
'citation': 'Knight, Anna K., et al. "An epigenetic clock for gestational age '
'at birth based on blood methylation data." Genome biology 17.1 '
'(2016): 1-11.',
'clock_name': 'knight',
'data_type': 'methylation',
'doi': 'https://doi.org/10.1186/s13059-016-1068-z',
'notes': None,
'research_only': None,
'species': 'Homo sapiens',
'version': None,
'year': 2016}
reference_values: [0.470988652, 0.4574588, 0.471526503, 0.060478447, 0.576057497, 0.578012018, 0.73858511, 0.032705227, 0.129677634, 0.847421632, 0.040442058, 0.123711132, 0.042129823, 0.058492584, 0.47490999, 0.567201154, 0.028050524, 0.693295627, 0.362458936, 0.357454375, 0.751423923, 0.076149441, 0.084830058, 0.047438755, 0.123293314, 0.851965226, 0.354742659, 0.83293121, 0.056902106, 0.464347542]... [Total elements: 148]
preprocess_name: None
preprocess_dependencies: None
postprocess_name: None
postprocess_dependencies: None
features: ['cg00022866', 'cg00466249', 'cg00546897', 'cg00575744', 'cg00689340', 'cg01056568', 'cg01184449', 'cg01348086', 'cg02100629', 'cg02813863', 'cg02941816', 'cg03086857', 'cg03427564', 'cg03506489', 'cg03923277', 'cg04001333', 'cg04323187', 'cg05294455', 'cg05365729', 'cg05512756', 'cg05564251', 'cg05898102', 'cg06049972', 'cg06311778', 'cg06471905', 'cg07017706', 'cg07141002', 'cg07197059', 'cg07664183', 'cg07679836']... [Total elements: 148]
base_model_features: None
%==================================== Model Details ====================================%
Model Structure:
base_model: LinearModel(
(linear): Linear(in_features=148, out_features=1, bias=True)
)
%==================================== Model Details ====================================%
Model Parameters and Weights:
base_model.linear.weight: [0.6935521960258484, -0.8255749344825745, -1.3585155010223389, -3.8292856216430664, 0.9603426456451416, 0.20516617596149445, 0.782781720161438, -1.3157227039337158, 0.5592088103294373, -1.0659143924713135, 1.355500340461731, 1.0993326902389526, -7.938111782073975, 6.338893413543701, -0.33696240186691284, -0.09361063688993454, 1.9930349588394165, 2.1887292861938477, 0.7005508542060852, 0.26436084508895874, -0.8554026484489441, -1.3309569358825684, 1.6402506828308105, -4.172684192657471, 0.1557571291923523, -4.798856258392334, -0.166761115193367, 0.09205283224582672, -3.1910228729248047, 0.048825453966856]... [Tensor of shape torch.Size([1, 148])]
base_model.linear.bias: tensor([41.7258])
%==================================== Model Details ====================================%
Basic test#
[14]:
torch.manual_seed(42)
input = torch.randn(10, len(model.features), dtype=float)
model.eval()
model.to(float)
pred = model(input)
pred
[14]:
tensor([[ 44.4538],
[102.4336],
[ 14.7963],
[137.9892],
[102.6453],
[ 56.0923],
[ 73.7889],
[ 26.2043],
[-12.2354],
[ 91.9445]], dtype=torch.float64, grad_fn=<AddmmBackward0>)
Save torch model#
[15]:
torch.save(model, f"../weights/{model.metadata['clock_name']}.pt")
Clear directory#
[16]:
# Function to remove a folder and all its contents
def remove_folder(path):
try:
shutil.rmtree(path)
print(f"Deleted folder: {path}")
except Exception as e:
print(f"Error deleting folder {path}: {e}")
# Get a list of all files and folders in the current directory
all_items = os.listdir('.')
# Loop through the items
for item in all_items:
# Check if it's a file and does not end with .ipynb
if os.path.isfile(item) and not item.endswith('.ipynb'):
os.remove(item)
print(f"Deleted file: {item}")
# Check if it's a folder
elif os.path.isdir(item):
remove_folder(item)
Deleted file: coefficients.csv
Deleted file: reference_feature_values.csv