CpGPTGrimAge3#
Index#
Let’s first import some packages:
[1]:
import os
import inspect
import shutil
import json
import torch
import pandas as pd
import pyaging as pya
import numpy as np
Instantiate model class#
[2]:
def print_entire_class(cls):
source = inspect.getsource(cls)
print(source)
print_entire_class(pya.models.CpGPTGrimAge3)
class CpGPTGrimAge3(pyagingModel):
def __init__(self):
super().__init__()
def preprocess(self, x):
"""
Scales an array based on the median and standard deviation.
"""
median = torch.tensor(self.preprocess_dependencies[0], device=x.device, dtype=x.dtype)
std = torch.tensor(self.preprocess_dependencies[1], device=x.device, dtype=x.dtype)
x = (x - median) / std
return x
def postprocess(self, x):
"""
Converts from a Cox parameter to age in units of years.
"""
cox_mean = self.postprocess_dependencies[0]
cox_std = self.postprocess_dependencies[1]
age_mean = self.postprocess_dependencies[2]
age_std = self.postprocess_dependencies[3]
# Normalize
x = (x - cox_mean) / cox_std
# Scale
x = (x * age_std) + age_mean
return x
[3]:
model = pya.models.CpGPTGrimAge3()
Define clock metadata#
[4]:
model.metadata["clock_name"] = 'cpgptgrimage3'
model.metadata["data_type"] = 'methylation'
model.metadata["species"] = 'Homo sapiens'
model.metadata["year"] = 2025
model.metadata["approved_by_author"] = '✅'
model.metadata["citation"] = "de Lima Camillo, Lucas Paulo, et al. \"CpGPT: a foundation model for DNA methylation.\" bioRxiv (2024): 2024-10."
model.metadata["doi"] = "https://doi.org/10.1101/2024.10.24.619766"
model.metadata["research_only"] = True
model.metadata["notes"] = None
Download clock dependencies#
[5]:
logger = pya.logger.Logger()
urls = [
"https://pyaging.s3.us-east-1.amazonaws.com/supporting_files/cpgpt_grimage3_dependencies/reliable/cpgpt_grimage3_weights_all_datasets_reliable.csv",
"https://pyaging.s3.us-east-1.amazonaws.com/supporting_files/cpgpt_grimage3_dependencies/reliable/input_scaler_mean_all_datasets_reliable.npy",
"https://pyaging.s3.us-east-1.amazonaws.com/supporting_files/cpgpt_grimage3_dependencies/reliable/input_scaler_scale_all_datasets_reliable.npy"
]
dir = "."
for url in urls:
pya.utils.download(url, dir, logger, indent_level=1)
|-----------> Data found in ./cpgpt_grimage3_weights_all_datasets_reliable.csv
|-----------> Data found in ./input_scaler_mean_all_datasets_reliable.npy
|-----------> Data found in ./input_scaler_scale_all_datasets_reliable.npy
Load features#
From CSV#
[6]:
df = pd.read_csv('cpgpt_grimage3_weights_all_datasets_reliable.csv')
model.features = df['feature'].tolist()
[7]:
df.head()
[7]:
| feature | coefficient | |
|---|---|---|
| 0 | age | 0.845167 |
| 1 | grimage2timp1 | 0.318954 |
| 2 | grimage2packyrs | 0.385882 |
| 3 | grimage2logcrp | 0.404675 |
| 4 | grimage2adm | 0.180551 |
Load weights into base model#
Linear model#
[8]:
weights = torch.tensor(df['coefficient'].tolist()).unsqueeze(0)
intercept = torch.tensor([0.0])
Linear model#
[9]:
base_model = pya.models.LinearModel(input_dim=len(model.features))
base_model.linear.weight.data = weights.float()
base_model.linear.bias.data = intercept.float()
model.base_model = base_model
Load reference values#
[11]:
scale_mean = np.load('input_scaler_mean_all_datasets_reliable.npy')
scale_std = np.load('input_scaler_scale_all_datasets_reliable.npy')
model.reference_values = None
Load preprocess and postprocess objects#
[12]:
model.preprocess_name = 'scale'
model.preprocess_dependencies = [scale_mean, scale_std]
[13]:
model.postprocess_name = 'cox_to_years'
model.postprocess_dependencies = [
0.54372919,
1.52036698,
64.94560376271838,
11.920838151170104
]
Check all clock parameters#
[14]:
pya.utils.print_model_details(model)
%==================================== Model Details ====================================%
Model Attributes:
training: True
metadata: {'approved_by_author': '✅',
'citation': 'de Lima Camillo, Lucas Paulo, et al. "CpGPT: a foundation model '
'for DNA methylation." bioRxiv (2024): 2024-10.',
'clock_name': 'cpgptgrimage3',
'data_type': 'methylation',
'doi': 'https://doi.org/10.1101/2024.10.24.619766',
'notes': None,
'research_only': True,
'species': 'Homo sapiens',
'version': None,
'year': 2025}
reference_values: None
preprocess_name: 'scale'
preprocess_dependencies: [array([ 6.50000000e+01, 3.49212152e+04, 1.21734902e+01, 2.73993813e-01,
3.51222301e+02, 8.51761217e+03, 8.85501049e+02, -5.21484375e-01,
-2.49755859e-01, -2.58056641e-01, -7.65991211e-02, -1.37939453e-01,
2.53173828e-01, 1.33399963e-02, -4.01245117e-01, 1.90368652e-01,
-3.27301025e-02, 1.29127502e-02, -2.78564453e-01, 1.92277772e+04,
2.87399292e-02, -3.61083984e-01, -1.25961304e-02, -2.21801758e-01]),
array([1.52000000e+01, 2.39220372e+03, 1.43614564e+01, 7.58010775e-01,
3.24518463e+01, 5.54305012e+03, 2.81677880e+02, 1.54296875e-01,
1.58935547e-01, 3.54949951e-01, 1.87866211e-01, 4.43069458e-01,
2.97714233e-01, 3.31024170e-01, 3.81805420e-01, 1.43981934e-01,
1.65519714e-01, 2.10388184e-01, 1.30737305e-01, 3.97488350e+03,
3.68286133e-01, 7.37304688e-02, 3.28125000e-01, 4.43408966e-01])]
postprocess_name: 'cox_to_years'
postprocess_dependencies: [0.54372919, 1.52036698, 64.94560376271838, 11.920838151170104]
features: ['age',
'grimage2timp1',
'grimage2packyrs',
'grimage2logcrp',
'grimage2adm',
'grimage2leptin',
'grimage2gdf15',
'cpgpt_s100a9',
'cpgpt_tnfrsf13c',
'cpgpt_tgfb1',
'cpgpt_tek',
'cpgpt_ccl14',
'cpgpt_tnfsf15',
'cpgpt_lilrb2',
'cpgpt_tnf',
'cpgpt_chit1',
'cpgpt_postn',
'cpgpt_il34',
'cpgpt_pdcd1',
'grimage2pai1',
'cpgpt_cst3',
'cpgpt_cxcl2',
'cpgpt_gzma',
'cpgpt_il5']
base_model_features: None
%==================================== Model Details ====================================%
Model Structure:
base_model: LinearModel(
(linear): Linear(in_features=24, out_features=1, bias=True)
)
%==================================== Model Details ====================================%
Model Parameters and Weights:
base_model.linear.weight: tensor([[ 0.8452, 0.3190, 0.3859, 0.4047, 0.1806, -0.2435, 0.0367, -0.0855,
2.0569, -3.9567, 1.8897, -2.3948, -3.8697, 4.5260, 0.0498, 1.4570,
-1.7014, -1.5117, -1.5438, 0.1255, 5.3232, -0.4491, 0.6656, 0.7276]])
base_model.linear.bias: tensor([0.])
%==================================== Model Details ====================================%
Basic test#
[15]:
torch.manual_seed(42)
input = torch.randn(10, len(model.features), dtype=float).double()
model.eval()
model.to(float)
pred = model(input)
pred
[15]:
tensor([[-425.9024],
[-312.5518],
[-563.8393],
[-260.0897],
[ -69.1534],
[ 30.5343],
[-252.0608],
[-445.2048],
[ -64.2164],
[ 103.0451]], dtype=torch.float64, grad_fn=<AddBackward0>)
Save torch model#
[16]:
torch.save(model, f"../weights/{model.metadata['clock_name']}.pt")
Clear directory#
[17]:
# Function to remove a folder and all its contents
def remove_folder(path):
try:
shutil.rmtree(path)
print(f"Deleted folder: {path}")
except Exception as e:
print(f"Error deleting folder {path}: {e}")
# Get a list of all files and folders in the current directory
all_items = os.listdir('.')
# Loop through the items
for item in all_items:
# Check if it's a file and does not end with .ipynb
if os.path.isfile(item) and not item.endswith('.ipynb'):
os.remove(item)
print(f"Deleted file: {item}")
# Check if it's a folder
elif os.path.isdir(item):
remove_folder(item)
Deleted file: cpgpt_grimage3_weights_all_datasets_reliable.csv
Deleted file: input_scaler_mean_all_datasets_reliable.npy
Deleted file: input_scaler_scale_all_datasets_reliable.npy