CpGPTPCGrimAge3#
Index#
Let’s first import some packages:
[1]:
import os
import inspect
import shutil
import json
import torch
import pandas as pd
import pyaging as pya
import numpy as np
Instantiate model class#
[2]:
def print_entire_class(cls):
source = inspect.getsource(cls)
print(source)
print_entire_class(pya.models.CpGPTPCGrimAge3)
class CpGPTPCGrimAge3(pyagingModel):
def __init__(self):
super().__init__()
self.rotation = nn.Parameter(torch.empty((30, 29)), requires_grad=False)
def preprocess(self, x):
"""
Scales an array based on the mean and standard deviation.
"""
mean = torch.tensor(self.preprocess_dependencies[0], device=x.device, dtype=x.dtype)
std = torch.tensor(self.preprocess_dependencies[1], device=x.device, dtype=x.dtype)
x = (x - mean) / std
return x
def forward(self, x):
x = self.preprocess(x)
age = x[:, 0].unsqueeze(1)
proxies = x[:, 1:]
PCs = torch.mm(proxies, self.rotation) # Apply PCA rotation
x = torch.concat([age, PCs], dim=1)
# Scale
mean = torch.tensor(self.preprocess_dependencies[2], device=x.device, dtype=x.dtype)
std = torch.tensor(self.preprocess_dependencies[3], device=x.device, dtype=x.dtype)
x[:, 1:] = (x[:, 1:] - mean) / std
x = self.base_model(x)
x = self.postprocess(x)
return x
def postprocess(self, x):
"""
Converts from a Cox parameter to age in units of years.
"""
cox_mean = self.postprocess_dependencies[0]
cox_std = self.postprocess_dependencies[1]
age_mean = self.postprocess_dependencies[2]
age_std = self.postprocess_dependencies[3]
# Normalize
x = (x - cox_mean) / cox_std
# Scale
x = (x * age_std) + age_mean
return x
[3]:
model = pya.models.CpGPTPCGrimAge3()
Define clock metadata#
[4]:
model.metadata["clock_name"] = 'cpgptpcgrimage3'
model.metadata["data_type"] = 'methylation'
model.metadata["species"] = 'Homo sapiens'
model.metadata["year"] = 2025
model.metadata["approved_by_author"] = '✅'
model.metadata["citation"] = "de Lima Camillo, Lucas Paulo, et al. \"CpGPT: a foundation model for DNA methylation.\" bioRxiv (2024): 2024-10."
model.metadata["doi"] = "https://doi.org/10.1101/2024.10.24.619766"
model.metadata["research_only"] = True
model.metadata["notes"] = None
Download clock dependencies#
[5]:
logger = pya.logger.Logger()
urls = [
"https://pyaging.s3.amazonaws.com/supporting_files/cpgpt_grimage3_dependencies/cpgpt_grimage3_weights.csv",
"https://pyaging.s3.amazonaws.com/supporting_files/cpgpt_grimage3_dependencies/cpgpt_grimage3_weights_all_datasets.csv",
"https://pyaging.s3.amazonaws.com/supporting_files/cpgpt_grimage3_dependencies/cpgpt_pcgrimage3_weights.csv",
"https://pyaging.s3.amazonaws.com/supporting_files/cpgpt_grimage3_dependencies/input_scaler_mean.npy",
"https://pyaging.s3.amazonaws.com/supporting_files/cpgpt_grimage3_dependencies/input_scaler_scale.npy",
"https://pyaging.s3.amazonaws.com/supporting_files/cpgpt_grimage3_dependencies/input_scaler_mean_all_datasets.npy",
"https://pyaging.s3.amazonaws.com/supporting_files/cpgpt_grimage3_dependencies/input_scaler_scale_all_datasets.npy",
"https://pyaging.s3.amazonaws.com/supporting_files/cpgpt_grimage3_dependencies/pca_scaler_mean.npy",
"https://pyaging.s3.amazonaws.com/supporting_files/cpgpt_grimage3_dependencies/pca_scaler_scale.npy",
"https://pyaging.s3.amazonaws.com/supporting_files/cpgpt_grimage3_dependencies/cpgpt_pcgrimage3_pca_components.npy",
]
dir = "."
for url in urls:
pya.utils.download(url, dir, logger, indent_level=1)
|-----------> Downloading data to ./cpgpt_grimage3_weights.csv
|-----------> in progress: 100.0000%
|-----------> Downloading data to ./cpgpt_grimage3_weights_all_datasets.csv
|-----------> in progress: 100.0000%
|-----------> Downloading data to ./cpgpt_pcgrimage3_weights.csv
|-----------> in progress: 100.0000%
|-----------> Downloading data to ./input_scaler_mean.npy
|-----------> in progress: 100.0000%%
|-----------> Downloading data to ./input_scaler_scale.npy
|-----------> in progress: 100.0000%%
|-----------> Downloading data to ./input_scaler_mean_all_datasets.npy
|-----------> in progress: 100.0000%%
|-----------> Downloading data to ./input_scaler_scale_all_datasets.npy
|-----------> in progress: 100.0000%%
|-----------> Downloading data to ./pca_scaler_mean.npy
|-----------> in progress: 100.0000%%
|-----------> Downloading data to ./pca_scaler_scale.npy
|-----------> in progress: 100.0000%%
|-----------> Downloading data to ./cpgpt_pcgrimage3_pca_components.npy
|-----------> in progress: 100.0000%
Load features#
From CSV#
[6]:
df = pd.read_csv('cpgpt_pcgrimage3_weights.csv')
model.features = pd.read_csv('cpgpt_grimage3_weights.csv')['feature'].tolist()
[7]:
df.head()
[7]:
| feature | coefficient | |
|---|---|---|
| 0 | age | 0.672689 |
| 1 | pca_component_1 | 0.759243 |
| 2 | pca_component_2 | -0.686806 |
| 3 | pca_component_3 | 0.233157 |
| 4 | pca_component_4 | -0.138548 |
Load weights into base model#
[8]:
pc_matrix = np.load('cpgpt_pcgrimage3_pca_components.npy').T
PC Components#
[9]:
model.rotation.data = torch.tensor(pc_matrix).float()
Linear model#
[10]:
weights = torch.tensor(df['coefficient'].tolist()).unsqueeze(0)
intercept = torch.tensor([0.0])
Linear model#
[11]:
base_model = pya.models.LinearModel(input_dim=len(model.features))
base_model.linear.weight.data = weights.float()
base_model.linear.bias.data = intercept.float()
model.base_model = base_model
Load reference values#
[12]:
scale_mean = np.load('input_scaler_mean.npy')
scale_std = np.load('input_scaler_scale.npy')
pca_scale_mean = np.load('pca_scaler_mean.npy')
pca_scale_std = np.load('pca_scaler_scale.npy')
model.reference_values = None
Load preprocess and postprocess objects#
[13]:
model.preprocess_name = 'scale'
model.preprocess_dependencies = [scale_mean, scale_std, pca_scale_mean, pca_scale_std]
[14]:
model.postprocess_name = 'cox_to_years'
model.postprocess_dependencies = [
4.66184408e-17,
1.70884158624939,
58.8234007654456,
13.091231557630831
]
Check all clock parameters#
[15]:
pya.utils.print_model_details(model)
%==================================== Model Details ====================================%
Model Attributes:
training: True
metadata: {'approved_by_author': '✅',
'citation': 'de Lima Camillo, Lucas Paulo, et al. "CpGPT: a foundation model '
'for DNA methylation." bioRxiv (2024): 2024-10.',
'clock_name': 'cpgptpcgrimage3',
'data_type': 'methylation',
'doi': 'https://doi.org/10.1101/2024.10.24.619766',
'notes': None,
'research_only': True,
'species': 'Homo sapiens',
'version': None,
'year': 2025}
reference_values: None
preprocess_name: 'scale'
preprocess_dependencies: [array([ 5.88234008e+01, 3.37892988e+04, 2.13030677e+01, 6.29543290e-02,
1.71182803e+06, 3.48032879e+02, 8.07863072e+03, 8.40183045e+02,
-4.62783972e-01, -2.81553566e-01, 2.08716775e-02, -2.03755490e-01,
-3.31462601e-01, -1.99447326e-01, -2.86641682e-01, 9.69636186e-02,
9.23084436e-02, -1.77190600e-01, -5.19144939e-01, 2.77395512e-01,
-5.05034895e-02, -1.69724841e-01, -2.09474468e-01, -1.81162437e-01,
2.91470602e-01, -2.46817951e-01, -1.38063048e-01, -2.31378318e-01,
-1.60070027e-01, -4.51811847e-01, -2.54749226e-01]),
array([1.30894420e+01, 1.96256186e+03, 1.18901350e+01, 4.18591628e-01,
1.73166548e+05, 2.79560288e+01, 3.95321057e+03, 1.97280471e+02,
7.76691367e-02, 1.88703875e-01, 8.32203637e-02, 9.29975592e-02,
9.75873526e-02, 2.24521829e-01, 2.24054312e-01, 2.35572764e-01,
1.57862263e-01, 9.37500577e-02, 1.60579409e-01, 6.03601420e-02,
1.31061777e-01, 2.68979974e-01, 2.70291051e-01, 3.07832239e-01,
1.89457709e-01, 1.12294499e-01, 1.04148286e-01, 1.73197185e-01,
2.55819219e-01, 2.33352986e-01, 2.00723138e-01]),
array([ 3.84844941e-17, 1.28382816e-17, -1.76943691e-17, -2.32485193e-17,
-2.96524848e-17, -1.89690921e-18, -3.21715803e-18, -1.40376024e-17,
1.26106525e-17, -1.44544482e-18, -2.46598198e-19, -1.61237283e-18,
-7.10961573e-18, 4.55637593e-18, 3.66672551e-18, -1.15948576e-18,
7.37660570e-18, 5.32557262e-19, -7.01856409e-20, 1.61711510e-19,
-3.30726121e-18, -5.22432509e-18, -5.37341030e-18, 4.27515914e-18,
-9.05537036e-19, 6.74588339e-19, -4.72330394e-19, 1.02907325e-18,
-7.48804912e-19]),
array([4.21748493, 2.11495255, 1.91135404, 1.29849805, 0.89811783,
0.68972216, 0.68116282, 0.44284398, 0.40801214, 0.35556359,
0.30007137, 0.17650593, 0.1483157 , 0.08898671, 0.06836764,
0.06232975, 0.0343495 , 0.03288555, 0.02752429, 0.02667721,
0.02145187, 0.01956034, 0.01423029, 0.01370304, 0.01111563,
0.01072517, 0.00881539, 0.0079158 , 0.00748125])]
postprocess_name: 'cox_to_years'
postprocess_dependencies: [4.66184408e-17, 1.70884158624939, 58.8234007654456, 13.091231557630831]
features: ['age', 'grimage2timp1', 'grimage2packyrs', 'grimage2logcrp', 'grimage2b2m', 'grimage2adm', 'grimage2leptin', 'grimage2gdf15', 'cpgpt_s100a9', 'cpgpt_il17ra', 'cpgpt_nampt', 'cpgpt_tnfrsf13c', 'cpgpt_faslg', 'cpgpt_tgfb1', 'cpgpt_ccl19', 'cpgpt_cst3', 'cpgpt_il6r', 'cpgpt_snap25', 'cpgpt_sdc1', 'cpgpt_cd200', 'cpgpt_tek', 'cpgpt_ccl14', 'cpgpt_il5', 'cpgpt_timp1', 'cpgpt_tnfsf15', 'cpgpt_ctf1', 'cpgpt_il20', 'cpgpt_pdgfa', 'cpgpt_calb2', 'cpgpt_il1rn']... [Total elements: 31]
base_model_features: None
%==================================== Model Details ====================================%
Model Structure:
base_model: LinearModel(
(linear): Linear(in_features=31, out_features=1, bias=True)
)
%==================================== Model Details ====================================%
Model Parameters and Weights:
rotation: [0.14989051222801208, -0.2575840353965759, 0.2105623334646225, -0.11764489859342575, -0.07999813556671143, 0.2998059093952179, 0.03677944839000702, 0.06442702561616898, -0.21868155896663666, -0.0400826632976532, 0.8137347102165222, 0.1822977364063263, 0.06282850354909897, 0.007936620153486729, 0.02314574271440506, -0.030949102714657784, 0.019456684589385986, -0.012623480521142483, 0.0012334324419498444, -0.017306583002209663, 0.002008328679949045, -0.0023207056801766157, 0.0028594015166163445, -0.004331233445554972, -0.001271342160180211, 0.0009904390899464488, -0.0020116211380809546, -0.0018152670236304402, -7.565721170976758e-05, 0.08682457357645035]... [Tensor of shape torch.Size([30, 29])]
base_model.linear.weight: tensor([[ 0.6727, 0.7592, -0.6868, 0.2332, -0.1385, 0.3705, 0.0353, 0.0611,
-0.0175, -0.0420, -0.0281, 0.0249, 0.0433, 0.0515, 0.0116, -0.0187,
0.0360, -0.0367, -0.0120, -0.0274, 0.0219, -0.0447, 0.0598, -0.0433,
-0.0298, -0.0062, -0.0176, -0.0209, 0.0035, 0.0329]])
base_model.linear.bias: tensor([0.])
%==================================== Model Details ====================================%
Basic test#
[16]:
torch.manual_seed(42)
input = torch.randn(10, len(model.features), dtype=float).double()
model.eval()
model.to(float)
pred = model(input)
pred
[16]:
tensor([[-365.0296],
[-418.9304],
[-114.4988],
[ -79.3617],
[ 576.5590],
[ 419.1323],
[-391.7414],
[ 237.9895],
[-340.7663],
[-172.7751]], dtype=torch.float64, grad_fn=<AddBackward0>)
Save torch model#
[17]:
torch.save(model, f"../weights/{model.metadata['clock_name']}.pt")
Clear directory#
[18]:
# Function to remove a folder and all its contents
def remove_folder(path):
try:
shutil.rmtree(path)
print(f"Deleted folder: {path}")
except Exception as e:
print(f"Error deleting folder {path}: {e}")
# Get a list of all files and folders in the current directory
all_items = os.listdir('.')
# Loop through the items
for item in all_items:
# Check if it's a file and does not end with .ipynb
if os.path.isfile(item) and not item.endswith('.ipynb'):
os.remove(item)
print(f"Deleted file: {item}")
# Check if it's a folder
elif os.path.isdir(item):
remove_folder(item)
Deleted file: input_scaler_mean_all_datasets.npy
Deleted file: cpgpt_pcgrimage3_pca_components.npy
Deleted file: cpgpt_pcgrimage3_weights.csv
Deleted file: input_scaler_scale_all_datasets.npy
Deleted file: input_scaler_scale.npy
Deleted file: pca_scaler_mean.npy
Deleted file: cpgpt_grimage3_weights_all_datasets.csv
Deleted file: pca_scaler_scale.npy
Deleted file: cpgpt_grimage3_weights.csv
Deleted file: input_scaler_mean.npy