GrimAge#
Index#
Let’s first import some packages:
[1]:
import os
import inspect
import shutil
import json
import torch
import pandas as pd
import pyaging as pya
import numpy as np
Instantiate model class#
[2]:
def print_entire_class(cls):
source = inspect.getsource(cls)
print(source)
print_entire_class(pya.models.GrimAge)
class GrimAge(pyagingModel):
def __init__(self):
super().__init__()
self.PACKYRS = None
self.ADM = None
self.B2M = None
self.CystatinC = None
self.GDF15 = None
self.Leptin = None
self.PAI1 = None
self.TIMP1 = None
self.features_PACKYRS = None
self.features_ADM = None
self.features_B2M = None
self.features_CystatinC = None
self.features_GDF15 = None
self.features_Leptin = None
self.features_PAI1 = None
self.features_TIMP1 = None
def forward(self, x):
Female = x[:, -2].unsqueeze(1)
Age = x[:, -1].unsqueeze(1)
PACKYRS = self.PACKYRS(x[:, self.features_PACKYRS])
ADM = self.ADM(x[:, self.features_ADM])
B2M = self.B2M(x[:, self.features_B2M])
CystatinC = self.CystatinC(x[:, self.features_CystatinC])
GDF15 = self.GDF15(x[:, self.features_GDF15])
Leptin = self.Leptin(x[:, self.features_Leptin])
PAI1 = self.PAI1(x[:, self.features_PAI1])
TIMP1 = self.TIMP1(x[:, self.features_TIMP1])
x = torch.concat(
[GDF15, B2M, CystatinC, TIMP1, ADM, PAI1, Leptin, PACKYRS, Age, Female],
dim=1,
)
x = self.base_model(x)
x = self.postprocess(x)
return x
def preprocess(self, x):
return x
def postprocess(self, x):
"""
Converts from a Cox parameter to age in units of years.
"""
cox_mean = 13.20127
cox_std = 1.086805
age_mean = 59.63951
age_std = 9.049608
# Normalize
x = (x - cox_mean) / cox_std
# Scale
x = (x * age_std) + age_mean
return x
[3]:
model = pya.models.GrimAge()
Define clock metadata#
[4]:
model.metadata["clock_name"] = 'grimage'
model.metadata["data_type"] = 'methylation'
model.metadata["species"] = 'Homo sapiens'
model.metadata["year"] = 2019
model.metadata["approved_by_author"] = '⌛'
model.metadata["citation"] = "Lu, Ake T., et al. \"DNA methylation GrimAge strongly predicts lifespan and healthspan.\" Aging (albany NY) 11.2 (2019): 303."
model.metadata["doi"] = "https://doi.org/10.18632/aging.101684"
model.metadata["research_only"] = True
model.metadata["notes"] = None
Download clock dependencies#
[5]:
#download PCClock Rdata file from https://yale.app.box.com/s/kq0b0a7lxckxjvaz7x5n4keaug7tewry
logger = pya.logger.Logger()
urls = [
"https://pyaging.s3.amazonaws.com/supporting_files/ElasticNet_DNAmProtein_Vars_model4.csv",
"https://pyaging.s3.amazonaws.com/supporting_files/datMiniAnnotation3_Gold.csv",
]
dir = "."
for url in urls:
pya.utils.download(url, dir, logger, indent_level=1)
|-----------> Downloading data to ./ElasticNet_DNAmProtein_Vars_model4.csv
|-----------> in progress: 100.0000%
|-----------> Downloading data to ./datMiniAnnotation3_Gold.csv
|-----------> in progress: 100.0000%
Load features#
From CSV#
[6]:
df = pd.read_csv('ElasticNet_DNAmProtein_Vars_model4.csv')
model.features = np.unique(df['var']).tolist()[2:] + ['female'] + ['age']
Load weights into base model#
Linear model#
[7]:
all_features = np.unique(df['var']).tolist()[2:] + ['Female'] + ['Age']
model.PACKYRS = pya.models.LinearModel(input_dim=len(np.array(df.loc[df['Y.pred'] == 'DNAmPACKYRS'])))
model.PACKYRS.linear.weight.data = torch.tensor(np.array(df.loc[df['Y.pred'] == 'DNAmPACKYRS', 'beta'][1:])).unsqueeze(0).float()
model.PACKYRS.linear.bias.data = torch.tensor(np.array(df.loc[df['Y.pred'] == 'DNAmPACKYRS', 'beta'].iloc[0])).float()
model.features_PACKYRS = indices = torch.tensor([all_features.index(item) for item in np.array(df.loc[df['Y.pred'] == 'DNAmPACKYRS', 'var']) if item in all_features]).long()
model.ADM = pya.models.LinearModel(input_dim=len(np.array(df.loc[df['Y.pred'] == 'DNAmadm'])))
model.ADM.linear.weight.data = torch.tensor(np.array(df.loc[df['Y.pred'] == 'DNAmadm', 'beta'][1:])).unsqueeze(0).float()
model.ADM.linear.bias.data = torch.tensor(np.array(df.loc[df['Y.pred'] == 'DNAmadm', 'beta'].iloc[0])).float()
model.features_ADM = indices = torch.tensor([all_features.index(item) for item in np.array(df.loc[df['Y.pred'] == 'DNAmadm', 'var']) if item in all_features]).long()
model.B2M = pya.models.LinearModel(input_dim=len(np.array(df.loc[df['Y.pred'] == 'DNAmB2M'])))
model.B2M.linear.weight.data = torch.tensor(np.array(df.loc[df['Y.pred'] == 'DNAmB2M', 'beta'][1:])).unsqueeze(0).float()
model.B2M.linear.bias.data = torch.tensor(np.array(df.loc[df['Y.pred'] == 'DNAmB2M', 'beta'].iloc[0])).float()
model.features_B2M = indices = torch.tensor([all_features.index(item) for item in np.array(df.loc[df['Y.pred'] == 'DNAmB2M', 'var']) if item in all_features]).long()
model.CystatinC = pya.models.LinearModel(input_dim=len(np.array(df.loc[df['Y.pred'] == 'DNAmCystatin_C'])))
model.CystatinC.linear.weight.data = torch.tensor(np.array(df.loc[df['Y.pred'] == 'DNAmCystatin_C', 'beta'][1:])).unsqueeze(0).float()
model.CystatinC.linear.bias.data = torch.tensor(np.array(df.loc[df['Y.pred'] == 'DNAmCystatin_C', 'beta'].iloc[0])).float()
model.features_CystatinC = indices = torch.tensor([all_features.index(item) for item in np.array(df.loc[df['Y.pred'] == 'DNAmCystatin_C', 'var']) if item in all_features]).long()
model.GDF15 = pya.models.LinearModel(input_dim=len(np.array(df.loc[df['Y.pred'] == 'DNAmGDF_15'])))
model.GDF15.linear.weight.data = torch.tensor(np.array(df.loc[df['Y.pred'] == 'DNAmGDF_15', 'beta'][1:])).unsqueeze(0).float()
model.GDF15.linear.bias.data = torch.tensor(np.array(df.loc[df['Y.pred'] == 'DNAmGDF_15', 'beta'].iloc[0])).float()
model.features_GDF15 = indices = torch.tensor([all_features.index(item) for item in np.array(df.loc[df['Y.pred'] == 'DNAmGDF_15', 'var']) if item in all_features]).long()
model.Leptin = pya.models.LinearModel(input_dim=len(np.array(df.loc[df['Y.pred'] == 'DNAmleptin'])))
model.Leptin.linear.weight.data = torch.tensor(np.array(df.loc[df['Y.pred'] == 'DNAmleptin', 'beta'][1:])).unsqueeze(0).float()
model.Leptin.linear.bias.data = torch.tensor(np.array(df.loc[df['Y.pred'] == 'DNAmleptin', 'beta'].iloc[0])).float()
model.features_Leptin = indices = torch.tensor([all_features.index(item) for item in np.array(df.loc[df['Y.pred'] == 'DNAmleptin', 'var']) if item in all_features]).long()
model.PAI1 = pya.models.LinearModel(input_dim=len(np.array(df.loc[df['Y.pred'] == 'DNAmpai_1'])))
model.PAI1.linear.weight.data = torch.tensor(np.array(df.loc[df['Y.pred'] == 'DNAmpai_1', 'beta'][1:])).unsqueeze(0).float()
model.PAI1.linear.bias.data = torch.tensor(np.array(df.loc[df['Y.pred'] == 'DNAmpai_1', 'beta'].iloc[0])).float()
model.features_PAI1 = indices = torch.tensor([all_features.index(item) for item in np.array(df.loc[df['Y.pred'] == 'DNAmpai_1', 'var']) if item in all_features]).long()
model.TIMP1 = pya.models.LinearModel(input_dim=len(np.array(df.loc[df['Y.pred'] == 'DNAmTIMP_1'])))
model.TIMP1.linear.weight.data = torch.tensor(np.array(df.loc[df['Y.pred'] == 'DNAmTIMP_1', 'beta'][1:])).unsqueeze(0).float()
model.TIMP1.linear.bias.data = torch.tensor(np.array(df.loc[df['Y.pred'] == 'DNAmTIMP_1', 'beta'].iloc[0])).float()
model.features_TIMP1 = indices = torch.tensor([all_features.index(item) for item in np.array(df.loc[df['Y.pred'] == 'DNAmTIMP_1', 'var']) if item in all_features]).long()
Linear model#
[8]:
grimage_weights = [
0.000348777412272004,
4.59105969389204e-07,
3.49816671441537e-06,
0.000143661105491888,
0.00790270975255529,
2.55560382039825e-05,
-7.32066983502079e-06,
0.0303981613409142,
0.0300823182194075,
-0.228468475622039
]
[9]:
base_model = pya.models.LinearModel(input_dim=len(grimage_weights))
base_model.linear.weight.data = torch.tensor(grimage_weights).unsqueeze(0).float()
base_model.linear.bias.data = torch.tensor([0]).float()
model.base_model = base_model
Load reference values#
[10]:
reference_df = pd.read_csv('datMiniAnnotation3_Gold.csv', index_col=0)
model.reference_values = reference_df.loc[model.features[:-2]]['gold'].tolist() + [1, 65] # 65 yo F
Load preprocess and postprocess objects#
[11]:
model.preprocess_name = None
model.preprocess_dependencies = None
[12]:
model.postprocess_name = 'cox_to_years'
model.postprocess_dependencies = None
Check all clock parameters#
[13]:
pya.utils.print_model_details(model)
%==================================== Model Details ====================================%
Model Attributes:
training: True
metadata: {'approved_by_author': '⌛',
'citation': 'Lu, Ake T., et al. "DNA methylation GrimAge strongly predicts '
'lifespan and healthspan." Aging (albany NY) 11.2 (2019): 303.',
'clock_name': 'grimage',
'data_type': 'methylation',
'doi': 'https://doi.org/10.18632/aging.101684',
'notes': None,
'research_only': True,
'species': 'Homo sapiens',
'version': None,
'year': 2019}
reference_values: [0.422480272528644, 0.935109546405548, 0.0162959729801047, 0.502691053893618, 0.910839576323153, 0.710155040209873, 0.479121329208521, 0.905888314944049, 0.279992670790348, 0.117900358329507, 0.940987438881091, 0.761621096809391, 0.0721244934513398, 0.0851830172952001, 0.222068390557704, 0.103705423432714, 0.91516014793103, 0.748331163695382, 0.903928589429489, 0.524090323888757, 0.894685558616447, 0.647988638853782, 0.0581747999131966, 0.830024180811995, 0.209808614636345, 0.324296328128978, 0.118979846374564, 0.545425926051344, 0.92324324492159, 0.328288208993484]... [Total elements: 1032]
preprocess_name: None
preprocess_dependencies: None
postprocess_name: 'cox_to_years'
postprocess_dependencies: None
features: ['cg00036119', 'cg00102512', 'cg00126959', 'cg00161556', 'cg00252095', 'cg00277397', 'cg00332048', 'cg00356999', 'cg00398048', 'cg00412842', 'cg00417288', 'cg00417823', 'cg00456299', 'cg00480331', 'cg00481951', 'cg00497251', 'cg00500789', 'cg00534468', 'cg00543335', 'cg00554421', 'cg00558975', 'cg00564555', 'cg00574958', 'cg00684178', 'cg00684824', 'cg00695391', 'cg00695799', 'cg00706683', 'cg00744433', 'cg00844308']... [Total elements: 1032]
base_model_features: None
features_PACKYRS: [1031, 799, 782, 584, 894, 609, 225, 268, 16, 907, 388, 202, 941, 665, 497, 405, 700, 61, 110, 392, 1001, 598, 200, 252, 297, 1, 287, 680, 27, 298]... [Tensor of shape torch.Size([173])]
features_ADM: [1031, 581, 823, 168, 152, 248, 649, 437, 922, 910, 594, 803, 449, 275, 163, 770, 790, 364, 908, 811, 474, 359, 420, 438, 215, 585, 327, 978, 133, 801]... [Tensor of shape torch.Size([187])]
features_B2M: [1031, 581, 866, 424, 1025, 764, 157, 712, 803, 977, 449, 635, 879, 787, 716, 810, 87, 648, 519, 48, 456, 768, 540, 888, 363, 35, 804, 434, 1015, 450]... [Tensor of shape torch.Size([92])]
features_CystatinC: [1031, 25, 660, 36, 225, 311, 59, 449, 982, 451, 89, 306, 475, 420, 914, 574, 358, 644, 916, 456, 14, 218, 868, 880, 432, 647, 1028, 931, 652, 98]... [Tensor of shape torch.Size([88])]
features_GDF15: [1031, 846, 885, 728, 974, 452, 449, 708, 544, 511, 539, 829, 729, 276, 831, 90, 362, 23, 1023, 186, 648, 286, 951, 962, 626, 189, 804, 532, 480, 67]... [Tensor of shape torch.Size([138])]
features_Leptin: [486, 581, 919, 775, 625, 444, 661, 213, 391, 603, 790, 908, 272, 334, 58, 420, 530, 786, 224, 381, 608, 91, 15, 1013, 683, 309, 1021, 455, 722, 549]... [Tensor of shape torch.Size([187])]
features_PAI1: [429, 330, 714, 421, 33, 636, 582, 12, 226, 558, 953, 509, 629, 766, 607, 824, 594, 774, 670, 789, 56, 792, 958, 571, 122, 991, 965, 191, 926, 970]... [Tensor of shape torch.Size([211])]
features_TIMP1: [1031, 764, 947, 883, 456, 912, 338, 434, 258, 476, 940, 739, 795, 473, 930, 956, 943, 534, 299, 702, 166, 800, 487, 326, 376, 514, 898, 936, 980, 423]... [Tensor of shape torch.Size([43])]
%==================================== Model Details ====================================%
Model Structure:
PACKYRS: LinearModel(
(linear): Linear(in_features=174, out_features=1, bias=True)
)
ADM: LinearModel(
(linear): Linear(in_features=188, out_features=1, bias=True)
)
B2M: LinearModel(
(linear): Linear(in_features=93, out_features=1, bias=True)
)
CystatinC: LinearModel(
(linear): Linear(in_features=89, out_features=1, bias=True)
)
GDF15: LinearModel(
(linear): Linear(in_features=139, out_features=1, bias=True)
)
Leptin: LinearModel(
(linear): Linear(in_features=188, out_features=1, bias=True)
)
PAI1: LinearModel(
(linear): Linear(in_features=212, out_features=1, bias=True)
)
TIMP1: LinearModel(
(linear): Linear(in_features=44, out_features=1, bias=True)
)
base_model: LinearModel(
(linear): Linear(in_features=10, out_features=1, bias=True)
)
%==================================== Model Details ====================================%
Model Parameters and Weights:
PACKYRS.linear.weight: [0.14214389026165009, 14.697949409484863, 0.4599894881248474, 0.3822956085205078, 7.98643684387207, 1.6803100109100342, 1.0967497825622559, 16.303823471069336, 2.4014580249786377, 0.6859070062637329, 1.6773189306259155, 21.501564025878906, -2.096100330352783, 2.2927305698394775, 0.12879624962806702, 0.5189002752304077, 9.517245292663574, 1.3636956214904785, 1.7754020690917969, 2.1244921684265137, 3.7083091735839844, 3.0460753440856934, 1.3274203538894653, -0.6062915921211243, -1.1171971559524536, -13.956497192382812, 0.36579036712646484, -0.6485168933868408, 4.881432056427002, -24.69486427307129]... [Tensor of shape torch.Size([1, 173])]
PACKYRS.linear.bias: tensor(-31.9970)
ADM.linear.weight: [0.9436950087547302, 4.995108127593994, 5.08618688583374, 28.64090347290039, 6.462732315063477, -118.2184066772461, -2.752854585647583, -55.56800079345703, -0.6833848357200623, 0.8265380263328552, -8.586676597595215, 9.290790557861328, 281.4186706542969, 9.880138397216797, -1.110060691833496, -0.036802105605602264, 202.86256408691406, -114.29457092285156, 248.89732360839844, 5.330321311950684, 4.495867729187012, -4.7390031814575195, 133.71437072753906, -2.2405805587768555, -3.3119983673095703, 19.081783294677734, 2.63143253326416, -24.076101303100586, -8.62603759765625, -32.408607482910156]... [Tensor of shape torch.Size([1, 187])]
ADM.linear.bias: tensor(290.1693)
B2M.linear.weight: [10486.9150390625, 316962.65625, 33927.10546875, -160612.375, 75457.171875, 87985.7421875, 292882.90625, -23280.169921875, 43791.1796875, 302011.96875, 1916187.0, -58500.59375, -126869.8828125, 1506.065185546875, 1417.4544677734375, 44895.46875, 267379.5625, -924930.5625, 69711.0390625, 102607.9921875, 49483.265625, 65359.765625, -13569.71875, -13531.2998046875, -84787.703125, -129131.7265625, 412413.875, -67296.7265625, 28426.35546875, 89744.1875]... [Tensor of shape torch.Size([1, 92])]
B2M.linear.bias: tensor(1412953.3750)
CystatinC.linear.weight: [2589.667724609375, -15088.66015625, 36553.1171875, -14194.40234375, 177517.65625, 2264.057861328125, -3639.993896484375, 146477.484375, 11819.2109375, 438.5729064941406, -35147.8515625, -146515.59375, 3482.841796875, 46536.5078125, -19400.65625, 5430.861328125, -2332.132080078125, 28774.947265625, 37130.91796875, 4642.5302734375, 2118.04541015625, 8312.26953125, 1070.0323486328125, 53286.4609375, 6551.3515625, 1233.9503173828125, -2420.194580078125, 3439.35546875, 13203.8330078125, 35212.96875]... [Tensor of shape torch.Size([1, 88])]
CystatinC.linear.bias: tensor(1091528.5000)
GDF15.linear.weight: [9.351357460021973, 84.36457824707031, 143.69606018066406, 81.37864685058594, 29.040103912353516, -112.49447631835938, 11.24372673034668, -118.38355255126953, -1980.7607421875, 12.090482711791992, 381.68292236328125, 20.09428596496582, 79.332275390625, 93.16657257080078, -42.04895782470703, -56.21128463745117, 50.33696746826172, -4.690526962280273, 23.865774154663086, 110.17974090576172, -504.0067138671875, 454.8924255371094, 3.939922571182251, 10.757635116577148, -21.75938606262207, 191.6622314453125, 342.773193359375, -183.74392700195312, 297.9110107421875, 50.03202438354492]... [Tensor of shape torch.Size([1, 138])]
GDF15.linear.bias: tensor(1975.7983)
Leptin.linear.weight: [399.8157043457031, 3861.580810546875, 4281.87353515625, 1714.1119384765625, -25588.5390625, 25643.771484375, -3710.89697265625, 1028.21484375, 4559.81591796875, -9729.9609375, 28351.38671875, 60980.60546875, 894.3289184570312, 2256.300537109375, -997.83447265625, 16759.64453125, 637.7293090820312, -14579.912109375, -7351.828125, 37.74680709838867, 5151.525390625, -38035.12890625, -3955.989990234375, -2736.428466796875, 154.31683349609375, -5049.8408203125, 4.860612869262695, -59299.390625, -594.4842529296875, 3549.596435546875]... [Tensor of shape torch.Size([1, 187])]
Leptin.linear.bias: tensor(7210.0625)
PAI1.linear.weight: [62.57840347290039, 321.195556640625, -476.72576904296875, 6221.58544921875, 7843.1796875, -313.1407775878906, -3855.91650390625, 3294.65234375, 752.2315673828125, -47.931236267089844, 301.0967712402344, -321.5203552246094, 24.402000427246094, -568.0665893554688, -3272.8876953125, 1760.7930908203125, -6259.56103515625, -10119.8154296875, 2037.0191650390625, 2472.403564453125, -3049.89794921875, 4225.28271484375, 982.7288208007812, -152.87660217285156, -356.9750061035156, -4542.0302734375, 433.330810546875, -169.24246215820312, -2095.550537109375, 311.5205078125]... [Tensor of shape torch.Size([1, 211])]
PAI1.linear.bias: tensor(-1129.6017)
TIMP1.linear.weight: [127.23798370361328, 576.6142578125, -161.49070739746094, -186.5166778564453, 571.6375732421875, 174.81607055664062, 23.66378402709961, -228.55433654785156, 58.980308532714844, 469.25677490234375, 723.093994140625, 1335.6502685546875, 542.5457153320312, 2160.827880859375, 922.79736328125, 7743.75146484375, -1151.7979736328125, -43.27967834472656, 407.7511901855469, -5735.69287109375, -11.83304500579834, -665.969970703125, 340.971923828125, 207.72994995117188, -32.84348678588867, -1965.6759033203125, 253.16822814941406, 23.78565788269043, 3192.898681640625, 67.02117156982422]... [Tensor of shape torch.Size([1, 43])]
TIMP1.linear.bias: tensor(15844.5957)
base_model.linear.weight: tensor([[ 3.4878e-04, 4.5911e-07, 3.4982e-06, 1.4366e-04, 7.9027e-03,
2.5556e-05, -7.3207e-06, 3.0398e-02, 3.0082e-02, -2.2847e-01]])
base_model.linear.bias: tensor([0.])
%==================================== Model Details ====================================%
Basic test#
[14]:
torch.manual_seed(42)
input = torch.randn(10, len(model.features), dtype=float).double()
model.eval()
model.to(float)
pred = model(input)
pred
[14]:
tensor([[ -8.6124],
[ 66.5366],
[ 49.7812],
[-51.8447],
[ 89.6747],
[-65.7946],
[ 89.6295],
[-60.4899],
[ 69.0053],
[-58.3423]], dtype=torch.float64, grad_fn=<AddBackward0>)
Save torch model#
[15]:
torch.save(model, f"../weights/{model.metadata['clock_name']}.pt")
Clear directory#
[16]:
# Function to remove a folder and all its contents
def remove_folder(path):
try:
shutil.rmtree(path)
print(f"Deleted folder: {path}")
except Exception as e:
print(f"Error deleting folder {path}: {e}")
# Get a list of all files and folders in the current directory
all_items = os.listdir('.')
# Loop through the items
for item in all_items:
# Check if it's a file and does not end with .ipynb
if os.path.isfile(item) and not item.endswith('.ipynb'):
os.remove(item)
print(f"Deleted file: {item}")
# Check if it's a folder
elif os.path.isdir(item):
remove_folder(item)
Deleted file: coefficients.xlsx
Deleted file: datMiniAnnotation3_Gold.csv
Deleted file: ElasticNet_DNAmProtein_Vars_model4.csv