GrimAge2B2M#
Index#
Let’s first import some packages:
[17]:
import os
import inspect
import shutil
import json
import torch
import pandas as pd
import pyaging as pya
import numpy as np
Instantiate model class#
[18]:
def print_entire_class(cls):
source = inspect.getsource(cls)
print(source)
print_entire_class(pya.models.GrimAge2B2M)
class GrimAge2B2M(pyagingModel):
def __init__(self):
super().__init__()
def preprocess(self, x):
return x
def postprocess(self, x):
return x
[19]:
model = pya.models.GrimAge2B2M()
Define clock metadata#
[20]:
model.metadata["clock_name"] = 'grimage2b2m'
model.metadata["data_type"] = 'methylation'
model.metadata["species"] = 'Homo sapiens'
model.metadata["year"] = 2022
model.metadata["approved_by_author"] = '⌛'
model.metadata["citation"] = "Lu, Ake T., et al. \"DNA methylation GrimAge version 2.\" Aging (Albany NY) 14.23 (2022): 9484."
model.metadata["doi"] = "https://doi.org/10.18632/aging.204434"
model.metadata["research_only"] = True
model.metadata["notes"] = None
Download clock dependencies#
[21]:
logger = pya.logger.Logger()
urls = [
"https://pyaging.s3.amazonaws.com/supporting_files/grimage2_subcomponents.csv",
"https://pyaging.s3.amazonaws.com/supporting_files/grimage2.csv",
"https://pyaging.s3.amazonaws.com/supporting_files/datMiniAnnotation3_Gold.csv",
]
dir = "."
for url in urls:
pya.utils.download(url, dir, logger, indent_level=1)
|-----------> Downloading data to ./grimage2_subcomponents.csv
|-----------> in progress: 100.0000%
|-----------> Downloading data to ./grimage2.csv
|-----------> in progress: 100.0000%%
|-----------> Downloading data to ./datMiniAnnotation3_Gold.csv
|-----------> in progress: 100.0000%
Load features#
From CSV#
[22]:
df = pd.read_csv('grimage2_subcomponents.csv', index_col=0)
df['Y.pred'].unique()
[22]:
array(['DNAmGDF_15', 'DNAmB2M', 'DNAmCystatin_C', 'DNAmTIMP_1', 'DNAmadm',
'DNAmpai_1', 'DNAmleptin', 'DNAmPACKYRS', 'DNAmlog.CRP',
'DNAmlog.A1C'], dtype=object)
[23]:
df = df[df['Y.pred'] == 'DNAmB2M']
df['feature'] = df['var']
df['coefficient'] = df['beta']
model.features = ['age'] + df['feature'][2:].tolist()
[24]:
df.head()
[24]:
| Y.pred | var | beta | feature | coefficient | |
|---|---|---|---|---|---|
| 140 | DNAmB2M | Intercept | 1.412953e+06 | Intercept | 1.412953e+06 |
| 141 | DNAmB2M | Age | 1.048692e+04 | Age | 1.048692e+04 |
| 142 | DNAmB2M | cg13947317 | 3.169626e+05 | cg13947317 | 3.169626e+05 |
| 143 | DNAmB2M | cg22510139 | 3.392710e+04 | cg22510139 | 3.392710e+04 |
| 144 | DNAmB2M | cg10116490 | -1.606124e+05 | cg10116490 | -1.606124e+05 |
Load weights into base model#
Linear model#
[25]:
weights = torch.tensor(df['coefficient'][1:].tolist()).unsqueeze(0)
intercept = torch.tensor([df['coefficient'].iloc[0]])
Linear model#
[26]:
base_model = pya.models.LinearModel(input_dim=len(model.features))
base_model.linear.weight.data = weights.float()
base_model.linear.bias.data = intercept.float()
model.base_model = base_model
Load reference values#
[27]:
reference_df = pd.read_csv('datMiniAnnotation3_Gold.csv', index_col=0)
model.reference_values = [65] + reference_df.loc[model.features[1:]]['gold'].tolist()
Load preprocess and postprocess objects#
[28]:
model.preprocess_name = None
model.preprocess_dependencies = None
[29]:
model.postprocess_name = None
model.postprocess_dependencies = None
Check all clock parameters#
[30]:
pya.utils.print_model_details(model)
%==================================== Model Details ====================================%
Model Attributes:
training: True
metadata: {'approved_by_author': '⌛',
'citation': 'Lu, Ake T., et al. "DNA methylation GrimAge version 2." Aging '
'(Albany NY) 14.23 (2022): 9484.',
'clock_name': 'grimage2b2m',
'data_type': 'methylation',
'doi': 'https://doi.org/10.18632/aging.204434',
'notes': None,
'research_only': True,
'species': 'Homo sapiens',
'version': None,
'year': 2022}
reference_values: [65, 0.945983138574711, 0.481093763685811, 0.27748709868544, 0.951672124097172, 0.468094302501278, 0.563678993728847, 0.948817639234683, 0.90429743695085, 0.0370947881546007, 0.0264846427378418, 0.933625606274069, 0.806897977714167, 0.616012873042032, 0.92495320469147, 0.905370920309603, 0.927267360866437, 0.949349810160961, 0.747531904904077, 0.949456556741532, 0.53813155435689, 0.71825377235117, 0.653576107061525, 0.747645234935016, 0.818024480938858, 0.622775695948335, 0.949645872518435, 0.572473769572986, 0.0359141899481796, 0.953339533069151]... [Total elements: 92]
preprocess_name: None
preprocess_dependencies: None
postprocess_name: 'cox_to_years'
postprocess_dependencies: None
features: ['age', 'cg13947317', 'cg22510139', 'cg10116490', 'cg27396830', 'cg19414383', 'cg03345668', 'cg17719473', 'cg20569940', 'cg26156167', 'cg10578779', 'cg15631106', 'cg22704788', 'cg20240347', 'cg17803430', 'cg20800892', 'cg01909777', 'cg15849439', 'cg12605080', 'cg01163330', 'cg10753966', 'cg19501902', 'cg13287247', 'cg22930808', 'cg08122652', 'cg00959259', 'cg20576510', 'cg10202557', 'cg27134386', 'cg10584300']... [Total elements: 92]
base_model_features: None
%==================================== Model Details ====================================%
Model Structure:
base_model: LinearModel(
(linear): Linear(in_features=92, out_features=1, bias=True)
)
%==================================== Model Details ====================================%
Model Parameters and Weights:
base_model.linear.weight: [10486.9150390625, 316962.65625, 33927.10546875, -160612.375, 75457.171875, 87985.7421875, 292882.90625, -23280.169921875, 43791.1796875, 302011.96875, 1916187.0, -58500.59375, -126869.8828125, 1506.065185546875, 1417.4544677734375, 44895.46875, 267379.5625, -924930.5625, 69711.0390625, 102607.9921875, 49483.265625, 65359.765625, -13569.71875, -13531.2998046875, -84787.703125, -129131.7265625, 412413.875, -67296.7265625, 28426.35546875, 89744.1875]... [Tensor of shape torch.Size([1, 92])]
base_model.linear.bias: tensor([1412953.3750])
%==================================== Model Details ====================================%
Basic test#
[31]:
torch.manual_seed(42)
input = torch.randn(10, len(model.features), dtype=float).double()
model.eval()
model.to(float)
pred = model(input)
pred
[31]:
tensor([[ 4694786.7644],
[-1597495.9568],
[ 3826313.5179],
[ 5135406.7161],
[-1016183.9231],
[ 4471339.9569],
[-1899905.5050],
[ 7716734.0988],
[ 5404589.6838],
[ 2730438.6117]], dtype=torch.float64, grad_fn=<AddmmBackward0>)
Save torch model#
[32]:
torch.save(model, f"../weights/{model.metadata['clock_name']}.pt")
Clear directory#
[33]:
# Function to remove a folder and all its contents
def remove_folder(path):
try:
shutil.rmtree(path)
print(f"Deleted folder: {path}")
except Exception as e:
print(f"Error deleting folder {path}: {e}")
# Get a list of all files and folders in the current directory
all_items = os.listdir('.')
# Loop through the items
for item in all_items:
# Check if it's a file and does not end with .ipynb
if os.path.isfile(item) and not item.endswith('.ipynb'):
os.remove(item)
print(f"Deleted file: {item}")
# Check if it's a folder
elif os.path.isdir(item):
remove_folder(item)
Deleted file: grimage2_subcomponents.csv
Deleted file: datMiniAnnotation3_Gold.csv
Deleted file: grimage2.csv