DNAmFitAgeGripF#
Index#
Let’s first import some packages:
[18]:
import os
import inspect
import shutil
import json
import torch
import pandas as pd
import numpy as np
import pyaging as pya
Instantiate model class#
[19]:
def print_entire_class(cls):
source = inspect.getsource(cls)
print(source)
print_entire_class(pya.models.DNAmFitAgeGripF)
class DNAmFitAgeGripF(pyagingModel):
def __init__(self):
super().__init__()
def preprocess(self, x):
return x
def postprocess(self, x):
return x
[20]:
model = pya.models.DNAmFitAgeGripF()
Define clock metadata#
[21]:
model.metadata["clock_name"] = 'dnamfitagegripf'
model.metadata["data_type"] = 'methylation'
model.metadata["species"] = 'Homo sapiens'
model.metadata["year"] = 2023
model.metadata["approved_by_author"] = '⌛'
model.metadata["citation"] = "McGreevy, Kristen M., et al. \"DNAmFitAge: biological age indicator incorporating physical fitness.\" Aging (Albany NY) 15.10 (2023): 3904."
model.metadata["doi"] = 'https://doi.org/10.18632/aging.204538'
model.metadata["research_only"] = None
model.metadata["notes"] = None
Download clock dependencies#
Download GitHub repository#
[22]:
github_url = "https://github.com/kristenmcgreevy/DNAmFitAge.git"
github_folder_name = github_url.split('/')[-1].split('.')[0]
os.system(f"git clone {github_url}")
[22]:
0
Download from R package#
[23]:
%%writefile download.r
options(repos = c(CRAN = "https://cloud.r-project.org/"))
library(jsonlite)
DNAmFitnessModels <- readRDS("DNAmFitAge/DNAmFitnessModelsandFitAge_Oct2022.rds")
AllCpGs <- DNAmFitnessModels$AllCpGs
write_json(AllCpGs, "AllCpGs.json")
MaleMedians <- DNAmFitnessModels$Male_Medians_All
write.csv(MaleMedians, "MaleMedians.csv")
FemaleMedians <- DNAmFitnessModels$Female_Medians_All
write.csv(FemaleMedians, "FemaleMedians.csv")
Gait_noAge_Females <- DNAmFitnessModels$Gait_noAge_Females
Gait_noAge_Males <- DNAmFitnessModels$Gait_noAge_Males
Grip_noAge_Females <- DNAmFitnessModels$Grip_noAge_Females
Grip_noAge_Males <- DNAmFitnessModels$Grip_noAge_Males
VO2maxModel <- DNAmFitnessModels$VO2maxModel
write.csv(Gait_noAge_Females, "Gait_noAge_Females.csv")
write.csv(Gait_noAge_Males, "Gait_noAge_Males.csv")
write.csv(Grip_noAge_Females, "Grip_noAge_Females.csv")
write.csv(Grip_noAge_Males, "Grip_noAge_Males.csv")
write.csv(VO2maxModel, "VO2maxModel.csv")
Writing download.r
[24]:
os.system("Rscript download.r")
[24]:
0
Load features#
From JSON file#
[25]:
df = pd.read_csv('Grip_noAge_Females.csv', index_col=0)
df['feature'] = df['term']
df['coefficient'] = df['estimate']
model.features = df['feature'][1:].tolist()
df.head()
[25]:
| term | step | estimate | lambda | dev.ratio | feature | coefficient | |
|---|---|---|---|---|---|---|---|
| 1 | (Intercept) | 1 | 53.820647 | 0.3 | 0.395663 | (Intercept) | 53.820647 |
| 2 | cg00094518 | 1 | -2.345767 | 0.3 | 0.395663 | cg00094518 | -2.345767 |
| 3 | cg01854776 | 1 | -2.941467 | 0.3 | 0.395663 | cg01854776 | -2.941467 |
| 4 | cg02001410 | 1 | 2.813212 | 0.3 | 0.395663 | cg02001410 | 2.813212 |
| 5 | cg02036261 | 1 | -0.942776 | 0.3 | 0.395663 | cg02036261 | -0.942776 |
Load weights into base model#
[27]:
weights = torch.tensor(df['coefficient'][1:].tolist()).unsqueeze(0)
intercept = torch.tensor([df['coefficient'].iloc[0]])
Linear model#
[28]:
base_model = pya.models.LinearModel(input_dim=len(model.features))
base_model.linear.weight.data = weights.float()
base_model.linear.bias.data = intercept.float()
model.base_model = base_model
Load reference values#
From CSV file#
[29]:
reference_df = pd.read_csv('FemaleMedians.csv', index_col=0)
model.reference_values = list(reference_df.loc[1, model.features])
Load preprocess and postprocess objects#
[30]:
model.preprocess_name = None
model.preprocess_dependencies = None
[31]:
model.postprocess_name = None
model.postprocess_dependencies = None
Check all clock parameters#
[32]:
pya.utils.print_model_details(model)
%==================================== Model Details ====================================%
Model Attributes:
training: True
metadata: {'approved_by_author': '⌛',
'citation': 'McGreevy, Kristen M., et al. "DNAmFitAge: biological age '
'indicator incorporating physical fitness." Aging (Albany NY) '
'15.10 (2023): 3904.',
'clock_name': 'dnamfitagegripf',
'data_type': 'methylation',
'doi': 'https://doi.org/10.18632/aging.204538',
'notes': 'Reference values is mean between male and female training medians',
'research_only': None,
'species': 'Homo sapiens',
'version': None,
'year': 2023}
reference_values: [0.115253497078786, 0.968105961834816, 0.0760619502414265, 0.0812081375780079, 0.674801040131638, 0.935404314255487, 0.868111085215553, 0.102187464459387, 0.875784927493447, 0.0721027662094031, 0.0794203726419054, 0.823996698486295, 0.417058282950929, 0.121432191141366, 0.59993557322113, 0.26732361189343, 0.631057715233057, 0.857492364335729, 0.920994690876469, 0.582338569224998, 0.20134054797388, 0.165165742573594, 0.257681226127591, 0.519308399081033, 0.914445786977054, 0.257597673788159, 0.255236501437818, 0.902457177412228, 0.0336179175661541, 0.692395670285827]... [Total elements: 91]
preprocess_name: None
preprocess_dependencies: None
postprocess_name: None
postprocess_dependencies: None
features: ['cg00094518', 'cg01854776', 'cg02001410', 'cg02036261', 'cg02047661', 'cg02382878', 'cg02581163', 'cg02683759', 'cg03068431', 'cg03607117', 'cg04194073', 'cg04640528', 'cg04651240', 'cg05376617', 'cg05719140', 'cg05967403', 'cg06043048', 'cg06082596', 'cg06133771', 'cg06185146', 'cg06291867', 'cg06434732', 'cg06447424', 'cg06557644', 'cg06559864', 'cg06821999', 'cg06966811', 'cg07404352', 'cg08047907', 'cg08619515']... [Total elements: 91]
base_model_features: None
%==================================== Model Details ====================================%
Model Structure:
base_model: LinearModel(
(linear): Linear(in_features=91, out_features=1, bias=True)
)
%==================================== Model Details ====================================%
Model Parameters and Weights:
base_model.linear.weight: [-2.3457672595977783, -2.94146728515625, 2.8132119178771973, -0.9427763223648071, 3.2280826568603516, -0.5796002745628357, -5.075088977813721, -1.3516250848770142, 3.539742946624756, -6.468724727630615, -3.5424692630767822, -6.332897663116455, 4.4002580642700195, 10.170988082885742, -0.5222252011299133, -2.993544101715088, -0.7089398503303528, -3.3968186378479004, 0.9145923852920532, 1.0081183910369873, -2.5558736324310303, -1.6970638036727905, 2.0081098079681396, 0.2233070731163025, -3.5272421836853027, -4.740792274475098, -2.4629898071289062, 0.7111413478851318, -11.599475860595703, 3.976231575012207]... [Tensor of shape torch.Size([1, 91])]
base_model.linear.bias: tensor([53.8206])
%==================================== Model Details ====================================%
Basic test#
[33]:
torch.manual_seed(42)
input = torch.randn(10, len(model.features), dtype=float)
model.eval()
model.to(float)
pred = model(input)
pred
[33]:
tensor([[ 43.5148],
[ 46.6872],
[114.4924],
[-12.2853],
[ 45.3849],
[ 9.3484],
[ 25.7228],
[ 44.3546],
[ 36.3137],
[ -6.2013]], dtype=torch.float64, grad_fn=<AddmmBackward0>)
Save torch model#
[34]:
torch.save(model, f"../weights/{model.metadata['clock_name']}.pt")
Clear directory#
[35]:
# Function to remove a folder and all its contents
def remove_folder(path):
try:
shutil.rmtree(path)
print(f"Deleted folder: {path}")
except Exception as e:
print(f"Error deleting folder {path}: {e}")
# Get a list of all files and folders in the current directory
all_items = os.listdir('.')
# Loop through the items
for item in all_items:
# Check if it's a file and does not end with .ipynb
if os.path.isfile(item) and not item.endswith('.ipynb'):
os.remove(item)
print(f"Deleted file: {item}")
# Check if it's a folder
elif os.path.isdir(item):
remove_folder(item)
Deleted file: Grip_noAge_Females.csv
Deleted file: Grip_noAge_Males.csv
Deleted file: Gait_noAge_Females.csv
Deleted file: VO2maxModel.csv
Deleted file: AllCpGs.json
Deleted file: Gait_noAge_Males.csv
Deleted folder: DNAmFitAge
Deleted file: download.r
Deleted file: FemaleMedians.csv
Deleted file: MaleMedians.csv