DNAmFitAgeVO2Max#
Index#
Let’s first import some packages:
[1]:
import os
import inspect
import shutil
import json
import torch
import pandas as pd
import numpy as np
import pyaging as pya
Instantiate model class#
[2]:
def print_entire_class(cls):
source = inspect.getsource(cls)
print(source)
print_entire_class(pya.models.DNAmFitAgeVO2Max)
class DNAmFitAgeVO2Max(pyagingModel):
def __init__(self):
super().__init__()
def preprocess(self, x):
return x
def postprocess(self, x):
return x
[3]:
model = pya.models.DNAmFitAgeVO2Max()
Define clock metadata#
[4]:
model.metadata["clock_name"] = 'dnamfitagevo2max'
model.metadata["data_type"] = 'methylation'
model.metadata["species"] = 'Homo sapiens'
model.metadata["year"] = 2023
model.metadata["approved_by_author"] = '⌛'
model.metadata["citation"] = "McGreevy, Kristen M., et al. \"DNAmFitAge: biological age indicator incorporating physical fitness.\" Aging (Albany NY) 15.10 (2023): 3904."
model.metadata["doi"] = 'https://doi.org/10.18632/aging.204538'
model.metadata["research_only"] = None
model.metadata["notes"] = 'Reference values is mean between male and female training medians'
Download clock dependencies#
Download GitHub repository#
[5]:
github_url = "https://github.com/kristenmcgreevy/DNAmFitAge.git"
github_folder_name = github_url.split('/')[-1].split('.')[0]
os.system(f"git clone {github_url}")
[5]:
32768
Download from R package#
[6]:
%%writefile download.r
options(repos = c(CRAN = "https://cloud.r-project.org/"))
library(jsonlite)
DNAmFitnessModels <- readRDS("DNAmFitAge/DNAmFitnessModelsandFitAge_Oct2022.rds")
AllCpGs <- DNAmFitnessModels$AllCpGs
write_json(AllCpGs, "AllCpGs.json")
MaleMedians <- DNAmFitnessModels$Male_Medians_All
write.csv(MaleMedians, "MaleMedians.csv")
FemaleMedians <- DNAmFitnessModels$Female_Medians_All
write.csv(FemaleMedians, "FemaleMedians.csv")
Gait_noAge_Females <- DNAmFitnessModels$Gait_noAge_Females
Gait_noAge_Males <- DNAmFitnessModels$Gait_noAge_Males
Grip_noAge_Females <- DNAmFitnessModels$Grip_noAge_Females
Grip_noAge_Males <- DNAmFitnessModels$Grip_noAge_Males
VO2maxModel <- DNAmFitnessModels$VO2maxModel
write.csv(Gait_noAge_Females, "Gait_noAge_Females.csv")
write.csv(Gait_noAge_Males, "Gait_noAge_Males.csv")
write.csv(Grip_noAge_Females, "Grip_noAge_Females.csv")
write.csv(Grip_noAge_Males, "Grip_noAge_Males.csv")
write.csv(VO2maxModel, "VO2maxModel.csv")
Overwriting download.r
[7]:
os.system("Rscript download.r")
[7]:
0
Load features#
From JSON file#
[8]:
df = pd.read_csv('VO2maxModel.csv', index_col=0)
df['feature'] = df['term']
df['coefficient'] = df['estimate']
model.features = df['feature'][1:-1].tolist() + ['age']
df.head()
[8]:
| term | step | estimate | lambda | dev.ratio | feature | coefficient | |
|---|---|---|---|---|---|---|---|
| 1 | (Intercept) | 1 | 69.652317 | 1.8 | 0.368651 | (Intercept) | 69.652317 |
| 2 | cg08721112 | 1 | 5.249130 | 1.8 | 0.368651 | cg08721112 | 5.249130 |
| 3 | cg06540747 | 1 | 3.090176 | 1.8 | 0.368651 | cg06540747 | 3.090176 |
| 4 | cg22359606 | 1 | -7.551167 | 1.8 | 0.368651 | cg22359606 | -7.551167 |
| 5 | cg03041029 | 1 | -5.796545 | 1.8 | 0.368651 | cg03041029 | -5.796545 |
Load weights into base model#
[9]:
weights = torch.tensor(df['coefficient'][1:].tolist()).unsqueeze(0)
intercept = torch.tensor([df['coefficient'].iloc[0]])
Linear model#
[10]:
base_model = pya.models.LinearModel(input_dim=len(model.features))
base_model.linear.weight.data = weights.float()
base_model.linear.bias.data = intercept.float()
model.base_model = base_model
Load reference values#
From CSV file#
[11]:
reference_df_f = pd.read_csv('FemaleMedians.csv', index_col=0)
reference_f = reference_df_f.loc[1, model.features[:-1]]
reference_df_m = pd.read_csv('MaleMedians.csv', index_col=0)
reference_m = reference_df_m.loc[1, model.features[:-1]]
reference = (reference_f + reference_m)/2
model.reference_values = list(reference) + [65]
Load preprocess and postprocess objects#
[12]:
model.preprocess_name = None
model.preprocess_dependencies = None
[13]:
model.postprocess_name = None
model.postprocess_dependencies = None
Check all clock parameters#
[14]:
pya.utils.print_model_details(model)
%==================================== Model Details ====================================%
Model Attributes:
training: True
metadata: {'approved_by_author': '⌛',
'citation': 'McGreevy, Kristen M., et al. "DNAmFitAge: biological age '
'indicator incorporating physical fitness." Aging (Albany NY) '
'15.10 (2023): 3904.',
'clock_name': 'dnamfitagevo2max',
'data_type': 'methylation',
'doi': 'https://doi.org/10.18632/aging.204538',
'notes': 'Reference values is mean between male and female training medians',
'research_only': None,
'species': 'Homo sapiens',
'version': None,
'year': 2023}
reference_values: [0.588137182005736, 0.091356441802012, 0.0707204363181266, 0.6412617944885715, 0.535138543094586, 0.5437546990565185, 0.930382573680189, 0.0242279670663961, 0.09365587082155075, 0.6242147821751816, 0.35000302571538, 0.869293193371122, 0.1280519183158855, 0.0294186147410821, 0.09983154938858019, 0.9755683687837085, 0.600750498775525, 0.14779655808084802, 0.1563590903974515, 0.537387884063212, 0.923580030601587, 0.9329177575485086, 0.07460485276649155, 0.6081440126190596, 0.7695271383829341, 0.9139682130144675, 0.1163057238787645, 0.0245550999871068, 0.247020660722574, 0.8178191327449524]... [Total elements: 41]
preprocess_name: None
preprocess_dependencies: None
postprocess_name: None
postprocess_dependencies: None
features: ['cg08721112', 'cg06540747', 'cg22359606', 'cg03041029', 'cg25693769', 'cg08140055', 'cg02401614', 'cg24960960', 'cg09184832', 'cg11998932', 'cg22103219', 'cg16037896', 'cg18788741', 'cg12054428', 'cg14470803', 'cg13557031', 'cg03804621', 'cg14868747', 'cg08533783', 'cg04835841', 'cg11846355', 'cg09910000', 'cg26494929', 'cg06808011', 'cg05708497', 'cg23441888', 'cg27615578', 'cg08648367', 'cg17609057', 'cg04605816']... [Total elements: 41]
base_model_features: None
%==================================== Model Details ====================================%
Model Structure:
base_model: LinearModel(
(linear): Linear(in_features=41, out_features=1, bias=True)
)
%==================================== Model Details ====================================%
Model Parameters and Weights:
base_model.linear.weight: [5.249129772186279, 3.0901758670806885, -7.551166534423828, -5.796545028686523, -1.094834804534912, -2.3806116580963135, -0.0022889631800353527, 1.0938740968704224, -1.4775551557540894, 1.4427802562713623, 1.268430471420288, 5.4764933586120605, -8.934550285339355, -1.9918478727340698, -5.6620774269104, -6.2174201011657715, -0.6082701086997986, -7.513339996337891, -1.4299590587615967, -3.6723220348358154, 14.669830322265625, 0.5884844660758972, -0.9597266912460327, -1.0253041982650757, -1.802089810371399, -4.9922356605529785, -0.6746888160705566, -10.973499298095703, -0.6614307761192322, 2.365175247192383]... [Tensor of shape torch.Size([1, 41])]
base_model.linear.bias: tensor([69.6523])
%==================================== Model Details ====================================%
Basic test#
[15]:
torch.manual_seed(42)
input = torch.randn(10, len(model.features), dtype=float)
model.eval()
model.to(float)
pred = model(input)
pred
[15]:
tensor([[ 40.8178],
[ 99.1144],
[ 95.0538],
[ 67.0306],
[105.7898],
[ 33.1438],
[ 64.7387],
[ 74.8257],
[129.5815],
[ 17.3352]], dtype=torch.float64, grad_fn=<AddmmBackward0>)
Save torch model#
[16]:
torch.save(model, f"../weights/{model.metadata['clock_name']}.pt")
Clear directory#
[17]:
# Function to remove a folder and all its contents
def remove_folder(path):
try:
shutil.rmtree(path)
print(f"Deleted folder: {path}")
except Exception as e:
print(f"Error deleting folder {path}: {e}")
# Get a list of all files and folders in the current directory
all_items = os.listdir('.')
# Loop through the items
for item in all_items:
# Check if it's a file and does not end with .ipynb
if os.path.isfile(item) and not item.endswith('.ipynb'):
os.remove(item)
print(f"Deleted file: {item}")
# Check if it's a folder
elif os.path.isdir(item):
remove_folder(item)
Deleted file: Grip_noAge_Females.csv
Deleted file: Grip_noAge_Males.csv
Deleted file: Gait_noAge_Females.csv
Deleted file: VO2maxModel.csv
Deleted file: AllCpGs.json
Deleted file: Gait_noAge_Males.csv
Deleted folder: DNAmFitAge
Deleted file: download.r
Deleted file: FemaleMedians.csv
Deleted file: MaleMedians.csv