DNAmFitAgeGaitM#
Index#
Let’s first import some packages:
[18]:
import os
import inspect
import shutil
import json
import torch
import pandas as pd
import numpy as np
import pyaging as pya
Instantiate model class#
[19]:
def print_entire_class(cls):
source = inspect.getsource(cls)
print(source)
print_entire_class(pya.models.DNAmFitAgeGaitM)
class DNAmFitAgeGaitM(pyagingModel):
def __init__(self):
super().__init__()
def preprocess(self, x):
return x
def postprocess(self, x):
return x
[20]:
model = pya.models.DNAmFitAgeGaitM()
Define clock metadata#
[21]:
model.metadata["clock_name"] = 'dnamfitagegaitm'
model.metadata["data_type"] = 'methylation'
model.metadata["species"] = 'Homo sapiens'
model.metadata["year"] = 2023
model.metadata["approved_by_author"] = '⌛'
model.metadata["citation"] = "McGreevy, Kristen M., et al. \"DNAmFitAge: biological age indicator incorporating physical fitness.\" Aging (Albany NY) 15.10 (2023): 3904."
model.metadata["doi"] = 'https://doi.org/10.18632/aging.204538'
model.metadata["research_only"] = None
model.metadata["notes"] = None
Download clock dependencies#
Download GitHub repository#
[22]:
github_url = "https://github.com/kristenmcgreevy/DNAmFitAge.git"
github_folder_name = github_url.split('/')[-1].split('.')[0]
os.system(f"git clone {github_url}")
[22]:
0
Download from R package#
[23]:
%%writefile download.r
options(repos = c(CRAN = "https://cloud.r-project.org/"))
library(jsonlite)
DNAmFitnessModels <- readRDS("DNAmFitAge/DNAmFitnessModelsandFitAge_Oct2022.rds")
AllCpGs <- DNAmFitnessModels$AllCpGs
write_json(AllCpGs, "AllCpGs.json")
MaleMedians <- DNAmFitnessModels$Male_Medians_All
write.csv(MaleMedians, "MaleMedians.csv")
FemaleMedians <- DNAmFitnessModels$Female_Medians_All
write.csv(FemaleMedians, "FemaleMedians.csv")
Gait_noAge_Females <- DNAmFitnessModels$Gait_noAge_Females
Gait_noAge_Males <- DNAmFitnessModels$Gait_noAge_Males
Grip_noAge_Females <- DNAmFitnessModels$Grip_noAge_Females
Grip_noAge_Males <- DNAmFitnessModels$Grip_noAge_Males
VO2maxModel <- DNAmFitnessModels$VO2maxModel
write.csv(Gait_noAge_Females, "Gait_noAge_Females.csv")
write.csv(Gait_noAge_Males, "Gait_noAge_Males.csv")
write.csv(Grip_noAge_Females, "Grip_noAge_Females.csv")
write.csv(Grip_noAge_Males, "Grip_noAge_Males.csv")
write.csv(VO2maxModel, "VO2maxModel.csv")
Writing download.r
[24]:
os.system("Rscript download.r")
[24]:
0
Load features#
From JSON file#
[25]:
df = pd.read_csv('Gait_noAge_Males.csv', index_col=0)
df['feature'] = df['term']
df['coefficient'] = df['estimate']
model.features = df['feature'][1:].tolist()
df.head()
[25]:
| term | step | estimate | lambda | dev.ratio | feature | coefficient | |
|---|---|---|---|---|---|---|---|
| 1 | (Intercept) | 1 | 3.182528 | 0.05 | 0.291034 | (Intercept) | 3.182528 |
| 2 | cg04885881 | 1 | 0.102360 | 0.05 | 0.291034 | cg04885881 | 0.102360 |
| 3 | cg03132729 | 1 | 0.087535 | 0.05 | 0.291034 | cg03132729 | 0.087535 |
| 4 | cg22026616 | 1 | 0.312490 | 0.05 | 0.291034 | cg22026616 | 0.312490 |
| 5 | cg18815943 | 1 | -0.281200 | 0.05 | 0.291034 | cg18815943 | -0.281200 |
Load weights into base model#
[27]:
weights = torch.tensor(df['coefficient'][1:].tolist()).unsqueeze(0)
intercept = torch.tensor([df['coefficient'].iloc[0]])
Linear model#
[28]:
base_model = pya.models.LinearModel(input_dim=len(model.features))
base_model.linear.weight.data = weights.float()
base_model.linear.bias.data = intercept.float()
model.base_model = base_model
Load reference values#
From CSV file#
[29]:
reference_df = pd.read_csv('MaleMedians.csv', index_col=0)
model.reference_values = list(reference_df.loc[1, model.features])
Load preprocess and postprocess objects#
[30]:
model.preprocess_name = None
model.preprocess_dependencies = None
[31]:
model.postprocess_name = None
model.postprocess_dependencies = None
Check all clock parameters#
[32]:
pya.utils.print_model_details(model)
%==================================== Model Details ====================================%
Model Attributes:
training: True
metadata: {'approved_by_author': '⌛',
'citation': 'McGreevy, Kristen M., et al. "DNAmFitAge: biological age '
'indicator incorporating physical fitness." Aging (Albany NY) '
'15.10 (2023): 3904.',
'clock_name': 'dnamfitagegaitm',
'data_type': 'methylation',
'doi': 'https://doi.org/10.18632/aging.204538',
'notes': 'Reference values is mean between male and female training medians',
'research_only': None,
'species': 'Homo sapiens',
'version': None,
'year': 2023}
reference_values: [0.42897495438465, 0.427655899021578, 0.129543495979192, 0.0316529198839327, 0.891633191357651, 0.0510565986500869, 0.396680844434307, 0.0754576359559242, 0.0777325149718022, 0.57735864652422, 0.461950512034599, 0.640052611552163, 0.0357628260832642, 0.24380932807525, 0.347401519318085, 0.0242175139352825, 0.752622910167749, 0.106453050021963, 0.184161373839444, 0.351499719560921, 0.0852183694652447, 0.135559493498925, 0.199226615813068, 0.923895962305445, 0.0153839328933658, 0.497078652018113, 0.267884547076273, 0.610592457564437, 0.935811367138778, 0.784755049673597]... [Total elements: 59]
preprocess_name: None
preprocess_dependencies: None
postprocess_name: None
postprocess_dependencies: None
features: ['cg04885881', 'cg03132729', 'cg22026616', 'cg18815943', 'cg21912203', 'cg14508615', 'cg18933331', 'cg06975196', 'cg14514751', 'cg24361098', 'cg25705148', 'cg22454769', 'ch.2.105901354F', 'cg00481951', 'cg02699218', 'cg03135515', 'cg16867657', 'cg04424621', 'cg12753631', 'cg25325512', 'cg17722319', 'cg18568843', 'cg18691434', 'cg18537063', 'cg21186299', 'cg00716277', 'cg13337658', 'cg14837598', 'cg15276922', 'cg18148156']... [Total elements: 59]
base_model_features: None
%==================================== Model Details ====================================%
Model Structure:
base_model: LinearModel(
(linear): Linear(in_features=59, out_features=1, bias=True)
)
%==================================== Model Details ====================================%
Model Parameters and Weights:
base_model.linear.weight: [0.10235995799303055, 0.08753516525030136, 0.3124901354312897, -0.28120002150535583, -0.3208324611186981, 0.24479524791240692, 0.05682919919490814, 0.21363066136837006, -0.3853186368942261, -0.038501303642988205, -0.0023554968647658825, -0.17415688931941986, 0.05159717798233032, -0.5185700058937073, -0.04655730724334717, -0.19074112176895142, -0.21096128225326538, 0.011959427036345005, 0.1078566312789917, 0.0770212784409523, 0.18820391595363617, 0.43347951769828796, -0.13240143656730652, 0.021351546049118042, -0.12319610267877579, -0.010150707326829433, -0.007736711762845516, 0.13240836560726166, -1.1829639673233032, -0.10984379798173904]... [Tensor of shape torch.Size([1, 59])]
base_model.linear.bias: tensor([3.1825])
%==================================== Model Details ====================================%
Basic test#
[33]:
torch.manual_seed(42)
input = torch.randn(10, len(model.features), dtype=float)
model.eval()
model.to(float)
pred = model(input)
pred
[33]:
tensor([[4.2823],
[2.2886],
[5.0413],
[6.9544],
[4.7673],
[3.0174],
[2.6717],
[2.2410],
[3.8598],
[2.9635]], dtype=torch.float64, grad_fn=<AddmmBackward0>)
Save torch model#
[34]:
torch.save(model, f"../weights/{model.metadata['clock_name']}.pt")
Clear directory#
[35]:
# Function to remove a folder and all its contents
def remove_folder(path):
try:
shutil.rmtree(path)
print(f"Deleted folder: {path}")
except Exception as e:
print(f"Error deleting folder {path}: {e}")
# Get a list of all files and folders in the current directory
all_items = os.listdir('.')
# Loop through the items
for item in all_items:
# Check if it's a file and does not end with .ipynb
if os.path.isfile(item) and not item.endswith('.ipynb'):
os.remove(item)
print(f"Deleted file: {item}")
# Check if it's a folder
elif os.path.isdir(item):
remove_folder(item)
Deleted file: Grip_noAge_Females.csv
Deleted file: Grip_noAge_Males.csv
Deleted file: Gait_noAge_Females.csv
Deleted file: VO2maxModel.csv
Deleted file: AllCpGs.json
Deleted file: Gait_noAge_Males.csv
Deleted folder: DNAmFitAge
Deleted file: download.r
Deleted file: FemaleMedians.csv
Deleted file: MaleMedians.csv