DNAmFitAgeGaitF#
Index#
Let’s first import some packages:
[18]:
import os
import inspect
import shutil
import json
import torch
import pandas as pd
import numpy as np
import pyaging as pya
Instantiate model class#
[19]:
def print_entire_class(cls):
source = inspect.getsource(cls)
print(source)
print_entire_class(pya.models.DNAmFitAgeGaitF)
class DNAmFitAgeGaitF(pyagingModel):
def __init__(self):
super().__init__()
def preprocess(self, x):
return x
def postprocess(self, x):
return x
[20]:
model = pya.models.DNAmFitAgeGaitF()
Define clock metadata#
[21]:
model.metadata["clock_name"] = 'dnamfitagegaitf'
model.metadata["data_type"] = 'methylation'
model.metadata["species"] = 'Homo sapiens'
model.metadata["year"] = 2023
model.metadata["approved_by_author"] = '⌛'
model.metadata["citation"] = "McGreevy, Kristen M., et al. \"DNAmFitAge: biological age indicator incorporating physical fitness.\" Aging (Albany NY) 15.10 (2023): 3904."
model.metadata["doi"] = 'https://doi.org/10.18632/aging.204538'
model.metadata["research_only"] = None
model.metadata["notes"] = None
Download clock dependencies#
Download GitHub repository#
[22]:
github_url = "https://github.com/kristenmcgreevy/DNAmFitAge.git"
github_folder_name = github_url.split('/')[-1].split('.')[0]
os.system(f"git clone {github_url}")
[22]:
0
Download from R package#
[23]:
%%writefile download.r
options(repos = c(CRAN = "https://cloud.r-project.org/"))
library(jsonlite)
DNAmFitnessModels <- readRDS("DNAmFitAge/DNAmFitnessModelsandFitAge_Oct2022.rds")
AllCpGs <- DNAmFitnessModels$AllCpGs
write_json(AllCpGs, "AllCpGs.json")
MaleMedians <- DNAmFitnessModels$Male_Medians_All
write.csv(MaleMedians, "MaleMedians.csv")
FemaleMedians <- DNAmFitnessModels$Female_Medians_All
write.csv(FemaleMedians, "FemaleMedians.csv")
Gait_noAge_Females <- DNAmFitnessModels$Gait_noAge_Females
Gait_noAge_Males <- DNAmFitnessModels$Gait_noAge_Males
Grip_noAge_Females <- DNAmFitnessModels$Grip_noAge_Females
Grip_noAge_Males <- DNAmFitnessModels$Grip_noAge_Males
VO2maxModel <- DNAmFitnessModels$VO2maxModel
write.csv(Gait_noAge_Females, "Gait_noAge_Females.csv")
write.csv(Gait_noAge_Males, "Gait_noAge_Males.csv")
write.csv(Grip_noAge_Females, "Grip_noAge_Females.csv")
write.csv(Grip_noAge_Males, "Grip_noAge_Males.csv")
write.csv(VO2maxModel, "VO2maxModel.csv")
Writing download.r
[24]:
os.system("Rscript download.r")
[24]:
0
Load features#
From JSON file#
[25]:
df = pd.read_csv('Gait_noAge_Females.csv', index_col=0)
df['feature'] = df['term']
df['coefficient'] = df['estimate']
model.features = df['feature'][1:].tolist()
df.head()
[25]:
| term | step | estimate | lambda | dev.ratio | feature | coefficient | |
|---|---|---|---|---|---|---|---|
| 1 | (Intercept) | 1 | 3.970134 | 0.024 | 0.32532 | (Intercept) | 3.970134 |
| 2 | cg00094518 | 1 | -0.058678 | 0.024 | 0.32532 | cg00094518 | -0.058678 |
| 3 | cg00149716 | 1 | -0.056379 | 0.024 | 0.32532 | cg00149716 | -0.056379 |
| 4 | cg00457495 | 1 | -0.103711 | 0.024 | 0.32532 | cg00457495 | -0.103711 |
| 5 | cg00620464 | 1 | 0.016243 | 0.024 | 0.32532 | cg00620464 | 0.016243 |
Load weights into base model#
[27]:
weights = torch.tensor(df['coefficient'][1:].tolist()).unsqueeze(0)
intercept = torch.tensor([df['coefficient'].iloc[0]])
Linear model#
[28]:
base_model = pya.models.LinearModel(input_dim=len(model.features))
base_model.linear.weight.data = weights.float()
base_model.linear.bias.data = intercept.float()
model.base_model = base_model
Load reference values#
From CSV file#
[29]:
reference_df = pd.read_csv('FemaleMedians.csv', index_col=0)
model.reference_values = list(reference_df.loc[1, model.features])
Load preprocess and postprocess objects#
[30]:
model.preprocess_name = None
model.preprocess_dependencies = None
[31]:
model.postprocess_name = None
model.postprocess_dependencies = None
Check all clock parameters#
[32]:
pya.utils.print_model_details(model)
%==================================== Model Details ====================================%
Model Attributes:
training: True
metadata: {'approved_by_author': '⌛',
'citation': 'McGreevy, Kristen M., et al. "DNAmFitAge: biological age '
'indicator incorporating physical fitness." Aging (Albany NY) '
'15.10 (2023): 3904.',
'clock_name': 'dnamfitagegaitf',
'data_type': 'methylation',
'doi': 'https://doi.org/10.18632/aging.204538',
'notes': 'Reference values is mean between male and female training medians',
'research_only': None,
'species': 'Homo sapiens',
'version': None,
'year': 2023}
reference_values: [0.115253497078786, 0.3634639598614, 0.875245484444968, 0.821546854188647, 0.275107342941108, 0.894889702144531, 0.875694533037502, 0.655817981502772, 0.688023755694219, 0.723811974825201, 0.0669232295556058, 0.0721027662094031, 0.101769898910285, 0.606080928108706, 0.660846514723889, 0.692684806440334, 0.169813568922838, 0.692395670285827, 0.946415994929112, 0.76865976935573, 0.900577681408405, 0.782582056125966, 0.875597498851315, 0.52799050796051, 0.0574926577245481, 0.0857054078140349, 0.854883089202875, 0.220147987860978, 0.932733864902247, 0.598833175052241]... [Total elements: 53]
preprocess_name: None
preprocess_dependencies: None
postprocess_name: None
postprocess_dependencies: None
features: ['cg00094518', 'cg00149716', 'cg00457495', 'cg00620464', 'cg01032119', 'cg01476885', 'cg02046532', 'cg02537108', 'cg02703627', 'cg02725269', 'cg03559454', 'cg03607117', 'cg04714041', 'cg04873577', 'cg05456652', 'cg06546183', 'cg07502389', 'cg08619515', 'cg08879886', 'cg09018483', 'cg09521872', 'cg10433390', 'cg10694507', 'cg10833685', 'cg10964367', 'cg11650763', 'cg12597309', 'cg13649056', 'cg14170201', 'cg14310198']... [Total elements: 53]
base_model_features: None
%==================================== Model Details ====================================%
Model Structure:
base_model: LinearModel(
(linear): Linear(in_features=53, out_features=1, bias=True)
)
%==================================== Model Details ====================================%
Model Parameters and Weights:
base_model.linear.weight: [-0.05867812782526016, -0.05637867748737335, -0.10371068120002747, 0.01624305173754692, -0.053210534155368805, -0.07633326947689056, -0.01514248363673687, -0.049918416887521744, -0.013779371976852417, 0.14702405035495758, 0.22061608731746674, -0.6326411366462708, -0.40431228280067444, 0.06633924692869186, -0.2228449434041977, -0.03177845478057861, -0.35903501510620117, 0.4153103232383728, -0.837234616279602, 0.056484829634428024, -0.13299566507339478, -0.058516617864370346, 0.04777200520038605, 0.13982263207435608, -0.1280703842639923, -0.03444225341081619, -0.05433110147714615, -0.4258767366409302, 0.0011224570916965604, 0.01846371404826641]... [Tensor of shape torch.Size([1, 53])]
base_model.linear.bias: tensor([3.9701])
%==================================== Model Details ====================================%
Basic test#
[33]:
torch.manual_seed(42)
input = torch.randn(10, len(model.features), dtype=float)
model.eval()
model.to(float)
pred = model(input)
pred
[33]:
tensor([[1.9288],
[2.7637],
[5.7855],
[3.2988],
[2.2712],
[6.3697],
[4.3048],
[2.6162],
[1.6352],
[1.3601]], dtype=torch.float64, grad_fn=<AddmmBackward0>)
Save torch model#
[34]:
torch.save(model, f"../weights/{model.metadata['clock_name']}.pt")
Clear directory#
[35]:
# Function to remove a folder and all its contents
def remove_folder(path):
try:
shutil.rmtree(path)
print(f"Deleted folder: {path}")
except Exception as e:
print(f"Error deleting folder {path}: {e}")
# Get a list of all files and folders in the current directory
all_items = os.listdir('.')
# Loop through the items
for item in all_items:
# Check if it's a file and does not end with .ipynb
if os.path.isfile(item) and not item.endswith('.ipynb'):
os.remove(item)
print(f"Deleted file: {item}")
# Check if it's a folder
elif os.path.isdir(item):
remove_folder(item)
Deleted file: Grip_noAge_Females.csv
Deleted file: Grip_noAge_Males.csv
Deleted file: Gait_noAge_Females.csv
Deleted file: VO2maxModel.csv
Deleted file: AllCpGs.json
Deleted file: Gait_noAge_Males.csv
Deleted folder: DNAmFitAge
Deleted file: download.r
Deleted file: FemaleMedians.csv
Deleted file: MaleMedians.csv