DNAmFitAgeGripM#
Index#
Let’s first import some packages:
[18]:
import os
import inspect
import shutil
import json
import torch
import pandas as pd
import numpy as np
import pyaging as pya
Instantiate model class#
[19]:
def print_entire_class(cls):
source = inspect.getsource(cls)
print(source)
print_entire_class(pya.models.DNAmFitAgeGripM)
class DNAmFitAgeGripM(pyagingModel):
def __init__(self):
super().__init__()
def preprocess(self, x):
return x
def postprocess(self, x):
return x
[20]:
model = pya.models.DNAmFitAgeGripM()
Define clock metadata#
[21]:
model.metadata["clock_name"] = 'dnamfitagegripm'
model.metadata["data_type"] = 'methylation'
model.metadata["species"] = 'Homo sapiens'
model.metadata["year"] = 2023
model.metadata["approved_by_author"] = '⌛'
model.metadata["citation"] = "McGreevy, Kristen M., et al. \"DNAmFitAge: biological age indicator incorporating physical fitness.\" Aging (Albany NY) 15.10 (2023): 3904."
model.metadata["doi"] = 'https://doi.org/10.18632/aging.204538'
model.metadata["research_only"] = None
model.metadata["notes"] = None
Download clock dependencies#
Download GitHub repository#
[22]:
github_url = "https://github.com/kristenmcgreevy/DNAmFitAge.git"
github_folder_name = github_url.split('/')[-1].split('.')[0]
os.system(f"git clone {github_url}")
[22]:
0
Download from R package#
[23]:
%%writefile download.r
options(repos = c(CRAN = "https://cloud.r-project.org/"))
library(jsonlite)
DNAmFitnessModels <- readRDS("DNAmFitAge/DNAmFitnessModelsandFitAge_Oct2022.rds")
AllCpGs <- DNAmFitnessModels$AllCpGs
write_json(AllCpGs, "AllCpGs.json")
MaleMedians <- DNAmFitnessModels$Male_Medians_All
write.csv(MaleMedians, "MaleMedians.csv")
FemaleMedians <- DNAmFitnessModels$Female_Medians_All
write.csv(FemaleMedians, "FemaleMedians.csv")
Gait_noAge_Females <- DNAmFitnessModels$Gait_noAge_Females
Gait_noAge_Males <- DNAmFitnessModels$Gait_noAge_Males
Grip_noAge_Females <- DNAmFitnessModels$Grip_noAge_Females
Grip_noAge_Males <- DNAmFitnessModels$Grip_noAge_Males
VO2maxModel <- DNAmFitnessModels$VO2maxModel
write.csv(Gait_noAge_Females, "Gait_noAge_Females.csv")
write.csv(Gait_noAge_Males, "Gait_noAge_Males.csv")
write.csv(Grip_noAge_Females, "Grip_noAge_Females.csv")
write.csv(Grip_noAge_Males, "Grip_noAge_Males.csv")
write.csv(VO2maxModel, "VO2maxModel.csv")
Writing download.r
[24]:
os.system("Rscript download.r")
[24]:
0
Load features#
From JSON file#
[25]:
df = pd.read_csv('Grip_noAge_Males.csv', index_col=0)
df['feature'] = df['term']
df['coefficient'] = df['estimate']
model.features = df['feature'][1:].tolist()
df.head()
[25]:
| term | step | estimate | lambda | dev.ratio | feature | coefficient | |
|---|---|---|---|---|---|---|---|
| 1 | (Intercept) | 1 | 43.019754 | 1.089044 | 0.479362 | (Intercept) | 43.019754 |
| 2 | cg16736630 | 1 | -0.892844 | 1.089044 | 0.479362 | cg16736630 | -0.892844 |
| 3 | cg26224305 | 1 | 1.637954 | 1.089044 | 0.479362 | cg26224305 | 1.637954 |
| 4 | cg20822990 | 1 | 1.526546 | 1.089044 | 0.479362 | cg20822990 | 1.526546 |
| 5 | cg03772253 | 1 | -0.334703 | 1.089044 | 0.479362 | cg03772253 | -0.334703 |
Load weights into base model#
[27]:
weights = torch.tensor(df['coefficient'][1:].tolist()).unsqueeze(0)
intercept = torch.tensor([df['coefficient'].iloc[0]])
Linear model#
[28]:
base_model = pya.models.LinearModel(input_dim=len(model.features))
base_model.linear.weight.data = weights.float()
base_model.linear.bias.data = intercept.float()
model.base_model = base_model
Load reference values#
From CSV file#
[29]:
reference_df = pd.read_csv('MaleMedians.csv', index_col=0)
model.reference_values = list(reference_df.loc[1, model.features])
Load preprocess and postprocess objects#
[30]:
model.preprocess_name = None
model.preprocess_dependencies = None
[31]:
model.postprocess_name = None
model.postprocess_dependencies = None
Check all clock parameters#
[32]:
pya.utils.print_model_details(model)
%==================================== Model Details ====================================%
Model Attributes:
training: True
metadata: {'approved_by_author': '⌛',
'citation': 'McGreevy, Kristen M., et al. "DNAmFitAge: biological age '
'indicator incorporating physical fitness." Aging (Albany NY) '
'15.10 (2023): 3904.',
'clock_name': 'dnamfitagegripm',
'data_type': 'methylation',
'doi': 'https://doi.org/10.18632/aging.204538',
'notes': 'Reference values is mean between male and female training medians',
'research_only': None,
'species': 'Homo sapiens',
'version': None,
'year': 2023}
reference_values: [0.510108256486195, 0.0174923251251838, 0.282280857535263, 0.0528901870575477, 0.597656383314558, 0.248233103255157, 0.852384268050395, 0.941476851346533, 0.734221050058483, 0.190121263555547, 0.31940105422856, 0.814431141757476, 0.19176405807589, 0.0479152480674275, 0.953607748943679, 0.492922877265799, 0.185486398346001, 0.0841236306449339, 0.621103094526097, 0.87518246696653, 0.663858799750913, 0.556449656236505, 0.623096676798486, 0.345448583792196, 0.527682477978777, 0.212507683830467, 0.485466327170399, 0.846662732645798, 0.851402366282247, 0.839862295544553]... [Total elements: 93]
preprocess_name: None
preprocess_dependencies: None
postprocess_name: None
postprocess_dependencies: None
features: ['cg16736630', 'cg26224305', 'cg20822990', 'cg03772253', 'cg25410668', 'cg06335143', 'cg24152080', 'cg13783177', 'cg01233392', 'cg09789768', 'cg22431262', 'cg01791778', 'cg19516340', 'cg26248645', 'cg17082856', 'cg06639320', 'cg26158023', 'cg14507891', 'cg13287247', 'cg01552919', 'cg17315281', 'cg21397124', 'cg14069287', 'cg03614721', 'cg12655768', 'cg08206318', 'cg23500537', 'cg26960988', 'cg15971074', 'cg23069046']... [Total elements: 93]
base_model_features: None
%==================================== Model Details ====================================%
Model Structure:
base_model: LinearModel(
(linear): Linear(in_features=93, out_features=1, bias=True)
)
%==================================== Model Details ====================================%
Model Parameters and Weights:
base_model.linear.weight: [-0.892844021320343, 1.6379542350769043, 1.5265462398529053, -0.3347032964229584, -1.9029316902160645, -0.2647155225276947, -6.30814266204834, 14.954833984375, 1.178484320640564, 3.5211784839630127, -0.1861504316329956, -1.6255935430526733, 4.550158977508545, -1.587499976158142, -0.449662446975708, -8.599822998046875, 25.895660400390625, 4.368823051452637, 3.992393970489502, 1.3252184391021729, -2.2360410690307617, -0.6896253228187561, 1.5932470560073853, 1.5443568229675293, -0.7052236795425415, -3.0787854194641113, -0.2242996096611023, -0.23673297464847565, 2.1442930698394775, -0.3954241871833801]... [Tensor of shape torch.Size([1, 93])]
base_model.linear.bias: tensor([43.0198])
%==================================== Model Details ====================================%
Basic test#
[33]:
torch.manual_seed(42)
input = torch.randn(10, len(model.features), dtype=float)
model.eval()
model.to(float)
pred = model(input)
pred
[33]:
tensor([[ 41.6172],
[ 21.7346],
[ 54.7012],
[ 53.8505],
[ 71.7095],
[203.5236],
[ 58.8656],
[ 38.5258],
[ 34.2558],
[109.6429]], dtype=torch.float64, grad_fn=<AddmmBackward0>)
Save torch model#
[34]:
torch.save(model, f"../weights/{model.metadata['clock_name']}.pt")
Clear directory#
[35]:
# Function to remove a folder and all its contents
def remove_folder(path):
try:
shutil.rmtree(path)
print(f"Deleted folder: {path}")
except Exception as e:
print(f"Error deleting folder {path}: {e}")
# Get a list of all files and folders in the current directory
all_items = os.listdir('.')
# Loop through the items
for item in all_items:
# Check if it's a file and does not end with .ipynb
if os.path.isfile(item) and not item.endswith('.ipynb'):
os.remove(item)
print(f"Deleted file: {item}")
# Check if it's a folder
elif os.path.isdir(item):
remove_folder(item)
Deleted file: Grip_noAge_Females.csv
Deleted file: Grip_noAge_Males.csv
Deleted file: Gait_noAge_Females.csv
Deleted file: VO2maxModel.csv
Deleted file: AllCpGs.json
Deleted file: Gait_noAge_Males.csv
Deleted folder: DNAmFitAge
Deleted file: download.r
Deleted file: FemaleMedians.csv
Deleted file: MaleMedians.csv