DunedinPACE#
Index#
Let’s first import some packages:
[1]:
import os
import inspect
import shutil
import json
import torch
import pandas as pd
import pyaging as pya
Instantiate model class#
[2]:
def print_entire_class(cls):
source = inspect.getsource(cls)
print(source)
print_entire_class(pya.models.DunedinPACE)
class DunedinPACE(pyagingModel):
def __init__(self):
super().__init__()
def preprocess(self, x):
"""
Apply quantile normalization on x using gold standard means.
"""
# Ensure gold_standard_means is a 1D tensor and sorted
sorted_gold_standard = torch.sort(torch.tensor(self.reference_values, device=x.device, dtype=x.dtype))[0]
# Pre-compute the quantile indices
quantile_indices = torch.linspace(0, len(sorted_gold_standard) - 1, steps=x.size(1)).long()
# Prepare a tensor to hold normalized data
normalized_data = torch.empty_like(x, device=x.device, dtype=x.dtype)
for i in range(x.size(0)):
sorted_indices = torch.argsort(x[i, :])
normalized_data[i, sorted_indices] = sorted_gold_standard[quantile_indices]
# Return only the subset from x that is used in the base model
return normalized_data[:, self.preprocess_dependencies[0]]
def postprocess(self, x):
return x
[3]:
model = pya.models.DunedinPACE()
Define clock metadata#
[4]:
model.metadata["clock_name"] = 'dunedinpace'
model.metadata["data_type"] = 'methylation'
model.metadata["species"] = 'Homo sapiens'
model.metadata["year"] = 2022
model.metadata["approved_by_author"] = '✅'
model.metadata["citation"] = "Belsky, Daniel W., et al. \"DunedinPACE, a DNA methylation biomarker of the pace of aging.\" Elife 11 (2022): e73420."
model.metadata["doi"] = "https://doi.org/10.7554/eLife.73420"
model.metadata["research_only"] = True
model.metadata["notes"] = "This model is for research purposes only. Commercial users should contact exclusive DunedinPACE licensee TruDiagnosticTM. The automatic failure if fewer than 80% of the CpG probes are available is not implemented and left to the user's discretion."
Download clock dependencies#
Download from R package#
[5]:
%%writefile download.r
options(repos = c(CRAN = "https://cloud.r-project.org/"))
library(jsonlite)
install.packages("devtools")
devtools::install_github("danbelsky/DunedinPACE", build_vignettes = FALSE)
library(DunedinPACE)
PACE_list = list(c(
mPACE_Models$model_names,
mPACE_Models$gold_standard_probes,
mPACE_Models$model_weights,
mPACE_Models$model_intercept,
mPACE_Models$model_means,
mPACE_Models$model_probes,
mPACE_Models$gold_standard_means
))
write_json(PACE_list, "DunedinPACE.json", digits = 12)
Writing download.r
[6]:
os.system("Rscript download.r")
[6]:
0
Load features#
From JSON file#
[7]:
with open('DunedinPACE.json', 'r') as f:
PACE_list = json.load(f)[0]
model.features = PACE_list['DunedinPACE']
model.base_model_features = PACE_list['DunedinPACE.4']
Load weights into base model#
[8]:
weights = torch.tensor(PACE_list['DunedinPACE.1']).unsqueeze(0).float()
intercept = torch.tensor([PACE_list['DunedinPACE.2'][0]]).float()
Linear model#
[9]:
base_model = pya.models.LinearModel(input_dim=len(model.features))
base_model.linear.weight.data = weights.float()
base_model.linear.bias.data = intercept.float()
model.base_model = base_model
Load reference values#
[10]:
model.reference_values = PACE_list['DunedinPACE.5']
Load preprocess and postprocess objects#
[11]:
model.preprocess_name = 'quantile_normalization_with_gold_standard'
indices = [model.features.index(item) for item in model.base_model_features]
model.preprocess_dependencies = [indices]
[12]:
model.postprocess_name = None
model.postprocess_dependencies = None
Check all clock parameters#
[13]:
pya.utils.print_model_details(model)
%==================================== Model Details ====================================%
Model Attributes:
training: True
metadata: {'approved_by_author': '✅',
'citation': 'Belsky, Daniel W., et al. "DunedinPACE, a DNA methylation '
'biomarker of the pace of aging." Elife 11 (2022): e73420.',
'clock_name': 'dunedinpace',
'data_type': 'methylation',
'doi': 'https://doi.org/10.7554/eLife.73420',
'notes': 'This model is for research purposes only. Commercial users should '
'contact exclusive DunedinPACE licensee TruDiagnosticTM. The '
'automatic failure if fewer than 80% of the CpG probes are available '
"is not implemented and left to the user's discretion.",
'research_only': True,
'species': 'Homo sapiens',
'version': None,
'year': 2022}
reference_values: [0.8499678448731, 0.7897610893879, 0.9717597609504, 0.8480138104804, 0.2475593905826, 0.1097207575569, 0.189885225724, 0.3909313843346, 0.9168688056148, 0.2708397202163, 0.8736696325841, 0.5147248840362, 0.9619479966112, 0.2575877641187, 0.9432588276732, 0.8989191043041, 0.580889869872, 0.1242693996089, 0.8932207211244, 0.6638228904263, 0.09830894494804, 0.8640316497494, 0.08535902315207, 0.07942500721274, 0.7473024928443, 0.8737263989611, 0.7534216033511, 0.09034449086512, 0.109486844368, 0.6611683137784]... [Total elements: 20000]
preprocess_name: 'quantile_normalization_with_gold_standard'
preprocess_dependencies: [[0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31,
32,
33,
34,
35,
36,
37,
38,
39,
40,
41,
42,
43,
44,
45,
46,
47,
48,
49,
50,
51,
52,
53,
54,
55,
56,
57,
58,
59,
60,
61,
62,
63,
64,
65,
66,
67,
68,
69,
70,
71,
72,
73,
74,
75,
76,
77,
78,
79,
80,
81,
82,
83,
84,
85,
86,
87,
88,
89,
90,
91,
92,
93,
94,
95,
96,
97,
98,
99,
100,
101,
102,
103,
104,
105,
106,
107,
108,
109,
110,
111,
112,
113,
114,
115,
116,
117,
118,
119,
120,
121,
122,
123,
124,
125,
126,
127,
128,
129,
130,
131,
132,
133,
134,
135,
136,
137,
138,
139,
140,
141,
142,
143,
144,
145,
146,
147,
148,
149,
150,
151,
152,
153,
154,
155,
156,
157,
158,
159,
160,
161,
162,
163,
164,
165,
166,
167,
168,
169,
170,
171,
172]]
postprocess_name: None
postprocess_dependencies: None
features: ['cg00112187', 'cg00151250', 'cg00359421', 'cg00513564', 'cg00532802', 'cg00574958', 'cg00668559', 'cg00782811', 'cg00835193', 'cg01055871', 'cg01101459', 'cg01360413', 'cg01554316', 'cg01936220', 'cg02004723', 'cg02079413', 'cg02229095', 'cg02300147', 'cg02307277', 'cg02571857', 'cg02650017', 'cg02949067', 'cg02997983', 'cg03604011', 'cg03776935', 'cg03810769', 'cg03868770', 'cg04051458', 'cg04105250', 'cg04305539']... [Total elements: 20000]
base_model_features: ['cg00112187', 'cg00151250', 'cg00359421', 'cg00513564', 'cg00532802', 'cg00574958', 'cg00668559', 'cg00782811', 'cg00835193', 'cg01055871', 'cg01101459', 'cg01360413', 'cg01554316', 'cg01936220', 'cg02004723', 'cg02079413', 'cg02229095', 'cg02300147', 'cg02307277', 'cg02571857', 'cg02650017', 'cg02949067', 'cg02997983', 'cg03604011', 'cg03776935', 'cg03810769', 'cg03868770', 'cg04051458', 'cg04105250', 'cg04305539']... [Total elements: 173]
%==================================== Model Details ====================================%
Model Structure:
base_model: LinearModel(
(linear): Linear(in_features=20000, out_features=1, bias=True)
)
%==================================== Model Details ====================================%
Model Parameters and Weights:
base_model.linear.weight: [-0.13975931704044342, 0.1523076742887497, 0.1058167889714241, -0.12104399502277374, 0.04729907587170601, -0.2645362913608551, -0.09107177704572678, 0.07626617699861526, -0.05811680853366852, -0.23175522685050964, 0.06270736455917358, 0.015027794055640697, 0.5146545171737671, -0.03559967875480652, 0.009041309356689453, 0.20905275642871857, 0.02438066340982914, 0.14796297252178192, 0.03653242066502571, 0.09093873202800751, -0.5942692160606384, -0.14602923393249512, 0.028370223939418793, 0.12909314036369324, 0.15818408131599426, 0.026989631354808807, -0.038831036537885666, 0.16809432208538055, 0.11093547195196152, -0.012085522525012493]... [Tensor of shape torch.Size([1, 173])]
base_model.linear.bias: tensor([-1.9499])
%==================================== Model Details ====================================%
Basic test#
[14]:
torch.manual_seed(42)
input = torch.randn(10, len(model.features), dtype=float)
model.eval()
model.to(float)
pred = model(input)
pred
[14]:
tensor([[ 0.2938],
[ 0.8014],
[-0.0384],
[-0.2034],
[-0.1848],
[ 0.5177],
[-0.1668],
[ 0.3322],
[-0.8489],
[ 0.7232]], dtype=torch.float64, grad_fn=<AddmmBackward0>)
Save torch model#
[15]:
torch.save(model, f"../weights/{model.metadata['clock_name']}.pt")
Clear directory#
[16]:
# Function to remove a folder and all its contents
def remove_folder(path):
try:
shutil.rmtree(path)
print(f"Deleted folder: {path}")
except Exception as e:
print(f"Error deleting folder {path}: {e}")
# Get a list of all files and folders in the current directory
all_items = os.listdir('.')
# Loop through the items
for item in all_items:
# Check if it's a file and does not end with .ipynb
if os.path.isfile(item) and not item.endswith('.ipynb'):
os.remove(item)
print(f"Deleted file: {item}")
# Check if it's a folder
elif os.path.isdir(item):
remove_folder(item)
Deleted file: coefficients.csv
Deleted file: coefficients.xlsx
Deleted file: download.r
Deleted file: DunedinPACE.json