PCDNAmTL#
Index#
Let’s first import some packages:
[1]:
import os
import inspect
import shutil
import json
import torch
import pandas as pd
import pyaging as pya
Instantiate model class#
[2]:
def print_entire_class(cls):
source = inspect.getsource(cls)
print(source)
print_entire_class(pya.models.PCDNAmTL)
class PCDNAmTL(pyagingModel):
def __init__(self):
super().__init__()
def preprocess(self, x):
return x
def postprocess(self, x):
return x
[3]:
model = pya.models.PCDNAmTL()
Define clock metadata#
[4]:
model.metadata["clock_name"] = 'pcdnamtl'
model.metadata["data_type"] = 'methylation'
model.metadata["species"] = 'Homo sapiens'
model.metadata["year"] = 2022
model.metadata["approved_by_author"] = '⌛'
model.metadata["citation"] = "Higgins-Chen, Albert T., et al. \"A computational solution for bolstering reliability of epigenetic clocks: Implications for clinical trials and longitudinal tracking.\" Nature aging 2.7 (2022): 644-661."
model.metadata["doi"] = "https://doi.org/10.1038/s43587-022-00248-2"
model.metadata["research_only"] = None
model.metadata["notes"] = None
Download clock dependencies#
[5]:
#download PCClock Rdata file from https://yale.app.box.com/s/kq0b0a7lxckxjvaz7x5n4keaug7tewry
logger = pya.logger.Logger()
url = "https://pyaging.s3.amazonaws.com/supporting_files/CalcAllPCClocks.RData"
dir = "."
pya.utils.download(url, dir, logger, indent_level=1)
|-----------> Downloading data to ./CalcAllPCClocks.RData
|-----------> in progress: 100.0000%
Download from R package#
[6]:
%%writefile download.r
library(dplyr)
library(tibble)
library(tidyr)
library(jsonlite)
load(file = "CalcAllPCClocks.RData")
print(ls(all.names = TRUE))
write_json(CalcPCDNAmTL, "CalcPCDNAmTL.json", digits = 10)
write_json(CpGs, "PCDNAmTLCpGs.json")
write_json(imputeMissingCpGs, "PCDNAmTLReferenceCpGBetas.json", digits = 10)
Writing download.r
[7]:
os.system("Rscript download.r")
[7]:
0
Load features#
From JSON file#
[8]:
with open('PCDNAmTLCpGs.json', 'r') as f:
model.features = json.load(f)
Load weights into base model#
From JSON file#
[9]:
with open('CalcPCDNAmTL.json', 'r') as f:
weights_dict = json.load(f)
weights = torch.tensor(weights_dict['model']).unsqueeze(0).float()
intercept = torch.tensor(weights_dict['intercept']).float()
center = torch.tensor(weights_dict['center']).float()
rotation = torch.tensor(weights_dict['rotation']).float()
PC linear model#
[10]:
base_model = pya.models.PCLinearModel(input_dim=len(model.features), pc_dim=rotation.shape[1])
base_model.center.data = center.float()
base_model.rotation.data = rotation.float()
base_model.linear.weight.data = weights.float()
base_model.linear.bias.data = intercept.float()
model.base_model = base_model
Load reference values#
From JSON file#
[11]:
with open('PCDNAmTLReferenceCpGBetas.json', 'r') as f:
reference_feature_values = json.load(f)
model.reference_values = reference_feature_values
Load preprocess and postprocess objects#
[12]:
model.preprocess_name = None
model.preprocess_dependencies = None
[13]:
model.postprocess_name = None
model.postprocess_dependencies = None
Check all clock parameters#
[14]:
pya.utils.print_model_details(model)
%==================================== Model Details ====================================%
Model Attributes:
training: True
metadata: {'approved_by_author': '⌛',
'citation': 'Higgins-Chen, Albert T., et al. "A computational solution for '
'bolstering reliability of epigenetic clocks: Implications for '
'clinical trials and longitudinal tracking." Nature aging 2.7 '
'(2022): 644-661.',
'clock_name': 'pcdnamtl',
'data_type': 'methylation',
'doi': 'https://doi.org/10.1038/s43587-022-00248-2',
'notes': None,
'research_only': None,
'species': 'Homo sapiens',
'version': None,
'year': 2022}
reference_values: [0.82635363384, 0.18898814441, 0.72938889209, 0.8680421375, 0.090353927561, 0.0066895021761, 0.48924643338, 0.87262052546, 0.87955373232, 0.04847264273, 0.0093070979947, 0.16393676218, 0.058440936082, 0.18857484916, 0.58239394253, 0.86564960457, 0.58457176982, 0.82903550669, 0.065646928047, 0.8500055061, 0.79155429878, 0.83499889314, 0.7754384128, 0.0039641831799, 0.50570339787, 0.60547040884, 0.29093154314, 0.88154845595, 0.46844171936, 0.79205361021]... [Total elements: 78464]
preprocess_name: None
preprocess_dependencies: None
postprocess_name: None
postprocess_dependencies: None
features: ['cg00000292', 'cg00000714', 'cg00001099', 'cg00001446', 'cg00001747', 'cg00002116', 'cg00002224', 'cg00002426', 'cg00002646', 'cg00002660', 'cg00002719', 'cg00002810', 'cg00003091', 'cg00003287', 'cg00003345', 'cg00003529', 'cg00003578', 'cg00003625', 'cg00003994', 'cg00004429', 'cg00004608', 'cg00004806', 'cg00005072', 'cg00005306', 'cg00005619', 'cg00005849', 'cg00006081', 'cg00006459', 'cg00007076', 'cg00007221']... [Total elements: 78464]
base_model_features: None
%==================================== Model Details ====================================%
Model Structure:
base_model: PCLinearModel(
(linear): Linear(in_features=598, out_features=1, bias=True)
)
%==================================== Model Details ====================================%
Model Parameters and Weights:
base_model.center: [0.8052302598953247, 0.18017764389514923, 0.7316451072692871, 0.8424648642539978, 0.09816480427980423, 0.02946575917303562, 0.4719793498516083, 0.8522981405258179, 0.8586591482162476, 0.06355271488428116, 0.05588531866669655, 0.1419440656900406, 0.055927883833646774, 0.19770769774913788, 0.5807257890701294, 0.8332509398460388, 0.5806705355644226, 0.8277810215950012, 0.0776563510298729, 0.8331976532936096, 0.7652954459190369, 0.807980477809906, 0.7726719379425049, 0.03210648521780968, 0.49826666712760925, 0.5388586521148682, 0.26504385471343994, 0.8373278379440308, 0.44017356634140015, 0.7763169407844543]... [Tensor of shape torch.Size([78464])]
base_model.rotation: [0.002477406756952405, -0.0010729391360655427, 0.0022349627688527107, 0.004578224383294582, -0.002992750611156225, 0.0022738976404070854, -0.0005298839532770216, -0.00026715119020082057, -0.0002368289278820157, 0.0018220609053969383, 0.0006074461853131652, 0.0004887818940915167, -0.0017534642247483134, -0.0005582351004704833, 0.0015488245990127325, 0.0020646878983825445, 0.0037664822302758694, -0.00010059976921183988, -0.0015801334520801902, -0.002538461936637759, 0.0008639764855615795, 0.00029158469988033175, 0.00023785523080732673, -0.0018999595195055008, -0.0008729812107048929, 0.002466981764882803, -0.002185217570513487, 0.001142821041867137, 0.0006497130962088704, 0.001074211555533111]... [Tensor of shape torch.Size([78464, 598])]
base_model.linear.weight: [0.029300235211849213, -0.04609266296029091, 0.02450183965265751, 0.01583261601626873, 0.04848414659500122, -0.056315574795007706, -0.0009003547020256519, -0.024040857329964638, 0.019012007862329483, 0.0014710112009197474, -0.01780042052268982, 0.1034836396574974, 0.015561497770249844, 0.027088770642876625, -0.08989907801151276, 0.02160358428955078, -0.017954019829630852, 0.05003052577376366, -0.038420092314481735, 0.02055496722459793, 0.006391878705471754, -0.007852035574615002, -0.001051983330398798, 0.002894297242164612, 0.03238001838326454, -0.013252204284071922, -0.003943934570997953, -0.004391353111714125, -0.030485106632113457, 0.0013454875443130732]... [Tensor of shape torch.Size([1, 598])]
base_model.linear.bias: tensor([6.9730])
%==================================== Model Details ====================================%
Basic test#
[15]:
torch.manual_seed(42)
input = torch.randn(10, len(model.features), dtype=float)
model.eval()
model.to(float)
pred = model(input)
pred
[15]:
tensor([[7.9391],
[7.8708],
[7.6138],
[7.8926],
[7.6140],
[7.8358],
[7.8324],
[7.7156],
[7.9497],
[7.9776]], dtype=torch.float64, grad_fn=<AddmmBackward0>)
Save torch model#
[16]:
torch.save(model, f"../weights/{model.metadata['clock_name']}.pt")
Clear directory#
[17]:
# Function to remove a folder and all its contents
def remove_folder(path):
try:
shutil.rmtree(path)
print(f"Deleted folder: {path}")
except Exception as e:
print(f"Error deleting folder {path}: {e}")
# Get a list of all files and folders in the current directory
all_items = os.listdir('.')
# Loop through the items
for item in all_items:
# Check if it's a file and does not end with .ipynb
if os.path.isfile(item) and not item.endswith('.ipynb'):
os.remove(item)
print(f"Deleted file: {item}")
# Check if it's a folder
elif os.path.isdir(item):
remove_folder(item)
Deleted file: PCDNAmTLReferenceCpGBetas.json
Deleted file: CalcPCDNAmTL.json
Deleted file: CalcAllPCClocks.RData
Deleted file: download.r
Deleted file: PCDNAmTLCpGs.json