PCHorvath2013#
Index#
Let’s first import some packages:
[1]:
import os
import inspect
import shutil
import json
import torch
import pandas as pd
import pyaging as pya
Instantiate model class#
[2]:
def print_entire_class(cls):
source = inspect.getsource(cls)
print(source)
print_entire_class(pya.models.PCHorvath2013)
class PCHorvath2013(pyagingModel):
def __init__(self):
super().__init__()
def preprocess(self, x):
return x
def postprocess(self, x):
"""
Applies an anti-logarithmic linear transformation to a PyTorch tensor.
"""
adult_age = 20
# Create a mask for negative and non-negative values
mask_negative = x < 0
mask_non_negative = ~mask_negative
# Initialize the result tensor
age_tensor = torch.empty_like(x)
# Exponential transformation for negative values
age_tensor[mask_negative] = (1 + adult_age) * torch.exp(x[mask_negative]) - 1
# Linear transformation for non-negative values
age_tensor[mask_non_negative] = (1 + adult_age) * x[
mask_non_negative
] + adult_age
return age_tensor
[3]:
model = pya.models.PCHorvath2013()
Define clock metadata#
[4]:
model.metadata["clock_name"] = 'pchorvath2013'
model.metadata["data_type"] = 'methylation'
model.metadata["species"] = 'Homo sapiens'
model.metadata["year"] = 2022
model.metadata["approved_by_author"] = '⌛'
model.metadata["citation"] = "Higgins-Chen, Albert T., et al. \"A computational solution for bolstering reliability of epigenetic clocks: Implications for clinical trials and longitudinal tracking.\" Nature aging 2.7 (2022): 644-661."
model.metadata["doi"] = "https://doi.org/10.1038/s43587-022-00248-2"
model.metadata["research_only"] = None
model.metadata["notes"] = None
Download clock dependencies#
[5]:
#download PCClock Rdata file from https://yale.app.box.com/s/kq0b0a7lxckxjvaz7x5n4keaug7tewry
logger = pya.logger.Logger()
url = "https://pyaging.s3.amazonaws.com/supporting_files/CalcAllPCClocks.RData"
dir = "."
pya.utils.download(url, dir, logger, indent_level=1)
|-----------> Downloading data to ./CalcAllPCClocks.RData
|-----------> in progress: 100.0000%
Download from R package#
[6]:
%%writefile download.r
library(dplyr)
library(tibble)
library(tidyr)
library(jsonlite)
load(file = "CalcAllPCClocks.RData")
print(ls(all.names = TRUE))
write_json(CalcPCHorvath1, "CalcPCHorvath1.json", digits = 10)
write_json(CpGs, "PCHorvath1CpGs.json")
write_json(imputeMissingCpGs, "PCHorvath1ReferenceCpGBetas.json", digits = 10)
Writing download.r
[7]:
os.system("Rscript download.r")
[7]:
0
Load features#
From JSON file#
[8]:
with open('PCHorvath1CpGs.json', 'r') as f:
model.features = json.load(f)
Load weights into base model#
From JSON file#
[9]:
with open('CalcPCHorvath1.json', 'r') as f:
weights_dict = json.load(f)
weights = torch.tensor(weights_dict['model']).unsqueeze(0).float()
intercept = torch.tensor(weights_dict['intercept']).float()
center = torch.tensor(weights_dict['center']).float()
rotation = torch.tensor(weights_dict['rotation']).float()
PC linear model#
[10]:
base_model = pya.models.PCLinearModel(input_dim=len(model.features), pc_dim=rotation.shape[1])
base_model.center.data = center.float()
base_model.rotation.data = rotation.float()
base_model.linear.weight.data = weights.float()
base_model.linear.bias.data = intercept.float()
model.base_model = base_model
Load reference values#
From JSON file#
[11]:
with open('PCHorvath1ReferenceCpGBetas.json', 'r') as f:
reference_feature_values = json.load(f)
model.reference_values = reference_feature_values
Load preprocess and postprocess objects#
[12]:
model.preprocess_name = None
model.preprocess_dependencies = None
[13]:
model.postprocess_name = 'anti_log_linear'
model.postprocess_dependencies = None
Check all clock parameters#
[14]:
pya.utils.print_model_details(model)
%==================================== Model Details ====================================%
Model Attributes:
training: True
metadata: {'approved_by_author': '⌛',
'citation': 'Higgins-Chen, Albert T., et al. "A computational solution for '
'bolstering reliability of epigenetic clocks: Implications for '
'clinical trials and longitudinal tracking." Nature aging 2.7 '
'(2022): 644-661.',
'clock_name': 'pchorvath2013',
'data_type': 'methylation',
'doi': 'https://doi.org/10.1038/s43587-022-00248-2',
'notes': None,
'research_only': None,
'species': 'Homo sapiens',
'version': None,
'year': 2022}
reference_values: [0.82635363384, 0.18898814441, 0.72938889209, 0.8680421375, 0.090353927561, 0.0066895021761, 0.48924643338, 0.87262052546, 0.87955373232, 0.04847264273, 0.0093070979947, 0.16393676218, 0.058440936082, 0.18857484916, 0.58239394253, 0.86564960457, 0.58457176982, 0.82903550669, 0.065646928047, 0.8500055061, 0.79155429878, 0.83499889314, 0.7754384128, 0.0039641831799, 0.50570339787, 0.60547040884, 0.29093154314, 0.88154845595, 0.46844171936, 0.79205361021]... [Total elements: 78464]
preprocess_name: None
preprocess_dependencies: None
postprocess_name: 'anti_log_linear'
postprocess_dependencies: None
features: ['cg00000292', 'cg00000714', 'cg00001099', 'cg00001446', 'cg00001747', 'cg00002116', 'cg00002224', 'cg00002426', 'cg00002646', 'cg00002660', 'cg00002719', 'cg00002810', 'cg00003091', 'cg00003287', 'cg00003345', 'cg00003529', 'cg00003578', 'cg00003625', 'cg00003994', 'cg00004429', 'cg00004608', 'cg00004806', 'cg00005072', 'cg00005306', 'cg00005619', 'cg00005849', 'cg00006081', 'cg00006459', 'cg00007076', 'cg00007221']... [Total elements: 78464]
base_model_features: None
%==================================== Model Details ====================================%
Model Structure:
base_model: PCLinearModel(
(linear): Linear(in_features=120, out_features=1, bias=True)
)
%==================================== Model Details ====================================%
Model Parameters and Weights:
base_model.center: [0.7290785312652588, 0.16658970713615417, 0.7259728312492371, 0.8467999696731567, 0.07713332772254944, 0.020072568207979202, 0.35924986004829407, 0.6348602175712585, 0.8571182489395142, 0.07178357243537903, 0.046470556408166885, 0.13662011921405792, 0.07353769242763519, 0.21539008617401123, 0.5204617977142334, 0.8018048405647278, 0.5394126176834106, 0.629653811454773, 0.08743196725845337, 0.8038381934165955, 0.638761579990387, 0.823479950428009, 0.7942575216293335, 0.02160545252263546, 0.6240652799606323, 0.6269707083702087, 0.2557828724384308, 0.810107409954071, 0.42385315895080566, 0.6233302354812622]... [Tensor of shape torch.Size([78464])]
base_model.rotation: [-0.0048322658985853195, 0.0016888284590095282, 0.0043016355484724045, 0.0006329840398393571, -0.0018616552697494626, 0.00015872654330451041, 0.0047627720050513744, -0.0007954642060212791, 0.001977873034775257, -0.0038264873437583447, 0.002861293265596032, -0.0020775371231138706, 0.0016555585898458958, 0.006291448138654232, 0.002375122159719467, 0.013304566964507103, -0.00033986676135100424, -0.0005006726132705808, 0.002878241939470172, -0.004465107340365648, -0.0033801733516156673, 0.00140372384339571, 0.0010448938701301813, -0.006161581724882126, -0.0026484185364097357, -0.0017679710872471333, -0.0001990400196518749, 0.0012213967274874449, 0.0035699992440640926, -0.0028738761320710182]... [Tensor of shape torch.Size([78464, 120])]
base_model.linear.weight: [0.0033615094143897295, 0.005445790942758322, -0.0690847635269165, 0.04485338553786278, -0.03673980012536049, 0.02644198387861252, -0.21757060289382935, 0.12505319714546204, -0.007363998331129551, -0.0007567511056549847, 0.0277174673974514, 0.01490762084722519, 0.05097680911421776, 0.02121218666434288, 0.030744116753339767, -0.000863946508616209, 0.0261214692145586, 0.008246997371315956, -0.01691156066954136, 0.12098846584558487, -0.16675393283367157, -0.032332953065633774, 0.04423817619681358, -0.10392844676971436, 0.024131860584020615, 0.04625745117664337, -0.019460106268525124, 0.02763879857957363, 0.027718640863895416, -0.08220915496349335]... [Tensor of shape torch.Size([1, 120])]
base_model.linear.bias: tensor([1.1583])
%==================================== Model Details ====================================%
Basic test#
[15]:
torch.manual_seed(42)
input = torch.randn(10, len(model.features), dtype=float)
model.eval()
model.to(float)
pred = model(input)
pred
[15]:
tensor([[59.4351],
[58.9732],
[36.1313],
[11.8124],
[37.2571],
[42.6807],
[35.9014],
[45.7016],
[61.6516],
[59.3143]], dtype=torch.float64, grad_fn=<IndexPutBackward0>)
Save torch model#
[16]:
torch.save(model, f"../weights/{model.metadata['clock_name']}.pt")
Clear directory#
[17]:
# Function to remove a folder and all its contents
def remove_folder(path):
try:
shutil.rmtree(path)
print(f"Deleted folder: {path}")
except Exception as e:
print(f"Error deleting folder {path}: {e}")
# Get a list of all files and folders in the current directory
all_items = os.listdir('.')
# Loop through the items
for item in all_items:
# Check if it's a file and does not end with .ipynb
if os.path.isfile(item) and not item.endswith('.ipynb'):
os.remove(item)
print(f"Deleted file: {item}")
# Check if it's a folder
elif os.path.isdir(item):
remove_folder(item)
Deleted file: PCHorvath1CpGs.json
Deleted file: CalcPCHorvath1.json
Deleted file: CalcAllPCClocks.RData
Deleted file: download.r
Deleted file: PCHorvath1ReferenceCpGBetas.json