PCHannum#
Index#
Let’s first import some packages:
[1]:
import os
import inspect
import shutil
import json
import torch
import pandas as pd
import pyaging as pya
Instantiate model class#
[2]:
def print_entire_class(cls):
source = inspect.getsource(cls)
print(source)
print_entire_class(pya.models.PCHannum)
class PCHannum(pyagingModel):
def __init__(self):
super().__init__()
def preprocess(self, x):
return x
def postprocess(self, x):
return x
[3]:
model = pya.models.PCHannum()
Define clock metadata#
[4]:
model.metadata["clock_name"] = 'pchannum'
model.metadata["data_type"] = 'methylation'
model.metadata["species"] = 'Homo sapiens'
model.metadata["year"] = 2022
model.metadata["approved_by_author"] = '⌛'
model.metadata["citation"] = "Higgins-Chen, Albert T., et al. \"A computational solution for bolstering reliability of epigenetic clocks: Implications for clinical trials and longitudinal tracking.\" Nature aging 2.7 (2022): 644-661."
model.metadata["doi"] = "https://doi.org/10.1038/s43587-022-00248-2"
model.metadata["research_only"] = None
model.metadata["notes"] = None
Download clock dependencies#
[5]:
#download PCClock Rdata file from https://yale.app.box.com/s/kq0b0a7lxckxjvaz7x5n4keaug7tewry
logger = pya.logger.Logger()
url = "https://pyaging.s3.amazonaws.com/supporting_files/CalcAllPCClocks.RData"
dir = "."
pya.utils.download(url, dir, logger, indent_level=1)
|-----------> Downloading data to ./CalcAllPCClocks.RData
|-----------> in progress: 100.0000%
Download from R package#
[6]:
%%writefile download.r
library(dplyr)
library(tibble)
library(tidyr)
library(jsonlite)
load(file = "CalcAllPCClocks.RData")
print(ls(all.names = TRUE))
write_json(CalcPCHannum, "CalcPCHannum.json", digits = 10)
write_json(CpGs, "PCHannumCpGs.json")
write_json(imputeMissingCpGs, "PCHannumReferenceCpGBetas.json", digits = 10)
Writing download.r
[7]:
os.system("Rscript download.r")
[7]:
0
Load features#
From JSON file#
[8]:
with open('PCHannumCpGs.json', 'r') as f:
model.features = json.load(f)
Load weights into base model#
From JSON file#
[9]:
with open('CalcPCHannum.json', 'r') as f:
weights_dict = json.load(f)
weights = torch.tensor(weights_dict['model']).unsqueeze(0).float()
intercept = torch.tensor(weights_dict['intercept']).float()
center = torch.tensor(weights_dict['center']).float()
rotation = torch.tensor(weights_dict['rotation']).float()
PC linear model#
[10]:
base_model = pya.models.PCLinearModel(input_dim=len(model.features), pc_dim=rotation.shape[1])
base_model.center.data = center.float()
base_model.rotation.data = rotation.float()
base_model.linear.weight.data = weights.float()
base_model.linear.bias.data = intercept.float()
model.base_model = base_model
Load reference values#
From JSON file#
[11]:
with open('PCHannumReferenceCpGBetas.json', 'r') as f:
reference_feature_values = json.load(f)
model.reference_values = reference_feature_values
Load preprocess and postprocess objects#
[12]:
model.preprocess_name = None
model.preprocess_dependencies = None
[13]:
model.postprocess_name = None
model.postprocess_dependencies = None
Check all clock parameters#
[14]:
pya.utils.print_model_details(model)
%==================================== Model Details ====================================%
Model Attributes:
training: True
metadata: {'approved_by_author': '⌛',
'citation': 'Higgins-Chen, Albert T., et al. "A computational solution for '
'bolstering reliability of epigenetic clocks: Implications for '
'clinical trials and longitudinal tracking." Nature aging 2.7 '
'(2022): 644-661.',
'clock_name': 'pchannum',
'data_type': 'methylation',
'doi': 'https://doi.org/10.1038/s43587-022-00248-2',
'notes': None,
'research_only': None,
'species': 'Homo sapiens',
'version': None,
'year': 2022}
reference_values: [0.82635363384, 0.18898814441, 0.72938889209, 0.8680421375, 0.090353927561, 0.0066895021761, 0.48924643338, 0.87262052546, 0.87955373232, 0.04847264273, 0.0093070979947, 0.16393676218, 0.058440936082, 0.18857484916, 0.58239394253, 0.86564960457, 0.58457176982, 0.82903550669, 0.065646928047, 0.8500055061, 0.79155429878, 0.83499889314, 0.7754384128, 0.0039641831799, 0.50570339787, 0.60547040884, 0.29093154314, 0.88154845595, 0.46844171936, 0.79205361021]... [Total elements: 78464]
preprocess_name: None
preprocess_dependencies: None
postprocess_name: None
postprocess_dependencies: None
features: ['cg00000292', 'cg00000714', 'cg00001099', 'cg00001446', 'cg00001747', 'cg00002116', 'cg00002224', 'cg00002426', 'cg00002646', 'cg00002660', 'cg00002719', 'cg00002810', 'cg00003091', 'cg00003287', 'cg00003345', 'cg00003529', 'cg00003578', 'cg00003625', 'cg00003994', 'cg00004429', 'cg00004608', 'cg00004806', 'cg00005072', 'cg00005306', 'cg00005619', 'cg00005849', 'cg00006081', 'cg00006459', 'cg00007076', 'cg00007221']... [Total elements: 78464]
base_model_features: None
%==================================== Model Details ====================================%
Model Structure:
base_model: PCLinearModel(
(linear): Linear(in_features=389, out_features=1, bias=True)
)
%==================================== Model Details ====================================%
Model Parameters and Weights:
base_model.center: [0.8263536095619202, 0.18898814916610718, 0.7293888926506042, 0.8680421113967896, 0.09035392850637436, 0.006689502391964197, 0.4892464280128479, 0.8726205229759216, 0.8795537352561951, 0.04847264289855957, 0.009307097643613815, 0.1639367640018463, 0.058440934866666794, 0.18857485055923462, 0.5823939442634583, 0.8656495809555054, 0.5845717787742615, 0.8290355205535889, 0.0656469315290451, 0.8500055074691772, 0.7915542721748352, 0.8349989056587219, 0.7754384279251099, 0.003964183386415243, 0.5057033896446228, 0.6054704189300537, 0.2909315526485443, 0.8815484642982483, 0.4684417247772217, 0.7920536398887634]... [Tensor of shape torch.Size([78464])]
base_model.rotation: [0.0013195723295211792, 0.00362178822979331, 0.0006210950668901205, 2.146821134374477e-05, -0.0026210916694253683, 0.0028544235974550247, 0.001789670903235674, -0.0011461229296401143, -0.003994912840425968, 0.004412394482642412, -0.0026953844353556633, 0.0014265442732721567, 0.001315636676736176, 0.0006052751559764147, -0.0011475264327600598, 0.003465775866061449, 0.00010673332872102037, -0.0034611125010997057, 0.0010321630397811532, 0.0017559151165187359, -0.0002594847755972296, 0.0030858758836984634, 0.00018000522686634213, -0.0009758774540387094, -0.0028044437058269978, -0.00242756400257349, -0.002741057425737381, -0.0020431955344974995, 0.001444243360310793, 0.0007104799733497202]... [Tensor of shape torch.Size([78464, 389])]
base_model.linear.weight: [-0.4314156770706177, -2.0623953342437744, 2.2526729106903076, 1.896068811416626, -4.193276882171631, -0.8024684190750122, 0.28980740904808044, 0.25557079911231995, -0.9447597861289978, 0.44644805788993835, 0.20270460844039917, 1.5658382177352905, 0.21148376166820526, -2.692491292953491, -0.9573062658309937, 2.2857649326324463, 0.2877136170864105, 2.2357680797576904, -3.9486842155456543, -1.3017735481262207, 0.15476754307746887, 0.004006503149867058, -1.643073558807373, 0.12215111404657364, 1.951690435409546, -0.49129924178123474, -1.257272720336914, -0.20982636511325836, 1.2917195558547974, -0.34574949741363525]... [Tensor of shape torch.Size([1, 389])]
base_model.linear.bias: tensor([68.7833])
%==================================== Model Details ====================================%
Basic test#
[15]:
torch.manual_seed(42)
input = torch.randn(10, len(model.features), dtype=float)
model.eval()
model.to(float)
pred = model(input)
pred
[15]:
tensor([[ 8.3248],
[ 14.5854],
[ 12.5163],
[-22.3059],
[ 17.3593],
[ -7.7755],
[ 2.7895],
[ 7.9108],
[ 9.5717],
[ 1.5685]], dtype=torch.float64, grad_fn=<AddmmBackward0>)
Save torch model#
[16]:
torch.save(model, f"../weights/{model.metadata['clock_name']}.pt")
Clear directory#
[17]:
# Function to remove a folder and all its contents
def remove_folder(path):
try:
shutil.rmtree(path)
print(f"Deleted folder: {path}")
except Exception as e:
print(f"Error deleting folder {path}: {e}")
# Get a list of all files and folders in the current directory
all_items = os.listdir('.')
# Loop through the items
for item in all_items:
# Check if it's a file and does not end with .ipynb
if os.path.isfile(item) and not item.endswith('.ipynb'):
os.remove(item)
print(f"Deleted file: {item}")
# Check if it's a folder
elif os.path.isdir(item):
remove_folder(item)
Deleted file: PCHannumReferenceCpGBetas.json
Deleted file: CalcAllPCClocks.RData
Deleted file: PCHannumCpGs.json
Deleted file: download.r
Deleted file: CalcPCHannum.json