PCPhenoAge#
Index#
Let’s first import some packages:
[1]:
import os
import inspect
import shutil
import json
import torch
import pandas as pd
import pyaging as pya
Instantiate model class#
[2]:
def print_entire_class(cls):
source = inspect.getsource(cls)
print(source)
print_entire_class(pya.models.PCPhenoAge)
class PCPhenoAge(pyagingModel):
def __init__(self):
super().__init__()
def preprocess(self, x):
return x
def postprocess(self, x):
return x
[3]:
model = pya.models.PCPhenoAge()
Define clock metadata#
[4]:
model.metadata["clock_name"] = 'pcphenoage'
model.metadata["data_type"] = 'methylation'
model.metadata["species"] = 'Homo sapiens'
model.metadata["year"] = 2022
model.metadata["approved_by_author"] = '⌛'
model.metadata["citation"] = "Higgins-Chen, Albert T., et al. \"A computational solution for bolstering reliability of epigenetic clocks: Implications for clinical trials and longitudinal tracking.\" Nature aging 2.7 (2022): 644-661."
model.metadata["doi"] = "https://doi.org/10.1038/s43587-022-00248-2"
model.metadata["research_only"] = None
model.metadata["notes"] = None
Download clock dependencies#
[5]:
#download PCClock Rdata file from https://yale.app.box.com/s/kq0b0a7lxckxjvaz7x5n4keaug7tewry
logger = pya.logger.Logger()
url = "https://pyaging.s3.amazonaws.com/supporting_files/CalcAllPCClocks.RData"
dir = "."
pya.utils.download(url, dir, logger, indent_level=1)
|-----------> Downloading data to ./CalcAllPCClocks.RData
|-----------> in progress: 100.0000%
Download from R package#
[6]:
%%writefile download.r
library(dplyr)
library(tibble)
library(tidyr)
library(jsonlite)
load(file = "CalcAllPCClocks.RData")
print(ls(all.names = TRUE))
write_json(CalcPCPhenoAge, "CalcPCPhenoAge.json", digits = 10)
write_json(CpGs, "PCPhenoAgeCpGs.json")
write_json(imputeMissingCpGs, "PCPhenoAgeReferenceCpGBetas.json", digits = 10)
Writing download.r
[7]:
os.system("Rscript download.r")
[7]:
0
Load features#
From JSON file#
[8]:
with open('PCPhenoAgeCpGs.json', 'r') as f:
model.features = json.load(f)
Load weights into base model#
From JSON file#
[9]:
with open('CalcPCPhenoAge.json', 'r') as f:
weights_dict = json.load(f)
weights = torch.tensor(weights_dict['model']).unsqueeze(0).float()
intercept = torch.tensor(weights_dict['intercept']).float()
center = torch.tensor(weights_dict['center']).float()
rotation = torch.tensor(weights_dict['rotation']).float()
PC linear model#
[10]:
base_model = pya.models.PCLinearModel(input_dim=len(model.features), pc_dim=rotation.shape[1])
base_model.center.data = center.float()
base_model.rotation.data = rotation.float()
base_model.linear.weight.data = weights.float()
base_model.linear.bias.data = intercept.float()
model.base_model = base_model
Load reference values#
From JSON file#
[11]:
with open('PCPhenoAgeReferenceCpGBetas.json', 'r') as f:
reference_feature_values = json.load(f)
model.reference_values = reference_feature_values
Load preprocess and postprocess objects#
[12]:
model.preprocess_name = None
model.preprocess_dependencies = None
[13]:
model.postprocess_name = None
model.postprocess_dependencies = None
Check all clock parameters#
[14]:
pya.utils.print_model_details(model)
%==================================== Model Details ====================================%
Model Attributes:
training: True
metadata: {'approved_by_author': '⌛',
'citation': 'Higgins-Chen, Albert T., et al. "A computational solution for '
'bolstering reliability of epigenetic clocks: Implications for '
'clinical trials and longitudinal tracking." Nature aging 2.7 '
'(2022): 644-661.',
'clock_name': 'pcphenoage',
'data_type': 'methylation',
'doi': 'https://doi.org/10.1038/s43587-022-00248-2',
'notes': None,
'research_only': None,
'species': 'Homo sapiens',
'version': None,
'year': 2022}
reference_values: [0.82635363384, 0.18898814441, 0.72938889209, 0.8680421375, 0.090353927561, 0.0066895021761, 0.48924643338, 0.87262052546, 0.87955373232, 0.04847264273, 0.0093070979947, 0.16393676218, 0.058440936082, 0.18857484916, 0.58239394253, 0.86564960457, 0.58457176982, 0.82903550669, 0.065646928047, 0.8500055061, 0.79155429878, 0.83499889314, 0.7754384128, 0.0039641831799, 0.50570339787, 0.60547040884, 0.29093154314, 0.88154845595, 0.46844171936, 0.79205361021]... [Total elements: 78464]
preprocess_name: None
preprocess_dependencies: None
postprocess_name: None
postprocess_dependencies: None
features: ['cg00000292', 'cg00000714', 'cg00001099', 'cg00001446', 'cg00001747', 'cg00002116', 'cg00002224', 'cg00002426', 'cg00002646', 'cg00002660', 'cg00002719', 'cg00002810', 'cg00003091', 'cg00003287', 'cg00003345', 'cg00003529', 'cg00003578', 'cg00003625', 'cg00003994', 'cg00004429', 'cg00004608', 'cg00004806', 'cg00005072', 'cg00005306', 'cg00005619', 'cg00005849', 'cg00006081', 'cg00006459', 'cg00007076', 'cg00007221']... [Total elements: 78464]
base_model_features: None
%==================================== Model Details ====================================%
Model Structure:
base_model: PCLinearModel(
(linear): Linear(in_features=651, out_features=1, bias=True)
)
%==================================== Model Details ====================================%
Model Parameters and Weights:
base_model.center: [0.8411704301834106, 0.1685701608657837, 0.7557920217514038, 0.9095144271850586, 0.057498179376125336, 0.01194298267364502, 0.4872075021266937, 0.909848690032959, 0.8732143640518188, 0.03602154180407524, 0.061301641166210175, 0.12556762993335724, 0.03800154849886894, 0.16473431885242462, 0.5713522434234619, 0.8393409848213196, 0.5687119960784912, 0.859745442867279, 0.06355176866054535, 0.9047518372535706, 0.8930169343948364, 0.8788580298423767, 0.8006214499473572, 0.014365476556122303, 0.4905247688293457, 0.5528188347816467, 0.27434754371643066, 0.867400050163269, 0.4297264516353607, 0.8199979066848755]... [Tensor of shape torch.Size([78464])]
base_model.rotation: [-0.00031331178615801036, 0.0009534272830933332, 0.00047261983854696155, -0.003930294886231422, 0.0005094414227642119, 0.0006895905244164169, -0.002100678626447916, -0.0012287608115002513, -0.00193792674690485, -0.006593621335923672, 0.00211642705835402, 0.0008730596164241433, 0.005285773426294327, 0.004182749427855015, -0.0009547793306410313, -0.002365315333008766, 0.0030447160825133324, -0.0016476826276630163, 0.0010397256119176745, 0.00157217460218817, -0.0012130382237955928, 4.213314241496846e-05, -0.001991128083318472, 0.0008436249918304384, 0.0022273382637649775, 4.255296153132804e-05, 0.002220186637714505, 0.0004465262754820287, -0.0006061694002710283, -0.0029561042319983244]... [Tensor of shape torch.Size([78464, 651])]
base_model.linear.weight: [0.5824476480484009, -0.5351578593254089, -0.5155732035636902, 2.113274335861206, 2.877065658569336, 0.25946056842803955, -0.9349619150161743, -3.642695188522339, -0.8174840807914734, 0.7983757853507996, 4.807836055755615, -1.8483978509902954, 0.1970488429069519, -0.08622869849205017, -0.17043375968933105, -1.4405218362808228, -0.4617669880390167, 0.22850117087364197, 2.2766926288604736, 2.9017574787139893, 1.6008095741271973, -0.8295918107032776, -0.13391417264938354, -2.9964559078216553, -2.1537444591522217, -0.04686202108860016, 0.5735985040664673, -2.7465732097625732, -1.1616591215133667, 0.6878449320793152]... [Tensor of shape torch.Size([1, 651])]
base_model.linear.bias: tensor([68.1700])
%==================================== Model Details ====================================%
Basic test#
[15]:
torch.manual_seed(42)
input = torch.randn(10, len(model.features), dtype=float)
model.eval()
model.to(float)
pred = model(input)
pred
[15]:
tensor([[ 18.8880],
[ 55.6389],
[ 29.9992],
[-19.9164],
[ 26.4750],
[ 42.4600],
[ 11.2694],
[ 45.2477],
[ 77.4414],
[ 47.4068]], dtype=torch.float64, grad_fn=<AddmmBackward0>)
Save torch model#
[16]:
torch.save(model, f"../weights/{model.metadata['clock_name']}.pt")
Clear directory#
[17]:
# Function to remove a folder and all its contents
def remove_folder(path):
try:
shutil.rmtree(path)
print(f"Deleted folder: {path}")
except Exception as e:
print(f"Error deleting folder {path}: {e}")
# Get a list of all files and folders in the current directory
all_items = os.listdir('.')
# Loop through the items
for item in all_items:
# Check if it's a file and does not end with .ipynb
if os.path.isfile(item) and not item.endswith('.ipynb'):
os.remove(item)
print(f"Deleted file: {item}")
# Check if it's a folder
elif os.path.isdir(item):
remove_folder(item)
Deleted file: CalcPCPhenoAge.json
Deleted file: CalcAllPCClocks.RData
Deleted file: PCPhenoAgeReferenceCpGBetas.json
Deleted file: download.r
Deleted file: PCPhenoAgeCpGs.json