Polygenic scoring for each local ancestry background#

[1]:
import pandas as pd
import numpy as np
import admix
import os
[2]:
# load the example dataset
dset = admix.io.read_dataset("example_data/CEU-YRI")
[3]:
# construct an example SNP weights dataframe (WEIGHT column contains the per-allele effect sizes)
df_weights = dset.snp[["CHROM", "POS", "REF", "ALT"]].copy()
df_weights["WEIGHT"] = np.random.normal(size=len(df_weights))
df_weights
[3]:
CHROM POS REF ALT WEIGHT
snp
22:16406147:A:G 22 16406147 A G -0.395390
22:16551808:T:C 22 16551808 T C 1.332264
22:16573830:T:C 22 16573830 T C 0.062149
22:16575525:T:C 22 16575525 T C 0.428249
22:16576248:G:T 22 16576248 G T 0.263856
... ... ... ... ... ...
22:50739662:G:A 22 50739662 G A -0.963074
22:50743331:A:G 22 50743331 A G -0.816075
22:50772964:T:C 22 50772964 T C -0.379005
22:50774447:A:C 22 50774447 A C 0.983688
22:50780578:G:A 22 50780578 G A 0.721975

15357 rows × 5 columns

[4]:
# calculate PGS for each local ancestry background
pgs_df = admix.data.calc_partial_pgs(dset=dset[:, 0:100], df_weights=df_weights)
admix.data.calc_partial_pgs: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 1268.91it/s]
[5]:
# per-allele effects x genotype dosage within each local ancestry background
pgs_df
[5]:
ANC1 ANC2
indiv
Sample_1 35.638558 118.706099
Sample_2 -30.856414 60.780757
Sample_3 61.098364 -46.071822
Sample_4 -27.939297 28.420301
Sample_5 -2.792197 213.910173
... ... ...
Sample_96 0.000000 45.962131
Sample_97 -58.187541 24.903466
Sample_98 -30.823394 39.505350
Sample_99 0.000000 84.651254
Sample_100 -46.359911 90.501891

100 rows × 2 columns

Alternatively, this can be calculated in a command line:

admix calc-partial-pgs \
    --plink-path <plink2_prefix>.pgen \
    --weights-path <weight_tsv_path> \
    --out out