Polygenic scoring for each local ancestry background#
[1]:
import pandas as pd
import numpy as np
import admix
import os
[2]:
# load the example dataset
dset = admix.io.read_dataset("example_data/CEU-YRI")
[3]:
# construct an example SNP weights dataframe (WEIGHT column contains the per-allele effect sizes)
df_weights = dset.snp[["CHROM", "POS", "REF", "ALT"]].copy()
df_weights["WEIGHT"] = np.random.normal(size=len(df_weights))
df_weights
[3]:
CHROM | POS | REF | ALT | WEIGHT | |
---|---|---|---|---|---|
snp | |||||
22:16406147:A:G | 22 | 16406147 | A | G | -0.395390 |
22:16551808:T:C | 22 | 16551808 | T | C | 1.332264 |
22:16573830:T:C | 22 | 16573830 | T | C | 0.062149 |
22:16575525:T:C | 22 | 16575525 | T | C | 0.428249 |
22:16576248:G:T | 22 | 16576248 | G | T | 0.263856 |
... | ... | ... | ... | ... | ... |
22:50739662:G:A | 22 | 50739662 | G | A | -0.963074 |
22:50743331:A:G | 22 | 50743331 | A | G | -0.816075 |
22:50772964:T:C | 22 | 50772964 | T | C | -0.379005 |
22:50774447:A:C | 22 | 50774447 | A | C | 0.983688 |
22:50780578:G:A | 22 | 50780578 | G | A | 0.721975 |
15357 rows × 5 columns
[4]:
# calculate PGS for each local ancestry background
pgs_df = admix.data.calc_partial_pgs(dset=dset[:, 0:100], df_weights=df_weights)
admix.data.calc_partial_pgs: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 1268.91it/s]
[5]:
# per-allele effects x genotype dosage within each local ancestry background
pgs_df
[5]:
ANC1 | ANC2 | |
---|---|---|
indiv | ||
Sample_1 | 35.638558 | 118.706099 |
Sample_2 | -30.856414 | 60.780757 |
Sample_3 | 61.098364 | -46.071822 |
Sample_4 | -27.939297 | 28.420301 |
Sample_5 | -2.792197 | 213.910173 |
... | ... | ... |
Sample_96 | 0.000000 | 45.962131 |
Sample_97 | -58.187541 | 24.903466 |
Sample_98 | -30.823394 | 39.505350 |
Sample_99 | 0.000000 | 84.651254 |
Sample_100 | -46.359911 | 90.501891 |
100 rows × 2 columns
Alternatively, this can be calculated in a command line:
admix calc-partial-pgs \
--plink-path <plink2_prefix>.pgen \
--weights-path <weight_tsv_path> \
--out out