admix.Dataset#

class admix.Dataset(geno: Array | None = None, lanc: Array | None = None, snp: DataFrame | None = None, indiv: DataFrame | None = None, n_anc: int | None = None, dset_ref=None, snp_idx: slice | int | ndarray | None = None, indiv_idx: slice | int | ndarray | None = None, enforce_order: bool = True)[source]#

Data structure to contain genotype and local ancestry.

__init__(geno: Array | None = None, lanc: Array | None = None, snp: DataFrame | None = None, indiv: DataFrame | None = None, n_anc: int | None = None, dset_ref=None, snp_idx: slice | int | ndarray | None = None, indiv_idx: slice | int | ndarray | None = None, enforce_order: bool = True)[source]#

Methods

__init__([geno, lanc, snp, indiv, n_anc, ...])

af_per_anc([force])

Return the allele frequency per ancestry (n_snp, n_anc)

allele_per_anc()

Return the allele-per-ancestry raw count matrix

append_indiv_info(df_info[, force_update])

append indiv info to the dataset, individual is matched using the self.indiv.index and df_info.index.

append_snp_info(df_info)

append snp info to the dataset, snp is matched using the self.snp.index and df_info.index.

nhaplo_per_anc([force])

Return the number of haplotype per ancestry (n_snp, n_anc)

persist()

persist the lazy data to memory

Attributes

data

Number of individuals.

geno

Genotype matrix

indiv

One-dimensional annotation of observations (pd.DataFrame).

lanc

Local ancestry matrix

n_anc

Number of ancestries.

n_indiv

Number of individuals.

n_snp

Number of SNPs.

snp

One-dimensional annotation of observations (pd.DataFrame).

uns

Unstructured annotation (ordered dictionary).

xr

Return the xr.Dataset used internally