77from xarray import Dataset
88
99from sgkit import variables
10- from sgkit .accelerate import numba_guvectorize , numba_jit
1110from sgkit .typing import ArrayLike
1211from sgkit .utils import (
1312 conditional_merge_datasets ,
1413 create_dataset ,
1514 define_variable_if_absent ,
1615)
1716
18- from .pedigree import (
19- _compress_hamilton_kerr_parameters ,
20- parent_indices ,
21- topological_argsort ,
22- )
17+ from .pedigree import parent_indices
2318
2419EST_DIPLOID = "diploid"
2520EST_HAMILTON_KERR = "Hamilton-Kerr"
2621
2722
28- @numba_guvectorize ( # type: ignore
29- [
30- "void(int8[:,:], int64[:,:], uint32, int8[:,:])" ,
31- "void(int16[:,:], int64[:,:], uint32, int16[:,:])" ,
32- "void(int32[:,:], int64[:,:], uint32, int32[:,:])" ,
33- "void(int64[:,:], int64[:,:], uint32, int64[:,:])" ,
34- ],
35- "(n,k),(n,p),()->(n,k)" ,
36- )
37- def genedrop_diploid (
38- genotypes : ArrayLike ,
39- parent : ArrayLike ,
40- seed : ArrayLike ,
41- out : ArrayLike ,
42- ) -> None : # pragma: no cover
43- n_sample , n_parent = parent .shape
44- _ , ploidy = genotypes .shape
45- if n_parent != 2 :
46- raise ValueError ("The parents dimension must be length 2" )
47- if ploidy != 2 :
48- raise ValueError ("Genotypes are not diploid" )
49- order = topological_argsort (parent )
50- np .random .seed (seed )
51- for i in range (n_sample ):
52- t = order [i ]
53- unknown_parent = 0
54- for j in range (n_parent ):
55- p = parent [t , j ]
56- if p < 0 :
57- # founder
58- unknown_parent += 1
59- else :
60- idx = np .random .randint (2 )
61- out [t , j ] = out [p , idx ]
62- if unknown_parent == 1 :
63- raise ValueError ("Pedigree contains half-founders" )
64- elif unknown_parent == 2 :
65- # copy founder
66- out [t , 0 ] = genotypes [t , 0 ]
67- out [t , 1 ] = genotypes [t , 1 ]
68-
69-
70- @numba_jit (nogil = True )
71- def _random_gamete_Hamilton_Kerr (
72- genotype : ArrayLike , ploidy : int , tau : int , lambda_ : float
73- ) -> ArrayLike :
74- if ploidy < len (genotype ):
75- # remove fill values
76- genotype = genotype [genotype > - 2 ]
77- if ploidy != len (genotype ):
78- raise ValueError ("Genotype ploidy does not match number of alleles" )
79- if tau > ploidy :
80- # TODO: this can be an option for encoding somatic genome duplication (with suitable lambda)
81- raise NotImplementedError ("Gamete tau cannot exceed parental ploidy" )
82- gamete = np .random .choice (genotype , tau , replace = False )
83- if lambda_ > 0.0 :
84- if tau == 2 :
85- if np .random .rand () <= lambda_ :
86- gamete [1 ] = gamete [0 ]
87- elif tau != 2 :
88- raise NotImplementedError ("Non-zero lambda is only implemented for tau = 2" )
89- return gamete
90-
91-
92- @numba_guvectorize ( # type: ignore
93- [
94- "void(int8[:,:], int64[:,:], uint64[:,:], float64[:,:], uint32, int8[:,:])" ,
95- "void(int16[:,:], int64[:,:], uint64[:,:], float64[:,:], uint32, int16[:,:])" ,
96- "void(int32[:,:], int64[:,:], uint64[:,:], float64[:,:], uint32, int32[:,:])" ,
97- "void(int64[:,:], int64[:,:], uint64[:,:], float64[:,:], uint32, int64[:,:])" ,
98- ],
99- "(n,k),(n,p),(n,p),(n,p),()->(n,k)" ,
100- )
101- def genedrop_Hamilton_Kerr (
102- genotypes : ArrayLike ,
103- parent : ArrayLike ,
104- tau : ArrayLike ,
105- lambda_ : ArrayLike ,
106- seed : int ,
107- out : ArrayLike ,
108- ) -> None : # pragma: no cover
109- if parent .shape [1 ] != 2 :
110- parent , tau , lambda_ = _compress_hamilton_kerr_parameters (parent , tau , lambda_ )
111- n_sample , n_parent = parent .shape
112- _ , max_ploidy = genotypes .shape
113- order = topological_argsort (parent )
114- np .random .seed (seed )
115- for i in range (n_sample ):
116- t = order [i ]
117- alleles = 0
118- unknown_alleles = 0
119- for j in range (n_parent ):
120- p = parent [t , j ]
121- tau_p = tau [t , j ]
122- lambda_p = lambda_ [t , j ]
123- if p < 0 :
124- unknown_alleles += tau_p
125- elif tau_p > 0 :
126- ploidy_p = tau [p , 0 ] + tau [p , 1 ]
127- gamete = _random_gamete_Hamilton_Kerr (out [p ], ploidy_p , tau_p , lambda_p )
128- out [t , alleles : alleles + tau_p ] = gamete
129- alleles += tau_p
130- if unknown_alleles == 0 :
131- if alleles < max_ploidy :
132- # pad with fill value
133- out [t , alleles :] = - 2
134- elif unknown_alleles == alleles :
135- # founder
136- out [t ] = genotypes [t ]
137- else :
138- # partial founder
139- raise ValueError ("Pedigree contains half-founders" )
140-
141-
14223def simulate_genedrop (
14324 ds : Dataset ,
14425 * ,
@@ -165,7 +46,7 @@ def simulate_genedrop(
16546 which is only suitable for pedigrees in which all samples are
16647 diploids resulting from sexual reproduction.
16748 The "Hamilton-Kerr" method is suitable for autopolyploid and
168- mixed-ploidy datasets following Kerr et al. 2012 and [2]
49+ mixed-ploidy datasets following Kerr et al. 2012 [2] and
16950 Hamilton and Kerr 2017 [3].
17051 call_genotype
17152 Input variable name holding call_genotype as defined by
@@ -217,8 +98,8 @@ def simulate_genedrop(
21798 If the Hamilton-Kerr method is used and a sample has more than
21899 two contributing parents.
219100 ValueError
220- If the Hamilton-Kerr method is used and the number of alleles
221- in a genotype does not match the sum of tau values (i.e., ploidy).
101+ If the Hamilton-Kerr method is used and the number of alleles in a
102+ founder genotype does not match the sum of its tau values (i.e., ploidy).
222103 NotImplementedError
223104 If the Hamilton-Kerr method is used and a tau value exceeds the
224105 parental ploidy.
@@ -295,6 +176,8 @@ def simulate_genedrop(
295176 "Computation of the inverse additive relationship matrix for autopolyploid
296177 and multiple-ploidy populations." Theoretical and Applied Genetics 131: 851-860.
297178 """
179+ from .genedrop_numba_fns import genedrop_diploid , genedrop_Hamilton_Kerr
180+
298181 ds = define_variable_if_absent (ds , variables .parent , parent , parent_indices )
299182 variables .validate (
300183 ds , {parent : variables .parent_spec , call_genotype : variables .call_genotype_spec }
0 commit comments