@@ -111,12 +111,6 @@ function _erf(x::Float64)
111111 ix:: UInt32 = reinterpret (UInt64,x)>> 32
112112 # # top 32, without sign bit
113113 ia:: UInt32 = ix & 0x7fffffff
114- # # sign
115- # sign::UInt32=ix>>31
116-
117- sign:: Bool = x< 0
118-
119-
120114
121115 if (ia < 0x3feb0000 )
122116 # a = |x| < 0.84375.
@@ -130,7 +124,6 @@ function _erf(x::Float64)
130124 PA= (0.12837916709551256 , - 0.3761263890318287 , 0.11283791670896592 , - 0.026866170630075903 , 0.005223977428649761 , - 0.0008548312229989974 , 0.00012054654502151287 , - 1.4906315067498891e-5 , 1.6126444349070824e-6 , - 1.3074259056679966e-7 )
131125
132126 r= fma (x,evalpoly (x2,PA),x) # # This fma is crucial for accuracy.
133-
134127 return r
135128 else
136129 # # 0.5 <= a < 0.84375 - Use rational approximation.
@@ -158,12 +151,9 @@ function _erf(x::Float64)
158151 P= evalpoly (a,NB)
159152 Q= evalpoly (a,DB)
160153
154+ r= C + P / Q
155+ return copysign (r,x)
161156
162- if (sign)
163- return - C - P / Q
164- else
165- return C + P / Q
166- end
167157 elseif (ia < 0x40000000 )
168158 # # 1.25 <= |x| < 2.0.
169159 a = abs (x)
@@ -174,13 +164,9 @@ function _erf(x::Float64)
174164 PC= ( 0x1 .3 bcd133aa0ffcp- 4 , - 0x1 . e4652fadcb702p- 3 , 0x1 .2 ebf3dcca0446p- 2 , - 0x1 .571 d01c62d66p- 3 , 0x1 .93 a9a8f5b3413p- 8 , 0x1 .8281 cbcc2cd52p- 5 , - 0x1 .5 cffd86b4de16p- 6 , - 0x1 . db4ccf595053ep- 9 , 0x1 .757 cbf8684edap- 8 , - 0x1 . ce7dfd2a9e56ap- 11 , - 0x1 .99 ee3bc5a3263p- 11 , 0x1 .3 c57cf9213f5fp- 12 , 0x1 .60692996 bf254p- 14 , - 0x1 .6e44 cb7c1fa2ap- 14 , 0x1 .9 d4484ac482b2p- 16 , - 0x1 .578 c9e375d37p- 19 )
175165
176166 # Obtains erfc of |x|
177- r= evalpoly (a,PC)
167+ r= 1.0 - evalpoly (a,PC)
168+ return copysign (r,x)
178169
179- if (sign)
180- return - 1.0 + r
181- else
182- return 1.0 - r
183- end
184170 elseif (ia < 0x400a0000 )
185171 # # 2 <= |x| < 3.25.
186172 a = abs (x)
@@ -189,16 +175,10 @@ function _erf(x::Float64)
189175 # Generated using Sollya::remez(f(c*x+d), deg, [(a-d)/c;(b-d)/c], 1, 1e-16), [|D ...|] with deg=17 a=2 b=3.25 c=2 d=2
190176 PD= ( 0x1 .328f5 ec350e5p- 8 , - 0x1 .529 b9e8cf8e99p- 5 , 0x1 .529 b9e8cd9e71p- 3 , - 0x1 .8 b0ae3a023bf2p- 2 , 0x1 .1 a2c592599d82p- 1 , - 0x1 . ace732477e494p- 2 , - 0x1 . e1a06a27920ffp- 6 , 0x1 . bae92a6d27af6p- 2 , - 0x1 . a15470fcf5ce7p- 2 , 0x1 . bafe45d18e213p- 6 , 0x1 .0 d950680d199ap- 2 , - 0x1 .8 c9481e8f22e3p- 3 , - 0x1 .158450 ed5c899p- 4 , 0x1 . c01f2973b44p- 3 , - 0x1 .73 ed2827546a7p- 3 , 0x1 .47733687 d1ff7p- 4 , - 0x1 .2 dec70d00b8e1p- 6 , 0x1 . a947ab83cd4fp- 10 )
191177
192-
193178 # Obtains erfc of |x|
194- r= evalpoly (a,PD)
179+ r= 1.0 - evalpoly (a,PD)
180+ return copysign (r,x)
195181
196-
197- if (sign)
198- return - 1.0 + r
199- else
200- return 1.0 - r
201- end
202182 elseif (ia < 0x40100000 )
203183 # # 3.25 <= |x| < 4.0.
204184 a = abs (x)
@@ -207,14 +187,9 @@ function _erf(x::Float64)
207187 # Generated using Sollya::remez(f(c*x+d), deg, [(a-d)/c;(b-d)/c], 1, 1e-16), [|D ...|] with deg=13 a=3.25 b=4 c=1 d=3.25
208188 PE= ( 0x1 .20 c13035539e4p- 18 , - 0x1 . e9b5e8d16df7ep- 16 , 0x1 .8 de3cd4733bf9p- 14 , - 0x1 .9 aa48beb8382fp- 13 , 0x1 .2 c7d713370a9fp- 12 , - 0x1 .490 b12110b9e2p- 12 , 0x1 .1459 c5d989d23p- 12 , - 0x1 .64 b28e9f1269p- 13 , 0x1 .57 c76d9d05cf8p- 14 , - 0x1 . bf271d9951cf8p- 16 , 0x1 . db7ea4d4535c9p- 19 , 0x1 .91 c2e102d5e49p- 20 , - 0x1 . e9f0826c2149ep- 21 , 0x1 .60 eebaea236e1p- 23 )
209189
210- r= evalpoly (a,PE)
211-
190+ r= 1.0 - evalpoly (a,PE)
191+ return copysign (r,x)
212192
213- if (sign)
214- return - 1.0 + r
215- else
216- return 1.0 - r
217- end
218193 elseif (ia < 0x4017a000 )
219194 # # 4 <= |x| < 5.90625.
220195 a = abs (x)
@@ -223,31 +198,19 @@ function _erf(x::Float64)
223198 # Generated using Sollya::remez(f(c*x+d), deg, [(a-d)/c;(b-d)/c], 1, 1e-16), [|D ...|] with deg=16 a=4 b=5.90625 c=2 d=4
224199 PF= ( 0x1 .08 ddd130d1fa6p- 26 , - 0x1 .10 b146f59ff06p- 22 , 0x1 .10 b135328b7b2p- 19 , - 0x1 .6039988e7575 fp- 17 , 0x1 .497 d365e19367p- 15 , - 0x1 . da48d9afac83ep- 14 , 0x1 .1024 c9b1fbb48p- 12 , - 0x1 . fc962e7066272p- 12 , 0x1 .87297282 d4651p- 11 , - 0x1 . f057b255f8c59p- 11 , 0x1 .0228 d0eee063p- 10 , - 0x1 . b1b21b84ec41cp- 11 , 0x1 .1 ead8ae9e1253p- 11 , - 0x1 .1e708 fba37fccp- 12 , 0x1 .9559363991 edap- 14 , - 0x1 .68 c827b783d9cp- 16 , 0x1 .2 ec4adeccf4a2p- 19 )
225200
226- r= evalpoly (a,PF)
227-
228-
229- if (sign)
230- return - 1.0 + r
231- else
232- return 1.0 - r
233- end
201+ r= 1.0 - evalpoly (a,PF)
202+ return copysign (r,x)
234203 else
235-
236204 if (isnan (x))
237205 return NaN
238206 end
239-
240- if (sign)
241- return - 1.0
242- else
243- return 1.0
244- end
245-
207+ copysign (1.0 ,x)
246208 end
247-
248-
249209end
250210
211+ # Fast erf implementation using
212+ # polynomial approximations of erf and erfc.
213+ # Highest measured error is 1.12 ULPs at x = 1.232469
251214function _erf (x:: Float32 )
252215 xabs= abs (x)
253216
@@ -256,47 +219,38 @@ function _erf(x::Float32)
256219 # # erf approximation using erf(x)=x+x*P(x^2) with degree 6
257220 # Sollya::remez(erf(sqrt(x))/sqrt(x)-1,6,[1e-32;0.5],1,1e-32);
258221
259- p= (0.128379167084072442015971708878153077794812875442854468252262977759538466466026108f0 , - 0.37612638677173360887708232154133809982218085424044576090554860522057182310851056f0 , 0.112837843774249243616280397250157777250507394914770984188915122197580134574091983f0 , - 2.68652865375831097248410803484628824158061934615600565461807890879374835870381413f-2 , 5.2188560068141289500478726517878958032487316358404909126782459380536157982980317f-3 , - 8.3948481565340715162646401327604259604058983841328238470236141987304564416187075f-4 )
222+ p= (0.12837917f0 , - 0.37612638f0 , 0.11283784f0 , - 0.026865287f0 , 0.005218856f0 , - 0.0008394848f0 )
260223 return copysign (fma (evalpoly (x^ 2 ,p),x,x),x)
261224
262-
263225 elseif (xabs< 1.25 )
264226 # range [0.5;1.25]
265227 # # erfc approximation with degree 11
266228 # Sollya::remez(erfc(x+0.5),11,[0;1.25-0.5],1,1e-32);
267229
268- p=(0.47950012218627633266759004480928346323282597139306026470914603497045110845486174f0 ,-0.87878257867905935820797978014037919344903083907274027447236660730320123742574287f0 ,0.43939127340837007019212165648359529028950189241566682841945406944095273511955846f0 ,0.14646415648788187125149469711213532491861248937046075996222718522704324115126361f0 ,-0.18308466657525286729680773848609379638263457954975049708494183866284386220432776f0 ,-7.2864220919698610613876940798326935208926368515022370715399462640511442447273458f-3 ,4.9870470155967931820488830916894059744198728056328087545059900897748278238141407f-2 ,-4.8868244174386309272067176761136942655656143898910274020685204636212063119576779f-3 ,-1.1067663329874328692394302924580932358903686528252386541750628802256971128485086f-2 ,3.422346846104011114882331840937679141622349918363223030054135870224315763897819f-3 ,7.3027064271433070734900533407935697302293380332736344236451943042328241926267078f-4 ,-3.75817085020390291674591289563810393435170791862443379088867595819930602228183736f-4)
230+ p= (0.47950011f0 , - 0.8787826f0 , 0.4393913f0 , 0.14646415f0 , - 0.18308467f0 , - 0.007286422f0 , 0.04987047f0 , - 0.0048868246f0 , - 0.011067663f0 , 0.003422347f0 , 0.00073027064f0 , - 0.0003758171f0 )
269231 return copysign (1f0 - evalpoly (xabs- 0.5f0 ,p),x)
270232
271-
272233 elseif (xabs< 2.5 )
273234 # range [1.25;2.5]
274235 # erfc approximation with degree 13
275236 # Sollya::remez(erfc(x+1.25),13,[1.25-1.25;2.5-1.25],1,1e-32);
276237
277- p=(7.7099871743416328126356260566547494286908999516428835479787235777958146662395595f-2 ,-0.236521122401763914017434484798130978411662107330375092318490618856395191655478216f0,0.29565140031735793699573224918347147906505950384598003349376150036910634397925794f0 ,-0.1675357298956664512006671881841788827839151996664942041368823179421003600967294f0 ,6.1585929514334837247453472606059635765901344910897544510201614098797139034234648f-3 ,4.7187118101037459805673209483303989110029907488612006887134009408435903419517292f-2 ,-2.13310233800625824496628789189780598311678632855392393834221656949343403265787497f-2 ,-3.5262544206616132931628291419365561301271536625777609043061691511329815920675659f-3 ,5.4618311186313877025711961981462734764321122794521259327727086934478142942850845f-3 ,-4.785867288141399400801105017099155449896231371492841611827465311139149075249888f-4 ,-1.27638529129849191843323454760991411930129610353375308047589690520960413915824394f-3 ,7.3386942792237055752542546076918383420248583637581325208367761430501498054660812f-4 ,-1.78316578653402766600700240536421560298308014794597944402973993319799701405750455f-4 ,1.7451623823305231302742688272669315889569143556707803172195238203096188492888552f-5)
238+ p= (0.077099875f0 , - 0.23652112f0 , 0.2956514f0 , - 0.16753574f0 , 0.006158593f0 , 0.04718712f0 , - 0.021331023f0 , - 0.0035262543f0 , 0.005461831f0 , - 0.00047858673f0 , - 0.0012763853f0 , 0.00073386944f0 , - 0.00017831658f0 , 1.7451624f-5 )
278239 return copysign (1f0 - evalpoly (xabs- 1.25f0 ,p),x)
279240
280-
281241 elseif (xabs< 4.0 )
282242 # range [2.5;4.0]
283243 # erfc approximation with degree 13
284244 # Sollya::remez(erfc(x+2.5),13,[0;4-2.5],1,1e-32);
285245
286- p=(4.0695201730499156824979069048504659592517281936379802348048709133497117253173134f-4 ,-2.17828418992563158453260234674605666557610132217183615223667886784864793391972216f-3 ,5.4457086379161451966966115384340866958878328106709954104596733367011474611248579f-3 ,-8.3500528577413591092464067696190605106058226371648352228707616455735161644530442f-3 ,8.6220108431254600006776817786647116978577973677436276371791579294890933467761521f-3 ,-6.1151670580428489239821330412642128179703517796977571695036324539244245861906337f-3 ,2.7899458310817905157828387844634029030573127219171061924068857965376725014125688f-3 ,-5.1939500417227631511066424033635849427151215499427143347485235667556121428433924f-4 ,-3.0461048275173680879818513198875622675191891190039217228437020935156706025853604f-4 ,3.1068457426894474803882479013975662189036326925351567029013595934579911165234626f-4 ,-1.38668990773606991790025193904557899401436779771288081224199889420050208965460715f-4 ,3.6909690646602578301139502069782980693656088359691610474313757700491321631191208f-5 ,-5.6828887569336940854242289035551153938157998442490411471025066283558049904037765f-6 ,3.9297630357752347428080345871297027499036518175420666529979940028638158683656671f-7)
246+ p= (0.00040695202f0 , - 0.002178284f0 , 0.0054457085f0 , - 0.008350053f0 , 0.008622011f0 , - 0.006115167f0 , 0.0027899458f0 , - 0.000519395f0 , - 0.00030461047f0 , 0.00031068458f0 , - 0.00013866898f0 , 3.6909692f-5 , - 5.682889f-6 , 3.929763f-7 )
287247 return copysign (1f0 - evalpoly (xabs- 2.5f0 ,p),x)
288248
289-
290249 else
291- # |x| >= 4.0.
292-
293-
294- r= copysign (1f0 ,x)
295-
296- return r
297-
250+ # range [4.0,inf)
251+ r= copysign (1f0 ,x)
252+ return r
298253 end
299-
300254end
301255
302256_erf (x:: Float16 )= Float16 (_erf (Float32 (x)))
0 commit comments