Skip to content

Commit 693f788

Browse files
committed
cleaned up float32 polynomials, added accuracy of float32, removed unnecessary whitespace, and removed explicit copysigns
1 parent 26b3b1f commit 693f788

File tree

1 file changed

+21
-67
lines changed

1 file changed

+21
-67
lines changed

src/erf.jl

Lines changed: 21 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -111,12 +111,6 @@ function _erf(x::Float64)
111111
ix::UInt32=reinterpret(UInt64,x)>>32
112112
# # top 32, without sign bit
113113
ia::UInt32=ix & 0x7fffffff
114-
# # sign
115-
# sign::UInt32=ix>>31
116-
117-
sign::Bool=x<0
118-
119-
120114

121115
if (ia < 0x3feb0000)
122116
# a = |x| < 0.84375.
@@ -130,7 +124,6 @@ function _erf(x::Float64)
130124
PA=(0.12837916709551256, -0.3761263890318287, 0.11283791670896592, -0.026866170630075903, 0.005223977428649761, -0.0008548312229989974, 0.00012054654502151287, -1.4906315067498891e-5, 1.6126444349070824e-6, -1.3074259056679966e-7)
131125

132126
r= fma(x,evalpoly(x2,PA),x) ## This fma is crucial for accuracy.
133-
134127
return r
135128
else
136129
## 0.5 <= a < 0.84375 - Use rational approximation.
@@ -158,12 +151,9 @@ function _erf(x::Float64)
158151
P=evalpoly(a,NB)
159152
Q=evalpoly(a,DB)
160153

154+
r= C + P / Q
155+
return copysign(r,x)
161156

162-
if (sign)
163-
return -C - P / Q
164-
else
165-
return C + P / Q
166-
end
167157
elseif (ia < 0x40000000)
168158
## 1.25 <= |x| < 2.0.
169159
a = abs(x)
@@ -174,13 +164,9 @@ function _erf(x::Float64)
174164
PC=( 0x1.3bcd133aa0ffcp-4, -0x1.e4652fadcb702p-3, 0x1.2ebf3dcca0446p-2, -0x1.571d01c62d66p-3, 0x1.93a9a8f5b3413p-8, 0x1.8281cbcc2cd52p-5, -0x1.5cffd86b4de16p-6, -0x1.db4ccf595053ep-9, 0x1.757cbf8684edap-8, -0x1.ce7dfd2a9e56ap-11, -0x1.99ee3bc5a3263p-11, 0x1.3c57cf9213f5fp-12, 0x1.60692996bf254p-14, -0x1.6e44cb7c1fa2ap-14, 0x1.9d4484ac482b2p-16, -0x1.578c9e375d37p-19)
175165

176166
# Obtains erfc of |x|
177-
r=evalpoly(a,PC)
167+
r=1.0-evalpoly(a,PC)
168+
return copysign(r,x)
178169

179-
if (sign)
180-
return -1.0 + r
181-
else
182-
return 1.0 - r
183-
end
184170
elseif (ia < 0x400a0000)
185171
## 2 <= |x| < 3.25.
186172
a = abs(x)
@@ -189,16 +175,10 @@ function _erf(x::Float64)
189175
# Generated using Sollya::remez(f(c*x+d), deg, [(a-d)/c;(b-d)/c], 1, 1e-16), [|D ...|] with deg=17 a=2 b=3.25 c=2 d=2
190176
PD=( 0x1.328f5ec350e5p-8, -0x1.529b9e8cf8e99p-5, 0x1.529b9e8cd9e71p-3, -0x1.8b0ae3a023bf2p-2, 0x1.1a2c592599d82p-1, -0x1.ace732477e494p-2, -0x1.e1a06a27920ffp-6, 0x1.bae92a6d27af6p-2, -0x1.a15470fcf5ce7p-2, 0x1.bafe45d18e213p-6, 0x1.0d950680d199ap-2, -0x1.8c9481e8f22e3p-3, -0x1.158450ed5c899p-4, 0x1.c01f2973b44p-3, -0x1.73ed2827546a7p-3, 0x1.47733687d1ff7p-4, -0x1.2dec70d00b8e1p-6, 0x1.a947ab83cd4fp-10 )
191177

192-
193178
# Obtains erfc of |x|
194-
r=evalpoly(a,PD)
179+
r=1.0-evalpoly(a,PD)
180+
return copysign(r,x)
195181

196-
197-
if (sign)
198-
return -1.0 + r
199-
else
200-
return 1.0 - r
201-
end
202182
elseif (ia < 0x40100000)
203183
## 3.25 <= |x| < 4.0.
204184
a = abs(x)
@@ -207,14 +187,9 @@ function _erf(x::Float64)
207187
# Generated using Sollya::remez(f(c*x+d), deg, [(a-d)/c;(b-d)/c], 1, 1e-16), [|D ...|] with deg=13 a=3.25 b=4 c=1 d=3.25
208188
PE=( 0x1.20c13035539e4p-18, -0x1.e9b5e8d16df7ep-16, 0x1.8de3cd4733bf9p-14, -0x1.9aa48beb8382fp-13, 0x1.2c7d713370a9fp-12, -0x1.490b12110b9e2p-12, 0x1.1459c5d989d23p-12, -0x1.64b28e9f1269p-13, 0x1.57c76d9d05cf8p-14, -0x1.bf271d9951cf8p-16, 0x1.db7ea4d4535c9p-19, 0x1.91c2e102d5e49p-20, -0x1.e9f0826c2149ep-21, 0x1.60eebaea236e1p-23 )
209189

210-
r=evalpoly(a,PE)
211-
190+
r=1.0-evalpoly(a,PE)
191+
return copysign(r,x)
212192

213-
if (sign)
214-
return -1.0 + r
215-
else
216-
return 1.0 - r
217-
end
218193
elseif (ia < 0x4017a000)
219194
## 4 <= |x| < 5.90625.
220195
a = abs(x)
@@ -223,31 +198,19 @@ function _erf(x::Float64)
223198
# Generated using Sollya::remez(f(c*x+d), deg, [(a-d)/c;(b-d)/c], 1, 1e-16), [|D ...|] with deg=16 a=4 b=5.90625 c=2 d=4
224199
PF=( 0x1.08ddd130d1fa6p-26, -0x1.10b146f59ff06p-22, 0x1.10b135328b7b2p-19, -0x1.6039988e7575fp-17, 0x1.497d365e19367p-15, -0x1.da48d9afac83ep-14, 0x1.1024c9b1fbb48p-12, -0x1.fc962e7066272p-12, 0x1.87297282d4651p-11, -0x1.f057b255f8c59p-11, 0x1.0228d0eee063p-10, -0x1.b1b21b84ec41cp-11, 0x1.1ead8ae9e1253p-11, -0x1.1e708fba37fccp-12, 0x1.9559363991edap-14, -0x1.68c827b783d9cp-16, 0x1.2ec4adeccf4a2p-19 )
225200

226-
r=evalpoly(a,PF)
227-
228-
229-
if (sign)
230-
return -1.0 + r
231-
else
232-
return 1.0 - r
233-
end
201+
r=1.0-evalpoly(a,PF)
202+
return copysign(r,x)
234203
else
235-
236204
if(isnan(x))
237205
return NaN
238206
end
239-
240-
if (sign)
241-
return -1.0
242-
else
243-
return 1.0
244-
end
245-
207+
copysign(1.0,x)
246208
end
247-
248-
249209
end
250210

211+
# Fast erf implementation using
212+
# polynomial approximations of erf and erfc.
213+
# Highest measured error is 1.12 ULPs at x = 1.232469
251214
function _erf(x::Float32)
252215
xabs=abs(x)
253216

@@ -256,47 +219,38 @@ function _erf(x::Float32)
256219
# # erf approximation using erf(x)=x+x*P(x^2) with degree 6
257220
# Sollya::remez(erf(sqrt(x))/sqrt(x)-1,6,[1e-32;0.5],1,1e-32);
258221

259-
p=(0.128379167084072442015971708878153077794812875442854468252262977759538466466026108f0 ,-0.37612638677173360887708232154133809982218085424044576090554860522057182310851056f0 ,0.112837843774249243616280397250157777250507394914770984188915122197580134574091983f0 ,-2.68652865375831097248410803484628824158061934615600565461807890879374835870381413f-2 ,5.2188560068141289500478726517878958032487316358404909126782459380536157982980317f-3 ,-8.3948481565340715162646401327604259604058983841328238470236141987304564416187075f-4 )
222+
p=(0.12837917f0, -0.37612638f0, 0.11283784f0, -0.026865287f0, 0.005218856f0, -0.0008394848f0)
260223
return copysign(fma(evalpoly(x^2,p),x,x),x)
261224

262-
263225
elseif(xabs<1.25)
264226
# range [0.5;1.25]
265227
# # erfc approximation with degree 11
266228
# Sollya::remez(erfc(x+0.5),11,[0;1.25-0.5],1,1e-32);
267229

268-
p=(0.47950012218627633266759004480928346323282597139306026470914603497045110845486174f0 ,-0.87878257867905935820797978014037919344903083907274027447236660730320123742574287f0 ,0.43939127340837007019212165648359529028950189241566682841945406944095273511955846f0 ,0.14646415648788187125149469711213532491861248937046075996222718522704324115126361f0 ,-0.18308466657525286729680773848609379638263457954975049708494183866284386220432776f0 ,-7.2864220919698610613876940798326935208926368515022370715399462640511442447273458f-3 ,4.9870470155967931820488830916894059744198728056328087545059900897748278238141407f-2 ,-4.8868244174386309272067176761136942655656143898910274020685204636212063119576779f-3 ,-1.1067663329874328692394302924580932358903686528252386541750628802256971128485086f-2 ,3.422346846104011114882331840937679141622349918363223030054135870224315763897819f-3 ,7.3027064271433070734900533407935697302293380332736344236451943042328241926267078f-4 ,-3.75817085020390291674591289563810393435170791862443379088867595819930602228183736f-4)
230+
p=(0.47950011f0, -0.8787826f0, 0.4393913f0, 0.14646415f0, -0.18308467f0, -0.007286422f0, 0.04987047f0, -0.0048868246f0, -0.011067663f0, 0.003422347f0, 0.00073027064f0, -0.0003758171f0)
269231
return copysign(1f0-evalpoly(xabs-0.5f0,p),x)
270232

271-
272233
elseif(xabs<2.5)
273234
# range [1.25;2.5]
274235
# erfc approximation with degree 13
275236
# Sollya::remez(erfc(x+1.25),13,[1.25-1.25;2.5-1.25],1,1e-32);
276237

277-
p=(7.7099871743416328126356260566547494286908999516428835479787235777958146662395595f-2 ,-0.236521122401763914017434484798130978411662107330375092318490618856395191655478216f0,0.29565140031735793699573224918347147906505950384598003349376150036910634397925794f0 ,-0.1675357298956664512006671881841788827839151996664942041368823179421003600967294f0 ,6.1585929514334837247453472606059635765901344910897544510201614098797139034234648f-3 ,4.7187118101037459805673209483303989110029907488612006887134009408435903419517292f-2 ,-2.13310233800625824496628789189780598311678632855392393834221656949343403265787497f-2 ,-3.5262544206616132931628291419365561301271536625777609043061691511329815920675659f-3 ,5.4618311186313877025711961981462734764321122794521259327727086934478142942850845f-3 ,-4.785867288141399400801105017099155449896231371492841611827465311139149075249888f-4 ,-1.27638529129849191843323454760991411930129610353375308047589690520960413915824394f-3 ,7.3386942792237055752542546076918383420248583637581325208367761430501498054660812f-4 ,-1.78316578653402766600700240536421560298308014794597944402973993319799701405750455f-4 ,1.7451623823305231302742688272669315889569143556707803172195238203096188492888552f-5)
238+
p=(0.077099875f0, -0.23652112f0, 0.2956514f0, -0.16753574f0, 0.006158593f0, 0.04718712f0, -0.021331023f0, -0.0035262543f0, 0.005461831f0, -0.00047858673f0, -0.0012763853f0, 0.00073386944f0, -0.00017831658f0, 1.7451624f-5)
278239
return copysign(1f0-evalpoly(xabs-1.25f0,p),x)
279240

280-
281241
elseif (xabs<4.0)
282242
# range [2.5;4.0]
283243
# erfc approximation with degree 13
284244
# Sollya::remez(erfc(x+2.5),13,[0;4-2.5],1,1e-32);
285245

286-
p=(4.0695201730499156824979069048504659592517281936379802348048709133497117253173134f-4 ,-2.17828418992563158453260234674605666557610132217183615223667886784864793391972216f-3 ,5.4457086379161451966966115384340866958878328106709954104596733367011474611248579f-3 ,-8.3500528577413591092464067696190605106058226371648352228707616455735161644530442f-3 ,8.6220108431254600006776817786647116978577973677436276371791579294890933467761521f-3 ,-6.1151670580428489239821330412642128179703517796977571695036324539244245861906337f-3 ,2.7899458310817905157828387844634029030573127219171061924068857965376725014125688f-3 ,-5.1939500417227631511066424033635849427151215499427143347485235667556121428433924f-4 ,-3.0461048275173680879818513198875622675191891190039217228437020935156706025853604f-4 ,3.1068457426894474803882479013975662189036326925351567029013595934579911165234626f-4 ,-1.38668990773606991790025193904557899401436779771288081224199889420050208965460715f-4 ,3.6909690646602578301139502069782980693656088359691610474313757700491321631191208f-5 ,-5.6828887569336940854242289035551153938157998442490411471025066283558049904037765f-6 ,3.9297630357752347428080345871297027499036518175420666529979940028638158683656671f-7)
246+
p=(0.00040695202f0, -0.002178284f0, 0.0054457085f0, -0.008350053f0, 0.008622011f0, -0.006115167f0, 0.0027899458f0, -0.000519395f0, -0.00030461047f0, 0.00031068458f0, -0.00013866898f0, 3.6909692f-5, -5.682889f-6, 3.929763f-7)
287247
return copysign(1f0-evalpoly(xabs-2.5f0,p),x)
288248

289-
290249
else
291-
# |x| >= 4.0.
292-
293-
294-
r=copysign(1f0,x)
295-
296-
return r
297-
250+
# range [4.0,inf)
251+
r=copysign(1f0,x)
252+
return r
298253
end
299-
300254
end
301255

302256
_erf(x::Float16)=Float16(_erf(Float32(x)))

0 commit comments

Comments
 (0)