00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032 #include <math.h>
00033 #include "ls_fft.h"
00034 #include "sharp_ylmgen_c.h"
00035 #include "sharp_internal.h"
00036 #include "c_utils.h"
00037 #include "sharp_core.h"
00038 #include "sharp_vecutil.h"
00039 #include "walltime_c.h"
00040 #include "sharp_almhelpers.h"
00041 #include "sharp_geomhelpers.h"
00042
00043 typedef complex double dcmplx;
00044 typedef complex float fcmplx;
00045
00046 static const double sqrt_one_half = 0.707106781186547572737310929369;
00047 static const double sqrt_two = 1.414213562373095145474621858739;
00048
00049 static int chunksize_min=500, nchunks_max=10;
00050
00051 static void get_chunk_info (int ndata, int nmult, int *nchunks, int *chunksize)
00052 {
00053 *chunksize = (ndata+nchunks_max-1)/nchunks_max;
00054 if (*chunksize>=chunksize_min)
00055 *chunksize = ((*chunksize+nmult-1)/nmult)*nmult;
00056 else
00057 {
00058 *nchunks = (ndata+chunksize_min-1)/chunksize_min;
00059 *chunksize = (ndata+(*nchunks)-1)/(*nchunks);
00060 if (*nchunks>1)
00061 *chunksize = ((*chunksize+nmult-1)/nmult)*nmult;
00062 }
00063 *nchunks = (ndata+(*chunksize)-1)/(*chunksize);
00064 }
00065
00066 int sharp_get_mlim (int lmax, int spin, double sth, double cth)
00067 {
00068 double ofs=lmax*0.01;
00069 if (ofs<100.) ofs=100.;
00070 double b = -2*spin*fabs(cth);
00071 double t1 = lmax*sth+ofs;
00072 double c = (double)spin*spin-t1*t1;
00073 double discr = b*b-4*c;
00074 if (discr<=0) return lmax;
00075 double res=(-b+sqrt(discr))/2.;
00076 if (res>lmax) res=lmax;
00077 return (int)(res+0.5);
00078 }
00079
00080 typedef struct
00081 {
00082 double phi0_;
00083 dcmplx *shiftarr, *work;
00084 int s_shift, s_work;
00085 real_plan plan;
00086 int norot;
00087 } ringhelper;
00088
00089 static void ringhelper_init (ringhelper *self)
00090 {
00091 static ringhelper rh_null = { 0, NULL, NULL, 0, 0, NULL, 0 };
00092 *self = rh_null;
00093 }
00094
00095 static void ringhelper_destroy (ringhelper *self)
00096 {
00097 if (self->plan) kill_real_plan(self->plan);
00098 DEALLOC(self->shiftarr);
00099 DEALLOC(self->work);
00100 ringhelper_init(self);
00101 }
00102
00103 static void ringhelper_update (ringhelper *self, int nph, int mmax, double phi0)
00104 {
00105 self->norot = (fabs(phi0)<1e-14);
00106 if (!(self->norot))
00107 if ((mmax!=self->s_shift-1) || (!FAPPROX(phi0,self->phi0_,1e-12)))
00108 {
00109 RESIZE (self->shiftarr,dcmplx,mmax+1);
00110 self->s_shift = mmax+1;
00111 self->phi0_ = phi0;
00112 for (int m=0; m<=mmax; ++m)
00113 self->shiftarr[m] = cos(m*phi0) + _Complex_I*sin(m*phi0);
00114 }
00115 if (!self->plan) self->plan=make_real_plan(nph);
00116 if (nph!=(int)self->plan->length)
00117 {
00118 kill_real_plan(self->plan);
00119 self->plan=make_real_plan(nph);
00120 }
00121 GROW(self->work,dcmplx,self->s_work,nph);
00122 }
00123
00124 static int ringinfo_compare (const void *xa, const void *xb)
00125 {
00126 const sharp_ringinfo *a=xa, *b=xb;
00127 return (a->sth < b->sth) ? -1 : (a->sth > b->sth) ? 1 : 0;
00128 }
00129 static int ringpair_compare (const void *xa, const void *xb)
00130 {
00131 const sharp_ringpair *a=xa, *b=xb;
00132 if (a->r1.nph==b->r1.nph)
00133 return (a->r1.phi0 < b->r1.phi0) ? -1 :
00134 ((a->r1.phi0 > b->r1.phi0) ? 1 :
00135 (a->r1.cth>b->r1.cth ? -1 : 1));
00136 return (a->r1.nph<b->r1.nph) ? -1 : 1;
00137 }
00138
00139 void sharp_make_general_alm_info (int lmax, int nm, int stride, const int *mval,
00140 const ptrdiff_t *mstart, int flags, sharp_alm_info **alm_info)
00141 {
00142 sharp_alm_info *info = RALLOC(sharp_alm_info,1);
00143 info->lmax = lmax;
00144 info->nm = nm;
00145 info->mval = RALLOC(int,nm);
00146 info->mvstart = RALLOC(ptrdiff_t,nm);
00147 info->stride = stride;
00148 info->flags = flags;
00149 for (int mi=0; mi<nm; ++mi)
00150 {
00151 info->mval[mi] = mval[mi];
00152 info->mvstart[mi] = mstart[mi];
00153 }
00154 *alm_info = info;
00155 }
00156
00157 void sharp_make_alm_info (int lmax, int mmax, int stride,
00158 const ptrdiff_t *mstart, sharp_alm_info **alm_info)
00159 {
00160 int *mval=RALLOC(int,mmax+1);
00161 for (int i=0; i<=mmax; ++i)
00162 mval[i]=i;
00163 sharp_make_general_alm_info (lmax, mmax+1, stride, mval, mstart, 0, alm_info);
00164 DEALLOC(mval);
00165 }
00166
00167 ptrdiff_t sharp_alm_index (const sharp_alm_info *self, int l, int mi)
00168 {
00169 UTIL_ASSERT(!(self->flags & SHARP_PACKED),
00170 "sharp_alm_index not applicable with SHARP_PACKED alms");
00171 return self->mvstart[mi]+self->stride*l;
00172 }
00173
00174 void sharp_destroy_alm_info (sharp_alm_info *info)
00175 {
00176 DEALLOC (info->mval);
00177 DEALLOC (info->mvstart);
00178 DEALLOC (info);
00179 }
00180
00181 void sharp_make_geom_info (int nrings, const int *nph, const ptrdiff_t *ofs,
00182 const int *stride, const double *phi0, const double *theta,
00183 const double *wgt, sharp_geom_info **geom_info)
00184 {
00185 sharp_geom_info *info = RALLOC(sharp_geom_info,1);
00186 sharp_ringinfo *infos = RALLOC(sharp_ringinfo,nrings);
00187
00188 int pos=0;
00189 info->pair=RALLOC(sharp_ringpair,nrings);
00190 info->npairs=0;
00191 *geom_info = info;
00192
00193 for (int m=0; m<nrings; ++m)
00194 {
00195 infos[m].theta = theta[m];
00196 infos[m].cth = cos(theta[m]);
00197 infos[m].sth = sin(theta[m]);
00198 infos[m].weight = (wgt != NULL) ? wgt[m] : 1.;
00199 infos[m].phi0 = phi0[m];
00200 infos[m].ofs = ofs[m];
00201 infos[m].stride = stride[m];
00202 infos[m].nph = nph[m];
00203 }
00204 qsort(infos,nrings,sizeof(sharp_ringinfo),ringinfo_compare);
00205 while (pos<nrings)
00206 {
00207 info->pair[info->npairs].r1=infos[pos];
00208 if ((pos<nrings-1) && FAPPROX(infos[pos].cth,-infos[pos+1].cth,1e-12))
00209 {
00210 if (infos[pos].cth>0)
00211 info->pair[info->npairs].r2=infos[pos+1];
00212 else
00213 {
00214 info->pair[info->npairs].r1=infos[pos+1];
00215 info->pair[info->npairs].r2=infos[pos];
00216 }
00217 ++pos;
00218 }
00219 else
00220 info->pair[info->npairs].r2.nph=-1;
00221 ++pos;
00222 ++info->npairs;
00223 }
00224 DEALLOC(infos);
00225
00226 qsort(info->pair,info->npairs,sizeof(sharp_ringpair),ringpair_compare);
00227 }
00228
00229 void sharp_destroy_geom_info (sharp_geom_info *geom_info)
00230 {
00231 DEALLOC (geom_info->pair);
00232 DEALLOC (geom_info);
00233 }
00234
00235
00236
00237
00238 static int sharp_get_mmax (int *mval, int nm)
00239 {
00240 int *mcheck=RALLOC(int,nm);
00241 SET_ARRAY(mcheck,0,nm,0);
00242 for (int i=0; i<nm; ++i)
00243 {
00244 int m_cur=mval[i];
00245 UTIL_ASSERT((m_cur>=0) && (m_cur<nm), "not all m values are present");
00246 UTIL_ASSERT(mcheck[m_cur]==0, "duplicate m value");
00247 mcheck[m_cur]=1;
00248 }
00249 DEALLOC(mcheck);
00250 return nm-1;
00251 }
00252
00253 static void ringhelper_phase2ring (ringhelper *self,
00254 const sharp_ringinfo *info, void *data, int mmax, const dcmplx *phase,
00255 int pstride, int flags)
00256 {
00257 int nph = info->nph;
00258 int stride = info->stride;
00259
00260 ringhelper_update (self, nph, mmax, info->phi0);
00261 self->work[0]=phase[0];
00262
00263 if (nph>=2*mmax+1)
00264 {
00265 for (int m=1; m<=mmax; ++m)
00266 {
00267 dcmplx tmp = phase[m*pstride];
00268 if(!self->norot) tmp*=self->shiftarr[m];
00269 self->work[m]=tmp;
00270 self->work[nph-m]=conj(tmp);
00271 }
00272 for (int m=mmax+1; m<(nph-mmax); ++m)
00273 self->work[m]=0.;
00274 }
00275 else
00276 {
00277 SET_ARRAY(self->work,1,nph,0.);
00278
00279 int idx1=1, idx2=nph-1;
00280 for (int m=1; m<=mmax; ++m)
00281 {
00282 dcmplx tmp = phase[m*pstride];
00283 if(!self->norot) tmp*=self->shiftarr[m];
00284 self->work[idx1]+=tmp;
00285 self->work[idx2]+=conj(tmp);
00286 if (++idx1>=nph) idx1=0;
00287 if (--idx2<0) idx2=nph-1;
00288 }
00289 }
00290 real_plan_backward_c (self->plan, (double *)(self->work));
00291 double wgt = (flags&SHARP_USE_WEIGHTS) ? info->weight : 1.;
00292 if (flags&SHARP_REAL_HARMONICS)
00293 wgt *= sqrt_one_half;
00294 if (flags&SHARP_DP)
00295 for (int m=0; m<nph; ++m)
00296 ((double *)data)[m*stride+info->ofs]+=creal(self->work[m])*wgt;
00297 else
00298 for (int m=0; m<nph; ++m)
00299 ((float *)data)[m*stride+info->ofs] += (float)(creal(self->work[m])*wgt);
00300 }
00301
00302 static void ringhelper_ring2phase (ringhelper *self,
00303 const sharp_ringinfo *info, const void *data, int mmax, dcmplx *phase,
00304 int pstride, int flags)
00305 {
00306 int nph = info->nph;
00307 #if 1
00308 int maxidx = mmax;
00309 #else
00310 int maxidx = IMIN(nph-1,mmax);
00311 #endif
00312
00313 ringhelper_update (self, nph, mmax, -info->phi0);
00314 double wgt = (flags&SHARP_USE_WEIGHTS) ? info->weight : 1;
00315 if (flags&SHARP_REAL_HARMONICS)
00316 wgt *= sqrt_two;
00317 if (flags&SHARP_DP)
00318 for (int m=0; m<nph; ++m)
00319 self->work[m] = ((double *)data)[info->ofs+m*info->stride]*wgt;
00320 else
00321 for (int m=0; m<nph; ++m)
00322 self->work[m] = ((float *)data)[info->ofs+m*info->stride]*wgt;
00323
00324 real_plan_forward_c (self->plan, (double *)self->work);
00325
00326 if (maxidx<nph)
00327 {
00328 if (self->norot)
00329 for (int m=0; m<=maxidx; ++m)
00330 phase[m*pstride] = self->work[m];
00331 else
00332 for (int m=0; m<=maxidx; ++m)
00333 phase[m*pstride]=self->work[m]*self->shiftarr[m];
00334 }
00335 else
00336 {
00337 if (self->norot)
00338 for (int m=0; m<=maxidx; ++m)
00339 phase[m*pstride] = self->work[m%nph];
00340 else
00341 for (int m=0; m<=maxidx; ++m)
00342 phase[m*pstride]=self->work[m%nph]*self->shiftarr[m];
00343 }
00344
00345 for (int m=maxidx+1;m<=mmax; ++m)
00346 phase[m*pstride]=0.;
00347 }
00348
00349 static void ringhelper_pair2phase (ringhelper *self, int mmax,
00350 const sharp_ringpair *pair, const void *data, dcmplx *phase1, dcmplx *phase2,
00351 int pstride, int flags)
00352 {
00353 ringhelper_ring2phase (self,&(pair->r1),data,mmax,phase1,pstride,flags);
00354 if (pair->r2.nph>0)
00355 ringhelper_ring2phase (self,&(pair->r2),data,mmax,phase2,pstride,flags);
00356 }
00357
00358 static void ringhelper_phase2pair (ringhelper *self, int mmax,
00359 const dcmplx *phase1, const dcmplx *phase2, int pstride,
00360 const sharp_ringpair *pair, void *data, int flags)
00361 {
00362 ringhelper_phase2ring (self,&(pair->r1),data,mmax,phase1,pstride,flags);
00363 if (pair->r2.nph>0)
00364 ringhelper_phase2ring (self,&(pair->r2),data,mmax,phase2,pstride,flags);
00365 }
00366
00367 static void fill_map (const sharp_geom_info *ginfo, void *map, double value,
00368 int flags)
00369 {
00370 for (int j=0;j<ginfo->npairs;++j)
00371 {
00372 if (flags&SHARP_DP)
00373 {
00374 for (ptrdiff_t i=0;i<ginfo->pair[j].r1.nph;++i)
00375 ((double *)map)[ginfo->pair[j].r1.ofs+i*ginfo->pair[j].r1.stride]=value;
00376 for (ptrdiff_t i=0;i<ginfo->pair[j].r2.nph;++i)
00377 ((double *)map)[ginfo->pair[j].r2.ofs+i*ginfo->pair[j].r2.stride]=value;
00378 }
00379 else
00380 {
00381 for (ptrdiff_t i=0;i<ginfo->pair[j].r1.nph;++i)
00382 ((float *)map)[ginfo->pair[j].r1.ofs+i*ginfo->pair[j].r1.stride]
00383 =(float)value;
00384 for (ptrdiff_t i=0;i<ginfo->pair[j].r2.nph;++i)
00385 ((float *)map)[ginfo->pair[j].r2.ofs+i*ginfo->pair[j].r2.stride]
00386 =(float)value;
00387 }
00388 }
00389 }
00390
00391 static void clear_alm (const sharp_alm_info *ainfo, void *alm, int flags)
00392 {
00393 #define CLEARLOOP(real_t,body) \
00394 { \
00395 real_t *talm = (real_t *)alm; \
00396 for (int l=m;l<=ainfo->lmax;++l) \
00397 body \
00398 }
00399
00400 for (int mi=0;mi<ainfo->nm;++mi)
00401 {
00402 int m=ainfo->mval[mi];
00403 ptrdiff_t mvstart = ainfo->mvstart[mi];
00404 ptrdiff_t stride = ainfo->stride;
00405 if (!(ainfo->flags&SHARP_PACKED))
00406 mvstart*=2;
00407 if ((ainfo->flags&SHARP_PACKED)&&(m==0))
00408 {
00409 if (flags&SHARP_DP)
00410 CLEARLOOP(double, talm[mvstart+l*stride] = 0.;)
00411 else
00412 CLEARLOOP(float, talm[mvstart+l*stride] = 0.;)
00413 }
00414 else
00415 {
00416 stride*=2;
00417 if (flags&SHARP_DP)
00418 CLEARLOOP(double,talm[mvstart+l*stride]=talm[mvstart+l*stride+1]=0.;)
00419 else
00420 CLEARLOOP(float,talm[mvstart+l*stride]=talm[mvstart+l*stride+1]=0.;)
00421 }
00422
00423 #undef CLEARLOOP
00424 }
00425 }
00426
00427 static void init_output (sharp_job *job)
00428 {
00429 if (job->flags&SHARP_ADD) return;
00430 if (job->type == SHARP_MAP2ALM)
00431 for (int i=0; i<job->ntrans*job->nalm; ++i)
00432 clear_alm (job->ainfo,job->alm[i],job->flags);
00433 else
00434 for (int i=0; i<job->ntrans*job->nmaps; ++i)
00435 fill_map (job->ginfo,job->map[i],0.,job->flags);
00436 }
00437
00438 static void alloc_phase (sharp_job *job, int nm, int ntheta)
00439 {
00440 if (job->type==SHARP_MAP2ALM)
00441 {
00442 if ((nm&1023)==0) nm+=3;
00443 job->s_m=2*job->ntrans*job->nmaps;
00444 job->s_th=job->s_m*nm;
00445 }
00446 else
00447 {
00448 if ((ntheta&1023)==0) ntheta+=3;
00449 job->s_th=2*job->ntrans*job->nmaps;
00450 job->s_m=job->s_th*ntheta;
00451 }
00452 job->phase=RALLOC(dcmplx,2*job->ntrans*job->nmaps*nm*ntheta);
00453 }
00454
00455 static void dealloc_phase (sharp_job *job)
00456 { DEALLOC(job->phase); }
00457
00458
00459 static void map2phase (sharp_job *job, int mmax, int llim, int ulim)
00460 {
00461 if (job->type != SHARP_MAP2ALM) return;
00462 int pstride = job->s_m;
00463 #pragma omp parallel if ((job->flags&SHARP_NO_OPENMP)==0)
00464 {
00465 ringhelper helper;
00466 ringhelper_init(&helper);
00467 #pragma omp for schedule(dynamic,1)
00468 for (int ith=llim; ith<ulim; ++ith)
00469 {
00470 int dim2 = job->s_th*(ith-llim);
00471 for (int i=0; i<job->ntrans*job->nmaps; ++i)
00472 ringhelper_pair2phase(&helper,mmax,&job->ginfo->pair[ith], job->map[i],
00473 &job->phase[dim2+2*i], &job->phase[dim2+2*i+1], pstride, job->flags);
00474 }
00475 ringhelper_destroy(&helper);
00476 }
00477 }
00478
00479 static void alloc_almtmp (sharp_job *job, int lmax)
00480 { job->almtmp=RALLOC(dcmplx,job->ntrans*job->nalm*(lmax+1)); }
00481
00482 static void dealloc_almtmp (sharp_job *job)
00483 { DEALLOC(job->almtmp); }
00484
00485 static void alm2almtmp (sharp_job *job, int lmax, int mi)
00486 {
00487
00488 #define COPY_LOOP(real_t, source_t, expr_of_x) \
00489 for (int l=job->ainfo->mval[mi]; l<=lmax; ++l) \
00490 for (int i=0; i<job->ntrans*job->nalm; ++i) \
00491 { \
00492 source_t x = *(source_t *)(((real_t *)job->alm[i])+ofs+l*stride); \
00493 job->almtmp[job->ntrans*job->nalm*l+i] = expr_of_x; \
00494 }
00495
00496 if (job->type!=SHARP_MAP2ALM)
00497 {
00498 ptrdiff_t ofs=job->ainfo->mvstart[mi];
00499 int stride=job->ainfo->stride;
00500 int m=job->ainfo->mval[mi];
00501
00502
00503
00504 double norm_m0=(job->flags&SHARP_REAL_HARMONICS) ? sqrt_two : 1.;
00505 if (!(job->ainfo->flags&SHARP_PACKED))
00506 ofs *= 2;
00507 if (!((job->ainfo->flags&SHARP_PACKED)&&(m==0)))
00508 stride *= 2;
00509 if (job->spin==0)
00510 {
00511 if (m==0)
00512 {
00513 if (job->flags&SHARP_DP)
00514 COPY_LOOP(double, double, x*norm_m0)
00515 else
00516 COPY_LOOP(float, float, x*norm_m0)
00517 }
00518 else
00519 {
00520 if (job->flags&SHARP_DP)
00521 COPY_LOOP(double, dcmplx, x)
00522 else
00523 COPY_LOOP(float, fcmplx, x)
00524 }
00525 }
00526 else
00527 {
00528 if (m==0)
00529 {
00530 if (job->flags&SHARP_DP)
00531 COPY_LOOP(double, double, x*job->norm_l[l]*norm_m0)
00532 else
00533 COPY_LOOP(float, float, x*job->norm_l[l]*norm_m0)
00534 }
00535 else
00536 {
00537 if (job->flags&SHARP_DP)
00538 COPY_LOOP(double, dcmplx, x*job->norm_l[l])
00539 else
00540 COPY_LOOP(float, fcmplx, x*job->norm_l[l])
00541 }
00542 }
00543 }
00544 else
00545 SET_ARRAY(job->almtmp,job->ntrans*job->nalm*job->ainfo->mval[mi],
00546 job->ntrans*job->nalm*(lmax+1),0.);
00547
00548 #undef COPY_LOOP
00549 }
00550
00551 static void almtmp2alm (sharp_job *job, int lmax, int mi)
00552 {
00553
00554 #define COPY_LOOP(real_t, target_t, expr_of_x) \
00555 for (int l=job->ainfo->mval[mi]; l<=lmax; ++l) \
00556 for (int i=0; i<job->ntrans*job->nalm; ++i) \
00557 { \
00558 dcmplx x = job->almtmp[job->ntrans*job->nalm*l+i]; \
00559 *(target_t *)(((real_t *)job->alm[i])+ofs+l*stride) += expr_of_x; \
00560 }
00561
00562 if (job->type != SHARP_MAP2ALM) return;
00563 ptrdiff_t ofs=job->ainfo->mvstart[mi];
00564 int stride=job->ainfo->stride;
00565 int m=job->ainfo->mval[mi];
00566
00567
00568
00569 double norm_m0=(job->flags&SHARP_REAL_HARMONICS) ? sqrt_one_half : 1.;
00570 if (!(job->ainfo->flags&SHARP_PACKED))
00571 ofs *= 2;
00572 if (!((job->ainfo->flags&SHARP_PACKED)&&(m==0)))
00573 stride *= 2;
00574 if (job->spin==0)
00575 {
00576 if (m==0)
00577 {
00578 if (job->flags&SHARP_DP)
00579 COPY_LOOP(double, double, creal(x)*norm_m0)
00580 else
00581 COPY_LOOP(float, float, crealf(x)*norm_m0)
00582 }
00583 else
00584 {
00585 if (job->flags&SHARP_DP)
00586 COPY_LOOP(double, dcmplx, x)
00587 else
00588 COPY_LOOP(float, fcmplx, (fcmplx)x)
00589 }
00590 }
00591 else
00592 {
00593 if (m==0)
00594 {
00595 if (job->flags&SHARP_DP)
00596 COPY_LOOP(double, double, creal(x)*job->norm_l[l]*norm_m0)
00597 else
00598 COPY_LOOP(float, fcmplx, (float)(creal(x)*job->norm_l[l]*norm_m0))
00599 }
00600 else
00601 {
00602 if (job->flags&SHARP_DP)
00603 COPY_LOOP(double, dcmplx, x*job->norm_l[l])
00604 else
00605 COPY_LOOP(float, fcmplx, (fcmplx)(x*job->norm_l[l]))
00606 }
00607 }
00608
00609 #undef COPY_LOOP
00610 }
00611
00612 static void phase2map (sharp_job *job, int mmax, int llim, int ulim)
00613 {
00614 if (job->type == SHARP_MAP2ALM) return;
00615 int pstride = job->s_m;
00616 #pragma omp parallel if ((job->flags&SHARP_NO_OPENMP)==0)
00617 {
00618 ringhelper helper;
00619 ringhelper_init(&helper);
00620 #pragma omp for schedule(dynamic,1)
00621 for (int ith=llim; ith<ulim; ++ith)
00622 {
00623 int dim2 = job->s_th*(ith-llim);
00624 for (int i=0; i<job->ntrans*job->nmaps; ++i)
00625 ringhelper_phase2pair(&helper,mmax,&job->phase[dim2+2*i],
00626 &job->phase[dim2+2*i+1],pstride,&job->ginfo->pair[ith],job->map[i],
00627 job->flags);
00628 }
00629 ringhelper_destroy(&helper);
00630 }
00631 }
00632
00633 static void sharp_execute_job (sharp_job *job)
00634 {
00635 double timer=wallTime();
00636 job->opcnt=0;
00637 int lmax = job->ainfo->lmax,
00638 mmax=sharp_get_mmax(job->ainfo->mval, job->ainfo->nm);
00639
00640 job->norm_l = (job->type==SHARP_ALM2MAP_DERIV1) ?
00641 sharp_Ylmgen_get_d1norm (lmax) :
00642 sharp_Ylmgen_get_norm (lmax, job->spin);
00643
00644
00645 init_output (job);
00646
00647 int nchunks, chunksize;
00648 get_chunk_info(job->ginfo->npairs,(job->flags&SHARP_NVMAX)*VLEN,&nchunks,
00649 &chunksize);
00650 alloc_phase (job,mmax+1,chunksize);
00651
00652
00653 for (int chunk=0; chunk<nchunks; ++chunk)
00654 {
00655 int llim=chunk*chunksize, ulim=IMIN(llim+chunksize,job->ginfo->npairs);
00656 int *ispair = RALLOC(int,ulim-llim);
00657 int *mlim = RALLOC(int,ulim-llim);
00658 double *cth = RALLOC(double,ulim-llim), *sth = RALLOC(double,ulim-llim);
00659 for (int i=0; i<ulim-llim; ++i)
00660 {
00661 ispair[i] = job->ginfo->pair[i+llim].r2.nph>0;
00662 cth[i] = job->ginfo->pair[i+llim].r1.cth;
00663 sth[i] = job->ginfo->pair[i+llim].r1.sth;
00664 mlim[i] = sharp_get_mlim(lmax, job->spin, sth[i], cth[i]);
00665 }
00666
00667
00668 map2phase (job, mmax, llim, ulim);
00669
00670 #pragma omp parallel if ((job->flags&SHARP_NO_OPENMP)==0)
00671 {
00672 sharp_job ljob = *job;
00673 ljob.opcnt=0;
00674 sharp_Ylmgen_C generator;
00675 sharp_Ylmgen_init (&generator,lmax,mmax,ljob.spin);
00676 alloc_almtmp(&ljob,lmax);
00677
00678 #pragma omp for schedule(dynamic,1)
00679 for (int mi=0; mi<job->ainfo->nm; ++mi)
00680 {
00681
00682 alm2almtmp (&ljob, lmax, mi);
00683
00684 inner_loop (&ljob, ispair, cth, sth, llim, ulim, &generator, mi, mlim);
00685
00686
00687 almtmp2alm (&ljob, lmax, mi);
00688 }
00689
00690 sharp_Ylmgen_destroy(&generator);
00691 dealloc_almtmp(&ljob);
00692
00693 #pragma omp critical
00694 job->opcnt+=ljob.opcnt;
00695 }
00696
00697
00698 phase2map (job, mmax, llim, ulim);
00699
00700 DEALLOC(ispair);
00701 DEALLOC(mlim);
00702 DEALLOC(cth);
00703 DEALLOC(sth);
00704 }
00705
00706 DEALLOC(job->norm_l);
00707 dealloc_phase (job);
00708 job->time=wallTime()-timer;
00709 }
00710
00711 static void sharp_build_job_common (sharp_job *job, sharp_jobtype type,
00712 int spin, void *alm, void *map, const sharp_geom_info *geom_info,
00713 const sharp_alm_info *alm_info, int ntrans, int flags)
00714 {
00715 UTIL_ASSERT((ntrans>0)&&(ntrans<=SHARP_MAXTRANS),
00716 "bad number of simultaneous transforms");
00717 if (type==SHARP_ALM2MAP_DERIV1) spin=1;
00718 if (type==SHARP_MAP2ALM) flags|=SHARP_USE_WEIGHTS;
00719 if (type==SHARP_Yt) type=SHARP_MAP2ALM;
00720 if (type==SHARP_WY) { type=SHARP_ALM2MAP; flags|=SHARP_USE_WEIGHTS; }
00721
00722 UTIL_ASSERT((spin>=0)&&(spin<=alm_info->lmax), "bad spin");
00723 job->type = type;
00724 job->spin = spin;
00725 job->norm_l = NULL;
00726 job->nmaps = (type==SHARP_ALM2MAP_DERIV1) ? 2 : ((spin>0) ? 2 : 1);
00727 job->nalm = (type==SHARP_ALM2MAP_DERIV1) ? 1 : ((spin>0) ? 2 : 1);
00728 job->ginfo = geom_info;
00729 job->ainfo = alm_info;
00730 job->flags = flags;
00731 if ((job->flags&SHARP_NVMAX)==0)
00732 job->flags|=sharp_nv_oracle (type, spin, ntrans);
00733 job->time = 0.;
00734 job->opcnt = 0;
00735 job->ntrans = ntrans;
00736 job->alm=alm;
00737 job->map=map;
00738 }
00739
00740 void sharp_execute (sharp_jobtype type, int spin, void *alm, void *map,
00741 const sharp_geom_info *geom_info, const sharp_alm_info *alm_info, int ntrans,
00742 int flags, double *time, unsigned long long *opcnt)
00743 {
00744 sharp_job job;
00745 sharp_build_job_common (&job, type, spin, alm, map, geom_info, alm_info,
00746 ntrans, flags);
00747
00748 sharp_execute_job (&job);
00749 if (time!=NULL) *time = job.time;
00750 if (opcnt!=NULL) *opcnt = job.opcnt;
00751 }
00752
00753 void sharp_set_chunksize_min(int new_chunksize_min)
00754 { chunksize_min=new_chunksize_min; }
00755 void sharp_set_nchunks_max(int new_nchunks_max)
00756 { nchunks_max=new_nchunks_max; }
00757
00758 int sharp_get_nv_max (void)
00759 { return 6; }
00760
00761 static int sharp_oracle (sharp_jobtype type, int spin, int ntrans)
00762 {
00763 int lmax=511;
00764 int mmax=(lmax+1)/2;
00765 int nrings=(lmax+1)/4;
00766 int ppring=1;
00767
00768 spin = (spin!=0) ? 2 : 0;
00769
00770 ptrdiff_t npix=(ptrdiff_t)nrings*ppring;
00771 sharp_geom_info *tinfo;
00772 sharp_make_gauss_geom_info (nrings, ppring, 0., 1, ppring, &tinfo);
00773
00774 ptrdiff_t nalms = ((mmax+1)*(mmax+2))/2 + (mmax+1)*(lmax-mmax);
00775 int ncomp = ntrans*((spin==0) ? 1 : 2);
00776
00777 double **map;
00778 ALLOC2D(map,double,ncomp,npix);
00779 SET_ARRAY(map[0],0,npix*ncomp,0.);
00780
00781 sharp_alm_info *alms;
00782 sharp_make_triangular_alm_info(lmax,mmax,1,&alms);
00783
00784 dcmplx **alm;
00785 ALLOC2D(alm,dcmplx,ncomp,nalms);
00786 SET_ARRAY(alm[0],0,nalms*ncomp,0.);
00787
00788 double time=1e30;
00789 int nvbest=-1;
00790
00791 for (int nv=1; nv<=sharp_get_nv_max(); ++nv)
00792 {
00793 double time_acc=0.;
00794 double jtime;
00795 int ntries=0;
00796 do
00797 {
00798 sharp_execute(type,spin,&alm[0],&map[0],tinfo,alms,ntrans,
00799 nv|SHARP_DP|SHARP_NO_OPENMP,&jtime,NULL);
00800
00801 if (jtime<time) { time=jtime; nvbest=nv; }
00802 time_acc+=jtime;
00803 ++ntries;
00804 }
00805 while ((time_acc<0.02)&&(ntries<2));
00806 }
00807
00808 DEALLOC2D(map);
00809 DEALLOC2D(alm);
00810
00811 sharp_destroy_alm_info(alms);
00812 sharp_destroy_geom_info(tinfo);
00813 return nvbest;
00814 }
00815
00816 int sharp_nv_oracle (sharp_jobtype type, int spin, int ntrans)
00817 {
00818 static const int maxtr = 6;
00819 static int nv_opt[6][2][5] = {
00820 {{0,0,0,0,0},{0,0,0,0,0}},
00821 {{0,0,0,0,0},{0,0,0,0,0}},
00822 {{0,0,0,0,0},{0,0,0,0,0}},
00823 {{0,0,0,0,0},{0,0,0,0,0}},
00824 {{0,0,0,0,0},{0,0,0,0,0}},
00825 {{0,0,0,0,0},{0,0,0,0,0}} };
00826
00827 if (type==SHARP_ALM2MAP_DERIV1) spin=1;
00828 UTIL_ASSERT(type<5,"bad type");
00829 UTIL_ASSERT((ntrans>0),"bad number of simultaneous transforms");
00830 UTIL_ASSERT(spin>=0, "bad spin");
00831 ntrans=IMIN(ntrans,maxtr);
00832
00833 if (nv_opt[ntrans-1][spin!=0][type]==0)
00834 nv_opt[ntrans-1][spin!=0][type]=sharp_oracle(type,spin,ntrans);
00835 return nv_opt[ntrans-1][spin!=0][type];
00836 }
00837
00838 #ifdef USE_MPI
00839 #include "sharp_mpi.c"
00840 #endif