Logo Search packages:      
Sourcecode: fastlink version File versions  Download package

compar.c

/* This file contains common parallel routines for use with the parallel
   versions of the FASTLINK programs ILINK, LINKMAP, and MLINK.
   Sequential FASTLINK is an improved version of LINKAGE.  Improvements
   are described in described in: R. W. Cottingham, Jr., R. M. Idury, and
   A. A. Schaffer Faster Sequential Genetic Linkage Computations American
   Journal of Human Genetics, 53(1993), pp. 252--263 and A. A. Schaffer,
   S. K. Gupta, K. Shriram, and R. W. Cottingham, Jr., Avoiding
   Recomputation in Linkage Analysis Human Heredity 44(1994),
   pp. 225-237.  The parallel implementations of ILINK are described in:
   S. Dwarkadas, A. A. Schaffer, R. W. Cottingham Jr., A. L. Cox,
   P. Keleher, and W. Zwaenepoel, Parallelization of General Linkage
   Analysis Problems, Human Heredity 44(1994), pp. 127-141 and
   S. K. Gupta, A. A. Schaffer, A. L. Cox, S. Dwarkadas, and
   W. Zwaenepoel, Integerating Parallelization Strategies for Linkage
   Analysis, Computers and Biomedical Research, to appear.
   The code in this file was written by Chris Hyams. */
   
#include "commondefs.h"
#include "gemdefs.h"
#if !defined(LESSMEMORY)
#include "moddefs.h"
#endif  /* if !defined(LESSMEMORY) */
#if defined(MLINK)
#include "mldefs.h"
#endif  /* if defined(MLINK) */
#if defined(LINKMAP)
#include "lidefs.h"
#endif  /* if defined(LINKMAP) */

#include <string.h>
#include "compar.h"

/* these routines are specific to MLINK and LINKMAP */
#if (defined(MLINK) || defined(LINKMAP))

/* Figure out which processors will be masters for which theta
   evaluations.  This function is based on AssignThetas() in ilink.c */
void AssignThetas()
{
  int i, j, procNum;
  
  /* figure out how many thetas are left */
  int nonzeroThetasLeft = numNonzeroThetas - nextThetaToAssign;

  /* remember the first theta in this group */
  *firstThetanum = nextThetaToAssign;

#ifdef ASSIGNTHETA_DEBUG
  printf("-- In AssignThetas, with nonzeroThetasLeft = %d, and firstThetanum = %d\n",
       nonzeroThetasLeft, *firstThetanum);
#endif  /* ifdef ASSIGNTHETA_DEBUG */
  
  if (nonzeroThetasLeft >= Tmk_nprocs) {
    /* At least as many thetas as processors */
    *slavesPerGroup = 1;
    *numGroups = Tmk_nprocs;
  } else {
    /* More processors than thetas */
    for (i = 2; i <= Tmk_nprocs; i++)
      if (((Tmk_nprocs % i) == 0) && ((Tmk_nprocs / i) <= nonzeroThetasLeft))
      break;
    *slavesPerGroup = i;
    *numGroups = Tmk_nprocs / i;
  }
  *thetasPerGroup = nonzeroThetasLeft / (*numGroups);
  nextThetaToAssign += (*numGroups) * (*thetasPerGroup);
  
  for (i = 0, procNum = 0; i < (*numGroups); i++)
    for (j = 0; j < (*slavesPerGroup); j++, procNum++) {
      whoIsMyMaster[procNum] = i * (*slavesPerGroup);
#ifdef ASSIGNTHETA_DEBUG
      printf("-- processor %d made master of processor %d\n",
           i * (*slavesPerGroup), procNum);
#endif /* ifdef ASSIGNTHETA_DEBUG */
      if (j == 0) {
      iAmAMaster[procNum] = true;
      thetanumbase[procNum] = i * (*thetasPerGroup);
      thetanumfence[procNum] = thetanumbase[procNum] + (*thetasPerGroup);
#ifdef ASSIGNTHETA_DEBUG
      printf("-- Master processor %d assigned thetas %d through %d\n",
           procNum, (i * (*thetasPerGroup)),
           thetanumbase[procNum] + (*thetasPerGroup));
#endif /* ifdef ASSIGNTHETA_DEBUG */
      }
      else
      iAmAMaster[procNum] = false;
    }
}  /* AssignThetas */


/* this function was adapted from fun() in ilink.c, and iterpeds()
   in mlink.c and linkmap.c */
static void multiPedLike()
{
  int i;
  int thisped;
  
#if PARALLEL
#if FUNTIMES
  struct timeval starttime, endtime;
  double time;
#endif  /* if FUNTIMES */
#endif  /* if PARALLEL */

#if defined(MLINK)  /* cgh */
  static double eachlod[maxped]; /* C. Haynes */
  int II=0;   /* C. Haynes */
#endif  /* cgh -- if defined(MLINK) */
  
#if PARALLEL
#if FUNTIMES
  gettimeofday(&starttime, NULL); /* Get start time */
#endif  /* if FUNTIMES */
#endif  /* if PARALLEL */

#if PARALLEL
  accumfamilytime = 0;
  infun = 1;
  if (!parallelThetas) {
    OneMaster();
#if BARRIER_OUTPUT
    printBarrier("(%d) Full barrier (!parallelThetas) at beginning of multiPedLike\n", ++barnum);
#endif  /* if BARRIER_OUTPUT */
#if IS_SHMEM
   BARRIER(gMem->full_barrier,Tmk_nprocs); 
#else
   Tmk_barrier(Tmk_mask);
#endif  /* if IS_SHMEM */
  }
#endif  /* if PARALLEL */

  tlike = 0.0;   /* cgh -- added this from iterpeds */
  alike = 0.0;
  
  for (i = 1; i <= totperson; i++) {
    person[i]->done = false;
    person[i]->firstpass = true;
  }
  
  thisc = minint;

  if (sexdif && readfemale)
    for(j = 0; j < mlocus - 1; j++) {
      maletheta->theta[j] = gmaletheta[absoluteThetanum][j];
      femaletheta->theta[j] = gfemaletheta[absoluteThetanum][j];
    }
  else
    for(j = 0; j < mlocus - 1; j++) 
      maletheta->theta[j] = gmaletheta[absoluteThetanum][j];
  
  recombination();
  checkzero();    /* cgh -- added this from iterpeds */

  /* buffer output from before calls to likelihood() */
  preLikeBufferOutput();

#if LOOPSPEED
  open_loop_file();
#endif
  /* likelihood loop */
  for (thisped = 0; thisped < nuped; thisped++) {
#if LOOPSPEED
    read_loop_file(thisped+1);
#endif
    likelihood((thisped + 1), proband[thisped]);
    /* store likelihood for later */
    likebyped[thisped] = like;

    /* buffer pedigree output */
    bufferPedOutput(thisped, like);
    
    alike += like;
    like /= log10_;

#if defined(MLINK)
    /* C. Haynes */   
    if (maletheta->theta[which - 1] == 0.5) {
      eachlod[II] = like;
      unlinkedLikeByPed[thisped] = like;
    }

    /* buffer likelihood output */
    bufferLikeOutput(like, unlinkedLikeByPed[thisped]);
#else  /* if defined(MLINK) */
    bufferLikeOutput(like);
#endif  /* if defined(MLINK) */
    
    tlike += like;
#if defined(MLINK)    
    II++;  /* C. Haynes */
#endif  /* if defined(MLINK) */
  }

#if LOOPSPEED
  close_loop_file();
#endif
  /* buffer totals output */
  bufferTotalsOutput(alike, tlike);
  alike *= -2;
  /* buffer lodscore and likelihood output */
  bufferLodOutput(alike, tlike, firsttime);

#if PARALLEL
  /*    BARRIER     */
  funfinished[Tmk_proc_id]++;
  infun = 0;
  if ((*slavesPerGroup) > 1) {
#if BARRIER_OUTPUT
    printBarrier("(%d) Group barrier at end of multiPedLike\n", ++barnum);
#endif  /* if BARRIER_OUTPUT */
#if IS_SHMEM
    BARRIER(partial_barrier[mymaster]->barrier, *slavesPerGroup);
#else
    Tmk_barrier(MakeMask(mymaster, mymaster + (*slavesPerGroup) - 1));
#endif  /* if IS_SHMEM */
  }
  if (!parallelThetas) {
#if BARRIER_OUTPUT
    printBarrier("(%d) Full barrier (!parallelThetas) at end of multiPedLike\n", ++barnum);
#endif  /* if BARRIER_OUTPUT */
#if IS_SHMEM
    BARRIER(gMem->full_barrier, Tmk_nprocs);
#else
    Tmk_barrier(Tmk_mask);
#endif  /* if IS_SHMEM */
  }
#endif  /* if PARALLEL */

  firsttime = false;

#if PARALLEL
#if FUNTIMES
  gettimeofday(&endtime, NULL);  /* Get end time */
  if (!parallelThetas) {
    time = (endtime.tv_sec - starttime.tv_sec)
         + (endtime.tv_usec - starttime.tv_usec)/1000000.0;
    /* add message about which absolute theta we're on, as
       well as master's number */
    printf("Execution time (all processors working together) = %7.3f\n", time);
    fflush(stdout);
  } else {
    /* update currentthetanum */
    executionTimes[absoluteThetanum][0] =
      (endtime.tv_sec - starttime.tv_sec) +
      (endtime.tv_usec - starttime.tv_usec)/1000000.0;
  }
#endif  /* if FUNTIMES */
#endif  /* if PARALLEL */
}  /* multiPedLike */


/* This function was adapted from gforward() in ilink.c.
   All master processors call this function. */
void masterPedLike()
{
  int i;

#if DO_PARALLELTHETAS
  parallelThetas = 1;       /* Turn on parallel thetas */
    
  /* Iterate over each theta */
  for (i = thetanumbase[Tmk_proc_id]; i < thetanumfence[Tmk_proc_id]; i++) {
    eTimeIndex = i;
    /* schaffer && cgh -- indexing bugfix */
    if ((*slavesPerGroup) == Tmk_nprocs)
      currentthetanum = 0;
    else
      currentthetanum = Tmk_proc_id / (*slavesPerGroup);
    absoluteThetanum = absoluteThetaIndex[i + (*firstThetanum)];
    thischild = gMem->gthischild[mymaster];
    pnonzgens = gpnonzgens[currentthetanum];
    qnonzgens = gqnonzgens[currentthetanum];
          
    /* spawn likelihood evaluations */
    multiPedLike();
  }
  
  parallelThetas = 0;   /* Turn off parallelthetas */
#else  /* if DO_PARALLELTHETAS */
  /* cgh -- not sure if this is right anymore */
  *nextTheta = 0;
#endif  /* if DO_PARALLELTHETAS */
}  /* masterPedLike */ 


/* Assign thetas, etc.  Function is called only by processor 0.
   This function is analagous to gemini() in ILINK */
void iterpedsControl()
{
  int i;

  temprowfence = (fencearray *) Malloc(sizeof(fencearray));
  tempqfence = (fencearray *) Malloc(sizeof(fencearray));
  
  *secondfe = 0;
  *numNuclear = 0;
  *firstfe = 1;

  parallelThetas = 0;
  
  /* Iterate over each theta */
  currentthetanum = 0;
  absoluteThetanum = absoluteThetaIndex[0];
  thischild = gMem->gthischild[mymaster];
  pnonzgens = gpnonzgens[currentthetanum];
  qnonzgens = gqnonzgens[currentthetanum];

#if DO_PARALLELTHETAS
  
  /* spawn likelihood evaluations */
  multiPedLike();

  /* If there are too many nuclear families (usually due to multiple
     loops), then dynamic load balancing should not be enabled */
  if ((*numNuclear) <= MAXNUMFAM)   
   *firstfe = 0;

  /* start off with the first theta vector */
  nextThetaToAssign = 1;
  
  while (nextThetaToAssign < numNonzeroThetas) {
    /* assign thetas, and setup global info */
    AssignThetas();

    *checkmaster = 1;
#if BARRIER_OUTPUT
    printBarrier("(%d) Full barrier at beginning of iterpedsControl\n",
             ++barnum);
#endif  /* BARRIER_OUTPUT */
#if IS_SHMEM
    BARRIER(gMem->full_barrier,Tmk_nprocs);
#else  /* IS_SHMEM */
    Tmk_barrier(Tmk_mask);
#endif  /* IS_SHMEM */

    /* call master likelihood evaluation loop */
    masterPedLike();

    
#if BARRIER_OUTPUT
    printBarrier("(%d) Full barrier at end of masterPedLike\n", ++barnum);
#endif  /* if BARRIER_OUTPUT */
#if IS_SHMEM
    BARRIER(gMem->full_barrier,Tmk_nprocs);
#else  /* if IS_SHMEM */
    Tmk_barrier(Tmk_mask);/* Wait for everybody to finish */
#endif  /* IS_SHMEM */
#if FUNTIMES
    for (i = *firstThetanum; i < nextThetaToAssign; i++)
      printf("Execution time (likelihood evaluation done by %d processor(s)) for %d = %7.3f\n",
             *(slavesPerGroup),
           absoluteThetaIndex[i],
           executionTimes[absoluteThetaIndex[i]][0]);
    fflush(stdout);
#endif  /* if FUNTIMES */
  }
  
  /* need to compute loadbalancing information on each function
     evaluation from now on */
  *firstfe = 1;
  
  currentthetanum = 0;
  thischild = gMem->gthischild[mymaster];
  pnonzgens = gpnonzgens[currentthetanum];
  qnonzgens = gqnonzgens[currentthetanum];

#endif  /* if DO_PARALLELTHETAS */
  
  /* Finish rest of thetas sequentially */
  while (nextThetaToAssign < numIpeds) {
    /* Iterate over each theta */
    absoluteThetanum = absoluteThetaIndex[nextThetaToAssign++];
    multiPedLike();
  }
}  /* iterpedsControl */

#endif  /* if (defined(MLINK) || defined(LINKMAP)) */


/* procedure to manage child processes in parallel computation */
void child()
{
  struct timeval start, finish;
  int i,j;
#if ALLELE_SPEED
  int l;
  boolean needToRecomputeTables;
#endif /*ALLELE_SPEED*/
  int oldfun; /* stores old value of funfinished */
#if defined(ILINK)
  int funind;
  int numfuns; /* number of function evaluations in gradient */
#endif  /* defined(ILINK) */
  
  if (Tmk_proc_id == 0) {
    gettimeofday(&start, NULL);
    mymaster = 0;

#if defined(ILINK)
    gemini();
    outf();
#else  /* defined(ILINK) */    
    iterpedsControl();
#endif  /* defined(ILINK) */
    
    gettimeofday(&finish, NULL);
    printf("Elapsed time: %.2f seconds\n",
         (((finish.tv_sec * 1000000.0) + finish.tv_usec) -
          ((start.tv_sec * 1000000.0) + start.tv_usec)) / 1000000.0);
  } else {
    while (1) {
#if BARRIER_OUTPUT
      printBarrier("(%d) Full barrier at beginning of child\n",
               ++barnum);
#endif  /* if BARRIER_OUTPUT */
#if IS_SHMEM
      BARRIER(gMem->full_barrier, Tmk_nprocs);
#else  /* IS_SHMEM */
      Tmk_barrier(Tmk_mask); /* Wait to find out if master or slave */
#endif  /* if IS_SHMEM */
      mymaster = 0;   /* Dummy line */
      if (gMem->finished)
        break;
      
      if ((*checkmaster) && (iAmAMaster[Tmk_proc_id] == true)) {
      /* I am a master */
      mymaster = Tmk_proc_id;
      
#if defined(ILINK)
      if (*usecentral)
        gcentral();
      else
        gforward();
#else  /* defined(ILINK) */
      masterPedLike();
#endif  /* defined(ILINK) */  
      } else {
      /* I am a slave */
#if defined(ILINK)
      if (*usecentral)
        numfuns = 2;
      else 
        numfuns =1;
#endif  /* defined(ILINK) */
      if (*checkmaster)
        mymaster = whoIsMyMaster[Tmk_proc_id];
      else
        /* Sequential function evaluation */
        mymaster = 0;
      for (i = thetanumbase[mymaster];
           i < thetanumfence[mymaster]; i++) {
#if DO_PARALLELTHETAS
        eTimeIndex = i;
        /* schaffer && cgh -- indexing bugfix */
        if ((*slavesPerGroup) == Tmk_nprocs)
          currentthetanum = 0;
        else
          currentthetanum = Tmk_proc_id / (*slavesPerGroup);
#else
        currentthetanum = 0;
#endif  /* if DO_PARALLEL_THETAS */
        thischild = gMem->gthischild[mymaster];
        pnonzgens = gpnonzgens[currentthetanum];
        qnonzgens = gqnonzgens[currentthetanum];

#if defined(ILINK)
#if !PARALLEL_GCENTRAL
        for(funind = 1; funind <= numfuns; funind++) {
#endif /* !PARALLEL_GCENTRAL */
#endif  /* defined(ILINK) */  

        /* Moved this assignment up here to fix problem with the
           shared memory version.  TreadMarks update semantics
           are lazy, but P4 can be greedier.  -- cgh */
        oldfun = funfinished[mymaster];
        
        if ((*slavesPerGroup) > 1) {
#if BARRIER_OUTPUT
          printBarrier("(%d) Group barrier 1 at middle of child\n",
                   ++barnum);
#endif  /* if BARRIER_OUTPUT */
#if IS_SHMEM
          BARRIER(partial_barrier[mymaster]->barrier,
                *slavesPerGroup);
#else
          Tmk_barrier(MakeMask(mymaster, mymaster +
                         (*slavesPerGroup) - 1));
#endif  /* if IS_SHMEM */
        }
        
        if ((sexdif) && readfemale)
          for (j = 0; j < mlocus - 1; j++) {
            maletheta->theta[j] = gmaletheta[i][j];
            femaletheta->theta[j] = gfemaletheta[i][j];
          }
        else 
          for (j = 0; j < mlocus - 1; j++) 
            maletheta->theta[j] = gmaletheta[i][j];
        
        for (j = 0; j < nuneed; j++) {
          maletheta->segprob[j] = gmalesegprob[currentthetanum][j];
          femaletheta->segprob[j] = gfemalesegprob[currentthetanum][j];
        }
        
        while (1) {
          if ((*slavesPerGroup) > 1) {
#if BARRIER_OUTPUT
            printBarrier("(%d) Group barrier 2 at middle of child\n",
                     ++barnum);
#endif  /* if BARRIER_OUTPUT */
#if IS_SHMEM
            BARRIER(partial_barrier[mymaster]->barrier,
                  *slavesPerGroup);
#else
            Tmk_barrier(MakeMask(mymaster, mymaster +
                           (*slavesPerGroup) - 1));
#endif  /* if IS_SHMEM */
          }
          if (funfinished[mymaster] > oldfun)
            break;
#if ALLELE_SPEED
            if ((*slavesPerGroup) > 1) {
              /*Test if need to recompute*/
              if (sharedCurrentPed[mymaster] != currentped) {
                needToRecomputeTables = TRUE;
            ped_must_change_locations[sharedCurrentPed[mymaster] - 1]
                = true;
            currentped = sharedCurrentPed[mymaster];
            recompute_haps(currentped, true);
            }
            }
#endif /*ALLELE_SPEED*/
          switch(gMem->trav_flag[mymaster]) {
          case ONEDOWN:
            onechilddown();
            break;
          case MANYDOWN:
            manychilddown();
            break;
          case ONEUP:
#if defined(LESSMEMORY)
            manychildup();
#else
            onechildup();
#endif
            break;
          case MANYUP:
            manychildup();
            break;
          default:
            printf("ERROR: unknown flag in child\n");
            exit(1);
            break;
          }
        }
#if defined(ILINK)
#if !PARALLEL_GCENTRAL
        }
#endif /* !PARALLEL_GCENTRAL */
#endif  /* defined(ILINK) */    
      }  /* end for loop */
      }  /* end else */                              
      /* This barrier is for all processers except for 0 */
#if BARRIER_OUTPUT
      printBarrier("(%d) Full barrier at end of child\n", ++barnum);
#endif  /* if BARRIER_OUTPUT */
#if IS_SHMEM
      BARRIER(gMem->full_barrier, Tmk_nprocs);
#else  /* IS_SHMEM */
      Tmk_barrier(Tmk_mask); /* Wait for everybody to finish */
#endif  /* if IS_SHMEM */
      mymaster = 0;   /* Dummy line */
    }

#if IS_P4
#if BARRIER_OUTPUT
    printBarrier("(%d) Full barrier at WAIT_FOR_END() in child\n",
             ++barnum);
#endif  /* if BARRIER_OUTPUT */
    BARRIER(gMem->full_barrier, Tmk_nprocs);
    /* Child process is done */
    /* cgh -- we used to call WAIT_FOR_END() here, due to some
       semantic unclarities in the P4 docs.  Even though we haven't
       gotten any official word back from Lusk, et. al. I decided
       that we really don't want this call. */       
    /* WAIT_FOR_END(Tmk_nprocs-1) */
#endif  /* IS_P4 */
  }
}  /* child */




/* Set up data structures for parallel theta evaluations.
   Used to be SandeepSetup(). */
void parThetaSetup()
{
#if defined(ILINK)
  gg = (vector *) Tmk_malloc(sizeof(vector));
  if (!gg)
    Tmk_errexit("MALLOC ERROR - gg\n");
  Tmk_distribute((char *) &gg,sizeof(gg));

  gitp = (itertype *) Tmk_malloc(sizeof(itertype));
  if (!gitp)
    Tmk_errexit("MALLOC ERROR - gitp\n");
  Tmk_distribute((char *) &gitp,sizeof(gitp));

  gnfe = (int *) Tmk_malloc(sizeof(int) * maxn);
  if (!gnfe)
    Tmk_errexit("MALLOC ERROR - gnfe\n");
  Tmk_distribute((char *) &gnfe,sizeof(gnfe));

  for(i = 0; i < maxn; i++)
    gnfe[i] = 0;

  usecentral = (int *) Tmk_malloc(sizeof(int) * Tmk_nprocs);
  if (!usecentral)
    Tmk_errexit("MALLOC ERROR - usecentral\n");
  Tmk_distribute((char *) &usecentral,sizeof(usecentral));
  *usecentral = 0;

#if PARALLEL_GCENTRAL
  gcentralf = (double **) Tmk_malloc(sizeof(double*) * maxn);
  if (!gcentralf)
    Tmk_errexit("MALLOC ERROR - gcentralf\n");
  Tmk_distribute((char *) &gcentralf,sizeof(gcentralf));
  for(i = 0; i < maxn; i++) {
    gcentralf[i] = (double *) Tmk_malloc(sizeof(double) * 2);
    if (!gcentralf[i])
      Tmk_errexit("MALLOC ERROR - gcentralf[%d]\n",i);
  }
#endif /* PARALLEL_GCENTRAL */
#endif  /* defined(ILINK) */

#if !defined(ILINK)
  unlinkedLike = (double*) Tmk_malloc(sizeof(double));
  if (!unlinkedLike)
    Tmk_errexit("MALLOC ERROR - unlinkedLike\n");
  Tmk_distribute((char*) &unlinkedLike, sizeof(unlinkedLike));

  *unlinkedLike = 0.0;
#endif  /* !defined(ILINK) */

#if defined(MLINK)
  if (lodbyfamily) {
    unlinkedLikeByPed = (double *) Tmk_malloc(maxped * sizeof(double));
    if (!unlinkedLikeByPed)
      Tmk_errexit("MALLOC ERROR - unlinkedLikeByPed\n");
    Tmk_distribute((char *) &unlinkedLikeByPed, sizeof(unlinkedLikeByPed));
    for(i = 0; i < nuped; i++)
      unlinkedLikeByPed[i] = 0.0;
  }
#endif
  
  gx = (vector *) Tmk_malloc(sizeof(vector));
  if (!gx)
    Tmk_errexit("MALLOC ERROR - gx\n");
  Tmk_distribute((char *) &gx,sizeof(gx));
  
  gxall = (vector *) Tmk_malloc(sizeof(vector));
  if (!gxall)
    Tmk_errexit("MALLOC ERROR - gxall\n");
  Tmk_distribute((char *) &gxall,sizeof(gxall));
  
  rowtime = (double *) Tmk_malloc(sizeof(double) * fgeno);
  if (!rowtime)
    Tmk_errexit("MALLOC ERROR - rowtime\n");
  Tmk_distribute((char *) &rowtime,sizeof(rowtime));
  
  qrowtime = (double *) Tmk_malloc(sizeof(double) * fgeno);
  if (!qrowtime)
    Tmk_errexit("MALLOC ERROR - qrowtime\n");
  Tmk_distribute((char *) &qrowtime,sizeof(qrowtime));
  
  timeExecutions = (int *) Tmk_malloc(sizeof(int));
  if (!timeExecutions)
    Tmk_errexit("MALLOC ERROR - timeExecutions\n");
  Tmk_distribute((char *) &timeExecutions,sizeof(timeExecutions));
  
  numNuclear = (int *) Tmk_malloc(sizeof(int));
  if (!numNuclear)
    Tmk_errexit("MALLOC ERROR - numNuclear\n");
  Tmk_distribute((char *) &numNuclear,sizeof(numNuclear));
  
  firstfe = (int *) Tmk_malloc(sizeof(int));
  if (!firstfe)
    Tmk_errexit("MALLOC ERROR - firstfe\n");
  Tmk_distribute((char *) &firstfe,sizeof(firstfe));
  
  secondfe = (int *) Tmk_malloc(sizeof(int));
  if (!secondfe)
    Tmk_errexit("MALLOC ERROR - secondfe\n");
  Tmk_distribute((char *) &secondfe,sizeof(secondfe));
  
  rowfence = (fencearray *) Tmk_malloc(sizeof(fencearray));
  if (!rowfence)
    Tmk_errexit("MALLOC ERROR - rowfence\n");
  Tmk_distribute((char *) &rowfence,sizeof(rowfence));
  
  qfence = (fencearray *) Tmk_malloc(sizeof(fencearray));
  if (!qfence)
    Tmk_errexit("MALLOC ERROR - qfence\n");
  Tmk_distribute((char *) &qfence,sizeof(qfence));
  
  funfinished = (int *) Tmk_malloc(sizeof(int) * Tmk_nprocs);
  if (!funfinished)
    Tmk_errexit("MALLOC ERROR - funfinished\n");
  Tmk_distribute((char *) &funfinished,sizeof(funfinished));
  
  for (i = 0; i < Tmk_nprocs; i++)
    funfinished[i] = 0;

  gf = (double *) Tmk_malloc(sizeof(double));
  if (!gf)
    Tmk_errexit("MALLOC ERROR - gf\n");
  Tmk_distribute((char *) &gf, sizeof(gf));
  
  thetanumbase = (int *) Tmk_malloc(sizeof(int) * Tmk_nprocs);
  if (!thetanumbase)
    Tmk_errexit("MALLOC ERROR - thetanumbase\n");
  Tmk_distribute((char *) &thetanumbase,sizeof(thetanumbase));
  
  thetanumfence = (int *) Tmk_malloc(sizeof(int) * Tmk_nprocs);
  if (!thetanumfence)
    Tmk_errexit("MALLOC ERROR - thetanumfence\n");
  Tmk_distribute((char *) &thetanumfence,sizeof(thetanumfence));
  
  iAmAMaster = (boolean *) Tmk_malloc(sizeof(boolean) * Tmk_nprocs);
  if (!iAmAMaster)
    Tmk_errexit("MALLOC ERROR - iAmAMaster\n");
  Tmk_distribute((char *) &iAmAMaster,sizeof(iAmAMaster));
  
  whoIsMyMaster = (int *) Tmk_malloc(sizeof(int) * Tmk_nprocs);
  if (!whoIsMyMaster)
    Tmk_errexit("MALLOC ERROR - whoIsMyMaster\n");
  Tmk_distribute((char *) &whoIsMyMaster,sizeof(whoIsMyMaster));
  
  checkmaster = (int *) Tmk_malloc(sizeof(int) * Tmk_nprocs);
  if (!checkmaster)
    Tmk_errexit("MALLOC ERROR - checkmaster\n");
  Tmk_distribute((char *) &checkmaster,sizeof(checkmaster));
  
  slavesPerGroup = (int *) Tmk_malloc(sizeof(int));
  if (!slavesPerGroup)
    Tmk_errexit("MALLOC ERROR - slavesPerGroup\n");
  Tmk_distribute((char *) &slavesPerGroup,sizeof(slavesPerGroup));
  
  thetasPerGroup = (int *) Tmk_malloc(sizeof(int));
  if (!thetasPerGroup)
    Tmk_errexit("MALLOC ERROR - thetasPerGroup\n");
  Tmk_distribute((char *) &thetasPerGroup,sizeof(thetasPerGroup));
  
  nextTheta = (int *) Tmk_malloc(sizeof(int));
  if (!nextTheta)
    Tmk_errexit("MALLOC ERROR - nextTheta\n");
  Tmk_distribute((char *) &nextTheta,sizeof(nextTheta));
  
  numGroups = (int *) Tmk_malloc(sizeof(int));
  if (!numGroups)
    Tmk_errexit("MALLOC ERROR - numGroups\n");
  Tmk_distribute((char *) &numGroups,sizeof(numGroups));
  
  *checkmaster = 0;
}  /* parThetaSetup */



/* parallel startup/initialization code */
void parStartup(argc, argv)
int argc;
char** argv;
{
  int c;

#if !IS_SHMEM
  boolean seenOptN = false;
#endif  
  
#if (IS_P4 || !IS_SHMEM)
  reportMemStats = false;
#endif  /* (IS_P4 || !IS_SHMEM) */

  maxworkingset = 0;

#if IS_SHMEM
  Tmk_proc_id = 0;
  Tmk_nprocs = 4;

#if IS_P4
  while ((c = getopt(argc, argv, "w:n:p:mi")) != -1)
#else  /* IS_P4 */
  while ((c = getopt(argc, argv, "w:n:i")) != -1)
#endif  /* IS_P4 */
#else  /* IS_SHMEM */
  while ((c = getopt(argc, argv, "w:n:mi")) != -1)
#endif  /* IS_SHMEM */
  
    switch (c) {
    case 'w':
      maxworkingset = atoi(optarg);
      break;
    case 'i':
      printInfo();
      break;
#if (IS_P4 || !IS_SHMEM)
    case 'm':
      reportMemStats = true;
      break;
#endif  /* (IS_P4 || !IS_SHMEM) */
    case 'n':
      Tmk_nprocs = atoi(optarg);
#if !IS_SHMEM
      seenOptN = true;
      Tmk_proc_id = 0;
#endif  /* !IS_SHMEM */      
      break;
#if IS_P4
    case 'p':
      if (*optarg == '4')
        /* it's a p4 command-line switch */
      break;
      /* otherwise, it *is* an unrecognized option... */
#endif  /* IS_P4 */
    case '?':
      /* fprintf(stderr,"Unrecognized option\n"); */
      exit(-1);
    default:
      break;
    }

#if !IS_SHMEM
  /* make sure options make sense */
  if (reportMemStats == true) {
    if (seenOptN == false)
      Tmk_errexit("To use the -m option with TreadMarks, you must also\nprovide -n <nprocs> to specify the number of processors.\n");
  } else if (seenOptN == true) {
    Tmk_errexit("The -n option before the \"--\" can only be used with TreadMarks\nin conjunction with the -m option.  Refer to the README.TreadMarks\nfor details on specifying processors.\n");
  } else {
    /* everything is normal -- no memory stat reporting, and no -n */
    Tmk_startup(argc, argv);
  }
#endif  /* !IS_SHMEM */
  
  if (Tmk_nprocs > maxprocs)
    Tmk_errexit("In order to use more than %d processors, you must increase the value\nof maxprocs in commondefs.h\n", maxprocs);
}  /* parStartup */


/* Allocate and initialize barriers, and allocate global memory */
void gMemAndBarrierAlloc()
{
#if IS_SHMEM
#if IS_PARMACS
  MAIN_INITENV();
#endif  /* IS_PARMACS */
  partial_barrier = (Barrier **) G_MALLOC(Tmk_nprocs * sizeof(Barrier *));
  for (i = 0; i < Tmk_nprocs; i++)
    partial_barrier[i] = (Barrier *) G_MALLOC(sizeof(Barrier));
/* Don't need barrier with SHMEM */
#else  /* IS_SHMEM */
#if BARRIER_OUTPUT
  printBarrier("(%d) Full barrier at beginning of main\n", ++barnum);
#endif  /* BARRIER_OUTPUT */
  Tmk_barrier(Tmk_mask);
#endif  /* if IS_SHMEM */

  onlymaster = 0;

  if (Tmk_proc_id == 0) {
    gMem = (GlobalMemory *) Tmk_malloc(sizeof(GlobalMemory));
    if (!gMem)
      Tmk_errexit("MALLOC ERROR - gMem\n");
    Tmk_distribute((char *) &gMem, sizeof(gMem));
    gMem->finished = 0;
  }
#if IS_SHMEM
  BARINIT(gMem->full_barrier);
  for (i=0;i<Tmk_nprocs;i++)
    BARINIT(partial_barrier[i]->barrier);
/* Don't need barrier with SHMEM */
#else  /* IS_SHMEM */
#if BARRIER_OUTPUT
  printBarrier("(%d) Full barrier at middle of main\n", ++barnum);
#endif  /* if BARRIER_OUTPUT */
  Tmk_barrier(Tmk_mask);
#endif  /* if IS_SHMEM */
}  /* gMemAndBarrierAlloc */

#if IS_SHMEM
/* This function *must* be called right before the fork for shared
   memory versions.  Otherwise, child processes inherit buffer state
   (which may not be flushed) at the time of the fork.  When the
   functions exit, the buffers will then be flushed, potentially
   leading to invalid output. -- cgh */
static void flushOutputFiles()
{
  if (final != NULL)
    fflush(final);
  if (stream != NULL)
    fflush(stream);
  if (outfile != NULL)
    fflush(outfile);
  fflush(stdout);
}
#endif  /* IS_SHMEM */


/* iterates over processes, creating children */
void childLoop()
{
#if IS_SHMEM
 flushOutputFiles();  /* cgh -- see comment above */

  for (Tmk_proc_id = 1; Tmk_proc_id < Tmk_nprocs; Tmk_proc_id++) {
    CREATE(child)
  }
  Tmk_proc_id = 0;
#endif  /* if IS_SHMEM */

  child();

  if (Tmk_proc_id == 0) {
    gMem->finished = 1;
#if BARRIER_OUTPUT
    printBarrier("(%d) Full barrier at end of main\n", ++barnum);
#endif  /* if BARRIER_OUTPUT */
#if IS_SHMEM
    BARRIER(gMem->full_barrier,Tmk_nprocs);
#else
    Tmk_barrier(Tmk_mask);
#endif  /* if IS_SHMEM */
  }
#if IS_PARMACS
  /* in P4, processor 0 is not quite done yet */
  WAIT_FOR_END(Tmk_nprocs-1)
#endif  /* if IS_PARMACS */
}  /* childLoop */


/* Exit safely from parallel code, writing an error message to
   stderr, as well as writing to the FASTLINK.err file */
void parErrExit(errmesg)
char* errmesg;
{
  time_t secondsNow;

  FILE* errFile = fopen("FASTLINK.err", "a");
  if (errFile != NULL) {
    time(&secondsNow);
    fprintf(errFile, "\n%s", ctime(&secondsNow));
    fprintf(errFile, errmesg);
    fclose(errFile);
  }
  
  Tmk_errexit(errmesg);
}  /* parErrExit */


/* This function should be called after inputdata() to check to
   see if there are any constants set that provide situations we
   have not yet implemented */
void checkNotImplemented()
{
#if (PARALLEL && !defined(DOS))
  parErrExit("Parallel FASTLINK (compiled with PARALLEL=1)\ncurrently requires that DOS be defined as well.\nPlease recompile with -DDOS, or use sequential\nFASTLINK if you want to use checkpointing.\n");
#endif   /* (PARALLEL && !defined(DOS)) */
  
  if (sexlink)
    parErrExit("FASTLINK has not yet been parallelized for sexlinked data --\nuse the sequential version instead.\n");
  
  if (interfer)
    parErrExit("FASTLINK has not yet been parallelized for a model with interference --\nuse the sequential version instead.\n");
  
  if (mutsys != 0)
    parErrExit("FASTLINK has not yet been parallelized for the mutation model --\nuse the sequential version instead.\n");
}  /* checkNotImplemented */



void printBarrier(format, barrierNum)
char* format;
int barrierNum;
{
  printf("%d: ", Tmk_proc_id);
  printf(format, barrierNum);
}


#if PARALLEL
void parFinish()
{
#if defined(GMEM_DEBUG)
  if (Tmk_proc_id == 0)
    printf("Total memory used: %d\n", totMem);
#endif  /* defined(GMEM_DEBUG) */
  
#if IS_P4
#if BARRIER_OUTPUT
  printBarrier("(%d) Full barrier at WAIT_FOR_END() in main\n", ++barnum);
#endif  /* if BARRIER_OUTPUT */
  BARRIER(gMem->full_barrier, Tmk_nprocs);
  /* Master process is done */
  WAIT_FOR_END(Tmk_nprocs-1)  
#elif IS_PARMACS  /* if IS_P4 */
  MAIN_END
#else  /* if IS_P4 */
  Tmk_exit(0);
#endif  /* if IS_P4 */
}  /* parFinish */


#if (defined(MLINK) || defined(LINKMAP))
/* Determine the proper value for numparams in allocgen(). This value
   represents the maximum value that currentthetanum can take on.  It
   is calculated as the maximum of (the maximum of 1 and the largest
   number smaller than the number of non-zero thetas that divides
   Tmk_nprocs evenly) and the value of numgroups calculated in
   AssignThetas() for the first theta evaluation. */
int calcNumparams()
{
  int i = 1;
  
  /* This loop is similar to one in AssignThetas().  It determines
     the the value of numgroups for the first theta evaluation. */
  if ((numIpeds - numZeroThetas) < Tmk_nprocs) {
    for (i = 2; i <= Tmk_nprocs; i++)
      if (((Tmk_nprocs % i) == 0) && 
          ((Tmk_nprocs / i) <= (numIpeds - numZeroThetas)))
        break;
  }
  return(Max(1,(Tmk_nprocs / i)));
}
#endif  /* (defined(MLINK) || defined(LINKMAP)) */


#endif  /* if PARALLEL */



/* Determine amount of memory to request given the value of our
   computed estimate */
unsigned addFudge(memNeeded, fmemNeeded)
unsigned *memNeeded, *fmemNeeded;
{
   int p;
  /* First, add a fudge factor to take into account other run-time
     allocations, like the string buffers, etc.  The fudge factor is
     either some percentage of the total usage calculated, or the
     maximum we allow -- whichever is smaller. */
  for(p =0; p < Tmk_nprocs; p++) {
    fmemNeeded[p] += Min((memFudgePercentage * memNeeded[p]), maxMemFudge);
  /* Next, we make sure to allocate at least the minimum we allow. */
    fmemNeeded[p] = memNeeded[p] + ((fmemNeeded[p] < minMemNeeded) ? minMemNeeded : fmemNeeded[p]);
  }
  return(fmemNeeded[Tmk_nprocs -1]);    
}


/* print out memory use statistics given memory usage calculated in
   computeMemNeeded() */
static void printMemStats(memCalculated)
unsigned *memCalculated;
{
  int p;
  unsigned fmemCalculated[maxprocs];

  for(p=0; p < Tmk_nprocs; p++)
    fmemCalculated[p] = 0;  

#if IS_P4
  addFudge(memCalculated,fmemCalculated);
  printf("\tmemory usage          total request\n");
  printf("\tcalculated            (including fudge factor)\n");
  printf("\t----------------------------------------------\n");
for (p = Tmk_nprocs; p> 0; p--)
  printf("\t%-11d bytes     %-11d bytes on %d processor(s)\n",
       memCalculated[p-1], fmemCalculated[p-1], p);
#else  /* IS_P4 */
for (p = Tmk_nprocs; p> 0; p--)
  printf("\t%d bytes of shared memory on %d processor(s)\n", memCalculated[p -1], p);
#endif  /* IS_P4 */
  printf("\n");
}


/* report memory usage statistics */
static void reportMemoryStats(memNeeded, allocprobMem)
unsigned *memNeeded, *allocprobMem;
{
#if PRECOMPUTE
  int precompute = 1;
#else
  int precompute = 0;
#endif  /* PRECOMPUTE */

  int p;

  printf("----------------------------------\n");
  printf("\n\n%s is currently compiled with PRECOMPUTE=%d.\n",
       PROGRAM, precompute);
#if IS_P4  
  printf("Shared memory usage for this run will be as follows:\n\n");
#else  /* IS_P4 */
  printf("This run will require at least:\n\n");
#endif  /* IS_P4 */
  printMemStats(memNeeded);

  printf("Recompiling with PRECOMPUTE=%d would yield:\n\n",
       (precompute + 1) & 1);
  for(p =0; p < Tmk_nprocs; p++) {
  if (precompute == 1)
    memNeeded[p] = memNeeded[p] - allocprobMem[p];
  else
    memNeeded[p] = memNeeded[p] + allocprobMem[p];
  }
  printMemStats(memNeeded);

  printf("Please refer to the README.makefile and ");
#if IS_P4
  printf("README.p4 for details.\n");
#else  /* IS_P4 */
  printf("README.TreadMarks for details.\n");
#endif  /* IS_P4 */
}  /* reportMemoryStats */


/* Approximate how much shared memory we will need so we don't need to
   tell p4 on the command line.  Essentially, we just add up all of
   the arguments to Tmk_malloc() in the routines parThetaSetup(),
   allocgen() and allocprob(), and add half again as much to the
   result to be safe. -- cgh */
static unsigned computeMemNeeded()
{
  unsigned memNeeded[maxprocs], allocprobMem[maxprocs],
           fmemNeeded[maxprocs];
  unsigned numparams, maxThetas;
 
  int numprocs;

  for(numprocs = 1; numprocs <= Tmk_nprocs; numprocs++) {

    fmemNeeded[numprocs - 1] = 
      memNeeded[numprocs - 1] = allocprobMem[numprocs - 1] = 0;

  /* from allocgen() in comlike.c */
#if PARALLEL_GCENTRAL
  numparams = maxThetas = 2 * maxn;
#elif (defined(MLINK) || defined(LINKMAP))
  numparams = calcNumparams();
  maxThetas = numIpeds;

  memNeeded[numprocs -1] =
    (sizeof(boolean) * numIpeds) +
    (sizeof(int) * numIpeds) +
    (sizeof(int)) +
#else  /* if PARALLEL_GCENTRAL ... */
  numparams = maxThetas = maxn;

  memNeeded[numprocs - 1] =
#endif  /* if PARALLEL_GCENTRAL */
    (sizeof(thetarray) * maxThetas) +
    (sizeof(thetarray) * maxThetas) +
    (sizeof(double *) * numparams) +
    (numparams * (sizeof(double) * nuneed)) +
    (sizeof(double *) * numparams) +
    (numparams * (sizeof(double) * nuneed)) +
    (numparams * sizeof(char*)) +
    (numparams * sizeof(thisarray*)) +
    (numparams * ((maxworkingset * sizeof(char)) +
                  (maxworkingset * maxfemgen * sizeof(unsigned char)) +
              (maxworkingset * maxfemgen * sizeof(double)))) +
    (numprocs * (sizeof(double *))) +
    (numprocs * (maxfemgen * sizeof(double))) +
    (numprocs * (sizeof(double *))) +
    (numprocs * (sizeof(double) * maxhaplo)) +
    (numprocs * (sizeof(double *))) +
    (numprocs * (sizeof(double) * maxhaplo)) +
    (numparams * sizeof(int*)) +
    (numparams * sizeof(int*)) +
    (numparams * ((maxfemgen * sizeof(int)) +
              (maxfemgen * sizeof(int))));
#if FUNTIMES
#if PARALLEL_GCENTRAL
  memNeeded[numprocs - 1] += (2 * maxn * sizeof(double));
#else /* !PARALLEL_GCENTRAL */
  memNeeded[numprocs - 1] +=
    (maxThetas * sizeof(double*)) +
    (maxThetas * (2 * sizeof(double)));
#endif /* PARALLEL_GCENTRAL */
#endif /*FUNTIMES*/
      
    /* from parThetaSetup() in compar.c */
  memNeeded[numprocs - 1] +=
#if defined(ILINK)
    (sizeof(vector)) +
    (sizeof(itertype)) +
    (sizeof(int) * maxn) +
    (sizeof(int) * numprocs) +
#if PARALLEL_GCENTRAL
    (sizeof(double*) * maxn) +
    (maxn * (sizeof(double) * 2)) +
#endif  /* PARALLEL_GCENTRAL */
#else  /* defined(ILINK) */
    (sizeof(double)) +
#endif  /* defined(ILINK) */
#if defined(MLINK)
    (maxped * sizeof(double)) +
#endif
    (sizeof(vector)) +
    (sizeof(vector)) +
    (sizeof(double) * fgeno) +
    (sizeof(double) * fgeno) +
    (sizeof(int)) +
    (sizeof(int)) +
    (sizeof(int)) +
    (sizeof(int)) +
    (sizeof(fencearray)) +
    (sizeof(fencearray)) +
    (sizeof(int) * numprocs) +
    (sizeof(double)) +
    (sizeof(int) * numprocs) +
    (sizeof(int) * numprocs) +
    (sizeof(boolean) * numprocs) +
    (sizeof(int) * numprocs) +
    (sizeof(int) * numprocs) +
    (sizeof(int)) +
    (sizeof(int)) +
    (sizeof(int)) +
    (sizeof(int)) +

    /* Global memory */  
    (sizeof(GlobalMemory));

  /* from allocprob() in parmodified.c */
  /* Note: this memory usage is only counted in the total usage if
     PRECOMPUTE=1.  However, for statists reporting, we figure it out
     anyway. */
  allocprobMem[numprocs -1] +=
    (numprocs * (sizeof(double *))) +
    (numprocs * ((maxisozygclass * nuneed * nuneed) *
               sizeof(double)));
  if (!sexlink) {
    allocprobMem[numprocs -1] +=
      (numprocs * (sizeof(unsigned *))) +
      (numprocs * ((nuneed * nuneed) * sizeof(unsigned)));
    if (sexdif)
      allocprobMem[numprocs - 1] +=      
      (numprocs * (sizeof(double *))) +
      (numprocs * ((maxisozygclass * nuneed * nuneed) *
                   sizeof(double)));
  }
  allocprobMem[numprocs - 1] +=
    (nuprobclass * sizeof(unsigned)) +
    (nuprobclass  * sizeof(unsigned));

#if PRECOMPUTE
  /* This is where we add in the usage. */
  memNeeded[numprocs - 1] += allocprobMem[numprocs - 1];
#endif  /* if PRECOMPUTE */  
  }
  if (reportMemStats == true) {
    reportMemoryStats(memNeeded, allocprobMem);
    exit(0);
  }

  /* add the fudge */
  return addFudge(memNeeded, fmemNeeded);
}  /* computeMemNeeded */


#if IS_P4

/* p4's command line option for setting global memory size */
static char* p4gmOpt = "-p4gm";

#define maxNumStrlen 19

/* convert an unsigned into it's ASCII representation */
static void itoa(str, num)
char* str;
unsigned num;
{
  unsigned digit;
  char buffer[maxNumStrlen + 1];
  int i = maxNumStrlen;

  buffer[i--] = '\0';

  do {
    digit = num % 10;
    buffer[i--] = digit + '0';
    num = (num - digit) / 10;
  } while (num > 0);

  strcpy(str, &buffer[i+1]);
}


#define maxArgs 8192   /* this should be sufficient */

/* p4 only allows us to declare the amount of global memory we'll need 
   on the command line.  Since we can determine it simply at run-time,
   we create a bogus argc and argv, and call the p4 initialization
   routine with an appropriate command-line flag. */
void initP4(argc, argv)
int argc;
char** argv;
{
  int i;
  char memNeededStr[maxNumStrlen];
  char* p4_argv[maxArgs+1];
  int p4_argc = argc + 2;         /* we're adding 2 arguments */

  /* first, check to see if memsize was specified on the command line
     already */
  for (i = 0; i < argc; i++) {
    if (!strcmp(argv[i], p4gmOpt)) {
      /* it was, so just initialize p4 the normal way and return */
      p4_initenv(&argc, argv);
      return;
    }
  }
     
  /* generate string representing memory usage */
  itoa(memNeededStr, computeMemNeeded());

  /* concoct our new argv */

  /* first, copy the old argv */
  for (i = 0; i < argc; i++)
    p4_argv[i] = argv[i];

  /* now, add the flag for global memory size */
  p4_argv[i++] = p4gmOpt;
  p4_argv[i++] = memNeededStr;
  p4_argv[p4_argc] = NULL;

#if defined(GMEM_DEBUG)
  printf("Allocating %s bytes of shared memory\n", memNeededStr);
#endif  /* defined(GMEM_DEBUG) */
  
  /* initialize p4 */
  p4_initenv(&p4_argc, p4_argv);
}  /* initP4 */

#endif  /* IS_P4 */


void initParLib(argc, argv)
int argc;
char** argv;
{
#if IS_P4
  initP4(argc, argv);
#endif  /* IS_P4 */
#if !IS_SHMEM
  if (reportMemStats == true)
    computeMemNeeded();
#endif  /* !IS_SHMEM */
}
  

/* end */

Generated by  Doxygen 1.6.0   Back to index