| /*************************************************************************/ |
| /* */ |
| /* Copyright (c) 1994 Stanford University */ |
| /* */ |
| /* All rights reserved. */ |
| /* */ |
| /* Permission is given to use, copy, and modify this software for any */ |
| /* non-commercial purpose as long as this copyright notice is not */ |
| /* removed. All other uses, including redistribution in whole or in */ |
| /* part, are forbidden without prior written permission. */ |
| /* */ |
| /* This software is provided with absolutely no warranty and no */ |
| /* support. */ |
| /* */ |
| /*************************************************************************/ |
| |
| /* |
| * FMM.C |
| * |
| * This file contains the entry to Greengard's adaptive algorithm. |
| * |
| |
| Usage: FMM <options> < inputfile |
| |
| Command line options: |
| |
| -o : Print out final particle positions. |
| -s : Print out individual processor timing statistics. |
| -h : Print out command line options |
| |
| Input file parameter description: |
| There are a total of nine parameters, with parameters |
| three through seven having no default values. |
| |
| 1) Cluster Type : Particles are distributed either in one cluster, |
| or two interacting clusters of size (# of particles)/ 2. |
| These two options are selected by the strings "one cluster" or |
| "two cluster". The default is for two clusters. |
| 2) Distribution Type : Particles are distributed in a cluster |
| either in a spherical uniform distribution, or according to |
| the Plummer model which typically has a large percentage of the |
| particles close to the center of the sphere and fewer particles |
| farther from the center. There two options are selected by |
| the strings "uniform" or "plummer". The default is for a |
| plummer distribution. |
| 3) Number Of Particles : Should be an integer greater than 0. |
| 4) Precision : A measure of how accurate the calculation should be. |
| A precision of 1e-3 means that the results will be accurate to |
| within three decimal places regardless of the relative magnitude |
| of the positions. The precision should be a real number greater |
| than 0. |
| 5) Number of Processors : Should be an integer greater than 0. |
| 6) Number of Time Steps : Should be an integer greater than 0. |
| 7) Duration of a Time Step : How long each time step lasts. |
| Should be a double greater than 0. |
| 8) Softening Parameter : This value sets the minimum distance in |
| each direction that two particles can be separated by. If two |
| particles are closer than this, the distance used for the |
| calculation is changed to the softening parameter. The particle |
| positions themselves are NOT changed. This number should be a |
| real number greater than 0 and defaults to DBL_MIN or FLT_MIN, |
| depending on what type of data is being used. |
| 9) Partitioning Scheme : Sets which type of partitioning scheme |
| is used. There are currently two : "cost zones" and "orb". |
| The default is cost zones. |
| |
| */ |
| |
| #include <stdio.h> |
| #include <math.h> |
| #include <errno.h> |
| #include <stdlib.h> |
| #include "defs.h" |
| #include "memory.h" |
| #include "particle.h" |
| #include "box.h" |
| #include "partition_grid.h" |
| #include "cost_zones.h" |
| #include "construct_grid.h" |
| #include "interactions.h" |
| |
| #define BASE ((((double) 4) - sqrt((double) 2)) / sqrt((double) 2)) |
| #define MAX_LINE_SIZE 100 |
| /* OCCUPANCY * maximum particles per box = avg number of particles per box */ |
| #define OCCUPANCY ((MAX_PARTICLES_PER_BOX > 5) ? .375 : .750) |
| /* Some processors will be given more than the average number of particles. |
| * PDF (Particle Distribution Factor) is the ratio of the maximum to the avg */ |
| #define PDF 4.0 |
| /* A nonuniform distribution will require more boxes than a uniform |
| * distribution of the same size. TOLERANCE is used to account for this */ |
| #define TOLERANCE 1.5 |
| /* Save as PDF, but for boxes */ |
| /* define BDF (((Total_Particles/Number_Of_Processors) > 128) ? 2.0 : 3.0)*/ |
| #define BDF (((Total_Particles/Number_Of_Processors) > 128) ? 4.0 : 8.0) |
| |
| static partition_alg Partition_Flag; |
| static real Precision; |
| static int Time_Steps; |
| static cluster_type Cluster; |
| static model_type Model; |
| int do_stats = 0; |
| int do_output = 0; |
| unsigned long starttime; |
| unsigned long endtime; |
| |
| void ParallelExecute(); |
| void StepSimulation(int my_id, time_info *local_time, int time_all); |
| void PartitionGrid(int my_id, time_info *local_time, int time_all); |
| void GetArguments(); |
| void PrintTimes(); |
| void Help(); |
| |
| |
| void |
| main (int argc, char *argv[]) |
| { |
| int i; |
| int c; |
| extern char *optarg; |
| |
| CLOCK(starttime); |
| |
| while ((c = getopt(argc, argv, "osh")) != -1) { |
| switch(c) { |
| case 'o': do_output = 1; break; |
| case 's': do_stats = 1; break; |
| case 'h': Help(); break; |
| } |
| } |
| |
| MAIN_INITENV(,40000000); |
| |
| GetArguments(); |
| InitGlobalMemory(); |
| InitExpTables(); |
| CreateDistribution(Cluster, Model); |
| |
| //for (i = 1; i < Number_Of_Processors; i++) { |
| CREATE(ParallelExecute, Number_Of_Processors); |
| //} |
| //ParallelExecute(); |
| WAIT_FOR_END(Number_Of_Processors); |
| printf("Finished FMM\n"); |
| PrintTimes(); |
| if (do_output) { |
| PrintAllParticles(); |
| } |
| MAIN_END; |
| } |
| |
| |
| void |
| ParallelExecute () |
| { |
| int my_id; |
| int num_boxes; |
| box *b, *b_list; |
| int i; |
| int start_index, end_index; |
| unsigned long start, finish; |
| time_info *local_time; |
| int time_all = 0; |
| time_info *timing; |
| unsigned int local_init_done; |
| |
| local_time = (time_info *) malloc(sizeof(struct _Time_Info) * MAX_TIME_STEPS); |
| BARRIER(G_Memory->synch, Number_Of_Processors); |
| LOCK(G_Memory->count_lock); |
| my_id = G_Memory->id; |
| G_Memory->id++; |
| UNLOCK(G_Memory->count_lock); |
| |
| /* POSSIBLE ENHANCEMENT: Here is where one might pin processes to |
| processors to avoid migration */ |
| |
| if (my_id == 0) { |
| time_all = 1; |
| } else if (do_stats) { |
| time_all = 1; |
| } |
| |
| if (my_id == 0) { |
| /* have to allocate extra space since it will construct the grid by |
| * itself for the first time step */ |
| CreateParticleList(my_id, Total_Particles); |
| InitParticleList(my_id, Total_Particles, 0); |
| } |
| else { |
| CreateParticleList(my_id, ((Total_Particles * PDF) |
| / Number_Of_Processors)); |
| InitParticleList(my_id, 0, 0); |
| } |
| num_boxes = 1.333 * (Total_Particles / (OCCUPANCY * MAX_PARTICLES_PER_BOX)); |
| if (my_id == 0) |
| CreateBoxes(my_id, TOLERANCE * num_boxes); |
| else |
| CreateBoxes(my_id, TOLERANCE * num_boxes * BDF / Number_Of_Processors); |
| |
| if (my_id == 0) { |
| LockedPrint("Starting FMM with %d processor%s\n", Number_Of_Processors, |
| (Number_Of_Processors == 1) ? "" : "s"); |
| } |
| BARRIER(G_Memory->synch, Number_Of_Processors); |
| Local[my_id].Time = 0.0; |
| for (MY_TIME_STEP = 0; MY_TIME_STEP < Time_Steps; MY_TIME_STEP++) { |
| |
| if (MY_TIME_STEP == 2) { |
| /* POSSIBLE ENHANCEMENT: Here is where one might reset the |
| statistics that one is measuring about the parallel execution */ |
| } |
| |
| if (MY_TIME_STEP == 2) { |
| if (do_stats || my_id == 0) { |
| CLOCK(local_init_done); |
| } |
| } |
| |
| if (MY_TIME_STEP == 0) { |
| CLOCK(start); |
| } |
| else |
| start = finish; |
| ConstructGrid(my_id,local_time,time_all); |
| ConstructLists(my_id,local_time,time_all); |
| PartitionGrid(my_id,local_time,time_all); |
| StepSimulation(my_id,local_time,time_all); |
| DestroyGrid(my_id,local_time,time_all); |
| CLOCK(finish); |
| Local[my_id].Time += Timestep_Dur; |
| MY_TIMING[MY_TIME_STEP].total_time = finish - start; |
| } |
| if (my_id == 0) { |
| CLOCK(endtime); |
| } |
| BARRIER(G_Memory->synch, Number_Of_Processors); |
| for (MY_TIME_STEP = 0; MY_TIME_STEP < Time_Steps; MY_TIME_STEP++) { |
| timing = &(MY_TIMING[MY_TIME_STEP]); |
| timing->other_time = local_time[MY_TIME_STEP].other_time; |
| timing->construct_time = local_time[MY_TIME_STEP].construct_time; |
| timing->list_time = local_time[MY_TIME_STEP].list_time; |
| timing->partition_time = local_time[MY_TIME_STEP].partition_time; |
| timing->pass_time = local_time[MY_TIME_STEP].pass_time; |
| timing->inter_time = local_time[MY_TIME_STEP].inter_time; |
| timing->barrier_time = local_time[MY_TIME_STEP].barrier_time; |
| timing->intra_time = local_time[MY_TIME_STEP].intra_time; |
| } |
| Local[my_id].init_done_times = local_init_done; |
| BARRIER(G_Memory->synch, Number_Of_Processors); |
| } |
| |
| |
| void |
| PartitionGrid (int my_id, time_info *local_time, int time_all) |
| { |
| unsigned long start, finish; |
| |
| if (time_all) |
| CLOCK(start); |
| if (Partition_Flag == COST_ZONES) |
| CostZones(my_id); |
| else { |
| } |
| if (time_all) |
| CLOCK(finish); |
| |
| if (time_all) { |
| local_time[MY_TIME_STEP].partition_time = finish - start; |
| } |
| } |
| |
| |
| void |
| StepSimulation (int my_id, time_info *local_time, int time_all) |
| { |
| unsigned long start, finish; |
| unsigned long upward_end, interaction_end, downward_end, barrier_end; |
| |
| if (time_all) |
| CLOCK(start); |
| PartitionIterate(my_id, UpwardPass, BOTTOM); |
| if (time_all) |
| CLOCK(upward_end); |
| PartitionIterate(my_id, ComputeInteractions, BOTTOM); |
| if (time_all) |
| CLOCK(interaction_end); |
| BARRIER(G_Memory->synch, Number_Of_Processors); |
| if (time_all) |
| CLOCK(barrier_end); |
| PartitionIterate(my_id, DownwardPass, TOP); |
| if (time_all) |
| CLOCK(downward_end); |
| PartitionIterate(my_id, ComputeParticlePositions, CHILDREN); |
| if (time_all) |
| CLOCK(finish); |
| |
| if (time_all) { |
| local_time[MY_TIME_STEP].pass_time = upward_end - start; |
| local_time[MY_TIME_STEP].inter_time = interaction_end - upward_end; |
| local_time[MY_TIME_STEP].barrier_time = barrier_end - interaction_end; |
| local_time[MY_TIME_STEP].pass_time += downward_end - barrier_end; |
| local_time[MY_TIME_STEP].intra_time = finish - downward_end; |
| } |
| } |
| |
| |
| void |
| GetArguments () |
| { |
| char *input; |
| |
| input = (char *) malloc(MAX_LINE_SIZE * sizeof(char)); |
| if (input == NULL) { |
| fprintf(stderr, "ERROR\n"); |
| exit(-1); |
| } |
| gets(input); |
| if (strcmp(input, "one cluster") == 0) |
| Cluster = ONE_CLUSTER; |
| else { |
| if ((*input == '\0') || (strcmp(input, "two cluster") == 0)) |
| Cluster = TWO_CLUSTER; |
| else { |
| fprintf(stderr, "ERROR: The only cluster types available are "); |
| fprintf(stderr, "\"one cluster\" or \"two cluster\".\n"); |
| fprintf(stderr, "If you need help, type \"nbody -help\".\n"); |
| exit(-1); |
| } |
| } |
| |
| gets(input); |
| if (strcmp(input, "uniform") == 0) |
| Model = UNIFORM; |
| else { |
| if ((*input == '\0') || (strcmp(input, "plummer") == 0)) |
| Model = PLUMMER; |
| else { |
| fprintf(stderr, "ERROR: The only distributions available are "); |
| fprintf(stderr, "\"uniform\" or \"plummer\".\n"); |
| fprintf(stderr, "If you need help, type \"nbody -help\".\n"); |
| exit(-1); |
| } |
| } |
| |
| Total_Particles = atoi(gets(input)); |
| if (Total_Particles <= 0) { |
| fprintf(stderr, "ERROR: The number of particles should be an int "); |
| fprintf(stderr, "greater than 0.\n"); |
| fprintf(stderr, "If you need help, type \"nbody -help\".\n"); |
| exit(-1); |
| } |
| |
| Precision = atof(gets(input)); |
| if (Precision == 0.0) { |
| fprintf(stderr, "ERROR: The precision has no default value.\n"); |
| fprintf(stderr, "If you need help, type \"nbody -help\".\n"); |
| exit(-1); |
| } |
| /* Determine number of multipole expansion terms needed for specified |
| * precision and flag an error if it is too precise */ |
| Expansion_Terms = (int) ceil(-(log(Precision) / log(BASE))); |
| if (Expansion_Terms > MAX_EXPANSION_TERMS) { |
| fprintf(stderr, "ERROR: %g (%d terms) is too great a precision.\n", |
| Precision, Expansion_Terms); |
| fprintf(stderr, "If you need help, type \"nbody -help\".\n"); |
| exit(-1); |
| } |
| |
| Number_Of_Processors = atoi(gets(input)); |
| if (Number_Of_Processors == 0) { |
| fprintf(stderr, "ERROR: The Number_Of_Processors has no default.\n"); |
| fprintf(stderr, "If you need help, type \"nbody -help\".\n"); |
| exit(-1); |
| } |
| if (Number_Of_Processors < 0) { |
| fprintf(stderr, "ERROR: Number of processors should be an int greater "); |
| fprintf(stderr, "than 0.\n"); |
| fprintf(stderr, "If you need help, type \"nbody -help\".\n"); |
| exit(-1); |
| } |
| |
| Time_Steps = atoi(gets(input)); |
| if (Time_Steps == 0) { |
| fprintf(stderr, "ERROR: The number of time steps has no default.\n"); |
| fprintf(stderr, "If you need help, type \"nbody -help\".\n"); |
| exit(-1); |
| } |
| if (Time_Steps < 0) { |
| fprintf(stderr, "ERROR: The number of time steps should be an int "); |
| fprintf(stderr, "greater than 0.\n"); |
| fprintf(stderr, "If you need help, type \"nbody -help\".\n"); |
| exit(-1); |
| } |
| |
| Timestep_Dur = atof(gets(input)); |
| if (Timestep_Dur == 0.0) { |
| fprintf(stderr, "ERROR: The duration of a time step has no default "); |
| fprintf(stderr, "value.\n If you need help, type \"nbody -help\".\n"); |
| exit(-1); |
| } |
| if (Timestep_Dur < 0) { |
| fprintf(stderr, "ERROR: The duration of a time step should be a "); |
| fprintf(stderr, "double greater than 0.\n"); |
| fprintf(stderr, "If you need help, type \"nbody -help\".\n"); |
| exit(-1); |
| } |
| |
| Softening_Param = atof(gets(input)); |
| if (Softening_Param == 0.0) |
| Softening_Param = MIN_REAL; |
| if (Softening_Param < 0) { |
| fprintf(stderr, "ERROR: The softening parameter should be a double "); |
| fprintf(stderr, "greater than 0.\n"); |
| fprintf(stderr, "If you need help, type \"nbody -help\".\n"); |
| exit(-1); |
| } |
| |
| gets(input); |
| if ((*input == '\0') || (strcmp(input, "cost zones") == 0)) |
| Partition_Flag = COST_ZONES; |
| else { |
| if (strcmp(input, "orb") == 0) |
| Partition_Flag = ORB; |
| else { |
| fprintf(stderr, "ERROR: The only partitioning schemes available "); |
| fprintf(stderr, "are \"cost zones\" \n\t or \"orb\".\n"); |
| fprintf(stderr, "If you need help, type \"nbody -help\".\n"); |
| exit(-1); |
| } |
| } |
| } |
| |
| |
| void |
| PrintTimes () |
| { |
| int i, j; |
| time_info *timing; |
| FILE *fp; |
| double t_total_time = 0; |
| double t_tree_time = 0; |
| double t_list_time = 0; |
| double t_part_time = 0; |
| double t_pass_time = 0; |
| double t_inter_time = 0; |
| double t_bar_time = 0; |
| double t_intra_time = 0; |
| double t_other_time = 0; |
| double total_time; |
| double tree_time; |
| double list_time; |
| double part_time; |
| double pass_time; |
| double inter_time; |
| double bar_time; |
| double intra_time; |
| double other_time; |
| double overall_total = 0; |
| int P; |
| int init_done; |
| |
| if ((fp = fopen("times", "w")) == NULL) { |
| fprintf(stderr, "Error opening output file\n"); |
| fflush(stderr); |
| exit(-1); |
| } |
| fprintf(fp, "TIMING:\n"); |
| fprintf(fp, "%d\t%d\t%.2e\t%d\n", Number_Of_Processors, Total_Particles, |
| Precision, Time_Steps); |
| for (i = 0; i < Time_Steps; i++) { |
| fprintf(fp, "Time Step %d\n", i); |
| for (j = 0; j < Number_Of_Processors; j++) { |
| timing = &(Local[j].Timing[i]); |
| fprintf(fp, "Processor %d\n", j); |
| fprintf(fp, "\tTotal Time = %lu\n", timing->total_time); |
| if (do_stats) { |
| fprintf(fp, "\tTree Construction Time = %lu\n", |
| timing->construct_time); |
| fprintf(fp, "\tList Construction Time = %lu\n", timing->list_time); |
| fprintf(fp, "\tPartition Time = %lu\n", timing->partition_time); |
| fprintf(fp, "\tTree Pass Time = %lu\n", timing->pass_time); |
| fprintf(fp, "\tInter Particle Time = %lu\n", timing->inter_time); |
| fprintf(fp, "\tBarrier Time = %lu\n", timing->barrier_time); |
| fprintf(fp, "\tIntra Particle Time = %lu\n", timing->intra_time); |
| fprintf(fp, "\tOther Time = %lu\n", timing->other_time); |
| } |
| fflush(fp); |
| } |
| } |
| fprintf(fp, "END\n"); |
| fclose(fp); |
| |
| printf(" PROCESS STATISTICS\n"); |
| printf(" Track Tree List Part Pass Inter Bar Intra Other\n"); |
| printf(" Proc Time Time Time Time Time Time Time Time Time\n"); |
| total_time = tree_time = list_time = part_time = pass_time = |
| inter_time = bar_time = intra_time = other_time = 0; |
| for (i = 2; i < Time_Steps; i++) { |
| timing = &(Local[0].Timing[i]); |
| total_time += timing->total_time; |
| tree_time += timing->construct_time; |
| list_time += timing->list_time; |
| part_time += timing->partition_time; |
| pass_time += timing->pass_time; |
| inter_time += timing->inter_time; |
| bar_time += timing->barrier_time; |
| intra_time += timing->intra_time; |
| other_time += timing->other_time; |
| } |
| printf(" %4d %12.0f%12.0f%12.0f%12.0f%12.0f%12.0f%12.0f%12.0f%12.0f\n", |
| 0,total_time,tree_time,list_time,part_time,pass_time, |
| inter_time,bar_time,intra_time,other_time); |
| t_total_time += total_time; |
| t_tree_time += tree_time; |
| t_list_time += list_time; |
| t_part_time += part_time; |
| t_pass_time += pass_time; |
| t_inter_time += inter_time; |
| t_bar_time += bar_time; |
| t_intra_time += intra_time; |
| t_other_time += other_time; |
| if (total_time > overall_total) { |
| overall_total = total_time; |
| } |
| for (j = 1; j < Number_Of_Processors; j++) { |
| total_time = tree_time = list_time = part_time = pass_time = |
| inter_time = bar_time = intra_time = other_time = 0; |
| for (i = 2; i < Time_Steps; i++) { |
| timing = &(Local[j].Timing[i]); |
| total_time += timing->total_time; |
| tree_time += timing->construct_time; |
| list_time += timing->list_time; |
| part_time += timing->partition_time; |
| pass_time += timing->pass_time; |
| inter_time += timing->inter_time; |
| bar_time += timing->barrier_time; |
| intra_time += timing->intra_time; |
| other_time += timing->other_time; |
| } |
| if (do_stats) { |
| printf(" %4d %12.0f%12.0f%12.0f%12.0f%12.0f%12.0f%12.0f%12.0f%12.0f\n", |
| j,total_time,tree_time,list_time,part_time,pass_time, |
| inter_time,bar_time,intra_time,other_time); |
| } |
| t_total_time += total_time; |
| t_tree_time += tree_time; |
| t_list_time += list_time; |
| t_part_time += part_time; |
| t_pass_time += pass_time; |
| t_inter_time += inter_time; |
| t_bar_time += bar_time; |
| t_intra_time += intra_time; |
| t_other_time += other_time; |
| if (total_time > overall_total) { |
| overall_total = total_time; |
| } |
| } |
| if (do_stats) { |
| P = Number_Of_Processors; |
| printf(" Avg %12.0f%12.0f%12.0f%12.0f%12.0f%12.0f%12.0f%12.0f%12.0f\n", |
| t_total_time/P,t_tree_time/P,t_list_time/P,t_part_time/P, |
| t_pass_time/P,t_inter_time/P,t_bar_time/P,t_intra_time/P, |
| t_other_time/P); |
| } |
| printf("\n"); |
| if (Time_Steps > 2) { |
| init_done = Local[0].init_done_times; |
| if (do_stats) { |
| for (j = 1; j < Number_Of_Processors; j++) { |
| if (Local[j].init_done_times > init_done) { |
| init_done = Local[j].init_done_times; |
| } |
| } |
| } |
| printf(" TIMING INFORMATION\n"); |
| printf("Start time : %16ld\n", |
| starttime); |
| printf("Initialization finish time : %16ld\n", |
| init_done); |
| printf("Overall finish time : %16ld\n", |
| endtime); |
| printf("Total time with initialization : %16ld\n", |
| endtime - starttime); |
| printf("Total time without initialization : %16ld\n", |
| (int) (overall_total)); |
| printf("\n"); |
| |
| printf("Total time for steps %d to %d : %12.0f\n",3,Time_Steps, |
| overall_total); |
| printf("\n"); |
| } |
| } |
| |
| |
| void |
| Help () |
| { |
| printf("Usage: FMM <options> < inputfile\n\n"); |
| printf("options:\n"); |
| printf(" -o : Print out final particle positions.\n"); |
| printf(" -s : Print out individual processor timing statistics.\n"); |
| printf(" -h : Print out command line options\n"); |
| printf("\n"); |
| printf("Input parameter descriptions:\n"); |
| printf(" There are nine parameters, and parameters three through\n"); |
| printf(" have no default values.\n"); |
| printf("1) Cluster Type : Distribute particles in one cluster\n"); |
| |
| printf(" (\"one cluster\") or two interacting clusters (\"two cluster\")\n"); |
| printf(" Default is two cluster.\n"); |
| printf("2) Distribution Type : Distribute particles in either a\n"); |
| printf(" uniform spherical distribution (\"uniform\"), or in a\n"); |
| printf(" Plummer model (\"plummer\"). Default is plummer.\n"); |
| printf("3) Number Of Particles : Integer greater than 0.\n"); |
| printf("4) Precision : Precision of results. Should be a double.\n"); |
| printf("5) Number of Processors : Integer greater than 0.\n"); |
| printf("6) Number of Time Steps : Integer greater than 0.\n"); |
| printf("7) Time Step Duration : Double greater than 0.\n"); |
| printf("8) Softening Parameter : Real number greater than 0.\n"); |
| printf(" Defaults is DBL_MIN or FLT_MIN.\n"); |
| printf("9) Partitioning Scheme : \"cost zones\" or \"orb\".\n"); |
| printf(" Default is cost zones.\n"); |
| exit(0); |
| } |
| |
| #undef MAX_LINE_SIZE |
| #undef BASE |