00001 /*************************************************************************** 00002 *cr 00003 *cr (C) Copyright 1995-2019 The Board of Trustees of the 00004 *cr University of Illinois 00005 *cr All Rights Reserved 00006 *cr 00007 ***************************************************************************/ 00008 /*************************************************************************** 00009 * RCS INFORMATION: 00010 * 00011 * $RCSfile: VMDMPI.C,v $ 00012 * $Author: johns $ $Locker: $ $State: Exp $ 00013 * $Revision: 1.24 $ $Date: 2020年07月23日 04:10:10 $ 00014 * 00015 ***************************************************************************/ 00016 00017 #ifdef VMDMPI 00018 #include <mpi.h> 00019 00020 // Check to see if we have to pass the MPI_IN_PLACE flag 00021 // for in-place allgather reductions (same approach as Tachyon) 00022 #if !defined(USE_MPI_IN_PLACE) 00023 #if (MPI_VERSION >= 2) || defined(MPI_IN_PLACE) 00024 #define USE_MPI_IN_PLACE 1 00025 #endif 00026 #endif 00027 00028 #include <stdio.h> 00029 #include <string.h> 00030 #include <Inform.h> 00031 #include <utilities.h> 00032 #include <WKFThreads.h> 00033 #include "VMDMPI.h" 00034 00035 typedef struct { 00036 int numcpus; /* number of processors on this node */ 00037 int numgpus; /* number of GPUs on this node */ 00038 float cpuspeed; /* relative speed of cpus on this node */ 00039 float nodespeed; /* relative speed index for this node */ 00040 char machname[512]; /* machine/node name */ 00041 long corefree; /* available physical memory */ 00042 long corepcnt; /* available physical memory (percentage) */ 00043 } nodeinfo; 00044 00045 00046 int vmd_mpi_init(int *argc, char ***argv) { 00047 int numnodes=0, mynode=0; 00048 00049 #if defined(VMDTHREADS) 00050 int provided=0; 00051 MPI_Init_thread(argc, argv, MPI_THREAD_SERIALIZED, &provided); 00052 if (provided != MPI_THREAD_SERIALIZED) { 00053 msgWarn << "MPI not providing thread-serial access." << sendmsg; 00054 } 00055 #else 00056 MPI_Init(argc, argv); 00057 #endif 00058 MPI_Comm_rank(MPI_COMM_WORLD, &mynode); 00059 MPI_Comm_size(MPI_COMM_WORLD, &numnodes); 00060 00061 // mute console output for all VMD instances other than the root node 00062 if (mynode != 0) { 00063 msgInfo.mute(); 00064 msgWarn.mute(); 00065 msgErr.mute(); 00066 } 00067 00068 return 0; 00069 } 00070 00071 00072 int vmd_mpi_barrier() { 00073 MPI_Barrier(MPI_COMM_WORLD); 00074 return 0; 00075 } 00076 00077 00078 int vmd_mpi_fini() { 00079 vmd_mpi_barrier(); 00080 msgInfo << "All nodes have reached the MPI shutdown barrier." << sendmsg; 00081 00082 MPI_Finalize(); 00083 00084 return 0; 00085 } 00086 00087 00088 int vmd_mpi_nodeinfo(int *noderank, int *nodecount) { 00089 int numnodes=0, mynode=0; 00090 00091 MPI_Comm_rank(MPI_COMM_WORLD, &mynode); 00092 MPI_Comm_size(MPI_COMM_WORLD, &numnodes); 00093 00094 if (noderank != NULL) 00095 *noderank=mynode; 00096 if (nodecount != NULL) 00097 *nodecount=numnodes; 00098 00099 return 0; 00100 } 00101 00102 00103 int vmd_mpi_nodescan(int *noderank, int *nodecount, 00104 char *nodename, int maxnodenamelen, 00105 int localgpucount) { 00106 int numnodes=0, mynode=0; 00107 int namelen; 00108 char namebuf[MPI_MAX_PROCESSOR_NAME]; 00109 00110 MPI_Comm_rank(MPI_COMM_WORLD, &mynode); 00111 MPI_Comm_size(MPI_COMM_WORLD, &numnodes); 00112 msgInfo << "Initializing parallel VMD instances via MPI..." << sendmsg; 00113 00114 nodeinfo *nodes; 00115 nodes = (nodeinfo *) malloc(numnodes * sizeof(nodeinfo)); 00116 nodes[mynode].numcpus = wkf_thread_numprocessors(); 00117 nodes[mynode].numgpus = localgpucount; 00118 nodes[mynode].cpuspeed = 1.0; 00119 nodes[mynode].nodespeed = nodes[mynode].numcpus * nodes[mynode].cpuspeed; 00120 nodes[mynode].corefree = vmd_get_avail_physmem_mb(); 00121 nodes[mynode].corepcnt = vmd_get_avail_physmem_percent(); 00122 00123 MPI_Get_processor_name((char *) &namebuf, &namelen); 00124 00125 // prepare for all-to-all gather 00126 strncpy((char *) &nodes[mynode].machname, namebuf, 00127 (((namelen + 1) < 511) ? (namelen+1) : 511)); 00128 00129 // provide to caller 00130 strncpy(nodename, namebuf, 00131 (((namelen + 1) < (maxnodenamelen-1)) ? (namelen+1) : (maxnodenamelen-1))); 00132 00133 #if defined(USE_MPI_IN_PLACE) 00134 // MPI >= 2.x implementations (e.g. NCSA/Cray Blue Waters) 00135 MPI_Allgather(MPI_IN_PLACE, sizeof(nodeinfo), MPI_BYTE, 00136 &nodes[ 0], sizeof(nodeinfo), MPI_BYTE, 00137 MPI_COMM_WORLD); 00138 #else 00139 // MPI 1.x 00140 MPI_Allgather(&nodes[mynode], sizeof(nodeinfo), MPI_BYTE, 00141 &nodes[ 0], sizeof(nodeinfo), MPI_BYTE, 00142 MPI_COMM_WORLD); 00143 #endif 00144 00145 if (mynode == 0) { 00146 char msgtxt[1024]; 00147 float totalspeed = 0.0; 00148 int totalcpus = 0; 00149 int totalgpus = 0; 00150 int i; 00151 00152 for (i=0; i<numnodes; i++) { 00153 totalcpus += nodes[i].numcpus; 00154 totalgpus += nodes[i].numgpus; 00155 totalspeed += nodes[i].nodespeed; 00156 } 00157 00158 sprintf(msgtxt, "Found %d VMD MPI node%s containing a total of %d CPU%s and %d GPU%s:", 00159 numnodes, (numnodes > 1) ? "s" : "", 00160 totalcpus, (totalcpus > 1) ? "s" : "", 00161 totalgpus, (totalgpus != 1) ? "s" : ""); 00162 msgInfo << msgtxt << sendmsg; 00163 00164 for (i=0; i<numnodes; i++) { 00165 sprintf(msgtxt, 00166 "%4d: %3d CPUs, %4.1fGB (%2ld%%) free mem, " 00167 "%d GPUs, " 00168 // "CPU Speed %4.2f, Node Speed %6.2f " 00169 "Name: %s", 00170 i, nodes[i].numcpus, 00171 nodes[i].corefree / 1024.0f, nodes[i].corepcnt, 00172 nodes[i].numgpus, 00173 // nodes[i].cpuspeed, nodes[i].nodespeed, 00174 nodes[i].machname); 00175 msgInfo << msgtxt << sendmsg;; 00176 } 00177 } 00178 00179 // wait for node 0 console output to complete before peers 00180 // continue with startup process 00181 MPI_Barrier(MPI_COMM_WORLD); 00182 00183 if (noderank != NULL) 00184 *noderank=mynode; 00185 if (nodecount != NULL) 00186 *nodecount=numnodes; 00187 00188 free(nodes); 00189 return 0; 00190 } 00191 00192 #endif