repartition.hh

Go to the documentation of this file.
00001 #ifndef DUNE_REPARTITION_HH
00002 #define DUNE_REPARTITION_HH
00003 
00004 #if HAVE_PARMETIS
00005 #include <parmetis.h>
00006 #endif
00007 #include "config.h"
00008 #include <dune/istl/owneroverlapcopy.hh>
00009 #include <dune/istl/mpitraits.hh>
00010 #include <dune/istl/indexset.hh>
00011 #include <dune/istl/remoteindices.hh>
00012 #include <dune/istl/indicessyncer.hh>
00013 #include <dune/istl/communicator.hh>
00014 #include <dune/istl/paamg/graph.hh>
00015 #include <dune/common/enumset.hh>
00016 #include <map>
00017 #include <utility>
00018 #include <cassert>
00019 
00028 namespace Dune 
00029 {
00030 #if HAVE_MPI
00031 
00044     template<class G, class T1, class T2>
00045     void fillIndexSetHoles(const G& graph, Dune::OwnerOverlapCopyCommunication<T1,T2>& oocomm)
00046     {
00047       typedef typename Dune::OwnerOverlapCopyCommunication<T1,T2>::ParallelIndexSet IndexSet;
00048       IndexSet& indexSet = oocomm.indexSet();
00049       const typename Dune::OwnerOverlapCopyCommunication<T1,T2>::GlobalLookupIndexSet& lookup =oocomm.globalLookup();
00050 
00051       // The type of the const vertex iterator.
00052       typedef typename G::ConstVertexIterator VertexIterator;
00053       
00054       
00055       std::size_t sum=0, needed = graph.noVertices()-indexSet.size();
00056       std::vector<std::size_t> neededall(oocomm.communicator().size(), 0);
00057       
00058       MPI_Allgather(&needed, 1, Generic_MPI_Datatype<std::size_t>::get() , &(neededall[0]), 1, Generic_MPI_Datatype<std::size_t>::get(), oocomm.communicator());
00059       for(int i=0; i<oocomm.communicator().size(); ++i)
00060         sum=sum+neededall[i]; // MAke this for generic
00061       
00062       if(sum==0)
00063         // Nothing to do
00064         return;
00065       
00066       //Compute Maximum Global Index
00067       T1 maxgi=0;
00068       typedef typename IndexSet::const_iterator Iterator;
00069       Iterator end;
00070       end = indexSet.end();
00071       for(Iterator it = indexSet.begin(); it != end; ++it) 
00072         maxgi=std::max(maxgi,it->global());
00073 
00074       //Process p creates global indices consecutively
00075       //starting atmaxgi+\sum_{i=1}^p neededall[i]
00076       // All created indices are owned by the process
00077       maxgi=oocomm.communicator().max(maxgi);
00078       ++maxgi;//Sart with the next free index.
00079       
00080       for(int i=0; i<oocomm.communicator().rank(); ++i)
00081         maxgi=maxgi+neededall[i]; // TODO: make this more generic
00082       
00083       // Store the global index information for repairing the remote index information
00084       std::map<int,SLList<T1> > globalIndices;
00085       storeGlobalIndicesOfRemoteIndices(globalIndices, oocomm.remoteIndices(), indexSet);
00086       indexSet.beginResize();
00087       
00088       for(VertexIterator vertex = graph.begin(), vend=graph.end(); vertex != vend; ++vertex){
00089         const typename IndexSet::IndexPair* pair=lookup.pair(*vertex);
00090         if(pair==0){
00091           // No index yet, add new one
00092           indexSet.add(maxgi, typename IndexSet::LocalIndex(*vertex, OwnerOverlapCopyAttributeSet::owner, false));
00093           ++maxgi;
00094         }
00095       }
00096       
00097       indexSet.endResize();
00098 
00099       repairLocalIndexPointers(globalIndices, oocomm.remoteIndices(), indexSet);
00100             
00101       oocomm.freeGlobalLookup();
00102       oocomm.buildGlobalLookup();
00103 #ifdef DEBUG_REPART
00104       std::cout<<"Holes are filled!"<<std::endl;
00105       std::cout<<oocomm.communicator().rank()<<": "<<oocomm.indexSet()<<std::endl;
00106 #endif
00107     }
00108 
00109   namespace 
00110   {
00111     
00112     class ParmetisDuneIndexMap
00113     {
00114     public:
00115       template<class Graph, class OOComm>
00116       ParmetisDuneIndexMap(const Graph& graph, const OOComm& com);
00117       int toParmetis(int i) const
00118       {
00119         return duneToParmetis[i];
00120       }
00121       int toLocalParmetis(int i) const
00122       {
00123         return duneToParmetis[i]-base_;
00124       }
00125       int operator[](int i) const
00126       {
00127         return duneToParmetis[i];
00128       }
00129       int toDune(int i) const
00130       {
00131         return parmetisToDune[i];
00132       }
00133       std::vector<int>::size_type numOfOwnVtx() const
00134       {
00135         return parmetisToDune.size();
00136       }
00137       int* vtxDist()
00138       {
00139         return &vtxDist_[0];
00140       }
00141       int globalOwnerVertices;
00142     private:
00143       int base_;
00144       std::vector<int> duneToParmetis;
00145       std::vector<int> parmetisToDune;
00146       // range of vertices for processor i: vtxdist[i] to vtxdist[i+1] (parmetis global)
00147       std::vector<int> vtxDist_;
00148     };
00149     
00150     template<class G, class OOComm>
00151     ParmetisDuneIndexMap::ParmetisDuneIndexMap(const G& graph, const OOComm& oocomm)
00152       : duneToParmetis(graph.noVertices(), -1), vtxDist_(oocomm.communicator().size()+1)
00153     {
00154       int npes=oocomm.communicator().size(), mype=oocomm.communicator().rank();
00155             
00156       typedef typename OOComm::ParallelIndexSet::const_iterator Iterator;
00157       typedef typename OOComm::OwnerSet OwnerSet;
00158 
00159       int numOfOwnVtx=0;
00160       Iterator end = oocomm.indexSet().end();
00161       for(Iterator index = oocomm.indexSet().begin(); index != end; ++index) {
00162         if (OwnerSet::contains(index->local().attribute())) {
00163           numOfOwnVtx++;
00164         }
00165       }
00166       parmetisToDune.resize(numOfOwnVtx);
00167       std::vector<int> globalNumOfVtx(npes);
00168       // make this number available to all processes
00169       MPI_Allgather(&numOfOwnVtx, 1, MPI_INT, &(globalNumOfVtx[0]), 1, MPI_INT, oocomm.communicator()); 
00170 
00171       int base=0;
00172       vtxDist_[0] = 0;
00173       for(int i=0; i<npes; i++) {
00174         if (i<mype) {
00175           base += globalNumOfVtx[i];
00176         }
00177         vtxDist_[i+1] = vtxDist_[i] + globalNumOfVtx[i];
00178       }
00179       globalOwnerVertices=vtxDist_[npes];
00180       base_=base;
00181       
00182       // The type of the const vertex iterator.
00183       typedef typename G::ConstVertexIterator VertexIterator;
00184 #ifdef DEBUG_REPART
00185       std::cout << oocomm.communicator().rank()<<" vtxDist: ";
00186       for(int i=0; i<= npes;++i)
00187         std::cout << vtxDist_[i]<<" ";
00188       std::cout<<std::endl;
00189 #endif
00190 
00191       // Traverse the graph and assign a new consecutive number/index
00192       // starting by "base" to all owner vertices.
00193       // The new index is used as the ParMETIS global index and is
00194       // stored in the vector "duneToParmetis"
00195       VertexIterator vend = graph.end();
00196       for(VertexIterator vertex = graph.begin(); vertex != vend; ++vertex) {
00197         const typename OOComm::ParallelIndexSet::IndexPair* index=oocomm.globalLookup().pair(*vertex);
00198         assert(index);
00199         if (OwnerSet::contains(index->local().attribute())) {
00200           // assign and count the index
00201           parmetisToDune[base-base_]=index->local();
00202           duneToParmetis[index->local()] = base++;
00203         }
00204       }
00205 
00206       // At this point, every process knows the ParMETIS global index
00207       // of it's owner vertices. The next step is to get the 
00208       // ParMETIS global index of the overlap vertices from the 
00209       // associated processes. To do this, the Dune::Interface class
00210       // is used.
00211 #ifdef DEBUG_REPART
00212       std::cout <<oocomm.communicator().rank()<<": before ";
00213       for(std::size_t i=0; i<duneToParmetis.size(); ++i)
00214         std::cout<<duneToParmetis[i]<<" ";
00215       std::cout<<std::endl;
00216 #endif
00217       oocomm.copyOwnerToAll(duneToParmetis,duneToParmetis);
00218 #ifdef DEBUG_REPART
00219       std::cout <<oocomm.communicator().rank()<<": after ";
00220       for(std::size_t i=0; i<duneToParmetis.size(); ++i)
00221         std::cout<<duneToParmetis[i]<<" ";
00222       std::cout<<std::endl;
00223 #endif
00224     }
00225   }
00226   
00227   struct RedistributeInterface
00228     : public Interface
00229   {
00230     void setCommunicator(MPI_Comm comm)
00231     {
00232       communicator_=comm;
00233     }
00234     template<class Flags,class IS>
00235     void buildSendInterface(const std::vector<int>& toPart, const ParmetisDuneIndexMap& pdmap,
00236                             const IS& idxset)
00237     {
00238       typedef std::vector<int>::const_iterator Iter;
00239       std::map<int,int> sizes;
00240       
00241       typedef typename IS::const_iterator IIter;
00242       for(IIter i=idxset.begin(), end=idxset.end();i!=end; ++i)
00243         if(Flags::contains(i->local().attribute()))
00244           ++sizes[toPart[i->local()]];
00245 
00246       // Allocate the necessary space
00247       typedef std::map<int,int>::const_iterator MIter;
00248       for(MIter i=sizes.begin(), end=sizes.end(); i!=end; ++i)
00249         interfaces()[i->first].first.reserve(i->second);
00250       
00251       //Insert the interface information
00252       typedef typename IS::const_iterator IIter;
00253       for(IIter i=idxset.begin(), end=idxset.end();i!=end; ++i)
00254         if(Flags::contains(i->local().attribute()))
00255           interfaces()[toPart[i->local()]].first.add(i->local());
00256     }
00257     
00258     void reserveSpaceForReceiveInterface(int proc, int size)
00259     {
00260       interfaces()[proc].second.reserve(size);
00261     }
00262     void addReceiveIndex(int proc, std::size_t idx)
00263     {
00264       interfaces()[proc].second.add(idx);
00265     }
00266     template<typename TG>
00267     void buildReceiveInterface(std::vector<std::pair<TG,int> >& indices)
00268     {
00269       typedef typename std::vector<std::pair<TG,int> >::const_iterator VIter;
00270       std::size_t i=0;
00271       for(VIter idx=indices.begin(); idx!= indices.end(); ++idx){
00272           interfaces()[idx->second].second.add(i++);
00273         }
00274     }
00275     
00276     ~RedistributeInterface()
00277     {
00278     }
00279     
00280   };
00281 
00282   namespace
00283   {
00293     template<class GI>
00294     void createSendBuf(std::vector<GI>& ownerVec, std::set<GI>& overlapVec, std::set<int>& neighbors, char *sendBuf, int buffersize, MPI_Comm comm) {
00295       // Pack owner vertices
00296       std::size_t s=ownerVec.size();
00297       int pos=0;
00298       MPI_Pack(&s, 1, MPITraits<std::size_t>::getType(), sendBuf, buffersize, &pos, comm);
00299       MPI_Pack(&(ownerVec[0]), s, MPITraits<GI>::getType(), sendBuf, buffersize, &pos, comm);
00300       s = overlapVec.size();
00301       MPI_Pack(&s, 1, MPITraits<std::size_t>::getType(), sendBuf, buffersize, &pos, comm);
00302       typedef typename std::set<GI>::iterator Iter;
00303       for(Iter i=overlapVec.begin(), end= overlapVec.end(); i != end; ++i)
00304         MPI_Pack(const_cast<GI*>(&(*i)), 1, MPITraits<GI>::getType(), sendBuf, buffersize, &pos, comm);
00305 
00306       int is=neighbors.size();
00307       MPI_Pack(&is, 1, MPI_INT, sendBuf, buffersize, &pos, comm);
00308       typedef typename std::set<int>::iterator IIter;
00309 
00310       for(IIter i=neighbors.begin(), end= neighbors.end(); i != end; ++i)
00311         MPI_Pack(const_cast<int*>(&(*i)), 1, MPI_INT, sendBuf, buffersize, &pos, comm);
00312     }
00321     template<class GI>
00322     void saveRecvBuf(char *recvBuf, int bufferSize, std::vector<std::pair<GI,int> >& ownerVec, 
00323                      std::set<GI>& overlapVec, std::set<int>& neighbors, RedistributeInterface& inf, int from, MPI_Comm comm) {
00324       int size;
00325       int pos=0;
00326       // unpack owner vertices
00327       MPI_Unpack(recvBuf, bufferSize, &pos, &size, 1, MPITraits<std::size_t>::getType(), comm);
00328       inf.reserveSpaceForReceiveInterface(from, size);
00329       ownerVec.reserve(ownerVec.size()+size);
00330       
00331       for(;size>0;--size){
00332         GI gi;
00333         MPI_Unpack(recvBuf, bufferSize, &pos, &gi, 1, MPITraits<GI>::getType(), comm);
00334         ownerVec.push_back(std::make_pair(gi,from));
00335       }
00336       // unpack overlap vertices
00337       MPI_Unpack(recvBuf, bufferSize, &pos, &size, 1, MPITraits<std::size_t>::getType(), comm);
00338       typename std::set<GI>::iterator ipos = overlapVec.begin();
00339       for(;size>0;--size){
00340         GI gi;
00341         MPI_Unpack(recvBuf, bufferSize, &pos, &gi, 1, MPITraits<GI>::getType(), comm);
00342         ipos=overlapVec.insert(ipos, gi);
00343       }
00344       //unpack neighbors
00345       int s;
00346       MPI_Unpack(recvBuf, bufferSize, &pos, &s, 1, MPI_INT, comm);
00347       
00348       typename std::set<int>::iterator npos = neighbors.begin();
00349       for(;s>0;--s){
00350         int n;
00351         MPI_Unpack(recvBuf, bufferSize, &pos, &n, 1, MPI_INT, comm);
00352         npos=neighbors.insert(npos, n);
00353       }
00354     }
00355 
00369     template<typename T>
00370     void getDomain(const MPI_Comm& comm, T *part, int numOfOwnVtx, int nparts, int *myDomain, int domainMapping[]) {
00371       int npes, mype;
00372       MPI_Comm_size(comm, &npes);
00373       MPI_Comm_rank(comm, &mype);
00374       MPI_Status status;
00375   
00376       *myDomain = -1;
00377       int i=0;
00378       int j=0;
00379   
00380       int domain[nparts];
00381       int assigned[npes];
00382       // init
00383       for (i=0; i<nparts; i++) {
00384         domainMapping[i] = -1;
00385         domain[i] = 0;
00386       }
00387       for (i=0; i<npes; i++) {
00388         assigned[i] = -0;
00389       }
00390       // count the occurance of domains
00391       for (i=0; i<numOfOwnVtx; i++) {
00392         domain[part[i]]++;
00393       }
00394   
00395       int *domainMatrix = new int[npes * nparts];
00396       // init
00397       for(i=0; i<npes*nparts; i++) {
00398         domainMatrix[i]=-1;
00399       }
00400   
00401       // init buffer with the own domain
00402       int *buf = new int[nparts];
00403       for (i=0; i<nparts; i++) {
00404         buf[i] = domain[i];
00405         domainMatrix[mype*nparts+i] = domain[i];
00406       }
00407       int pe=0;
00408       int src = (mype-1+npes)%npes;
00409       int dest = (mype+1)%npes;
00410       // ring communication, we need n-1 communications for n processors
00411       for (i=0; i<npes-1; i++) {
00412         MPI_Sendrecv_replace(buf, nparts, MPI_INT, dest, 0, src, 0, comm, &status); 
00413         // pe is the process of the actual received buffer
00414         pe = ((mype-1-i)+npes)%npes;
00415         for(j=0; j<nparts; j++) {
00416           // save the values to the domain matrix
00417           domainMatrix[pe*nparts+j] = buf[j];
00418         }
00419       }
00420       delete[] buf;
00421       
00422       // Start the domain calculation.
00423       // The process which contains the maximum number of vertices of a 
00424       // particular domain is selected to choose it's favorate domain
00425       int maxOccurance = 0;
00426       pe = -1;
00427       for(i=0; i<nparts; i++) {
00428         for(j=0; j<npes; j++) {
00429           // process has no domain assigned
00430           if (assigned[j]==0) {
00431             if (maxOccurance < domainMatrix[j*nparts+i]) {
00432               maxOccurance = domainMatrix[j*nparts+i];
00433               pe = j;
00434             }
00435           }
00436       
00437         }
00438         if (pe!=-1) {
00439           // process got a domain, ...
00440           domainMapping[i] = pe;
00441           // ...mark as assigned
00442           assigned[pe] = 1;
00443           if (pe==mype) {
00444             *myDomain = i;
00445           }
00446           pe = -1;
00447         }
00448         maxOccurance = 0;
00449       }
00450     
00451       delete[] domainMatrix;
00452 
00453     }
00454   
00455     struct SortFirst
00456     {
00457       template<class T>
00458       bool operator()(const T& t1, const T& t2) const
00459       {
00460         return t1<t2;
00461       }
00462     };
00463     
00464 
00475     template<class GI>
00476     void mergeVec(std::vector<std::pair<GI, int> >& ownerVec, std::set<GI>& overlapSet) {
00477       
00478       typedef typename std::vector<std::pair<GI,int> >::const_iterator VIter;
00479 #ifdef DEBUG_REPART
00480       // Safty check for duplicates.
00481       if(ownerVec.size()>0)
00482         {
00483           VIter old=ownerVec.begin();
00484           for(VIter i=old+1, end=ownerVec.end(); i != end; old=i++)
00485             {
00486               if(i->first==old->first)
00487                 {
00488                   std::cerr<<"Value at indes"<<old-ownerVec.begin()<<" is the same as at index "
00489                            <<i-ownerVec.begin()<<" ["<<old->first<<","<<old->second<<"]==["
00490                            <<i->first<<","<<i->second<<"]"<<std::endl;  
00491                   throw "Huch!";
00492                 }
00493             }
00494         }
00495     
00496 #endif
00497     
00498       typedef typename std::set<GI>::iterator SIter;
00499       VIter v=ownerVec.begin(), vend=ownerVec.end();
00500       for(SIter s=overlapSet.begin(), send=overlapSet.end(); s!=send;)
00501         {
00502           while(v!=vend && v->first<*s) ++v;
00503           if(v!=vend && v->first==*s){
00504             // Move to the next element before erasing
00505             // thus s stays valid!
00506             SIter tmp=s;
00507             ++s;
00508             overlapSet.erase(tmp);
00509           }else
00510             ++s;
00511         }
00512     }
00513 
00514 
00528     template<class OwnerSet, class Graph, class IS, class GI>
00529     void getNeighbor(const Graph& g, std::vector<int>& part,
00530                      typename Graph::VertexDescriptor vtx, const IS& indexSet, 
00531                      int toPe, std::set<GI>& neighbor, std::set<int>& neighborProcs) {
00532       typedef typename Graph::ConstEdgeIterator Iter;
00533       for(Iter edge=g.beginEdges(vtx), end=g.endEdges(vtx); edge!=end; ++edge)
00534         {
00535           const typename IS::IndexPair* pindex = indexSet.pair(edge.target());
00536           assert(pindex);
00537           if(part[pindex->local()]!=toPe || !OwnerSet::contains(pindex->local().attribute()))
00538             {
00539               // is sent to another process and therefore becomes overlap
00540               neighbor.insert(pindex->global());
00541               neighborProcs.insert(toPe);
00542             }
00543         }
00544     }
00545 
00546     template<class T, class I>
00547     void my_push_back(std::vector<T>& ownerVec, const I& index, int proc)
00548     {
00549       ownerVec.push_back(index);
00550     }
00551     
00552     template<class T, class I>
00553     void my_push_back(std::vector<std::pair<T,int> >& ownerVec, const I& index, int proc)
00554     {
00555       ownerVec.push_back(std::make_pair(index,proc));
00556     }
00557     template<class T>
00558     void reserve(std::vector<T>&, RedistributeInterface&, int)
00559     {
00560     }
00561     template<class T>
00562     void reserve(std::vector<std::pair<T,int> >& ownerVec, RedistributeInterface& redist, int proc)
00563     {
00564       redist.reserveSpaceForReceiveInterface(proc, ownerVec.size());
00565     }
00566     
00567 
00585     template<class OwnerSet, class G, class IS, class T, class GI>
00586     void getOwnerOverlapVec(const G& graph, std::vector<int>& part, IS& indexSet, 
00587                             int myPe, int toPe, std::vector<T>& ownerVec, std::set<GI>& overlapSet,
00588                             RedistributeInterface& redist, std::set<int>& neighborProcs) {
00589 
00590       //typedef typename IndexSet::const_iterator Iterator;
00591       typedef typename IS::const_iterator Iterator;
00592       for(Iterator index = indexSet.begin(); index != indexSet.end(); ++index) {
00593         // Only Process owner vertices, the others are not in the parmetis graph.
00594         if(OwnerSet::contains(index->local().attribute()))
00595           {
00596             if(part[index->local()]==toPe)
00597               {
00598                 getNeighbor<OwnerSet>(graph, part, index->local(), indexSet, 
00599                                       toPe, overlapSet, neighborProcs);
00600                 my_push_back(ownerVec, index->global(), toPe);
00601               }
00602           }
00603       }
00604       reserve(ownerVec, redist, toPe);
00605       
00606     }
00607     
00608 
00615     template<class F, class IS>
00616     inline bool isOwner(IS& indexSet, int index) {
00617 
00618       const typename IS::IndexPair* pindex=indexSet.pair(index);
00619       
00620       assert(pindex);
00621       return F::contains(pindex->local().attribute());
00622     }
00623 
00624     
00625     class BaseEdgeFunctor
00626     {
00627     public:
00628       BaseEdgeFunctor(int* adj,const ParmetisDuneIndexMap& data)
00629         :i_(), adj_(adj), data_(data)
00630       {}
00631       
00632       template<class T>
00633       void operator()(const T& edge)
00634       {
00635         // Get the egde weight
00636         // const Weight& weight=edge.weight();
00637         adj_[i_] = data_.toParmetis(edge.target());
00638         i_++;
00639       }
00640       std::size_t index()
00641       {
00642         return i_;
00643       }
00644       
00645     private:
00646       std::size_t i_;
00647       int* adj_;
00648       const ParmetisDuneIndexMap& data_;
00649     };
00650     
00651     template<typename G>
00652     struct EdgeFunctor
00653       : public BaseEdgeFunctor
00654     {
00655       EdgeFunctor(int* adj, const ParmetisDuneIndexMap& data, std::size_t s)
00656         : BaseEdgeFunctor(adj, data)
00657       {}
00658 
00659       int* getWeights()
00660       {
00661         return NULL;
00662       }
00663       void free(){}
00664     };
00665       
00666     template<class G, class V, class E, class VM, class EM>
00667     class EdgeFunctor<Dune::Amg::PropertiesGraph<G,V,E,VM,EM> >
00668       :  public BaseEdgeFunctor
00669     {
00670     public:
00671       EdgeFunctor(int* adj, const ParmetisDuneIndexMap& data, std::size_t s)
00672         :BaseEdgeFunctor(adj, data)
00673         {
00674           weight_=new int[s];
00675         }
00676       
00677       template<class T>
00678       void operator()(const T& edge)
00679       {
00680         weight_[index()]=edge.properties().depends()?1:0;
00681         BaseEdgeFunctor::operator()(edge);
00682       }
00683       int* getWeights()
00684       {
00685         return weight_;
00686       }
00687       void free(){
00688         if(weight_!=0){
00689           delete weight_;
00690           weight_=0;
00691         }
00692       }
00693     private:
00694       int* weight_;
00695     };
00696     
00697       
00698 
00712     template<class F, class G, class IS, class EW>
00713     void getAdjArrays(G& graph, IS& indexSet, int *xadj,
00714                       EW& ew)
00715     {
00716       int j=0;
00717   
00718       // The type of the const vertex iterator.
00719       typedef typename G::ConstVertexIterator VertexIterator;
00720       //typedef typename IndexSet::const_iterator Iterator;
00721       typedef typename IS::const_iterator Iterator;
00722 
00723       VertexIterator vend = graph.end();
00724       Iterator end;
00725 
00726       for(VertexIterator vertex = graph.begin(); vertex != vend; ++vertex){
00727         if (isOwner<F>(indexSet,*vertex)) {  
00728           // The type of const edge iterator.
00729           typedef typename G::ConstEdgeIterator EdgeIterator;
00730           EdgeIterator eend = vertex.end();
00731           xadj[j] = ew.index();
00732           j++;
00733           for(EdgeIterator edge = vertex.begin(); edge != eend; ++edge){
00734             ew(edge);
00735           }
00736         }
00737       }
00738       xadj[j] = ew.index();
00739     }
00740   }// end anonymous namespace
00741 
00756   template<class G, class T1, class T2>
00757   bool graphRepartition(const G& graph, Dune::OwnerOverlapCopyCommunication<T1,T2>& oocomm, int nparts,
00758                         Dune::OwnerOverlapCopyCommunication<T1,T2>*& outcomm,
00759                         RedistributeInterface& redistInf)
00760   {
00761     MPI_Comm comm=oocomm.communicator();
00762     oocomm.buildGlobalLookup(graph.noVertices());
00763     fillIndexSetHoles(graph, oocomm);
00764       
00765     // simple precondition checks 
00766      
00767 #ifdef PERF_REPART  
00768     // Profiling variables
00769     double t1=0.0, t2=0.0, t3=0.0, t4=0.0, tSum=0.0;
00770 #endif  
00771   
00772     // Common variables
00773     std::size_t i=0, j=0;
00774   
00775     // MPI variables
00776     int npes = oocomm.communicator().size();
00777     int mype = oocomm.communicator().rank();
00778       
00779     MPI_Status status;
00780       
00781     assert(nparts<=npes);
00782   
00783     typedef typename  Dune::OwnerOverlapCopyCommunication<T1,T2>::ParallelIndexSet::GlobalIndex GI;
00784     typedef std::vector<GI> GlobalVector;
00785     int myDomain;
00786 
00787     //
00788     // 1) Prepare the required parameters for using ParMETIS
00789     //    Especially the arrays that represent the graph must be 
00790     //    generated by the DUNE Graph and IndexSet input variables.
00791     //    These are the arrays:
00792     //    - vtxdist 
00793     //    - xadj
00794     //    - adjncy
00795     //
00796     //
00797 #ifdef PERF_REPART  
00798     // reset timer for step 1)
00799     t1=MPI_Wtime();
00800 #endif  
00801 
00802 
00803     typedef typename  Dune::OwnerOverlapCopyCommunication<T1,T2> OOComm;
00804     typedef typename  OOComm::OwnerSet OwnerSet;  
00805 
00806     // Create the vtxdist array and parmetisVtxMapping.
00807     // Global communications are necessary
00808     // The parmetis global identifiers for the owner vertices.
00809     ParmetisDuneIndexMap indexMap(graph,oocomm);
00810 #if HAVE_PARMETIS
00811     idxtype *part = new idxtype[indexMap.numOfOwnVtx()];
00812 #else
00813     std::size_t *part = new std::size_t[indexMap.numOfOwnVtx()];
00814 #endif
00815 
00816     int numOfVtx = oocomm.indexSet().size();
00817 
00818 #if !HAVE_PARMETIS
00819     if(oocomm.communicator().rank()==0 && nparts>1)
00820       std::cerr<<"ParMETIS not activated. Will repartition to 1 domain instead of requested "
00821                <<nparts<<" domains."<<std::endl;
00822     nparts=1; // No parmetis available, fallback to agglomerating to 1 process
00823     typedef std::size_t idxtype;
00824     
00825 #else
00826 
00827     if(nparts>1){
00828       // Create the xadj and adjncy arrays
00829       int *xadj = new  int[indexMap.numOfOwnVtx()+1];
00830       int *adjncy = new int[graph.noEdges()];
00831       EdgeFunctor<G> ef(adjncy, indexMap, graph.noEdges());
00832       getAdjArrays<OwnerSet>(graph, oocomm.globalLookup(), xadj, ef);
00833                
00834       //
00835       // 2) Call ParMETIS
00836       //
00837       //
00838       int numflag=0, wgtflag=0, options[3], edgecut=0, ncon=1;
00839       float *tpwgts = NULL;
00840       float ubvec[1];
00841       options[0] = 1; // 0=default, 1=options are defined in [1]+[2]
00842 #ifdef DEBUG_REPART
00843       options[1] = 3; // show info: 0=no message
00844 #else
00845       options[1] = 0; // show info: 0=no message
00846 #endif
00847       options[2] = 1; // random number seed, default is 15
00848       wgtflag = (ef.getWeights()!=NULL)?1:0;
00849       numflag = 0;
00850       edgecut = 0;
00851       ncon=1;
00852       ubvec[0]=1.05; // recommended by ParMETIS
00853 
00854 #ifdef DEBUG_REPART
00855       if (mype == 0) {
00856         std::cout<<std::endl;
00857         std::cout<<"Testing ParMETIS_V3_PartKway with options[1-2] = {"
00858                  <<options[1]<<" "<<options[2]<<"}, Ncon: "
00859                  <<ncon<<", Nparts: "<<nparts<<std::endl;
00860       }
00861 #endif
00862 #ifdef PERF_REPART  
00863       // stop the time for step 1)
00864       t1=MPI_Wtime()-t1;
00865       // reset timer for step 2)
00866       t2=MPI_Wtime();
00867 #endif  
00868 
00869       //=======================================================
00870       // ParMETIS_V3_PartKway
00871       //=======================================================
00872       ParMETIS_V3_PartKway(indexMap.vtxDist(), xadj, adjncy,
00873                            NULL, ef.getWeights(), &wgtflag,
00874                            &numflag, &ncon, &nparts, tpwgts, ubvec, options, &edgecut, part, &const_cast<MPI_Comm&>(comm));
00875 
00876       
00877       delete[] xadj;
00878       delete[] adjncy;
00879       ef.free();
00880     
00881 #ifdef DEBUG_REPART
00882       if (mype == 0) {
00883         std::cout<<std::endl;
00884         std::cout<<"ParMETIS_V3_PartKway reported a cut of "<<edgecut<<std::endl;
00885         std::cout<<std::endl;
00886       }
00887       std::cout<<mype<<": PARMETIS-Result: ";
00888       for(int i=0; i < indexMap.vtxDist()[mype+1]-indexMap.vtxDist()[mype]; ++i) {
00889         std::cout<<part[i]<<" ";
00890       }
00891       std::cout<<std::endl;
00892       std::cout<<"Testing ParMETIS_V3_PartKway with options[1-2] = {"
00893                <<options[1]<<" "<<options[2]<<"}, Ncon: "
00894                <<ncon<<", Nparts: "<<nparts<<std::endl;
00895 #endif
00896 #ifdef PERF_REPART  
00897       // stop the time for step 2)
00898       t2=MPI_Wtime()-t2;
00899       // reset timer for step 3)
00900       t3=MPI_Wtime();
00901 #endif  
00902 
00903     }else
00904 #endif
00905       {
00906       // Everything goes to process 0!
00907       for(std::size_t i=0; i<indexMap.numOfOwnVtx();++i)
00908         part[i]=0;
00909       }
00910     
00911 
00912     //
00913     // 3) Find a optimal domain based on the ParMETIS repatitioning
00914     //    result
00915     //
00916   
00917     int domainMapping[nparts];
00918     if(nparts>1)
00919       getDomain(comm, part, indexMap.numOfOwnVtx(), nparts, &myDomain, domainMapping);
00920     else
00921       domainMapping[0]=0;
00922     
00923 #ifdef DEBUG_REPART
00924     std::cout<<mype<<": myDomain: "<<myDomain<<std::endl;
00925     std::cout<<mype<<": DomainMapping: ";
00926     for(int j=0; j<nparts; j++) {
00927       std::cout<<" do: "<<j<<" pe: "<<domainMapping[j]<<" ";
00928     }
00929     std::cout<<std::endl;
00930 #endif  
00931     // translate the domain number to real process number
00932     std::vector<int> newPartition(indexMap.numOfOwnVtx());
00933     for(j=0; j<indexMap.numOfOwnVtx(); j++) 
00934       newPartition[j] = domainMapping[part[j]];
00935 
00936     // Make a domain mapping for the indexset and translate 
00937     //domain number to real process number
00938     // domainMapping is the one of parmetis, that is without
00939     // the overlap/copy vertices
00940     std::vector<int> setPartition(oocomm.indexSet().size(), -1);
00941       
00942     typedef typename  OOComm::ParallelIndexSet::const_iterator Iterator;
00943     i=0; // parmetis index
00944     for(Iterator index = oocomm.indexSet().begin(); index != oocomm.indexSet().end(); ++index)
00945       if(OwnerSet::contains(index->local().attribute())){
00946         setPartition[index->local()]=domainMapping[part[i++]];
00947       }
00948 
00949     oocomm.copyOwnerToAll(setPartition, setPartition);
00950     
00951     // Build the send interface
00952     redistInf.buildSendInterface<OwnerSet>(setPartition, indexMap, oocomm.indexSet());
00953 
00954 #ifdef PERF_REPART  
00955     // stop the time for step 3)
00956     t3=MPI_Wtime()-t3;
00957     // reset timer for step 4)
00958     t4=MPI_Wtime();
00959 #endif  
00960   
00961   
00962     //
00963     // 4) Create the output IndexSet and RemoteIndices 
00964     //    4.1) Determine the "send to" and "receive from" relation
00965     //         according to the new partition using a MPI ring 
00966     //         communication.
00967     // 
00968     //    4.2) Depends on the "send to" and "receive from" vector,
00969     //         the processes will exchange the vertices each other
00970     //
00971     //    4.3) Create the IndexSet, RemoteIndices and the new MPI 
00972     //         communicator
00973     //
00974 
00975     //
00976     // 4.1) Let's start...
00977     //
00978 
00979     int *sendTo = new int[npes];
00980     int *recvFrom = new int[npes];
00981     int *buf = new int[npes];
00982     // init the buffers
00983     for(int j=0; j<npes; j++) {
00984       sendTo[j] = 0;
00985       recvFrom[j] = 0;
00986       buf[j] = 0;
00987     }
00988   
00989     // the max number of vertices is stored in the sendTo buffer,
00990     // not the number of vertices to send! Because the max number of Vtx
00991     // is used as the fixed buffer size by the MPI send/receive calls
00992 
00993     // TODO: optimize buffer size
00994     bool existentOnNextLevel=false;
00995 
00996     for(i=0; i<indexMap.numOfOwnVtx(); ++i) {
00997       if (newPartition[i]!=mype) {
00998         if (sendTo[newPartition[i]]==0) {
00999           sendTo[newPartition[i]] = numOfVtx;
01000           buf[newPartition[i]] = numOfVtx;
01001         }
01002       }
01003       else
01004         existentOnNextLevel=true;
01005     }
01006   
01007     // The own "send to" array is sent to the next process and so on.
01008     // Each process receive such a array and pick up the 
01009     // corresponding "receive from" value. This value define the size
01010     // of the buffer containing the vertices to receive by the next step.
01011     // TODO: not really a ring communication
01012     int pe=0;
01013     int src = (mype-1+npes)%npes;
01014     int dest = (mype+1)%npes;
01015 
01016     // ring communication, we need n-1 communication for n processors
01017     for (int i=0; i<npes-1; i++) {
01018       MPI_Sendrecv_replace(buf, npes, MPI_INT, dest, 0, src, 0, comm, &status); 
01019       // pe is the process of the actual received buffer
01020       pe = ((mype-1-i)+npes)%npes;
01021       recvFrom[pe] = buf[mype]; // pick up the "recv from" value for myself
01022       if(recvFrom[pe]>0)
01023         existentOnNextLevel=true;
01024     }
01025     delete[] buf;
01026       
01027 #ifdef DEBUG_REPART
01028     std::cout<<mype<<": recvFrom: ";
01029     for(int i=0; i<npes; i++) {
01030       std::cout<<recvFrom[i]<<" ";
01031     }
01032     std::cout<<std::endl<<std::endl;
01033     std::cout<<mype<<": sendTo: ";
01034     for(int i=0; i<npes; i++) {
01035       std::cout<<sendTo[i]<<" ";
01036     }
01037     std::cout<<std::endl<<std::endl;
01038 #endif
01039   
01040     //
01041     // 4.2) Start the communication
01042     //
01043   
01044     // Get all the owner and overlap vertices for myself ans save
01045     // it in the vectors myOwnerVec and myOverlapVec.
01046     // The received vertices from the other processes are simple 
01047     // added to these vector.
01048     // 
01049 
01050       
01051     typedef typename OOComm::ParallelIndexSet::GlobalIndex GI;
01052     std::vector<std::pair<GI,int> > myOwnerVec;
01053     std::set<GI> myOverlapSet;
01054     GlobalVector sendOwnerVec;
01055     std::set<GI> sendOverlapSet;
01056     std::set<int> myNeighbors;
01057     
01058     getOwnerOverlapVec<OwnerSet>(graph, setPartition, oocomm.globalLookup(),
01059                                  mype, mype, myOwnerVec, myOverlapSet, redistInf, myNeighbors);
01060 
01061     for(int i=0; i < npes; ++i) {
01062       // the rank of the process defines the sending order,
01063       // so it starts naturally by 0
01064 
01065       if (i==mype) {
01066         for(int j=0; j < npes; ++j) {
01067           if (sendTo[j]>0) {
01068             // clear the vector for sending
01069             sendOwnerVec.clear();
01070             sendOverlapSet.clear();
01071             // get all owner and overlap vertices for process j and save these
01072             // in the vectors sendOwnerVec and sendOverlapSet
01073             std::set<int> neighbors;
01074             getOwnerOverlapVec<OwnerSet>(graph, setPartition, oocomm.globalLookup(),
01075                                          mype, j, sendOwnerVec, sendOverlapSet, redistInf,
01076                                          neighbors);
01077             // +2, we need 2 integer more for the length of each part
01078             // (owner/overlap) of the array
01079             int buffersize=0;
01080             int tsize;
01081             MPI_Pack_size(1, MPITraits<std::size_t>::getType(), oocomm.communicator(), &buffersize);
01082             MPI_Pack_size(1, MPITraits<std::size_t>::getType(), oocomm.communicator(), &tsize);
01083             buffersize +=tsize;
01084             MPI_Pack_size(sendTo[j], MPITraits<GI>::getType(), oocomm.communicator(), &tsize);
01085             buffersize += tsize;
01086             MPI_Pack_size(1, MPI_INT, oocomm.communicator(), &tsize);
01087             buffersize += tsize;
01088             MPI_Pack_size(neighbors.size(), MPI_INT, oocomm.communicator(), &tsize);
01089             buffersize += tsize;
01090               
01091             char* sendBuf = new char[buffersize]; 
01092 #ifdef DEBUG_REPART
01093             std::cout<<mype<<" sending "<<sendOwnerVec.size()<<" owner and "<<
01094               sendOverlapSet.size()<<" overlap to "<<j<<" buffersize="<<buffersize<<std::endl;
01095 #endif
01096             createSendBuf(sendOwnerVec, sendOverlapSet, neighbors, sendBuf, buffersize, oocomm.communicator());
01097             MPI_Send(sendBuf, buffersize, MPI_PACKED, j, 0, oocomm.communicator()); 
01098             delete[] sendBuf;
01099           }
01100         }
01101       } else { // All the other processes have to wait for receive...
01102         if (recvFrom[i]>0) {
01103           // Get buffer size
01104           MPI_Probe(i, 0,oocomm.communicator(), &status);
01105           int buffersize=0;
01106           MPI_Get_count(&status, MPI_PACKED, &buffersize);
01107           char* recvBuf = new char[buffersize];
01108 #ifdef DEBUG_REPART
01109           std::cout<<mype<<" receiving "<<recvFrom[i]<<" from "<<i<<" buffersize="<<buffersize<<std::endl;
01110 #endif
01111           MPI_Recv(recvBuf, buffersize, MPI_PACKED, i, 0, oocomm.communicator(), &status); 
01112           saveRecvBuf(recvBuf, buffersize, myOwnerVec, myOverlapSet, myNeighbors, redistInf, i, oocomm.communicator());
01113           delete[] recvBuf;
01114         }
01115       }
01116     }
01117     
01118     redistInf.setCommunicator(oocomm.communicator());
01119 
01120     // Trick to free memory
01121     newPartition.clear();
01122     newPartition.swap(newPartition);
01123              
01124     //
01125     // 4.2) Create the IndexSet etc.
01126     //
01127 
01128     // build the new outputIndexSet
01129       
01130       
01131     int color=0;
01132       
01133     if (!existentOnNextLevel) {
01134       // this process is not used anymore
01135       color= MPI_UNDEFINED;
01136     }
01137     MPI_Comm outputComm;
01138     
01139     MPI_Comm_split(oocomm.communicator(), color, oocomm.communicator().rank(), &outputComm);
01140     outcomm = new OOComm(outputComm);
01141 
01142     // translate neighbor ranks.
01143     int newrank=outcomm->communicator().rank();
01144     int *newranks=new int[oocomm.communicator().size()];
01145     std::vector<int> tneighbors;
01146     tneighbors.reserve(myNeighbors.size());
01147     
01148     typename OOComm::ParallelIndexSet& outputIndexSet = outcomm->indexSet();
01149     
01150     MPI_Allgather(&newrank, 1, MPI_INT, newranks, 1, 
01151                   MPI_INT, oocomm.communicator());
01152     typedef typename std::set<int>::const_iterator IIter;
01153 
01154 #ifdef DEBUG_REPART
01155     std::cout<<oocomm.communicator().rank()<<" ";
01156     for(IIter i=myNeighbors.begin(), end=myNeighbors.end();
01157         i!=end; ++i){
01158       assert(newranks[*i]>=0);
01159       std::cout<<*i<<"->"<<newranks[*i]<<" ";
01160       tneighbors.push_back(newranks[*i]);
01161     }
01162     std::cout<<std::endl;
01163 #endif
01164     delete[] newranks;
01165     myNeighbors.clear();
01166     
01167     outputIndexSet.beginResize();
01168     // 1) add the owner vertices
01169     // Sort the owners
01170     std::sort(myOwnerVec.begin(), myOwnerVec.end(), SortFirst());
01171     // The owners are sorted according to there global index
01172     // Therefore the entries of ownerVec are the same as the
01173     // ones in the resulting index set.
01174     typedef typename OOComm::ParallelIndexSet::LocalIndex LocalIndex;
01175     typedef typename std::vector<std::pair<GI,int> >::const_iterator VIter;
01176     i=0;
01177     for(VIter g=myOwnerVec.begin(), end =myOwnerVec.end(); g!=end; ++g, ++i ) {
01178       outputIndexSet.add(g->first,LocalIndex(i, OwnerOverlapCopyAttributeSet::owner, true));
01179       redistInf.addReceiveIndex(g->second, i);
01180     }
01181 
01182     // After all the vertices are received, the vectors must
01183     // be "merged" together to create the final vectors.
01184     // Because some vertices that are sent as overlap could now 
01185     // already included as owner vertiecs in the new partition
01186     mergeVec(myOwnerVec, myOverlapSet);
01187     
01188     // Trick to free memory
01189     myOwnerVec.clear();
01190     myOwnerVec.swap(myOwnerVec);
01191 
01192     
01193 
01194     // 2) add the overlap vertices
01195     typedef typename std::set<GI>::const_iterator SIter;
01196     for(SIter g=myOverlapSet.begin(), end=myOverlapSet.end(); g!=end; ++g, i++) {
01197       outputIndexSet.add(*g,LocalIndex(i, OwnerOverlapCopyAttributeSet::copy, true));
01198     }
01199     myOverlapSet.clear();
01200     outputIndexSet.endResize();
01201 
01202 #ifdef DUNE_ISTL_WITH_CHECKING
01203     int numOfOwnVtx =0;
01204     Iterator end = outputIndexSet.end();
01205     for(Iterator index = outputIndexSet.begin(); index != end; ++index) {
01206       if (OwnerSet::contains(index->local().attribute())) {
01207         numOfOwnVtx++;
01208       }
01209     }
01210     numOfOwnVtx = oocomm.communicator().sum(numOfOwnVtx);
01211     if(numOfOwnVtx!=indexMap.globalOwnerVertices)
01212       {
01213         std::cerr<<numOfOwnVtx<<"!="<<indexMap.globalOwnerVertices<<" owners missing or additional ones!"<<std::endl;
01214         DUNE_THROW(ISTLError, numOfOwnVtx<<"!="<<indexMap.globalOwnerVertices<<" owners missing or additional ones"
01215                    <<" during repartitioning.");
01216       }
01217     Iterator index=outputIndexSet.begin();
01218     if(index!=end){
01219       ++index;
01220       for(Iterator old = outputIndexSet.begin(); index != end; old=index++) {
01221         if(old->global()>index->global())
01222           DUNE_THROW(ISTLError, "Index set's globalindex not sorted correctly");
01223       }
01224     }
01225 #endif
01226     
01227     if(color != MPI_UNDEFINED){
01228       outcomm->remoteIndices().setNeighbours(tneighbors);
01229       outcomm->remoteIndices().template rebuild<true>();
01230 
01231     }
01232   
01233     // release the memory
01234     delete[] sendTo;
01235     delete[] recvFrom;
01236     delete[] part;
01237 
01238       
01239 #ifdef PERF_REPART  
01240     // stop the time for step 4) and print the results
01241     t4=MPI_Wtime()-t4;
01242     tSum = t1 + t2 + t3 + t4;
01243     std::cout<<std::endl
01244              <<mype<<": WTime for step 1): "<<t1
01245              <<" 2): "<<t2
01246              <<" 3): "<<t3
01247              <<" 4): "<<t4
01248              <<" total: "<<tSum
01249              <<std::endl;
01250 #endif  
01251 
01252     return color!=MPI_UNDEFINED;
01253       
01254   }
01255 #else
01256   template<class G, class P,class T1, class T2, class R>
01257   bool graphRepartition(const G& graph, P& oocomm, int nparts,
01258                         P*& outcomm,
01259                         R& redistInf)
01260   {
01261     if(nparts!=oocomm.size())
01262       DUNE_THROW(NotImplemented, "only available for MPI programs");
01263   }
01264 #endif // HAVE_MPI
01265 } // end of namespace Dune
01266 #endif
Generated on Sat Apr 24 11:13:47 2010 for dune-istl by  doxygen 1.6.3