SNAP Library 6.0, User Reference  2020-12-09 16:24:20
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
anf.h
Go to the documentation of this file.
1 // Approximate Neighborhood Function.
3 namespace TSnap {
10 template <class PGraph> void GetAnf(const PGraph& Graph, const int& SrcNId, TIntFltKdV& DistNbrsV, const int& MxDist, const bool& IsDir, const int& NApprox=32);
17 template <class PGraph> void GetAnf(const PGraph& Graph, TIntFltKdV& DistNbrsV, const int& MxDist, const bool& IsDir, const int& NApprox=32);
20 template <class PGraph> double GetAnfEffDiam(const PGraph& Graph, const bool& IsDir, const double& Percentile, const int& NApprox);
23 template <class PGraph> double GetAnfEffDiam(const PGraph& Graph, const int NRuns=1, int NApprox=-1);
24 } // namespace TSnap
25 
32 template <class PGraph>
33 class TGraphAnf {
34 private:
36  THash<TInt, uint64> NIdToBitPosH; // NId to byte(!) offset in BitV
37  TInt NApprox; // maintain N parallel approximations (multiple of 8)
38  TInt NBits, MoreBits, ApproxBytes; // NBits=logNodes+MoreBits; MoreBits: additional R bits; ApproxBytes: Approx/8;
39  PGraph Graph;
41 private:
43 public:
44  TGraphAnf(const PGraph& GraphPt, const int& Approx=32, const int& moreBits=5, const int& RndSeed=0) :
45  NApprox(Approx), MoreBits(moreBits), Graph(GraphPt), Rnd(RndSeed) { IAssert(NApprox%8==0); IAssert(sizeof(uint)==4); }
46  uint64 GetNIdOffset(const int& NId) const { return NIdToBitPosH.GetDat(NId); }
47  void InitAnfBits(TAnfBitV& BitV);
48  void Union(TAnfBitV& BitV1, const uint64& NId1Offset, TAnfBitV& BitV2, const uint64& NId2Offset) const;
49  double AvgLstZero(const TAnfBitV& BitV, const uint64& NIdOffset) const;
50  double GetCount(const TAnfBitV& BitV, const uint64& NIdOffset) const {
51  return pow(2.0, AvgLstZero(BitV, NIdOffset)) / 0.77351; }
57  void GetNodeAnf(const int& SrcNId, TIntFltKdV& DistNbrsV, const int& MxDist, const bool& IsDir);
63  void GetGraphAnf(TIntFltKdV& DistNbrsV, const int& MxDist, const bool& IsDir);
64 };
65 
66 template <class PGraph>
68  const int NNodes = Graph->GetNodes();
69  const int LogNodes = int(ceil(TMath::Log2(NNodes)));
70  ApproxBytes = NApprox / 8;
71  NBits = LogNodes + MoreBits; // bits per node
72  const int BytesPerNd = ApproxBytes * NBits; // total bytes per node
73  int64 VSize = ((static_cast<int64>(NNodes) * static_cast<int64>(BytesPerNd))/sizeof(uint)) + 1;
74  IAssertR(VSize <= TInt::Mx,
75  TStr::Fmt("Your graph is too large for Approximate Neighborhood Function, %s is larger than %d",
76  TUInt64::GetStr(VSize).CStr(),TInt::Mx));
77  //printf("size %d\n", static_cast<int>(VSize));
78  BitV.Gen((const int)VSize); IAssert(BitV.BegI() != NULL);
79  BitV.PutAll(0);
80  int SetBit = 0;
81  uint64 NodeOff = 0;
82  uchar* BitVPt = (uchar *) BitV.BegI();
83  // for each node: 1st bits of all approximations are at BitV[Offset+0], 2nd bits at BitV[Offset+NApprox/32], ...
84  for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI != Graph->EndNI(); NI++) {
85  NIdToBitPosH.AddDat(NI.GetId(), NodeOff);
86  // init vertex bits
87  for (int approx = 0; approx < NApprox; approx++) {
88  const int RndNum = Rnd.GetUniDevInt();
89  for (SetBit = 0; (RndNum & (1<<SetBit))==0 && SetBit < NBits; SetBit++) { }
90  if (SetBit >= NBits) SetBit = NBits-1;
91  const int BitPos = ApproxBytes * SetBit + approx / 8;
92  *(BitVPt + NodeOff + BitPos) |= (1<<(approx % 8)); // magically works better than code below (see anf.c)
93  }
94  NodeOff += BytesPerNd;
95  }
96 }
97 
98 template <class PGraph>
99 void TGraphAnf<PGraph>::Union(TAnfBitV& BitV1, const uint64& NId1Offset, TAnfBitV& BitV2, const uint64& NId2Offset) const {
100  uchar* DstI = (uchar *) BitV1.BegI() + NId1Offset;
101  uchar* SrcI = (uchar *) BitV2.BegI() + NId2Offset;
102  for (int b=0; b < ApproxBytes*NBits; b++, DstI++, SrcI++) { *DstI |= *SrcI; }
103 }
104 
105 // Average least zero bit position (least significant zero)
106 template <class PGraph>
107 double TGraphAnf<PGraph>::AvgLstZero(const TAnfBitV& BitV, const uint64& NIdOffset) const { //average least zero bit position (least significant zero)
108  int approx, bit, AvgBitPos = 0;
109  uchar* BitVPt = (uchar *) BitV.BegI();
110  for (approx = 0; approx < NApprox; approx++) {
111  for (bit = 0; bit < NBits; bit++) {
112  if ((*(BitVPt+NIdOffset + ApproxBytes*bit + approx/8) & (1<<(approx%8))) == 0) break; } // found zero
113  if (bit > NBits) bit = NBits;
114  AvgBitPos += bit;
115  }
116  return AvgBitPos/double(NApprox) ;
117 }
118 
119 template <class PGraph>
120 void TGraphAnf<PGraph>::GetNodeAnf(const int& SrcNId, TIntFltKdV& DistNbrsV, const int& MxDist, const bool& IsDir) {
121  //const int NNodes = Graph->GetNodes();
122  TAnfBitV CurBitsV, LastBitsV;
123  InitAnfBits(CurBitsV); IAssert(CurBitsV.BegI() != NULL);
124  LastBitsV.Gen(CurBitsV.Len()); IAssert(LastBitsV.BegI() != NULL);
125  DistNbrsV.Clr();
126  DistNbrsV.Add(TIntFltKd(1, Graph->GetNI(SrcNId).GetOutDeg()));
127  for (int dist = 1; dist < (MxDist==-1 ? TInt::Mx : MxDist); dist++) {
128  memcpy(LastBitsV.BegI(), CurBitsV.BegI(), sizeof(uint)*CurBitsV.Len()); //LastBitsV = CurBitsV;
129  for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
130  const uint64 NIdOffset = GetNIdOffset(NI.GetId());
131  for (int e = 0; e < NI.GetOutDeg(); e++) {
132  const uint64 NId2Offset = GetNIdOffset(NI.GetOutNId(e));
133  Union(CurBitsV, NIdOffset, LastBitsV, NId2Offset);
134  }
135  if (! IsDir) {
136  for (int e = 0; e < NI.GetInDeg(); e++) {
137  const uint64 NId2Offset = GetNIdOffset(NI.GetInNId(e));
138  Union(CurBitsV, NIdOffset, LastBitsV, NId2Offset);
139  }
140  }
141  }
142  DistNbrsV.Add(TIntFltKd(dist, GetCount(CurBitsV, GetNIdOffset(SrcNId))));
143  if (DistNbrsV.Len() > 1 && DistNbrsV.Last().Dat < 1.001*DistNbrsV[DistNbrsV.Len()-2].Dat) break; // 0.1% change
144  }
145 }
146 
147 template <class PGraph>
148 void TGraphAnf<PGraph>::GetGraphAnf(TIntFltKdV& DistNbrsV, const int& MxDist, const bool& IsDir) {
149  TAnfBitV CurBitsV, LastBitsV;
150  InitAnfBits(CurBitsV); IAssert(CurBitsV.BegI() != NULL);
151  LastBitsV.Gen(CurBitsV.Len()); IAssert(LastBitsV.BegI() != NULL);
152  int v, e;
153  double NPairs = 0.0;
154  DistNbrsV.Clr();
155  DistNbrsV.Add(TIntFltKd(0, Graph->GetNodes()));
156  //TExeTm ExeTm;
157  for (int dist = 1; dist < (MxDist==-1 ? TInt::Mx : MxDist); dist++) {
158  //printf("ANF dist %d...", dist); ExeTm.Tick();
159  memcpy(LastBitsV.BegI(), CurBitsV.BegI(), sizeof(uint)*CurBitsV.Len()); //LastBitsV = CurBitsV;
160  for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
161  const uint64 NIdOffset = GetNIdOffset(NI.GetId());
162  for (e = 0; e < NI.GetOutDeg(); e++) {
163  const uint64 NId2Offset = GetNIdOffset(NI.GetOutNId(e));
164  Union(CurBitsV, NIdOffset, LastBitsV, NId2Offset);
165  }
166  if (! IsDir) {
167  for (e = 0; e < NI.GetInDeg(); e++) {
168  const uint64 NId2Offset = GetNIdOffset(NI.GetInNId(e));
169  Union(CurBitsV, NIdOffset, LastBitsV, NId2Offset);
170  }
171  }
172  }
173  NPairs = 0.0;
174  for (v = NIdToBitPosH.FFirstKeyId(); NIdToBitPosH.FNextKeyId(v); ) {
175  NPairs += GetCount(CurBitsV, NIdToBitPosH[v]);
176  }
177  DistNbrsV.Add(TIntFltKd(dist, NPairs));
178  //printf("pairs: %g %s\n", NPairs, ExeTm.GetTmStr());
179  if (NPairs == 0) { break; }
180  if (DistNbrsV.Len() > 1 && NPairs < 1.001*DistNbrsV.LastLast().Dat) { break; } // 0.1% change
181  //TGnuPlot::SaveTs(DistNbrsV, "hops.tab", "HOPS, REACHABLE PAIRS");
182  }
183 }
185 // Approximate Neighborhood Function
186 namespace TSnap {
187 
188 namespace TSnapDetail {
190 double CalcEffDiam(const TIntFltKdV& DistNbrsCdfV, const double& Percentile=0.9);
192 double CalcEffDiam(const TFltPrV& DistNbrsCdfV, const double& Percentile=0.9);
194 double CalcEffDiamPdf(const TIntFltKdV& DistNbrsPdfV, const double& Percentile=0.9);
196 double CalcEffDiamPdf(const TFltPrV& DistNbrsPdfV, const double& Percentile=0.9);
198 double CalcAvgDiamPdf(const TIntFltKdV& DistNbrsPdfV);
200 double CalcAvgDiamPdf(const TFltPrV& DistNbrsPdfV);
201 } // TSnapDetail
202 
203 template <class PGraph>
204 void GetAnf(const PGraph& Graph, const int& SrcNId, TIntFltKdV& DistNbrsV, const int& MxDist, const bool& IsDir, const int& NApprox) {
205  TGraphAnf<PGraph> Anf(Graph, NApprox, 5, 0);
206  Anf.GetNodeAnf(SrcNId, DistNbrsV, MxDist, IsDir);
207 }
208 
209 template <class PGraph>
210 void GetAnf(const PGraph& Graph, TIntFltKdV& DistNbrsV, const int& MxDist, const bool& IsDir, const int& NApprox) {
211  TGraphAnf<PGraph> Anf(Graph, NApprox, 5, 0);
212  Anf.GetGraphAnf(DistNbrsV, MxDist, IsDir);
213 }
214 
215 template <class PGraph>
216 double GetAnfEffDiam(const PGraph& Graph, const bool& IsDir, const double& Percentile, const int& NApprox) {
217  TIntFltKdV DistNbrsV;
218  TGraphAnf<PGraph> Anf(Graph, NApprox, 5, 0);
219  Anf.GetGraphAnf(DistNbrsV, -1, IsDir);
220  return TSnap::TSnapDetail::CalcEffDiam(DistNbrsV, Percentile);
221 }
222 
223 template<class PGraph>
224 double GetAnfEffDiam(const PGraph& Graph, const int NRuns, int NApprox) {
225  //return TSnap::GetEffDiam(Graph, IsDir, 0.9, 32);
226  TMom Mom;
227  if (NApprox == -1) {
228  if (Graph->GetNodes() < 100000) { NApprox = 64; }
229  else if (Graph->GetNodes() < 1000000) { NApprox = 32; }
230  else { NApprox = 16; }
231  }
232  const bool IsDir = false;
233  for (int r = 0; r < NRuns; r++) {
234  Mom.Add(TSnap::GetAnfEffDiam(Graph, IsDir, 0.9, NApprox));
235  }
236  Mom.Def();
237  return Mom.GetMean();
238 }
239 
240 template <class PGraph> void TestAnf() {
241  PGraph Graph = PGraph::TObj::New();
242  //Graph:
243  // 0 2 ----> 3
244  // ^ |
245  // | |
246  // | ^
247  // 1 5 <---- 4
248  for (int v = 0; v < 6; v++) { Graph->AddNode(v); }
249  Graph->AddEdge(2, 3);
250  Graph->AddEdge(3, 4);
251  Graph->AddEdge(4, 5);
252  Graph->AddEdge(5, 2);
253  TFltV AnfV;
254  for (int t = 0; t < 10; t++) {
255  TGraphAnf<PGraph> Anf(Graph, 128, 5, t+1);
256  TIntFltKdV DistToNbrsV;
257  Anf.GetGraphAnf(DistToNbrsV, 5, true);
258  printf("\n--seed: %d---------------------\n", t+1);
259  for (int i = 0; i < DistToNbrsV.Len(); i++) {
260  printf("dist: %d\t hops:%f\n", DistToNbrsV[i].Key(), DistToNbrsV[i].Dat());
261  }
262  AnfV.Add(DistToNbrsV.Last().Dat);
263  }
264  TMom Mom(AnfV);
265  printf("-----------\nAvgAnf: %f StDev: %f\n", Mom.GetMean(), Mom.GetSDev());//*/
266  // const int NApprox = 32;
267  /*printf("\nANF vs. SAMPLE diam test (10 runs of ANF, NApprox=%d):\n", NApprox);
268  //Graph = TGGen<PGraph>::GenGrid(20, 20);
269  Graph = TGAlg::GetMxWcc(TGGen<PGraph>::GenRnd(1000, 10000));
270  TFltV FullAnf, EffAnf;
271  for (int tryn = 0; tryn < 10; tryn++) {
272  FullAnf.Add(GetEffDiam(Graph, false, 1.0, NApprox));
273  EffAnf.Add(GetEffDiam(Graph, false, 0.9, NApprox));
274  }
275  TMom FullMom(FullAnf);
276  TMom AnfMom(EffAnf);
277  printf(" Sample FullDiam: %d\n", TGAlg::GetBfsFullDiam(Graph, 100, false));
278  printf(" Anf FullDiam: %f [%f]\n", FullMom.GetMean(), FullMom.GetSDev());
279  printf(" Sample EffDiam [90%%]: %f\n", TGAlg::GetBfsEffDiam(Graph, 100, false));
280  printf(" Anf EffDiam [90%%]: %f [%f]\n", AnfMom.GetMean(), AnfMom.GetSDev());
281  // epinions
282  printf("\nEpinions graph:\n");
283  { typedef PNGraph PGraph;
284  PGraph G = TGGen<PGraph>::GenEpinions();
285  TIntFltKdV DistToPairsV;
286  GetAnf(G, DistToPairsV, 50, true);
287  for(int i = 0; i < DistToPairsV.Len(); i++) {
288  printf("\t%d\t%f\n", DistToPairsV[i].Key, DistToPairsV[i].Dat); }
289  printf("\nUndir\n");
290  TAnf<PGraph>::GetAnf(G, DistToPairsV, 50, false);
291  for(int j = 0; j < DistToPairsV.Len(); j++) {
292  printf("\t%d\t%f\n", DistToPairsV[j].Key, DistToPairsV[j].Dat); }
293  }//*/
294 }
295 
296 } // namespace TSnap
TRnd Rnd
Definition: anf.h:40
void GetGraphAnf(TIntFltKdV &DistNbrsV, const int &MxDist, const bool &IsDir)
Definition: anf.h:148
#define IAssert(Cond)
Definition: bd.h:262
PGraph Graph
Definition: anf.h:39
Main namespace for all the Snap global entities.
Definition: alg.h:1
#define IAssertR(Cond, Reason)
Definition: bd.h:265
Definition: dt.h:11
unsigned int uint
Definition: bd.h:11
static const int Mx
Definition: dt.h:1142
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:575
TKeyDat< TInt, TFlt > TIntFltKd
Definition: ds.h:381
void GetAnf(const PGraph &Graph, const int &SrcNId, TIntFltKdV &DistNbrsV, const int &MxDist, const bool &IsDir, const int &NApprox=32)
Definition: anf.h:204
double GetAnfEffDiam(const PGraph &Graph, const bool &IsDir, const double &Percentile, const int &NApprox)
Definition: anf.h:216
void Union(TAnfBitV &BitV1, const uint64 &NId1Offset, TAnfBitV &BitV2, const uint64 &NId2Offset) const
Definition: anf.h:99
TInt ApproxBytes
Definition: anf.h:38
Definition: xmath.h:129
const TDat & GetDat(const TKey &Key) const
Definition: hash.h:262
void TestAnf()
Definition: anf.h:240
double GetSDev() const
Definition: xmath.h:242
TGraphAnf(const PGraph &GraphPt, const int &Approx=32, const int &moreBits=5, const int &RndSeed=0)
Definition: anf.h:44
double AvgLstZero(const TAnfBitV &BitV, const uint64 &NIdOffset) const
Definition: anf.h:107
TInt NBits
Definition: anf.h:38
void Clr(const bool &DoDel=true, const TSizeTy &NoDelLim=-1)
Clears the contents of the vector.
Definition: ds.h:1022
void Add(const TFlt &Val, const TFlt &Wgt=1)
Definition: xmath.h:217
void PutAll(const TVal &Val)
Sets all elements of the vector to value Val.
Definition: ds.h:1229
unsigned long long uint64
Definition: bd.h:38
const TVal & LastLast() const
Returns a reference to the one before last element of the vector.
Definition: ds.h:585
double CalcEffDiamPdf(const TIntFltKdV &DistNbrsPdfV, const double &Percentile)
Helper function for computing a given Percentile of a (unnormalized) probability distribution functio...
Definition: anf.cpp:29
void InitAnfBits(TAnfBitV &BitV)
Definition: anf.h:67
void GetNodeAnf(const int &SrcNId, TIntFltKdV &DistNbrsV, const int &MxDist, const bool &IsDir)
Definition: anf.h:120
const TVal & Last() const
Returns a reference to the last element of the vector.
Definition: ds.h:579
unsigned char uchar
Definition: bd.h:10
Definition: dt.h:1137
double CalcAvgDiamPdf(const TIntFltKdV &DistNbrsPdfV)
Helper function for computing the mean of a (unnormalized) probability distribution function...
Definition: anf.cpp:41
UndefDefaultCopyAssign(TGraphAnf)
double CalcEffDiam(const TIntFltKdV &DistNbrsCdfV, const double &Percentile)
Helper function for computing a given Percentile of a (unnormalized) cumulative distribution function...
Definition: anf.cpp:7
TStr GetStr() const
Definition: dt.h:1363
long long int64
Definition: bd.h:27
double GetMean() const
Definition: xmath.h:240
uint64 GetNIdOffset(const int &NId) const
Definition: anf.h:46
TIter BegI() const
Returns an iterator pointing to the first element in the vector.
Definition: ds.h:593
static TStr Fmt(const char *FmtStr,...)
Definition: dt.cpp:1599
static double Log2(const double &Val)
Definition: xmath.h:15
THash< TInt, uint64 > NIdToBitPosH
Definition: anf.h:36
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements.
Definition: ds.h:523
TVec< uint64 > TAnfBitV
Definition: anf.h:35
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:602
Definition: anf.h:33
TInt MoreBits
Definition: anf.h:38
void Def()
Definition: xmath.cpp:339
TInt NApprox
Definition: anf.h:37
Vector is a sequence TVal objects representing an array that can change in size.
Definition: ds.h:430
double GetCount(const TAnfBitV &BitV, const uint64 &NIdOffset) const
Definition: anf.h:50