SNAP Library 6.0, User Reference  2020-12-09 16:24:20
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
word2vec.h File Reference

Go to the source code of this file.

Functions

void LearnEmbeddings (TVVec< TInt, int64 > &WalksVV, const int &Dimensions, const int &WinSize, const int &Iter, const bool &Verbose, TIntFltVH &EmbeddingsHV)
 Learns embeddings using SGD, Skip-gram with negative sampling. More...
 

Variables

const int MaxExp = 6
 
const int ExpTablePrecision = 10000
 
const int TableSize = MaxExp*ExpTablePrecision*2
 
const int NegSamN = 5
 
const double StartAlpha = 0.025
 

Function Documentation

void LearnEmbeddings ( TVVec< TInt, int64 > &  WalksVV,
const int &  Dimensions,
const int &  WinSize,
const int &  Iter,
const bool &  Verbose,
TIntFltVH EmbeddingsHV 
)

Learns embeddings using SGD, Skip-gram with negative sampling.

Definition at line 160 of file word2vec.cpp.

162  {
163  TIntIntH RnmH;
164  TIntIntH RnmBackH;
165  int64 NNodes = 0;
166  //renaming nodes into consecutive numbers
167  for (int i = 0; i < WalksVV.GetXDim(); i++) {
168  for (int64 j = 0; j < WalksVV.GetYDim(); j++) {
169  if ( RnmH.IsKey(WalksVV(i, j)) ) {
170  WalksVV(i, j) = RnmH.GetDat(WalksVV(i, j));
171  } else {
172  RnmH.AddDat(WalksVV(i,j),NNodes);
173  RnmBackH.AddDat(NNodes,WalksVV(i, j));
174  WalksVV(i, j) = NNodes++;
175  }
176  }
177  }
178  TIntV Vocab(NNodes);
179  LearnVocab(WalksVV, Vocab);
180  TIntV KTable(NNodes);
181  TFltV UTable(NNodes);
182  TVVec<TFlt, int64> SynNeg;
183  TVVec<TFlt, int64> SynPos;
184  TRnd Rnd(time(NULL));
185  InitPosEmb(Vocab, Dimensions, Rnd, SynPos);
186  InitNegEmb(Vocab, Dimensions, SynNeg);
187  InitUnigramTable(Vocab, KTable, UTable);
188  TFltV ExpTable(TableSize);
189  double Alpha = StartAlpha; //learning rate
190 #pragma omp parallel for schedule(dynamic)
191  for (int i = 0; i < TableSize; i++ ) {
192  double Value = -MaxExp + static_cast<double>(i) / static_cast<double>(ExpTablePrecision);
193  ExpTable[i] = TMath::Power(TMath::E, Value);
194  }
195  int64 WordCntAll = 0;
196 // op RS 2016/09/26, collapse does not compile on Mac OS X
197 //#pragma omp parallel for schedule(dynamic) collapse(2)
198  for (int j = 0; j < Iter; j++) {
199 #pragma omp parallel for schedule(dynamic)
200  for (int64 i = 0; i < WalksVV.GetXDim(); i++) {
201  TrainModel(WalksVV, Dimensions, WinSize, Iter, Verbose, KTable, UTable,
202  WordCntAll, ExpTable, Alpha, i, Rnd, SynNeg, SynPos);
203  }
204  }
205  if (Verbose) { printf("\n"); fflush(stdout); }
206  for (int64 i = 0; i < SynPos.GetXDim(); i++) {
207  TFltV CurrV(SynPos.GetYDim());
208  for (int j = 0; j < SynPos.GetYDim(); j++) { CurrV[j] = SynPos(i, j); }
209  EmbeddingsHV.AddDat(RnmBackH.GetDat(i), CurrV);
210  }
211 }
Definition: dt.h:11
void InitUnigramTable(TIntV &Vocab, TIntV &KTable, TFltV &UTable)
Definition: word2vec.cpp:18
void InitNegEmb(TIntV &Vocab, const int &Dimensions, TVVec< TFlt, int64 > &SynNeg)
Definition: word2vec.cpp:73
const TDat & GetDat(const TKey &Key) const
Definition: hash.h:262
const int MaxExp
Definition: word2vec.h:10
Definition: ds.h:2223
TSizeTy GetYDim() const
Definition: ds.h:2251
static double Power(const double &Base, const double &Exponent)
Definition: xmath.h:25
void TrainModel(TVVec< TInt, int64 > &WalksVV, const int &Dimensions, const int &WinSize, const int &Iter, const bool &Verbose, TIntV &KTable, TFltV &UTable, int64 &WordCntAll, TFltV &ExpTable, double &Alpha, int64 CurrWalk, TRnd &Rnd, TVVec< TFlt, int64 > &SynNeg, TVVec< TFlt, int64 > &SynPos)
Definition: word2vec.cpp:92
long long int64
Definition: bd.h:27
const double StartAlpha
Definition: word2vec.h:20
TSizeTy GetXDim() const
Definition: ds.h:2250
Definition: hash.h:97
const int ExpTablePrecision
Definition: word2vec.h:13
void LearnVocab(TVVec< TInt, int64 > &WalksVV, TIntV &Vocab)
Definition: word2vec.cpp:8
const int TableSize
Definition: word2vec.h:14
bool IsKey(const TKey &Key) const
Definition: hash.h:258
TDat & AddDat(const TKey &Key)
Definition: hash.h:238
static double E
Definition: xmath.h:7
void InitPosEmb(TIntV &Vocab, const int &Dimensions, TRnd &Rnd, TVVec< TFlt, int64 > &SynPos)
Definition: word2vec.cpp:83

Variable Documentation

const int ExpTablePrecision = 10000

Definition at line 13 of file word2vec.h.

const int MaxExp = 6

Definition at line 10 of file word2vec.h.

const int NegSamN = 5

Definition at line 17 of file word2vec.h.

const double StartAlpha = 0.025

Definition at line 20 of file word2vec.h.

const int TableSize = MaxExp*ExpTablePrecision*2

Definition at line 14 of file word2vec.h.