9 for(
int64 i = 0; i < Vocab.
Len(); i++) { Vocab[i] = 0; }
11 for(
int j = 0; j < WalksVV.
GetYDim(); j++) {
12 Vocab[WalksVV(i,j)]++;
19 double TrainWordsPow = 0;
22 for (
int64 i = 0; i < Vocab.
Len(); i++) {
24 TrainWordsPow += ProbV[i];
28 for (
int64 i = 0; i < ProbV.Len(); i++) {
29 ProbV[i] /= TrainWordsPow;
33 for (
int64 i = 0; i < ProbV.Len(); i++) {
34 UTable[i] = ProbV[i] * ProbV.
Len();
35 if ( UTable[i] < 1 ) {
41 while(UnderV.
Len() > 0 && OverV.
Len() > 0) {
46 KTable[Small] = Large;
47 UTable[Large] = (UTable[Large] + UTable[Small]) - 1;
48 if (UTable[Large] < 1) {
54 while(UnderV.
Len() > 0){
59 while(OverV.
Len() > 0){
69 return Y < UTable[X] ? X : KTable[X];
76 for (
int j = 0; j < SynNeg.
GetYDim(); j++) {
86 for (
int j = 0; j < SynPos.
GetYDim(); j++) {
87 SynPos(i,j) =(Rnd.
GetUniDev()-0.5)/Dimensions;
93 const int& WinSize,
const int& Iter,
const bool& Verbose,
97 TFltV Neu1V(Dimensions);
98 TFltV Neu1eV(Dimensions);
101 for (
int j = 0; j < WalksVV.
GetYDim(); j++) { WalkV[j] = WalksVV(CurrWalk,j); }
102 for (
int64 WordI=0; WordI<WalkV.Len(); WordI++) {
103 if ( WordCntAll%10000 == 0 ) {
105 printf(
"\rLearning Progress: %.2lf%% ",(
double)WordCntAll*100/(
double)(Iter*AllWords));
108 Alpha =
StartAlpha * (1 - WordCntAll /
static_cast<double>(Iter * AllWords + 1));
111 int64 Word = WalkV[WordI];
112 for (
int i = 0; i < Dimensions; i++) {
117 for (
int a = Offset; a < WinSize * 2 + 1 - Offset; a++) {
118 if (a == WinSize) {
continue; }
119 int64 CurrWordI = WordI - WinSize + a;
120 if (CurrWordI < 0){
continue; }
121 if (CurrWordI >= WalkV.Len()){
continue; }
122 int64 CurrWord = WalkV[CurrWordI];
123 for (
int i = 0; i < Dimensions; i++) { Neu1eV[i] = 0; }
125 for (
int j = 0; j <
NegSamN+1; j++) {
132 if (Target == Word) {
continue; }
136 for (
int i = 0; i < Dimensions; i++) {
137 Product += SynPos(CurrWord,i) * SynNeg(Target,i);
140 if (Product >
MaxExp) { Grad = (Label - 1) * Alpha; }
141 else if (Product < -
MaxExp) { Grad = Label * Alpha; }
144 Grad = (Label - 1 + 1 / (1 + Exp)) * Alpha;
146 for (
int i = 0; i < Dimensions; i++) {
147 Neu1eV[i] += Grad * SynNeg(Target,i);
148 SynNeg(Target,i) += Grad * SynPos(CurrWord,i);
151 for (
int i = 0; i < Dimensions; i++) {
152 SynPos(CurrWord,i) += Neu1eV[i];
161 const int& WinSize,
const int& Iter,
const bool& Verbose,
167 for (
int i = 0; i < WalksVV.
GetXDim(); i++) {
169 if ( RnmH.
IsKey(WalksVV(i, j)) ) {
170 WalksVV(i, j) = RnmH.
GetDat(WalksVV(i, j));
172 RnmH.
AddDat(WalksVV(i,j),NNodes);
173 RnmBackH.
AddDat(NNodes,WalksVV(i, j));
174 WalksVV(i, j) = NNodes++;
180 TIntV KTable(NNodes);
181 TFltV UTable(NNodes);
184 TRnd Rnd(time(NULL));
190 #pragma omp parallel for schedule(dynamic)
195 int64 WordCntAll = 0;
198 for (
int j = 0; j < Iter; j++) {
199 #pragma omp parallel for schedule(dynamic)
201 TrainModel(WalksVV, Dimensions, WinSize, Iter, Verbose, KTable, UTable,
202 WordCntAll, ExpTable, Alpha, i, Rnd, SynNeg, SynPos);
205 if (Verbose) { printf(
"\n"); fflush(stdout); }
208 for (
int j = 0; j < SynPos.
GetYDim(); j++) { CurrV[j] = SynPos(i, j); }
void InitUnigramTable(TIntV &Vocab, TIntV &KTable, TFltV &UTable)
TSizeTy Len() const
Returns the number of elements in the vector.
void LearnEmbeddings(TVVec< TInt, int64 > &WalksVV, const int &Dimensions, const int &WinSize, const int &Iter, const bool &Verbose, TIntFltVH &EmbeddingsHV)
Learns embeddings using SGD, Skip-gram with negative sampling.
void InitNegEmb(TIntV &Vocab, const int &Dimensions, TVVec< TFlt, int64 > &SynNeg)
const TDat & GetDat(const TKey &Key) const
static double Power(const double &Base, const double &Exponent)
const TVal & Last() const
Returns a reference to the last element of the vector.
void TrainModel(TVVec< TInt, int64 > &WalksVV, const int &Dimensions, const int &WinSize, const int &Iter, const bool &Verbose, TIntV &KTable, TFltV &UTable, int64 &WordCntAll, TFltV &ExpTable, double &Alpha, int64 CurrWalk, TRnd &Rnd, TVVec< TFlt, int64 > &SynNeg, TVVec< TFlt, int64 > &SynPos)
const int ExpTablePrecision
void LearnVocab(TVVec< TInt, int64 > &WalksVV, TIntV &Vocab)
int GetUniDevInt(const int &Range=0)
int64 RndUnigramInt(TIntV &KTable, TFltV &UTable, TRnd &Rnd)
bool IsKey(const TKey &Key) const
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
void DelLast()
Removes the last element of the vector.
TDat & AddDat(const TKey &Key)
void InitPosEmb(TIntV &Vocab, const int &Dimensions, TRnd &Rnd, TVVec< TFlt, int64 > &SynPos)