SNAP Library 6.0, User Reference  2020-12-09 16:24:20
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
html.h
Go to the documentation of this file.
1 #include "bd.h"
2 
4 // Forward
7 
9 // Html-Lexical-Chars
10 typedef enum {
13 
15 private:
16  TIntV ChTyV;
17  TChV UcChV;
18  TChV LcChV;
19  TStrStrH EscStrH;
20  void SetUcCh(const char& UcCh, const char& LcCh);
21  void SetUcCh(const TStr& Str);
22  void SetChTy(const THtmlLxChTy& ChTy, const TStr& Str);
23  void SetEscStr(const TStr& SrcStr, const TStr& DstStr);
24 public:
25  THtmlLxChDef();
26  THtmlLxChDef(TSIn& SIn): ChTyV(SIn), UcChV(SIn), LcChV(SIn), EscStrH(SIn){}
27  static PHtmlLxChDef Load(TSIn& SIn){return new THtmlLxChDef(SIn);}
28  void Save(TSOut& SOut){
29  ChTyV.Save(SOut); UcChV.Save(SOut); LcChV.Save(SOut); EscStrH.Save(SOut);}
30 
31  THtmlLxChDef& operator=(const THtmlLxChDef&){Fail; return *this;}
32 
33  // character type operations
34  int GetChTy(const char& Ch) const {return ChTyV[Ch-TCh::Mn];}
35  bool IsEoln(const char& Ch) const {return (Ch==TCh::CrCh)||(Ch==TCh::LfCh);}
36  bool IsWs(const char& Ch) const {
37  return (Ch==' ')||(Ch==TCh::TabCh)||(Ch==TCh::CrCh)||(Ch==TCh::LfCh);}
38  bool IsSpace(const char& Ch) const {return int(ChTyV[Ch-TCh::Mn])==hlctSpace;}
39  bool IsAlpha(const char& Ch) const {return int(ChTyV[Ch-TCh::Mn])==hlctAlpha;}
40  bool IsNum(const char& Ch) const {return int(ChTyV[Ch-TCh::Mn])==hlctNum;}
41  bool IsAlNum(const char& Ch) const {
42  return (int(ChTyV[Ch-TCh::Mn])==hlctAlpha)||(int(ChTyV[Ch-TCh::Mn])==hlctNum);}
43  bool IsSym(const char& Ch) const {return int(ChTyV[Ch-TCh::Mn])==hlctSym;}
44  bool IsUrl(const char& Ch) const {
45  int ChTy=ChTyV[Ch-TCh::Mn];
46  return (ChTy==hlctAlpha)||(ChTy==hlctNum)||
47  (Ch=='.')||(Ch=='-')||(Ch==':')||(Ch=='/')||(Ch=='~');}
48 
49  // upper/lower-case & escape-string operations
50  bool IsUc(const char& Ch) const {return Ch==UcChV[Ch-TCh::Mn];}
51  bool IsLc(const char& Ch) const {return Ch==LcChV[Ch-TCh::Mn];}
52  char GetUc(const char& Ch) const {return UcChV[Ch-TCh::Mn];}
53  char GetLc(const char& Ch) const {return LcChV[Ch-TCh::Mn];}
54  void GetUcChA(TChA& ChA) const {
55  for (int ChN=0; ChN<ChA.Len(); ChN++){ChA.PutCh(ChN, GetUc(ChA[ChN]));}}
56  void GetLcChA(TChA& ChA) const {
57  for (int ChN=0; ChN<ChA.Len(); ChN++){ChA.PutCh(ChN, GetLc(ChA[ChN]));}}
58  TStr GetUcStr(const TStr& Str) const {
59  TChA ChA(Str); GetUcChA(ChA); return ChA;}
60  TStr GetLcStr(const TStr& Str) const {
61  TChA ChA(Str); GetLcChA(ChA); return ChA;}
62  TStr GetEscStr(const TStr& Str) const;
63 
64  // standard entry points
66  static PHtmlLxChDef GetChDef(){IAssert(!ChDef.Empty()); return ChDef;}
67  static THtmlLxChDef& GetChDefRef(){IAssert(!ChDef.Empty()); return *ChDef;}
68 
69  // character-set transformations
70  static TStr GetCSZFromYuascii(const TChA& ChA);
71  static TStr GetCSZFromWin1250(const TChA& ChA);
72  static TStr GetWin1250FromYuascii(const TChA& ChA);
73  static TStr GetIsoCeFromYuascii(const TChA& ChA);
74 };
75 
77 // Html-Lexical
78 typedef enum {
81 
82 class THtmlLx{
83 private:
87  bool DoParseArg;
89  char Ch;
90  int ChX;
91  bool EscCh;
95  void GetCh(){
96  if (ChStack.Empty()){
97  if (RSIn.Eof()){Ch=TCh::EofCh;} else {Ch=RSIn.GetCh(); ChX++;}
98  } else {
99  Ch=ChStack.Pop(); ChX++;
100  }
101  SymChA+=Ch;
102  }
103  void GetEscCh();
104  void GetMetaTag();
105  void GetTag();
106 public:
115  TArgNmValV ArgNmValV;
116 public:
117  THtmlLx(const PSIn& _SIn, const bool& _DoParseArg=true):
118  SIn(_SIn), RSIn(*SIn), DoParseArg(_DoParseArg),
119  ChStack(), Ch(' '), ChX(0), EscCh(false),
120  EscChA(), ArgNm(), ArgVal(),
121  Sym(hsyUndef), SymBChX(0), SymEChX(0), ChA(), UcChA(),
122  PreSpaces(0), PreSpaceChA(), ArgNmValV(){}
123 
124  THtmlLx& operator=(const THtmlLx&){Fail; return *this;}
125 
126  void PutCh(const char& _Ch){
127  ChStack.Push(Ch); if (!SymChA.Empty()){SymChA.Pop();} Ch=_Ch; ChX--;}
128  void PutStr(const TStr& Str){
129  for (int ChN=Str.Len()-1; ChN>=0; ChN--){PutCh(Str[ChN]);}}
130  THtmlLxSym GetSym();
131  PHtmlTok GetTok(const bool& DoUc=true);
133  return TStr::GetSpaceStr(PreSpaces);}
134 
135  int GetArgs() const {return ArgNmValV.Len();}
136  TStr GetArgNm(const int& ArgN) const {return ArgNmValV[ArgN].Key;}
137  TStr GetArgVal(const int& ArgN) const {return ArgNmValV[ArgN].Dat;}
138  bool IsArg(const TStr& ArgNm) const {return ArgNmValV.IsIn(TStrKd(ArgNm));}
139  TStr GetArg(const TStr& ArgNm, const TStr& DfArgVal=TStr()) const {
140  int ArgN=ArgNmValV.SearchForw(TStrKd(ArgNm));
141  if (ArgN==-1){return DfArgVal;} else {return ArgNmValV[ArgN].Dat;}}
142  void PutArg(const TStr& ArgNm, const TStr& ArgVal){
143  int ArgN=ArgNmValV.SearchForw(TStrKd(ArgNm));
144  if (ArgN==-1){ArgNmValV.Add(TStrKd(ArgNm, ArgVal));}
145  else {ArgNmValV[ArgN]=TStrKd(ArgNm, ArgVal);}}
146  TStr GetFullBTagStr() const;
147 
148  void MoveToStrOrEof(const TStr& Str);
149  void MoveToBTagOrEof(const TStr& TagNm);
150  void MoveToBTag2OrEof(const TStr& TagNm1, const TStr& TagNm2);
151  void MoveToBTag3OrEof(const TStr& TagNm1, const TStr& TagNm2, const TStr& TagNm3);
152  void MoveToBTagOrETagOrEof(const TStr& BTagNm, const TStr& ETagNm);
153  void MoveToBTagArgOrEof(
154  const TStr& TagNm, const TStr& ArgNm, const TStr& ArgVal);
155  void MoveToBTagArg2OrEof(const TStr& TagNm,
156  const TStr& ArgNm1, const TStr& ArgVal1,
157  const TStr& ArgNm2, const TStr& ArgVal2, const bool& AndOpP=true);
158  void MoveToBTagOrEof(
159  const TStr& TagNm1, const TStr& ArgNm1, const TStr& ArgVal1,
160  const TStr& TagNm2, const TStr& ArgNm2, const TStr& ArgVal2);
161  void MoveToETagOrEof(const TStr& TagNm);
163  TStr GetStrToBTag(const TStr& TagNm, const bool& TxtOnlyP=false);
164  TStr GetStrToBTag(const TStr& TagNm, const TStr& ArgNm,
165  const TStr& ArgVal, const bool& TxtOnlyP=false);
166  TStr GetStrToETag(const TStr& TagNm, const bool& TxtOnlyP=false);
167  TStr GetStrToETag2(const TStr& TagNm1, const TStr& TagNm2, const bool& TxtOnlyP=false);
168  TStr GetStrInTag(const TStr& TagNm, const bool& TxtOnlyP=false);
169  TStr GetHRefBeforeStr(const TStr& Str);
170  bool IsGetBTag(const TStr& TagNm);
171  bool IsGetETag(const TStr& TagNm);
172 
173  static TStr GetSymStr(const THtmlLxSym& Sym);
174  static TStr GetEscapedStr(const TChA& ChA);
175  static TStr GetAsciiStr(const TChA& ChA, const char& GenericCh='_');
176  static void GetTokStrV(const TStr& Str, TStrV& TokStrV);
177  static TStr GetNoTag(const TStr& Str);
178 };
179 
181 // Html-Token
183 private:
185  TStr Str;
186  THtmlLx::TArgNmValV ArgNmValV;
187 public:
188  THtmlTok(): Sym(hsyUndef), Str(), ArgNmValV(){}
189  THtmlTok(const THtmlLxSym& _Sym):
190  Sym(_Sym), Str(), ArgNmValV(){}
191  THtmlTok(const THtmlLxSym& _Sym, const TStr& _Str):
192  Sym(_Sym), Str(_Str), ArgNmValV(){}
193  THtmlTok(const THtmlLxSym& _Sym, const TStr& _Str,
194  const THtmlLx::TArgNmValV& _ArgNmValV):
195  Sym(_Sym), Str(_Str), ArgNmValV(_ArgNmValV){}
197  static PHtmlTok Load(TSIn&){Fail; return NULL;}
198  void Save(TSOut&){Fail;}
199 
200  THtmlTok& operator=(const THtmlTok&){Fail; return *this;}
201 
202  THtmlLxSym GetSym() const {return Sym;}
203  TStr GetStr() const {return Str;}
204  TStr GetFullStr() const;
205  bool IsArg(const TStr& ArgNm) const {
206  return ArgNmValV.SearchForw(TStrKd(ArgNm))!=-1;}
207  TStr GetArg(const TStr& ArgNm) const {
208  return ArgNmValV[ArgNmValV.SearchForw(TStrKd(ArgNm))].Dat;}
209  TStr GetArg(const TStr& ArgNm, const TStr& DfArgVal) const {
210  int ArgN=ArgNmValV.SearchForw(TStrKd(ArgNm));
211  if (ArgN==-1){return DfArgVal;} else {return ArgNmValV[ArgN].Dat;}}
212  bool IsUrlTok(TStr& RelUrlStr) const;
213  bool IsRedirUrlTok() const;
214 
215  void SaveTxt(const PSOut& SOut, const bool& TxtMode=true);
216 
217  static const TStr ATagNm;
218  static const TStr AreaTagNm;
219  static const TStr BrTagNm;
220  static const TStr CardTagNm;
221  static const TStr CenterTagNm;
222  static const TStr FrameTagNm;
223  static const TStr H1TagNm;
224  static const TStr H2TagNm;
225  static const TStr H3TagNm;
226  static const TStr H4TagNm;
227  static const TStr H5TagNm;
228  static const TStr H6TagNm;
229  static const TStr ImgTagNm;
230  static const TStr LiTagNm;
231  static const TStr MetaTagNm;
232  static const TStr PTagNm;
233  static const TStr UlTagNm;
234  static const TStr TitleTagNm;
235  static const TStr TitleETagNm;
236 
237  static const TStr AltArgNm;
238  static const TStr HRefArgNm;
239  static const TStr SrcArgNm;
240  static const TStr TitleArgNm;
241  static const TStr HttpEquivArgNm;
242 
243  static bool IsBreakTag(const TStr& TagNm);
244  static bool IsBreakTok(const PHtmlTok& Tok);
245  static bool IsHTag(const TStr& TagNm, int& HTagN);
246  static PHtmlTok GetHTok(const bool& IsBTag, const int& HTagN);
247 };
248 
250 // Html-Document
251 typedef enum {
253 
255 private:
256  THtmlTokV TokV;
257 public:
258  THtmlDoc(): TokV(){}
259  THtmlDoc(
260  const PSIn& SIn, const THtmlDocType& Type=hdtAll, const bool& DoUc=true);
261  static PHtmlDoc New(
262  const PSIn& SIn, const THtmlDocType& Type=hdtAll, const bool& DoUc=true){
263  return PHtmlDoc(new THtmlDoc(SIn, Type, DoUc));}
265  static PHtmlDoc Load(TSIn&){Fail; return NULL;}
266  void Save(TSOut&){Fail;}
267 
268  THtmlDoc& operator=(const THtmlDoc&){Fail; return *this;}
269 
270  int GetToks() const {return TokV.Len();}
271  PHtmlTok GetTok(const int& TokN) const {return TokV[TokN];}
272  PHtmlTok GetTok(const int& TokN, THtmlLxSym& Sym, TStr& Str) const {
273  Sym=TokV[TokN]->GetSym(); Str=TokV[TokN]->GetStr(); return TokV[TokN];}
274  void AddTokV(const THtmlTokV& _TokV){TokV.AddV(_TokV);}
275 
276  static TStr GetTxtLnDoc(const TStr& HtmlStr);
277  static TStr GetTxtLnDoc(const TStr& HtmlStr, const TStr& BaseUrlStr,
278  const bool& OutUrlP, const bool& OutTagsP);
279 
281  const TStr& FNm, const THtmlDocType& Type=hdtAll, const bool& DoUc=true){
282  PSIn SIn=TFIn::New(FNm); return PHtmlDoc(new THtmlDoc(SIn, Type, DoUc));}
283  void SaveTxt(const PSOut& SOut, const bool& TxtMode=true) const;
284 
285  static void SaveHtmlToTxt(
286  const TStr& HtmlStr, const PSOut& TxtSOut, const TStr& BaseUrlStr,
287  const bool& OutUrlP, const bool& OutToksP);
288  static void SaveHtmlToTxt(
289  const TStr& HtmlStr, const TStr& TxtFNm, const TStr& BaseUrlStr,
290  const bool& OutUrlP, const bool& OutToksP);
291  static void SaveHtmlToXml(
292  const TStr& HtmlStr, const PSOut& XmlSOut, const TStr& BaseUrlStr,
293  const bool& OutTextP, const bool& OutUrlP, const bool& OutToksP,
294  const bool& OutTagsP, const bool& OutArgsP);
295  static void SaveHtmlToXml(
296  const TStr& HtmlStr, const TStr& XmlFNm, const TStr& BaseUrlStr,
297  const bool& OutTextP, const bool& OutUrlP, const bool& OutToksP,
298  const bool& OutTagsP, const bool& OutArgsP);
299 
300  static TLxSym GetLxSym(const THtmlLxSym& HtmlLxSym, const TChA& ChA);
301 
302  static bool _IsTagRedir(
303  const TStr& TagStr, const TStr& ArgNm, THtmlLx& Lx,
304  const TStr& BaseUrlStr, const TStr& RedirUrlStr);
305  static TStr GetRedirHtmlDocStr(const TStr& HtmlStr,
306  const TStr& BaseUrlStr, const TStr& RedirUrlStr);
307 };
308 
310 // Html-Hyper-Link-Document-Vector
312 private:
313  PHtmlDoc RefHtmlDoc;
314  THtmlDocV HldV;
315 public:
316  THtmlHldV(const PHtmlDoc& _RefHtmlDoc, const int& HldWnLen=10);
318  static PHtmlHldV Load(TSIn&){Fail; return NULL;}
319  void Save(TSOut&){Fail;}
320 
321  THtmlHldV& operator=(const THtmlHldV&){Fail; return *this;}
322 
323  PHtmlDoc GetRefHtmlDoc(){return RefHtmlDoc;}
324  int GetHlds(){return HldV.Len();}
325  PHtmlDoc GetHld(const int& HldN){return HldV[HldN];}
326 };
327 
329 // Web-Page
331 private:
332  TStrV UrlStrV;
333  TStrV IpNumV;
334  PHttpResp HttpResp;
335  uint64 FetchMSecs;
336 public:
337  TWebPg(): UrlStrV(), IpNumV(), HttpResp(){}
338  TWebPg(const TStrV& _UrlStrV, const TStrV& _IpNumV, const PHttpResp& _HttpResp):
339  UrlStrV(_UrlStrV), IpNumV(_IpNumV), HttpResp(_HttpResp){}
340  static PWebPg New(const TStrV& UrlStrV, const TStrV& IpNumV, const PHttpResp& HttpResp){
341  return new TWebPg(UrlStrV, IpNumV, HttpResp);}
342  static PWebPg New(const TStrV& UrlStrV, const PHttpResp& HttpResp){
343  return new TWebPg(UrlStrV, TStrV(), HttpResp);}
344  static PWebPg New(const TStr& UrlStr, const PHttpResp& HttpResp){
345  TStrV UrlStrV; UrlStrV.Add(UrlStr);
346  return new TWebPg(UrlStrV, TStrV(), HttpResp);}
349  static PWebPg Load(TSIn&){Fail; return NULL;}
350  void Save(TSOut&){Fail;}
351 
352  TWebPg& operator=(const TWebPg&){Fail; return *this;}
353 
354  int GetUrls() const {return UrlStrV.Len();}
355  TStr GetUrlStr(const int& UrlN=-1) const {
356  if (UrlN==-1){return UrlStrV.Last();} else {return UrlStrV[UrlN];}}
357  PUrl GetUrl(const int& UrlN=-1) const {
358  TStr UrlStr;
359  if (UrlN==-1){UrlStr=UrlStrV.Last();} else {UrlStr=UrlStrV[UrlN];}
360  return TUrl::New(UrlStr);}
361 
362  int GetIps() const {return IpNumV.Len();}
363  TStr GetIpNum(const int& IpN=-1) const {
364  if (IpN==-1){return IpNumV.Last();} else {return IpNumV[IpN];}}
365 
366  PHttpResp GetHttpResp() const {return HttpResp;}
367  TStr GetHttpHdStr() const {return GetHttpResp()->GetHdStr();}
368  TStr GetHttpBodyAsStr() const {return GetHttpResp()->GetBodyAsStr();}
369  //void GetOutUrlStrV(TStrV& OutUrlStrV) const;
370  void GetOutUrlV(TUrlV& OutUrlV, TUrlV& OutRedirUrlV) const;
371  void GetOutUrlV(TUrlV& OutUrlV) const {
372  TUrlV OutRedirUrlV; GetOutUrlV(OutUrlV, OutRedirUrlV);}
373  void GetOutDescUrlStrKdV(TStrKdV& OutDescUrlStrKdV) const;
374 
375  // fetch time
376  void PutFetchMSecs(const uint64& _FetchMSecs){FetchMSecs=_FetchMSecs;}
377  uint64 GetFetchMSecs() const {return FetchMSecs;}
378 
379  void SaveAsHttpBody(const TStr& FNm) const;
380  void SaveAsHttp(const TStr& FNm) const;
381 
382  bool IsTxt() const;
383 };
Definition: html.h:252
THtmlDocType
Definition: html.h:251
#define IAssert(Cond)
Definition: bd.h:262
static PWebPg Load(TSIn &)
Definition: html.h:349
static const TStr H5TagNm
Definition: html.h:227
THtmlLxSym
Definition: html.h:78
bool IsGetETag(const TStr &TagNm)
Definition: html.cpp:547
THtmlDoc(TSIn &)
Definition: html.h:264
static const TStr H4TagNm
Definition: html.h:226
TChA ArgNm
Definition: html.h:93
TStr GetHRefBeforeStr(const TStr &Str)
Definition: html.cpp:530
static TStr GetSpaceStr(const int &Spaces)
Definition: dt.cpp:1608
TStr GetHttpHdStr() const
Definition: html.h:367
static const TStr FrameTagNm
Definition: html.h:222
static THtmlLxChDef ChDef
Definition: html.h:84
static const TStr H3TagNm
Definition: html.h:225
bool IsWs(const char &Ch) const
Definition: html.h:36
int Len() const
Definition: dt.h:490
static const TStr H1TagNm
Definition: html.h:223
#define ClassHdTP(TNm, PNm)
Definition: bd.h:135
static const TStr TitleArgNm
Definition: html.h:240
void GetLcChA(TChA &ChA) const
Definition: html.h:56
TSIn & RSIn
Definition: html.h:86
Definition: html.h:252
PUrl GetUrl(const int &UrlN=-1) const
Definition: html.h:357
static const TStr LiTagNm
Definition: html.h:230
TChA EscChA
Definition: html.h:92
PHtmlDoc GetRefHtmlDoc()
Definition: html.h:323
Definition: html.h:252
Definition: html.h:79
THtmlTok(const THtmlLxSym &_Sym, const TStr &_Str)
Definition: html.h:191
void MoveToETagOrEof(const TStr &TagNm)
Definition: html.cpp:441
static const TStr HRefArgNm
Definition: html.h:238
TStr GetFullBTagStr() const
Definition: html.cpp:358
TStr GetUrlStr(const int &UrlN=-1) const
Definition: html.h:355
int SymBChX
Definition: html.h:108
void PutStr(const TStr &Str)
Definition: html.h:128
bool IsIn(const TVal &Val) const
Checks whether element Val is a member of the vector.
Definition: ds.h:828
bool IsNum(const char &Ch) const
Definition: html.h:40
Definition: html.h:182
void MoveToBTagArgOrEof(const TStr &TagNm, const TStr &ArgNm, const TStr &ArgVal)
Definition: html.cpp:400
bool IsSym(const char &Ch) const
Definition: html.h:43
bool Empty() const
Definition: dt.h:260
#define Fail
Definition: bd.h:238
static void GetTokStrV(const TStr &Str, TStrV &TokStrV)
Definition: html.cpp:595
TStrKdV TArgNmValV
Definition: html.h:114
bool Empty() const
Definition: bd.h:501
void PutCh(const int &ChN, const char &Ch)
Definition: dt.h:278
Definition: html.h:79
TStr GetArg(const TStr &ArgNm, const TStr &DfArgVal) const
Definition: html.h:209
void GetUcChA(TChA &ChA) const
Definition: html.h:54
TLxSym
Definition: lx.h:44
bool IsEoln(const char &Ch) const
Definition: html.h:35
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:575
void PutCh(const char &_Ch)
Definition: html.h:126
bool DoParseArg
Definition: html.h:87
THtmlLxSym GetSym()
Definition: html.cpp:277
static THtmlLxChDef & GetChDefRef()
Definition: html.h:67
THtmlLx & operator=(const THtmlLx &)
Definition: html.h:124
void Save(TSOut &)
Definition: html.h:266
TStr GetArg(const TStr &ArgNm) const
Definition: html.h:207
void Save(TSOut &)
Definition: html.h:350
PHtmlTok GetTok(const int &TokN, THtmlLxSym &Sym, TStr &Str) const
Definition: html.h:272
static const TStr MetaTagNm
Definition: html.h:231
Definition: html.h:12
static PUrl New(const TStr &RelUrlStr, const TStr &BaseUrlStr=TStr())
Definition: url.h:25
int Len() const
Definition: dt.h:259
void MoveToBTagArg2OrEof(const TStr &TagNm, const TStr &ArgNm1, const TStr &ArgVal1, const TStr &ArgNm2, const TStr &ArgVal2, const bool &AndOpP=true)
Definition: html.cpp:410
static TStr GetEscapedStr(const TChA &ChA)
Definition: html.cpp:568
void Save(TSOut &)
Definition: html.h:319
int GetHlds()
Definition: html.h:324
bool IsUrl(const char &Ch) const
Definition: html.h:44
TStr GetStr() const
Definition: html.h:203
static PHtmlLxChDef ChDef
Definition: html.h:65
static const TStr TitleTagNm
Definition: html.h:234
static PWebPg New(const TStrV &UrlStrV, const PHttpResp &HttpResp)
Definition: html.h:342
char Ch
Definition: html.h:89
int GetUrls() const
Definition: html.h:354
THtmlTok(const THtmlLxSym &_Sym, const TStr &_Str, const THtmlLx::TArgNmValV &_ArgNmValV)
Definition: html.h:193
void GetTag()
Definition: html.cpp:236
bool IsArg(const TStr &ArgNm) const
Definition: html.h:205
THtmlTok(TSIn &)
Definition: html.h:196
bool IsGetBTag(const TStr &TagNm)
Definition: html.cpp:541
static const TStr AreaTagNm
Definition: html.h:218
TPt< THtmlDoc > PHtmlDoc
Definition: html.h:6
void Save(TSOut &)
Definition: html.h:198
THtmlLxSym GetSym() const
Definition: html.h:202
Definition: html.h:11
Definition: fl.h:58
Definition: html.h:11
static PWebPg New(const TStrV &UrlStrV, const TStrV &IpNumV, const PHttpResp &HttpResp)
Definition: html.h:340
TChA PreSpaceChA
Definition: html.h:113
static const TStr HttpEquivArgNm
Definition: html.h:241
TStr GetLcStr(const TStr &Str) const
Definition: html.h:60
TStr GetArgVal(const int &ArgN) const
Definition: html.h:137
TChA ChStack
Definition: html.h:88
Definition: html.h:330
static const char EofCh
Definition: dt.h:1040
Definition: html.h:252
#define ClassTP(TNm, PNm)
Definition: bd.h:126
Definition: html.h:252
static const TStr H2TagNm
Definition: html.h:224
THtmlDoc & operator=(const THtmlDoc &)
Definition: html.h:268
static PSIn New(const TStr &FNm)
Definition: fl.cpp:290
static const TStr ATagNm
Definition: html.h:217
#define ClassTPV(TNm, PNm, TNmV)
Definition: bd.h:162
static const TStr CenterTagNm
Definition: html.h:221
void Save(TSOut &SOut)
Definition: html.h:28
THtmlTok & operator=(const THtmlTok &)
Definition: html.h:200
static PHtmlHldV Load(TSIn &)
Definition: html.h:318
bool IsArg(const TStr &ArgNm) const
Definition: html.h:138
static const TStr UlTagNm
Definition: html.h:233
virtual bool Eof()=0
static TStr GetNoTag(const TStr &Str)
Definition: html.cpp:606
THtmlLx(const PSIn &_SIn, const bool &_DoParseArg=true)
Definition: html.h:117
static const TStr CardTagNm
Definition: html.h:220
unsigned long long uint64
Definition: bd.h:38
static const char TabCh
Definition: dt.h:1037
static const char Mn
Definition: dt.h:1032
TStr GetArgNm(const int &ArgN) const
Definition: html.h:136
void GetOutUrlV(TUrlV &OutUrlV) const
Definition: html.h:371
TStr GetStrToETag(const TStr &TagNm, const bool &TxtOnlyP=false)
Definition: html.cpp:494
bool IsUc(const char &Ch) const
Definition: html.h:50
void PutArg(const TStr &ArgNm, const TStr &ArgVal)
Definition: html.h:142
static PHtmlDoc LoadTxt(const TStr &FNm, const THtmlDocType &Type=hdtAll, const bool &DoUc=true)
Definition: html.h:280
bool EscCh
Definition: html.h:91
Definition: html.h:80
PHtmlTok GetTok(const bool &DoUc=true)
Definition: html.cpp:353
int SymEChX
Definition: html.h:108
Definition: html.h:12
TStr GetArg(const TStr &ArgNm, const TStr &DfArgVal=TStr()) const
Definition: html.h:139
~TWebPg()
Definition: html.h:347
int GetToks() const
Definition: html.h:270
TWebPg & operator=(const TWebPg &)
Definition: html.h:352
PHttpResp GetHttpResp() const
Definition: html.h:366
Definition: html.h:11
bool IsAlNum(const char &Ch) const
Definition: html.h:41
TStr GetStrInTag(const TStr &TagNm, const bool &TxtOnlyP=false)
Definition: html.cpp:525
TChA UcChA
Definition: html.h:110
static PHtmlDoc Load(TSIn &)
Definition: html.h:265
static const TStr H6TagNm
Definition: html.h:228
void MoveToBTagOrETagOrEof(const TStr &BTagNm, const TStr &ETagNm)
Definition: html.cpp:394
TChA ChA
Definition: html.h:109
Definition: html.h:79
Definition: fl.h:128
TStr GetIpNum(const int &IpN=-1) const
Definition: html.h:363
void MoveToBTag3OrEof(const TStr &TagNm1, const TStr &TagNm2, const TStr &TagNm3)
Definition: html.cpp:388
static const char LfCh
Definition: dt.h:1038
int GetChTy(const char &Ch) const
Definition: html.h:34
void GetMetaTag()
Definition: html.cpp:225
TStr GetStr() const
Definition: dt.h:681
int ChX
Definition: html.h:90
bool IsSpace(const char &Ch) const
Definition: html.h:38
void AddTokV(const THtmlTokV &_TokV)
Definition: html.h:274
Definition: dt.h:201
TStr GetTextOnlyStrToEof()
Definition: html.cpp:447
Definition: html.h:79
TStr GetUcStr(const TStr &Str) const
Definition: html.h:58
Definition: html.h:80
PSIn SIn
Definition: html.h:85
void GetCh()
Definition: html.h:95
TVec< TStr > TStrV
Definition: ds.h:1599
TWebPg(const TStrV &_UrlStrV, const TStrV &_IpNumV, const PHttpResp &_HttpResp)
Definition: html.h:338
uint64 GetFetchMSecs() const
Definition: html.h:377
Definition: html.h:12
int GetIps() const
Definition: html.h:362
static const TStr ImgTagNm
Definition: html.h:229
TStr GetPreSpaceStr() const
Definition: html.h:132
static const TStr PTagNm
Definition: html.h:232
TStr GetStrToETag2(const TStr &TagNm1, const TStr &TagNm2, const bool &TxtOnlyP=false)
Definition: html.cpp:509
void MoveToStrOrEof(const TStr &Str)
Definition: html.cpp:370
bool IsAlpha(const char &Ch) const
Definition: html.h:39
static TStr GetAsciiStr(const TChA &ChA, const char &GenericCh='_')
Definition: html.cpp:584
void GetEscCh()
Definition: html.cpp:195
Definition: html.h:80
static const TStr AltArgNm
Definition: html.h:237
Definition: dt.h:412
TChA SymChA
Definition: html.h:111
static PHtmlLxChDef GetChDef()
Definition: html.h:66
char GetUc(const char &Ch) const
Definition: html.h:52
static PHtmlLxChDef Load(TSIn &SIn)
Definition: html.h:27
THtmlLxSym Sym
Definition: html.h:107
Definition: html.h:79
void MoveToBTag2OrEof(const TStr &TagNm1, const TStr &TagNm2)
Definition: html.cpp:382
TSizeTy SearchForw(const TVal &Val, const TSizeTy &BValN=0) const
Returns the position of an element with value Val.
Definition: ds.h:1552
Definition: html.h:252
static const char CrCh
Definition: dt.h:1039
int GetArgs() const
Definition: html.h:135
Definition: html.h:80
void Push(const char &Ch)
Definition: dt.h:264
THtmlHldV & operator=(const THtmlHldV &)
Definition: html.h:321
Definition: html.h:82
TWebPg(TSIn &)
Definition: html.h:348
Definition: bd.h:196
char GetLc(const char &Ch) const
Definition: html.h:53
static const TStr BrTagNm
Definition: html.h:219
THtmlTok(const THtmlLxSym &_Sym)
Definition: html.h:189
virtual char GetCh()=0
Definition: html.h:254
THtmlLxChTy
Definition: html.h:10
TArgNmValV ArgNmValV
Definition: html.h:115
TChA ArgVal
Definition: html.h:94
void PutFetchMSecs(const uint64 &_FetchMSecs)
Definition: html.h:376
PHtmlDoc GetHld(const int &HldN)
Definition: html.h:325
Definition: html.h:11
int PreSpaces
Definition: html.h:112
THtmlLxChDef & operator=(const THtmlLxChDef &)
Definition: html.h:31
char Pop()
Definition: dt.h:265
void MoveToBTagOrEof(const TStr &TagNm)
Definition: html.cpp:376
static PHtmlDoc New(const PSIn &SIn, const THtmlDocType &Type=hdtAll, const bool &DoUc=true)
Definition: html.h:261
static PWebPg New(const TStr &UrlStr, const PHttpResp &HttpResp)
Definition: html.h:344
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:602
static const TStr TitleETagNm
Definition: html.h:235
PHtmlTok GetTok(const int &TokN) const
Definition: html.h:271
static const TStr SrcArgNm
Definition: html.h:239
static PHtmlTok Load(TSIn &)
Definition: html.h:197
TKeyDat< TStr, TStr > TStrKd
Definition: ds.h:405
static TStr GetSymStr(const THtmlLxSym &Sym)
Definition: html.cpp:553
bool IsLc(const char &Ch) const
Definition: html.h:51
TStr GetStrToBTag(const TStr &TagNm, const bool &TxtOnlyP=false)
Definition: html.cpp:462
TStr GetHttpBodyAsStr() const
Definition: html.h:368