From 13ae890171911f6c6c22e6edb7014a4e4da2af54 Mon Sep 17 00:00:00 2001 From: eson Date: Wed, 15 Jul 2020 19:23:45 +0800 Subject: [PATCH] =?UTF-8?q?TODO:=20=E6=AD=A3=E5=88=99Regexp=20=E8=8E=B7?= =?UTF-8?q?=E5=8F=96Tags=E9=94=99=E8=AF=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- extractor/openrec/.gitignore | 2 + extractor/openrec/openrec_test.go | 17 ++++--- extractor_field.go | 75 ++++++++++++++++++------------- sql/intimate_extractor.sql | 2 +- store.go | 6 +-- 5 files changed, 62 insertions(+), 40 deletions(-) create mode 100644 extractor/openrec/.gitignore diff --git a/extractor/openrec/.gitignore b/extractor/openrec/.gitignore new file mode 100644 index 0000000..589ccd4 --- /dev/null +++ b/extractor/openrec/.gitignore @@ -0,0 +1,2 @@ +*.html +screenlog.* \ No newline at end of file diff --git a/extractor/openrec/openrec_test.go b/extractor/openrec/openrec_test.go index 74eb55a..33cb210 100644 --- a/extractor/openrec/openrec_test.go +++ b/extractor/openrec/openrec_test.go @@ -96,6 +96,10 @@ func TestExtractor(t *testing.T) { collect := intimate.NewExtractorStore() store := intimate.NewSourceStore("source_openrec") source, err := store.Pop(string(intimate.TTOpenrecRanking), 100) + if err != nil { + log.Println(err) + return + } anchorId := source.GetSource().String @@ -180,11 +184,11 @@ func TestExtractor(t *testing.T) { // MovieToolbar__Views-g5e6ic-13 iDRGyA livejson := m["user_live"] - // f, err := os.OpenFile("./test.html", os.O_CREATE|os.O_TRUNC|os.O_RDWR, os.ModePerm) - // if err != nil { - // panic(err) - // } - // f.WriteString(livejson.String()) + f, err := os.OpenFile("./test.html", os.O_CREATE|os.O_TRUNC|os.O_RDWR, os.ModePerm) + if err != nil { + panic(err) + } + f.WriteString(livejson.String()) extractor = hunter.NewExtractor([]byte(livejson.Str)) // xr, err := extractor.XPathResult("//h1[ contains(@class, 'MovieTitle__Title')]") @@ -228,6 +232,9 @@ func TestExtractor(t *testing.T) { } } + matheslist := regexp.MustCompile(`TagButton__Button.+>([^<]+)<`).FindAllStringSubmatch(livejson.Str, 0) + t.Error(matheslist) + LiveUrl := "https://www.openrec.tv/live/" + anchorId ai.SetLiveUrl(sql.NullString{String: LiveUrl, Valid: true}) diff --git a/extractor_field.go b/extractor_field.go index ad5127e..9721e1f 100644 --- a/extractor_field.go +++ b/extractor_field.go @@ -6,27 +6,27 @@ import ( ) type ISetAnchorInfo interface { - SetUid(int64) // - SetPlatform(string) // - SetAnchorId(string) // - SetAnchorName(string) // - SetLiveUrl(sql.NullString) // - SetChannel(sql.NullString) // - SetShowType(sql.NullString) // - SetExt(interface{}) // - SetUpdateTime(time.Time) // + SetUid(int64) // + SetPlatform(string) // + SetAnchorId(string) // + SetAnchorName(string) // + SetLiveUrl(sql.NullString) // + SetChannel(sql.NullString) // + SetTags(interface{}) // + SetExt(interface{}) // + SetUpdateTime(time.Time) // } type IGetAnchorInfo interface { - GetUid() int64 // - GetPlatform() string // - GetAnchorId() string // - GetAnchorName() string // - GetLiveUrl() sql.NullString // - GetChannel() sql.NullString // - GetShowType() sql.NullString // - GetExt() interface{} // - GetUpdateTime() time.Time // + GetUid() int64 // + GetPlatform() string // + GetAnchorId() string // + GetAnchorName() string // + GetLiveUrl() sql.NullString // + GetChannel() sql.NullString // + GetTags() interface{} + GetExt() interface{} // + GetUpdateTime() time.Time // } /* @@ -58,9 +58,19 @@ type AnchorInfo struct { AnchorName string // LiveUrl sql.NullString // Channel sql.NullString // - ShowType sql.NullString // - Ext interface{} // - UpdateTime time.Time // + Tags interface{} + Ext interface{} // + UpdateTime time.Time // +} + +// GetTags Get return Tags interface{} +func (ai *AnchorInfo) GetTags() interface{} { + return ai.Tags +} + +// SetTags Set Tags interface{} +func (ai *AnchorInfo) SetTags(Tags interface{}) { + ai.Tags = Tags } // GetUpdateTime Get return UpdateTime time.Time @@ -83,16 +93,6 @@ func (ai *AnchorInfo) SetExt(Ext interface{}) { ai.Ext = Ext } -// GetShowType Get return ShowType sql.NullString -func (ai *AnchorInfo) GetShowType() sql.NullString { - return ai.ShowType -} - -// SetShowType Set ShowType sql.NullString -func (ai *AnchorInfo) SetShowType(ShowType sql.NullString) { - ai.ShowType = ShowType -} - // GetChannel Get return Channel sql.NullString func (ai *AnchorInfo) GetChannel() sql.NullString { return ai.Channel @@ -167,6 +167,7 @@ type IGetCollectLog interface { GetShowStartTime() sql.NullTime // GetShowEndTime() sql.NullTime // GetUpdateTime() sql.NullTime // + GetTags() interface{} // GetExt() interface{} // GetErrorMsg() sql.NullString // } @@ -185,6 +186,7 @@ type ISetCollectLog interface { SetShowStartTime(sql.NullTime) // SetShowEndTime(sql.NullTime) // SetUpdateTime(sql.NullTime) // + SetTags(interface{}) // SetExt(interface{}) // SetErrorMsg(sql.NullString) // } @@ -237,10 +239,21 @@ type CollectLog struct { ShowStartTime sql.NullTime // ShowEndTime sql.NullTime // UpdateTime sql.NullTime // + Tags interface{} Ext interface{} // ErrorMsg sql.NullString // } +// GetTags Get return Tags interface{} +func (cl *CollectLog) GetTags() interface{} { + return cl.Tags +} + +// SetTags Set Tags interface{} +func (cl *CollectLog) SetTags(Tags interface{}) { + cl.Tags = Tags +} + // GetErrorMsg Get return Error sql.NullString func (cl *CollectLog) GetErrorMsg() sql.NullString { return cl.ErrorMsg diff --git a/sql/intimate_extractor.sql b/sql/intimate_extractor.sql index bac815a..6402f45 100644 --- a/sql/intimate_extractor.sql +++ b/sql/intimate_extractor.sql @@ -17,7 +17,6 @@ CREATE TABLE IF NOT EXISTS `anchor_info` ( KEY `anchor_id_idx` (`anchor_id`), KEY `anchor_name_idx` (`anchor_name`), KEY `channel_idx` (`channel`), - KEY `show_type_idx` (`show_type`), KEY `update_time_idx` (`update_time`) ); @@ -38,6 +37,7 @@ CREATE TABLE IF NOT EXISTS `collect_log` ( `show_start_time` timestamp NULL DEFAULT NULL, `show_end_time` timestamp NULL DEFAULT NULL, `update_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + `tags` json DEFAULT NULL, `ext` json DEFAULT NULL, `error_msg` text DEFAULT NULL, diff --git a/store.go b/store.go index 904f4a4..48b30c0 100644 --- a/store.go +++ b/store.go @@ -198,7 +198,7 @@ func (store *ExtractorStore) InsertAnchorInfo(isource IGetAnchorInfo) (Uid int64 return uid, nil } - result, err := tx.Exec("insert into "+AnchorTable+"(platform, anchor_id, anchor_name, live_url, channel, show_type, ext) values(?,?,?,?,?,?,?);", isource.GetPlatform(), isource.GetAnchorId(), isource.GetAnchorName(), isource.GetLiveUrl(), isource.GetChannel(), isource.GetShowType(), isource.GetExt()) + result, err := tx.Exec("insert into "+AnchorTable+"(platform, anchor_id, anchor_name, live_url, channel, tags, ext) values(?,?,?,?,?,?,?);", isource.GetPlatform(), isource.GetAnchorId(), isource.GetAnchorName(), isource.GetLiveUrl(), isource.GetChannel(), isource.GetTags(), isource.GetExt()) log.Println(result.LastInsertId()) if err != nil { @@ -249,8 +249,8 @@ func (store *ExtractorStore) InsertAnchorInfo(isource IGetAnchorInfo) (Uid int64 // InsertCollectLog CollectLog表插入数据 func (store *ExtractorStore) InsertCollectLog(isource IGetCollectLog) error { - _, err := store.db.Exec("insert into "+CollectLogTable+"(uid, platform, anchor_id, is_showing, is_error, followers, views, giver, gratuity, show_title, show_start_time, show_end_time, update_time, ext, error_msg) values(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)", - isource.GetUid(), isource.GetPlatform(), isource.GetAnchorId(), isource.GetIsShowing(), isource.GetIsError(), isource.GetFollowers(), isource.GetViews(), isource.GetGiver(), isource.GetGratuity(), isource.GetShowTitle(), isource.GetShowStartTime(), isource.GetShowEndTime(), isource.GetUpdateTime(), isource.GetExt(), isource.GetErrorMsg(), + _, err := store.db.Exec("insert into "+CollectLogTable+"(uid, platform, anchor_id, is_showing, is_error, followers, views, giver, gratuity, show_title, show_start_time, show_end_time, update_time, tags, ext, error_msg) values(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)", + isource.GetUid(), isource.GetPlatform(), isource.GetAnchorId(), isource.GetIsShowing(), isource.GetIsError(), isource.GetFollowers(), isource.GetViews(), isource.GetGiver(), isource.GetGratuity(), isource.GetShowTitle(), isource.GetShowStartTime(), isource.GetShowEndTime(), isource.GetUpdateTime(), isource.GetTags(), isource.GetExt(), isource.GetErrorMsg(), ) store.errorAlarm(err) return err