TODO: 正则Regexp 获取Tags错误

This commit is contained in:
eson 2020-07-15 19:23:45 +08:00
parent b63e180499
commit 13ae890171
5 changed files with 62 additions and 40 deletions

2
extractor/openrec/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
*.html
screenlog.*

View File

@ -96,6 +96,10 @@ func TestExtractor(t *testing.T) {
collect := intimate.NewExtractorStore()
store := intimate.NewSourceStore("source_openrec")
source, err := store.Pop(string(intimate.TTOpenrecRanking), 100)
if err != nil {
log.Println(err)
return
}
anchorId := source.GetSource().String
@ -180,11 +184,11 @@ func TestExtractor(t *testing.T) {
// MovieToolbar__Views-g5e6ic-13 iDRGyA
livejson := m["user_live"]
// f, err := os.OpenFile("./test.html", os.O_CREATE|os.O_TRUNC|os.O_RDWR, os.ModePerm)
// if err != nil {
// panic(err)
// }
// f.WriteString(livejson.String())
f, err := os.OpenFile("./test.html", os.O_CREATE|os.O_TRUNC|os.O_RDWR, os.ModePerm)
if err != nil {
panic(err)
}
f.WriteString(livejson.String())
extractor = hunter.NewExtractor([]byte(livejson.Str))
// xr, err := extractor.XPathResult("//h1[ contains(@class, 'MovieTitle__Title')]")
@ -228,6 +232,9 @@ func TestExtractor(t *testing.T) {
}
}
matheslist := regexp.MustCompile(`TagButton__Button.+>([^<]+)<`).FindAllStringSubmatch(livejson.Str, 0)
t.Error(matheslist)
LiveUrl := "https://www.openrec.tv/live/" + anchorId
ai.SetLiveUrl(sql.NullString{String: LiveUrl, Valid: true})

View File

@ -6,27 +6,27 @@ import (
)
type ISetAnchorInfo interface {
SetUid(int64) //
SetPlatform(string) //
SetAnchorId(string) //
SetAnchorName(string) //
SetLiveUrl(sql.NullString) //
SetChannel(sql.NullString) //
SetShowType(sql.NullString) //
SetExt(interface{}) //
SetUpdateTime(time.Time) //
SetUid(int64) //
SetPlatform(string) //
SetAnchorId(string) //
SetAnchorName(string) //
SetLiveUrl(sql.NullString) //
SetChannel(sql.NullString) //
SetTags(interface{}) //
SetExt(interface{}) //
SetUpdateTime(time.Time) //
}
type IGetAnchorInfo interface {
GetUid() int64 //
GetPlatform() string //
GetAnchorId() string //
GetAnchorName() string //
GetLiveUrl() sql.NullString //
GetChannel() sql.NullString //
GetShowType() sql.NullString //
GetExt() interface{} //
GetUpdateTime() time.Time //
GetUid() int64 //
GetPlatform() string //
GetAnchorId() string //
GetAnchorName() string //
GetLiveUrl() sql.NullString //
GetChannel() sql.NullString //
GetTags() interface{}
GetExt() interface{} //
GetUpdateTime() time.Time //
}
/*
@ -58,9 +58,19 @@ type AnchorInfo struct {
AnchorName string //
LiveUrl sql.NullString //
Channel sql.NullString //
ShowType sql.NullString //
Ext interface{} //
UpdateTime time.Time //
Tags interface{}
Ext interface{} //
UpdateTime time.Time //
}
// GetTags Get return Tags interface{}
func (ai *AnchorInfo) GetTags() interface{} {
return ai.Tags
}
// SetTags Set Tags interface{}
func (ai *AnchorInfo) SetTags(Tags interface{}) {
ai.Tags = Tags
}
// GetUpdateTime Get return UpdateTime time.Time
@ -83,16 +93,6 @@ func (ai *AnchorInfo) SetExt(Ext interface{}) {
ai.Ext = Ext
}
// GetShowType Get return ShowType sql.NullString
func (ai *AnchorInfo) GetShowType() sql.NullString {
return ai.ShowType
}
// SetShowType Set ShowType sql.NullString
func (ai *AnchorInfo) SetShowType(ShowType sql.NullString) {
ai.ShowType = ShowType
}
// GetChannel Get return Channel sql.NullString
func (ai *AnchorInfo) GetChannel() sql.NullString {
return ai.Channel
@ -167,6 +167,7 @@ type IGetCollectLog interface {
GetShowStartTime() sql.NullTime //
GetShowEndTime() sql.NullTime //
GetUpdateTime() sql.NullTime //
GetTags() interface{} //
GetExt() interface{} //
GetErrorMsg() sql.NullString //
}
@ -185,6 +186,7 @@ type ISetCollectLog interface {
SetShowStartTime(sql.NullTime) //
SetShowEndTime(sql.NullTime) //
SetUpdateTime(sql.NullTime) //
SetTags(interface{}) //
SetExt(interface{}) //
SetErrorMsg(sql.NullString) //
}
@ -237,10 +239,21 @@ type CollectLog struct {
ShowStartTime sql.NullTime //
ShowEndTime sql.NullTime //
UpdateTime sql.NullTime //
Tags interface{}
Ext interface{} //
ErrorMsg sql.NullString //
}
// GetTags Get return Tags interface{}
func (cl *CollectLog) GetTags() interface{} {
return cl.Tags
}
// SetTags Set Tags interface{}
func (cl *CollectLog) SetTags(Tags interface{}) {
cl.Tags = Tags
}
// GetErrorMsg Get return Error sql.NullString
func (cl *CollectLog) GetErrorMsg() sql.NullString {
return cl.ErrorMsg

View File

@ -17,7 +17,6 @@ CREATE TABLE IF NOT EXISTS `anchor_info` (
KEY `anchor_id_idx` (`anchor_id`),
KEY `anchor_name_idx` (`anchor_name`),
KEY `channel_idx` (`channel`),
KEY `show_type_idx` (`show_type`),
KEY `update_time_idx` (`update_time`)
);
@ -38,6 +37,7 @@ CREATE TABLE IF NOT EXISTS `collect_log` (
`show_start_time` timestamp NULL DEFAULT NULL,
`show_end_time` timestamp NULL DEFAULT NULL,
`update_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
`tags` json DEFAULT NULL,
`ext` json DEFAULT NULL,
`error_msg` text DEFAULT NULL,

View File

@ -198,7 +198,7 @@ func (store *ExtractorStore) InsertAnchorInfo(isource IGetAnchorInfo) (Uid int64
return uid, nil
}
result, err := tx.Exec("insert into "+AnchorTable+"(platform, anchor_id, anchor_name, live_url, channel, show_type, ext) values(?,?,?,?,?,?,?);", isource.GetPlatform(), isource.GetAnchorId(), isource.GetAnchorName(), isource.GetLiveUrl(), isource.GetChannel(), isource.GetShowType(), isource.GetExt())
result, err := tx.Exec("insert into "+AnchorTable+"(platform, anchor_id, anchor_name, live_url, channel, tags, ext) values(?,?,?,?,?,?,?);", isource.GetPlatform(), isource.GetAnchorId(), isource.GetAnchorName(), isource.GetLiveUrl(), isource.GetChannel(), isource.GetTags(), isource.GetExt())
log.Println(result.LastInsertId())
if err != nil {
@ -249,8 +249,8 @@ func (store *ExtractorStore) InsertAnchorInfo(isource IGetAnchorInfo) (Uid int64
// InsertCollectLog CollectLog表插入数据
func (store *ExtractorStore) InsertCollectLog(isource IGetCollectLog) error {
_, err := store.db.Exec("insert into "+CollectLogTable+"(uid, platform, anchor_id, is_showing, is_error, followers, views, giver, gratuity, show_title, show_start_time, show_end_time, update_time, ext, error_msg) values(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)",
isource.GetUid(), isource.GetPlatform(), isource.GetAnchorId(), isource.GetIsShowing(), isource.GetIsError(), isource.GetFollowers(), isource.GetViews(), isource.GetGiver(), isource.GetGratuity(), isource.GetShowTitle(), isource.GetShowStartTime(), isource.GetShowEndTime(), isource.GetUpdateTime(), isource.GetExt(), isource.GetErrorMsg(),
_, err := store.db.Exec("insert into "+CollectLogTable+"(uid, platform, anchor_id, is_showing, is_error, followers, views, giver, gratuity, show_title, show_start_time, show_end_time, update_time, tags, ext, error_msg) values(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)",
isource.GetUid(), isource.GetPlatform(), isource.GetAnchorId(), isource.GetIsShowing(), isource.GetIsError(), isource.GetFollowers(), isource.GetViews(), isource.GetGiver(), isource.GetGratuity(), isource.GetShowTitle(), isource.GetShowStartTime(), isource.GetShowEndTime(), isource.GetUpdateTime(), isource.GetTags(), isource.GetExt(), isource.GetErrorMsg(),
)
store.errorAlarm(err)
return err