finish: 解析逻辑, 入库正确.

TODO: 整理代码, 让入库提取数据成为 基础库.
This commit is contained in:
eson 2020-07-16 11:02:30 +08:00
parent 13ae890171
commit 51fe6f6039
3 changed files with 57 additions and 43 deletions

View File

@ -19,6 +19,20 @@ import (
"github.com/tidwall/gjson" "github.com/tidwall/gjson"
) )
func TestCase0(t *testing.T) {
f, err := os.Open("./test.html")
if err != nil {
panic(err)
}
data, err := ioutil.ReadAll(f)
if err != nil {
panic(err)
}
matheslist := regexp.MustCompile(`TagButton__Button[^>]+>(.{1,100})</a`).FindAllStringSubmatch(string(data), -1)
t.Error(matheslist)
}
func TestCase1(t *testing.T) { func TestCase1(t *testing.T) {
date := "2020-07-13T18:58:24+09:00" date := "2020-07-13T18:58:24+09:00"
@ -196,7 +210,7 @@ func TestExtractor(t *testing.T) {
// t.Error(err) // t.Error(err)
// } // }
mathes := regexp.MustCompile("MovieTitle__Title.*>(.+)</h1>").FindStringSubmatch(livejson.Str) mathes := regexp.MustCompile("MovieTitle__Title[^>]+>(.{1,50})</h1>").FindStringSubmatch(livejson.Str)
if len(mathes) == 2 { if len(mathes) == 2 {
clog.SetShowTitle(sql.NullString{String: mathes[1], Valid: true}) clog.SetShowTitle(sql.NullString{String: mathes[1], Valid: true})
@ -232,8 +246,19 @@ func TestExtractor(t *testing.T) {
} }
} }
matheslist := regexp.MustCompile(`TagButton__Button.+>([^<]+)<`).FindAllStringSubmatch(livejson.Str, 0) var tags []string
t.Error(matheslist) matheslist := regexp.MustCompile(`TagButton__Button[^>]+>(.{1,100})</a`).FindAllStringSubmatch(livejson.Str, -1)
for _, m := range matheslist {
tags = append(tags, m[1])
}
t.Error(tags)
tagsBytes, err := json.Marshal(tags)
if err != nil {
log.Println(err)
}
ai.SetTags(tagsBytes)
ai.SetUpdateTime(source.GetUpdateTime())
LiveUrl := "https://www.openrec.tv/live/" + anchorId LiveUrl := "https://www.openrec.tv/live/" + anchorId
ai.SetLiveUrl(sql.NullString{String: LiveUrl, Valid: true}) ai.SetLiveUrl(sql.NullString{String: LiveUrl, Valid: true})
@ -245,6 +270,7 @@ func TestExtractor(t *testing.T) {
} }
clog.SetUid(Uid) clog.SetUid(Uid)
clog.SetTags(tagsBytes)
clog.SetGratuity(sql.NullInt64{Int64: gratuity, Valid: true}) clog.SetGratuity(sql.NullInt64{Int64: gratuity, Valid: true})
clog.SetPlatform(string(intimate.Popenrec)) clog.SetPlatform(string(intimate.Popenrec))
clog.SetFollowers(sql.NullInt64{Int64: int64(followersInt), Valid: true}) clog.SetFollowers(sql.NullInt64{Int64: int64(followersInt), Valid: true})

View File

@ -2,7 +2,6 @@ package intimate
import ( import (
"database/sql" "database/sql"
"time"
) )
type ISetAnchorInfo interface { type ISetAnchorInfo interface {
@ -14,7 +13,7 @@ type ISetAnchorInfo interface {
SetChannel(sql.NullString) // SetChannel(sql.NullString) //
SetTags(interface{}) // SetTags(interface{}) //
SetExt(interface{}) // SetExt(interface{}) //
SetUpdateTime(time.Time) // SetUpdateTime(sql.NullTime) //
} }
type IGetAnchorInfo interface { type IGetAnchorInfo interface {
@ -26,7 +25,7 @@ type IGetAnchorInfo interface {
GetChannel() sql.NullString // GetChannel() sql.NullString //
GetTags() interface{} GetTags() interface{}
GetExt() interface{} // GetExt() interface{} //
GetUpdateTime() time.Time // GetUpdateTime() sql.NullTime //
} }
/* /*
@ -60,7 +59,7 @@ type AnchorInfo struct {
Channel sql.NullString // Channel sql.NullString //
Tags interface{} Tags interface{}
Ext interface{} // Ext interface{} //
UpdateTime time.Time // UpdateTime sql.NullTime //
} }
// GetTags Get return Tags interface{} // GetTags Get return Tags interface{}
@ -74,12 +73,12 @@ func (ai *AnchorInfo) SetTags(Tags interface{}) {
} }
// GetUpdateTime Get return UpdateTime time.Time // GetUpdateTime Get return UpdateTime time.Time
func (ai *AnchorInfo) GetUpdateTime() time.Time { func (ai *AnchorInfo) GetUpdateTime() sql.NullTime {
return ai.UpdateTime return ai.UpdateTime
} }
// SetUpdateTime Set UpdateTime time.Time // SetUpdateTime Set UpdateTime time.Time
func (ai *AnchorInfo) SetUpdateTime(UpdateTime time.Time) { func (ai *AnchorInfo) SetUpdateTime(UpdateTime sql.NullTime) {
ai.UpdateTime = UpdateTime ai.UpdateTime = UpdateTime
} }

View File

@ -2,7 +2,6 @@ package intimate
import ( import (
"database/sql" "database/sql"
"errors"
"log" "log"
_ "github.com/go-sql-driver/mysql" _ "github.com/go-sql-driver/mysql"
@ -112,26 +111,16 @@ func (store *SourceStore) Pop(targetType string, operators ...int32) (IUpdateSou
} }
}() }()
if row != nil {
s := &Source{} s := &Source{}
// uid, url, target_type, source, ext, operator // uid, url, target_type, source, ext, operator
err = row.Scan(&s.Uid, &s.Url, &s.TargetType, &s.Source, &s.Ext, &s.Operator, &s.UpdateTime) err = row.Scan(&s.Uid, &s.Url, &s.TargetType, &s.Source, &s.Ext, &s.Operator, &s.UpdateTime)
s.SetLastOperator(s.Operator)
if err != nil { if err != nil {
log.Println(err, targetType) log.Println(err, targetType)
_, err = tx.Exec("update "+store.table+" set error_msg = ?, operator = ? where uid = ?", err.Error(), OperatorError, s.Uid)
if err != nil {
log.Println(err)
}
return nil, err return nil, err
} }
s.SetLastOperator(s.Operator)
_, err = tx.Exec("update "+store.table+" set operator = ? where uid = ?", OperatorWait, s.Uid) _, err = tx.Exec("update "+store.table+" set operator = ? where uid = ?", OperatorWait, s.Uid)
return s, nil return s, nil
}
return nil, errors.New("TaskQueue is nil")
} }
// AnchorTable 主播表名称 // AnchorTable 主播表名称
@ -189,12 +178,11 @@ func (store *ExtractorStore) InsertAnchorInfo(isource IGetAnchorInfo) (Uid int64
log.Println(err) log.Println(err)
return 0, err return 0, err
} }
log.Println(isource.GetPlatform(), isource.GetAnchorId())
row := tx.QueryRow(selectSQL+` limit 1 for update`, isource.GetPlatform(), isource.GetAnchorId()) row := tx.QueryRow(selectSQL+` limit 1 for update`, isource.GetPlatform(), isource.GetAnchorId())
if row != nil {
var uid int64 var uid int64
row.Scan(&uid) if err = row.Scan(&uid); err == nil {
return uid, nil return uid, nil
} }
@ -213,6 +201,7 @@ func (store *ExtractorStore) InsertAnchorInfo(isource IGetAnchorInfo) (Uid int64
if err != nil { if err != nil {
log.Println(err) log.Println(err)
} }
return 0, err
} }
return result.LastInsertId() return result.LastInsertId()