finish: 解析逻辑, 入库正确.

TODO: 整理代码, 让入库提取数据成为 基础库.
This commit is contained in:
eson
2020-07-16 11:02:30 +08:00
parent 13ae890171
commit 51fe6f6039
3 changed files with 57 additions and 43 deletions

View File

@@ -19,6 +19,20 @@ import (
"github.com/tidwall/gjson"
)
func TestCase0(t *testing.T) {
f, err := os.Open("./test.html")
if err != nil {
panic(err)
}
data, err := ioutil.ReadAll(f)
if err != nil {
panic(err)
}
matheslist := regexp.MustCompile(`TagButton__Button[^>]+>(.{1,100})</a`).FindAllStringSubmatch(string(data), -1)
t.Error(matheslist)
}
func TestCase1(t *testing.T) {
date := "2020-07-13T18:58:24+09:00"
@@ -196,7 +210,7 @@ func TestExtractor(t *testing.T) {
// t.Error(err)
// }
mathes := regexp.MustCompile("MovieTitle__Title.*>(.+)</h1>").FindStringSubmatch(livejson.Str)
mathes := regexp.MustCompile("MovieTitle__Title[^>]+>(.{1,50})</h1>").FindStringSubmatch(livejson.Str)
if len(mathes) == 2 {
clog.SetShowTitle(sql.NullString{String: mathes[1], Valid: true})
@@ -232,8 +246,19 @@ func TestExtractor(t *testing.T) {
}
}
matheslist := regexp.MustCompile(`TagButton__Button.+>([^<]+)<`).FindAllStringSubmatch(livejson.Str, 0)
t.Error(matheslist)
var tags []string
matheslist := regexp.MustCompile(`TagButton__Button[^>]+>(.{1,100})</a`).FindAllStringSubmatch(livejson.Str, -1)
for _, m := range matheslist {
tags = append(tags, m[1])
}
t.Error(tags)
tagsBytes, err := json.Marshal(tags)
if err != nil {
log.Println(err)
}
ai.SetTags(tagsBytes)
ai.SetUpdateTime(source.GetUpdateTime())
LiveUrl := "https://www.openrec.tv/live/" + anchorId
ai.SetLiveUrl(sql.NullString{String: LiveUrl, Valid: true})
@@ -245,6 +270,7 @@ func TestExtractor(t *testing.T) {
}
clog.SetUid(Uid)
clog.SetTags(tagsBytes)
clog.SetGratuity(sql.NullInt64{Int64: gratuity, Valid: true})
clog.SetPlatform(string(intimate.Popenrec))
clog.SetFollowers(sql.NullInt64{Int64: int64(followersInt), Valid: true})