TODO: extractor openrec

This commit is contained in:
eson 2020-07-10 12:05:33 +08:00
parent 2e9a803645
commit 4af5430572
17 changed files with 4117 additions and 34 deletions

View File

@ -3,6 +3,7 @@ package intimate
import ( import (
"errors" "errors"
"io/ioutil" "io/ioutil"
"log"
"os" "os"
"gopkg.in/yaml.v2" "gopkg.in/yaml.v2"
@ -20,20 +21,26 @@ func init() {
// Config 配置 // Config 配置
type Config struct { type Config struct {
Database struct { Database struct {
URI string `yaml:"uri"` // "user:password@/dbname" SourceURI string `yaml:"source_uri"` // "user:password@/dbname"
ExtractorURI string `yaml:"extractor_uri"`
} `yaml:"database"` } `yaml:"database"`
} }
// Load 加载yaml/yml配置 // Load 加载yaml/yml配置
func (conifg *Config) Load() { func (conifg *Config) Load() {
configfile := "./config.yaml" var configfile string
if _, err := os.Stat(configfile); os.IsNotExist(err) { configlist := []string{"./config.yaml", "./config.yml", "../../config.yml", "../../config.yaml"}
configfile = "./config.yml" for _, configfile = range configlist {
if _, err := os.Stat(configfile); os.IsNotExist(err) { if _, err := os.Stat(configfile); err == nil {
panic(errors.New("config.yaml or config.yml is not exists")) log.Println("find config: ", configfile)
break
} }
} }
if len(configfile) <= 4 {
log.Panic(errors.New("can't find config.yaml/config.yml"))
}
f, err := os.Open(configfile) f, err := os.Open(configfile)
if err != nil { if err != nil {
panic(err) panic(err)

View File

@ -1,2 +1,3 @@
database: database:
uri: "root:@tcp(127.0.0.1:4000)/intimate_source" source_uri: "root:@tcp(127.0.0.1:4000)/intimate_source"
extractor_uri: "root:@tcp(127.0.0.1:4000)/intimate_extractor"

View File

@ -6,7 +6,7 @@ func TestConfig(t *testing.T) {
config := &Config{} config := &Config{}
config.Load() config.Load()
if config.Database.URI != "root:@tcp(127.0.0.1:4000)/intimate_source" { if config.Database.SourceURI != "root:@tcp(127.0.0.1:4000)/intimate_source" {
t.Error("error yaml loaded, ", config) t.Error("error yaml loaded, ", config)
} }
} }

View File

@ -0,0 +1,5 @@
package main
func main() {
}

View File

@ -0,0 +1,37 @@
package main
import (
"intimate"
"os"
"testing"
"github.com/tidwall/gjson"
)
func TestExtractor(t *testing.T) {
store := intimate.NewSourceStore("source_openrec")
source, err := store.Pop("openrec_user", 100)
if source != nil {
defer store.Restore(source)
}
if err != nil {
t.Error(err)
}
sdata := source.GetExt().([]byte)
if gjson.ValidBytes(sdata) {
result := gjson.ParseBytes(sdata)
m := result.Map()
for key := range m {
t.Error(key)
f, err := os.OpenFile("./openrec_"+key+".html", os.O_CREATE|os.O_RDWR|os.O_TRUNC, os.ModePerm)
if err != nil {
panic(err)
}
f.WriteString(m[key].String())
}
} else {
t.Error("data is not json:\n", string(sdata))
}
}

View File

@ -15,6 +15,18 @@ type Source struct {
UpdateTime time.Time // UpdateTime time.Time //
Operator int32 // Operator int32 //
ErrorMsg sql.NullString // ErrorMsg sql.NullString //
lastOperator int32
}
// GetLastOperator Get return lastOperator int32
func (so *Source) GetLastOperator() int32 {
return so.lastOperator
}
// SetLastOperator Set lastOperator int32
func (so *Source) SetLastOperator(lastOperator int32) {
so.lastOperator = lastOperator
} }
// GetErrorMsg Get return ErrorMsg sql.NullString // GetErrorMsg Get return ErrorMsg sql.NullString

View File

@ -24,6 +24,8 @@ type IGetSource interface {
type IUpdateSource interface { type IUpdateSource interface {
IGetSource IGetSource
GetLastOperator() int32
SetExt(ext interface{}) // SetExt(ext interface{}) //
SetUpdateTime(ut time.Time) // SetUpdateTime(ut time.Time) //
SetOperator(operator int32) // SetOperator(operator int32) //
@ -42,24 +44,24 @@ const (
OperatorError OperatorFlag = 10000 OperatorError OperatorFlag = 10000
) )
// Store 储存 // SourceStore 储存
type Store struct { type SourceStore struct {
table string table string
db *sql.DB db *sql.DB
errorCount int errorCount int
errorLimit int errorLimit int
} }
// NewStore 创建一个存储实例 // NewSourceStore 创建一个存储实例
func NewStore(table string) *Store { func NewSourceStore(table string) *SourceStore {
db, err := sql.Open("mysql", InitConfig.Database.URI) db, err := sql.Open("mysql", InitConfig.Database.SourceURI)
if err != nil { if err != nil {
panic(err) panic(err)
} }
return &Store{table: table, db: db} return &SourceStore{table: table, db: db}
} }
func (store *Store) errorAlarm(err error) { func (store *SourceStore) errorAlarm(err error) {
if err != nil { if err != nil {
log.Println("store error: ", err) log.Println("store error: ", err)
// 报警. 如果数据插入有问题 // 报警. 如果数据插入有问题
@ -74,20 +76,26 @@ func (store *Store) errorAlarm(err error) {
} }
} }
// Insert 储存数据 // Insert 插入数据
func (store *Store) Insert(isource IGetSource) { func (store *SourceStore) Insert(isource IGetSource) {
_, err := store.db.Exec("insert into `source_openrec`(url, target_type, source, ext, operator, error_msg) values(?,?,?,?,?,?)", isource.GetUrl(), isource.GetTargetType(), isource.GetSource(), isource.GetExt(), isource.GetOperator(), isource.GetErrorMsg()) _, err := store.db.Exec("insert into `source_openrec`(url, target_type, source, ext, operator, error_msg) values(?,?,?,?,?,?)", isource.GetUrl(), isource.GetTargetType(), isource.GetSource(), isource.GetExt(), isource.GetOperator(), isource.GetErrorMsg())
store.errorAlarm(err) store.errorAlarm(err)
} }
// Update 储存数据 // Update 更新数据
func (store *Store) Update(isource IUpdateSource) { func (store *SourceStore) Update(isource IUpdateSource) {
_, err := store.db.Exec("update "+store.table+" set ext = ?, operator = ?, error_msg = ? where uid = ?", isource.GetExt(), isource.GetOperator(), isource.GetErrorMsg(), isource.GetUid()) _, err := store.db.Exec("update "+store.table+" set ext = ?, operator = ?, error_msg = ? where uid = ?", isource.GetExt(), isource.GetOperator(), isource.GetErrorMsg(), isource.GetUid())
store.errorAlarm(err) store.errorAlarm(err)
} }
// Pop 储存数据 // Restore 恢复Operator数据状态
func (store *Store) Pop(targetType string, operators ...int32) (IUpdateSource, error) { func (store *SourceStore) Restore(isource IUpdateSource) {
_, err := store.db.Exec("update "+store.table+" set operator = ? where uid = ?", isource.GetLastOperator(), isource.GetUid())
store.errorAlarm(err)
}
// Pop 弹出一条未处理的数据
func (store *SourceStore) Pop(targetType string, operators ...int32) (IUpdateSource, error) {
tx, err := store.db.Begin() tx, err := store.db.Begin()
if err != nil { if err != nil {
@ -122,8 +130,11 @@ func (store *Store) Pop(targetType string, operators ...int32) (IUpdateSource, e
if row != nil { if row != nil {
s := &Source{} s := &Source{}
// uid, url, target_type, source, ext, operator // uid, url, target_type, source, ext, operator
err = row.Scan(&s.Uid, &s.Url, &s.TargetType, &s.Source, &s.Ext, &s.Operator) err = row.Scan(&s.Uid, &s.Url, &s.TargetType, &s.Source, &s.Ext, &s.Operator)
s.SetLastOperator(s.Operator)
if err != nil { if err != nil {
log.Println(err, targetType) log.Println(err, targetType)
_, err = tx.Exec("update "+store.table+" set error_msg = ?, operator = ? where uid = ?", OperatorError, s.Uid) _, err = tx.Exec("update "+store.table+" set error_msg = ?, operator = ? where uid = ?", OperatorError, s.Uid)
@ -138,3 +149,12 @@ func (store *Store) Pop(targetType string, operators ...int32) (IUpdateSource, e
return nil, errors.New("TaskQueue is nil") return nil, errors.New("TaskQueue is nil")
} }
// NewExtractorStore 创建一个存储实例
func NewExtractorStore(table string) *SourceStore {
db, err := sql.Open("mysql", InitConfig.Database.ExtractorURI)
if err != nil {
panic(err)
}
return &SourceStore{table: table, db: db}
}

View File

@ -26,8 +26,8 @@ func TestStoreInsertCase1(t *testing.T) {
} }
func TestStorePopCase1(t *testing.T) { func TestStorePopCase1(t *testing.T) {
store := NewStore("source_openrec") store := NewSourceStore("source_openrec")
source, err := store.Pop("openrec_ranking") source, err := store.Pop(string(TTOpenrecRanking))
if err != nil { if err != nil {
t.Error(err) t.Error(err)
} }

10
table_list.go Normal file
View File

@ -0,0 +1,10 @@
package intimate
// SourceTable 源的table列表
type SourceTable string
const (
// STOpenrec openrec源table名称
STOpenrec SourceTable = "source_openrec"
)

12
target_type_list.go Normal file
View File

@ -0,0 +1,12 @@
package intimate
// TargetType 源的 目标类型 列表
type TargetType string
const (
// TTOpenrecRanking openrec源TargetType名称
TTOpenrecRanking TargetType = "openrec_ranking"
// TTOpenrecUser openrec源TargetType名称
TTOpenrecUser TargetType = "openrec_ranking"
)

View File

@ -1 +0,0 @@
../../../config.yaml

View File

@ -11,12 +11,10 @@ import (
"github.com/tidwall/gjson" "github.com/tidwall/gjson"
) )
var targetTypeRanking = "openrec_ranking"
var targetTypeUser = "openrec_user"
var openrecRanking *OpenrecRanking var openrecRanking *OpenrecRanking
// store 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql // store 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
var store *intimate.Store = intimate.NewStore("source_openrec") var store *intimate.SourceStore = intimate.NewSourceStore(string(intimate.STOpenrec))
func init() { func init() {
@ -67,7 +65,7 @@ func (or *OpenrecRanking) Execute(cxt *hunter.TaskContext) {
data.SetSource(sql.NullString{String: userid, Valid: len(userid) > 0}) data.SetSource(sql.NullString{String: userid, Valid: len(userid) > 0})
data.SetUrl(wf.GetRawURL()) data.SetUrl(wf.GetRawURL())
data.SetTargetType(targetTypeUser) data.SetTargetType(string(intimate.TTOpenrecUser))
store.Insert(data) store.Insert(data)
} }
} }

View File

@ -1,2 +0,0 @@
database:
uri: "root:@tcp(127.0.0.1:4000)/intimate_source"

View File

@ -10,12 +10,10 @@ import (
"github.com/474420502/hunter" "github.com/474420502/hunter"
) )
var targetTypeUser = "openrec_user"
var targetTypeRanking = "openrec_ranking"
var oer *OpenrecExtratorRanking var oer *OpenrecExtratorRanking
// store 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql // store 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
var store *intimate.Store = intimate.NewStore("source_openrec") var store *intimate.SourceStore = intimate.NewSourceStore(string(intimate.STOpenrec))
func init() { func init() {
oer = &OpenrecExtratorRanking{} oer = &OpenrecExtratorRanking{}
@ -31,7 +29,7 @@ func (oer *OpenrecExtratorRanking) Execute(cxt *hunter.TaskContext) {
for { for {
source, err := store.Pop(targetTypeUser) source, err := store.Pop(string(intimate.TTOpenrecUser))
if err != nil { if err != nil {
log.Println(err) log.Println(err)
return return

1659
testfile/openrec_user.html Executable file

File diff suppressed because it is too large Load Diff

392
testfile/openrec_user_live.html Executable file

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff