TODO: extractor openrec
This commit is contained in:
parent
2e9a803645
commit
4af5430572
19
config.go
19
config.go
|
@ -3,6 +3,7 @@ package intimate
|
||||||
import (
|
import (
|
||||||
"errors"
|
"errors"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
|
"log"
|
||||||
"os"
|
"os"
|
||||||
|
|
||||||
"gopkg.in/yaml.v2"
|
"gopkg.in/yaml.v2"
|
||||||
|
@ -20,20 +21,26 @@ func init() {
|
||||||
// Config 配置
|
// Config 配置
|
||||||
type Config struct {
|
type Config struct {
|
||||||
Database struct {
|
Database struct {
|
||||||
URI string `yaml:"uri"` // "user:password@/dbname"
|
SourceURI string `yaml:"source_uri"` // "user:password@/dbname"
|
||||||
|
ExtractorURI string `yaml:"extractor_uri"`
|
||||||
} `yaml:"database"`
|
} `yaml:"database"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// Load 加载yaml/yml配置
|
// Load 加载yaml/yml配置
|
||||||
func (conifg *Config) Load() {
|
func (conifg *Config) Load() {
|
||||||
configfile := "./config.yaml"
|
var configfile string
|
||||||
if _, err := os.Stat(configfile); os.IsNotExist(err) {
|
configlist := []string{"./config.yaml", "./config.yml", "../../config.yml", "../../config.yaml"}
|
||||||
configfile = "./config.yml"
|
for _, configfile = range configlist {
|
||||||
if _, err := os.Stat(configfile); os.IsNotExist(err) {
|
if _, err := os.Stat(configfile); err == nil {
|
||||||
panic(errors.New("config.yaml or config.yml is not exists"))
|
log.Println("find config: ", configfile)
|
||||||
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if len(configfile) <= 4 {
|
||||||
|
log.Panic(errors.New("can't find config.yaml/config.yml"))
|
||||||
|
}
|
||||||
|
|
||||||
f, err := os.Open(configfile)
|
f, err := os.Open(configfile)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
|
|
|
@ -1,2 +1,3 @@
|
||||||
database:
|
database:
|
||||||
uri: "root:@tcp(127.0.0.1:4000)/intimate_source"
|
source_uri: "root:@tcp(127.0.0.1:4000)/intimate_source"
|
||||||
|
extractor_uri: "root:@tcp(127.0.0.1:4000)/intimate_extractor"
|
|
@ -6,7 +6,7 @@ func TestConfig(t *testing.T) {
|
||||||
config := &Config{}
|
config := &Config{}
|
||||||
config.Load()
|
config.Load()
|
||||||
|
|
||||||
if config.Database.URI != "root:@tcp(127.0.0.1:4000)/intimate_source" {
|
if config.Database.SourceURI != "root:@tcp(127.0.0.1:4000)/intimate_source" {
|
||||||
t.Error("error yaml loaded, ", config)
|
t.Error("error yaml loaded, ", config)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
5
extractor/openrec/main.go
Normal file
5
extractor/openrec/main.go
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
package main
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
|
||||||
|
}
|
37
extractor/openrec/openrec_test.go
Normal file
37
extractor/openrec/openrec_test.go
Normal file
|
@ -0,0 +1,37 @@
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"intimate"
|
||||||
|
"os"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/tidwall/gjson"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestExtractor(t *testing.T) {
|
||||||
|
store := intimate.NewSourceStore("source_openrec")
|
||||||
|
source, err := store.Pop("openrec_user", 100)
|
||||||
|
if source != nil {
|
||||||
|
defer store.Restore(source)
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
}
|
||||||
|
sdata := source.GetExt().([]byte)
|
||||||
|
|
||||||
|
if gjson.ValidBytes(sdata) {
|
||||||
|
result := gjson.ParseBytes(sdata)
|
||||||
|
m := result.Map()
|
||||||
|
for key := range m {
|
||||||
|
t.Error(key)
|
||||||
|
f, err := os.OpenFile("./openrec_"+key+".html", os.O_CREATE|os.O_RDWR|os.O_TRUNC, os.ModePerm)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
f.WriteString(m[key].String())
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
t.Error("data is not json:\n", string(sdata))
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
12
source.go
12
source.go
|
@ -15,6 +15,18 @@ type Source struct {
|
||||||
UpdateTime time.Time //
|
UpdateTime time.Time //
|
||||||
Operator int32 //
|
Operator int32 //
|
||||||
ErrorMsg sql.NullString //
|
ErrorMsg sql.NullString //
|
||||||
|
|
||||||
|
lastOperator int32
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetLastOperator Get return lastOperator int32
|
||||||
|
func (so *Source) GetLastOperator() int32 {
|
||||||
|
return so.lastOperator
|
||||||
|
}
|
||||||
|
|
||||||
|
// SetLastOperator Set lastOperator int32
|
||||||
|
func (so *Source) SetLastOperator(lastOperator int32) {
|
||||||
|
so.lastOperator = lastOperator
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetErrorMsg Get return ErrorMsg sql.NullString
|
// GetErrorMsg Get return ErrorMsg sql.NullString
|
||||||
|
|
46
store.go
46
store.go
|
@ -24,6 +24,8 @@ type IGetSource interface {
|
||||||
type IUpdateSource interface {
|
type IUpdateSource interface {
|
||||||
IGetSource
|
IGetSource
|
||||||
|
|
||||||
|
GetLastOperator() int32
|
||||||
|
|
||||||
SetExt(ext interface{}) //
|
SetExt(ext interface{}) //
|
||||||
SetUpdateTime(ut time.Time) //
|
SetUpdateTime(ut time.Time) //
|
||||||
SetOperator(operator int32) //
|
SetOperator(operator int32) //
|
||||||
|
@ -42,24 +44,24 @@ const (
|
||||||
OperatorError OperatorFlag = 10000
|
OperatorError OperatorFlag = 10000
|
||||||
)
|
)
|
||||||
|
|
||||||
// Store 储存
|
// SourceStore 储存
|
||||||
type Store struct {
|
type SourceStore struct {
|
||||||
table string
|
table string
|
||||||
db *sql.DB
|
db *sql.DB
|
||||||
errorCount int
|
errorCount int
|
||||||
errorLimit int
|
errorLimit int
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewStore 创建一个存储实例
|
// NewSourceStore 创建一个存储实例
|
||||||
func NewStore(table string) *Store {
|
func NewSourceStore(table string) *SourceStore {
|
||||||
db, err := sql.Open("mysql", InitConfig.Database.URI)
|
db, err := sql.Open("mysql", InitConfig.Database.SourceURI)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
return &Store{table: table, db: db}
|
return &SourceStore{table: table, db: db}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (store *Store) errorAlarm(err error) {
|
func (store *SourceStore) errorAlarm(err error) {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Println("store error: ", err)
|
log.Println("store error: ", err)
|
||||||
// 报警. 如果数据插入有问题
|
// 报警. 如果数据插入有问题
|
||||||
|
@ -74,20 +76,26 @@ func (store *Store) errorAlarm(err error) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Insert 储存数据
|
// Insert 插入数据
|
||||||
func (store *Store) Insert(isource IGetSource) {
|
func (store *SourceStore) Insert(isource IGetSource) {
|
||||||
_, err := store.db.Exec("insert into `source_openrec`(url, target_type, source, ext, operator, error_msg) values(?,?,?,?,?,?)", isource.GetUrl(), isource.GetTargetType(), isource.GetSource(), isource.GetExt(), isource.GetOperator(), isource.GetErrorMsg())
|
_, err := store.db.Exec("insert into `source_openrec`(url, target_type, source, ext, operator, error_msg) values(?,?,?,?,?,?)", isource.GetUrl(), isource.GetTargetType(), isource.GetSource(), isource.GetExt(), isource.GetOperator(), isource.GetErrorMsg())
|
||||||
store.errorAlarm(err)
|
store.errorAlarm(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update 储存数据
|
// Update 更新数据
|
||||||
func (store *Store) Update(isource IUpdateSource) {
|
func (store *SourceStore) Update(isource IUpdateSource) {
|
||||||
_, err := store.db.Exec("update "+store.table+" set ext = ?, operator = ?, error_msg = ? where uid = ?", isource.GetExt(), isource.GetOperator(), isource.GetErrorMsg(), isource.GetUid())
|
_, err := store.db.Exec("update "+store.table+" set ext = ?, operator = ?, error_msg = ? where uid = ?", isource.GetExt(), isource.GetOperator(), isource.GetErrorMsg(), isource.GetUid())
|
||||||
store.errorAlarm(err)
|
store.errorAlarm(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Pop 储存数据
|
// Restore 恢复Operator数据状态
|
||||||
func (store *Store) Pop(targetType string, operators ...int32) (IUpdateSource, error) {
|
func (store *SourceStore) Restore(isource IUpdateSource) {
|
||||||
|
_, err := store.db.Exec("update "+store.table+" set operator = ? where uid = ?", isource.GetLastOperator(), isource.GetUid())
|
||||||
|
store.errorAlarm(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pop 弹出一条未处理的数据
|
||||||
|
func (store *SourceStore) Pop(targetType string, operators ...int32) (IUpdateSource, error) {
|
||||||
|
|
||||||
tx, err := store.db.Begin()
|
tx, err := store.db.Begin()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -122,8 +130,11 @@ func (store *Store) Pop(targetType string, operators ...int32) (IUpdateSource, e
|
||||||
|
|
||||||
if row != nil {
|
if row != nil {
|
||||||
s := &Source{}
|
s := &Source{}
|
||||||
|
|
||||||
// uid, url, target_type, source, ext, operator
|
// uid, url, target_type, source, ext, operator
|
||||||
err = row.Scan(&s.Uid, &s.Url, &s.TargetType, &s.Source, &s.Ext, &s.Operator)
|
err = row.Scan(&s.Uid, &s.Url, &s.TargetType, &s.Source, &s.Ext, &s.Operator)
|
||||||
|
s.SetLastOperator(s.Operator)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Println(err, targetType)
|
log.Println(err, targetType)
|
||||||
_, err = tx.Exec("update "+store.table+" set error_msg = ?, operator = ? where uid = ?", OperatorError, s.Uid)
|
_, err = tx.Exec("update "+store.table+" set error_msg = ?, operator = ? where uid = ?", OperatorError, s.Uid)
|
||||||
|
@ -138,3 +149,12 @@ func (store *Store) Pop(targetType string, operators ...int32) (IUpdateSource, e
|
||||||
|
|
||||||
return nil, errors.New("TaskQueue is nil")
|
return nil, errors.New("TaskQueue is nil")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// NewExtractorStore 创建一个存储实例
|
||||||
|
func NewExtractorStore(table string) *SourceStore {
|
||||||
|
db, err := sql.Open("mysql", InitConfig.Database.ExtractorURI)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
return &SourceStore{table: table, db: db}
|
||||||
|
}
|
||||||
|
|
|
@ -26,8 +26,8 @@ func TestStoreInsertCase1(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestStorePopCase1(t *testing.T) {
|
func TestStorePopCase1(t *testing.T) {
|
||||||
store := NewStore("source_openrec")
|
store := NewSourceStore("source_openrec")
|
||||||
source, err := store.Pop("openrec_ranking")
|
source, err := store.Pop(string(TTOpenrecRanking))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Error(err)
|
t.Error(err)
|
||||||
}
|
}
|
||||||
|
|
10
table_list.go
Normal file
10
table_list.go
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
package intimate
|
||||||
|
|
||||||
|
// SourceTable 源的table列表
|
||||||
|
type SourceTable string
|
||||||
|
|
||||||
|
const (
|
||||||
|
// STOpenrec openrec源table名称
|
||||||
|
STOpenrec SourceTable = "source_openrec"
|
||||||
|
)
|
||||||
|
|
12
target_type_list.go
Normal file
12
target_type_list.go
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
package intimate
|
||||||
|
|
||||||
|
// TargetType 源的 目标类型 列表
|
||||||
|
type TargetType string
|
||||||
|
|
||||||
|
const (
|
||||||
|
// TTOpenrecRanking openrec源TargetType名称
|
||||||
|
TTOpenrecRanking TargetType = "openrec_ranking"
|
||||||
|
|
||||||
|
// TTOpenrecUser openrec源TargetType名称
|
||||||
|
TTOpenrecUser TargetType = "openrec_ranking"
|
||||||
|
)
|
|
@ -1 +0,0 @@
|
||||||
../../../config.yaml
|
|
|
@ -11,12 +11,10 @@ import (
|
||||||
"github.com/tidwall/gjson"
|
"github.com/tidwall/gjson"
|
||||||
)
|
)
|
||||||
|
|
||||||
var targetTypeRanking = "openrec_ranking"
|
|
||||||
var targetTypeUser = "openrec_user"
|
|
||||||
var openrecRanking *OpenrecRanking
|
var openrecRanking *OpenrecRanking
|
||||||
|
|
||||||
// store 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
|
// store 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
|
||||||
var store *intimate.Store = intimate.NewStore("source_openrec")
|
var store *intimate.SourceStore = intimate.NewSourceStore(string(intimate.STOpenrec))
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
|
|
||||||
|
@ -67,7 +65,7 @@ func (or *OpenrecRanking) Execute(cxt *hunter.TaskContext) {
|
||||||
|
|
||||||
data.SetSource(sql.NullString{String: userid, Valid: len(userid) > 0})
|
data.SetSource(sql.NullString{String: userid, Valid: len(userid) > 0})
|
||||||
data.SetUrl(wf.GetRawURL())
|
data.SetUrl(wf.GetRawURL())
|
||||||
data.SetTargetType(targetTypeUser)
|
data.SetTargetType(string(intimate.TTOpenrecUser))
|
||||||
store.Insert(data)
|
store.Insert(data)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,2 +0,0 @@
|
||||||
database:
|
|
||||||
uri: "root:@tcp(127.0.0.1:4000)/intimate_source"
|
|
|
@ -10,12 +10,10 @@ import (
|
||||||
"github.com/474420502/hunter"
|
"github.com/474420502/hunter"
|
||||||
)
|
)
|
||||||
|
|
||||||
var targetTypeUser = "openrec_user"
|
|
||||||
var targetTypeRanking = "openrec_ranking"
|
|
||||||
var oer *OpenrecExtratorRanking
|
var oer *OpenrecExtratorRanking
|
||||||
|
|
||||||
// store 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
|
// store 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
|
||||||
var store *intimate.Store = intimate.NewStore("source_openrec")
|
var store *intimate.SourceStore = intimate.NewSourceStore(string(intimate.STOpenrec))
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
oer = &OpenrecExtratorRanking{}
|
oer = &OpenrecExtratorRanking{}
|
||||||
|
@ -31,7 +29,7 @@ func (oer *OpenrecExtratorRanking) Execute(cxt *hunter.TaskContext) {
|
||||||
|
|
||||||
for {
|
for {
|
||||||
|
|
||||||
source, err := store.Pop(targetTypeUser)
|
source, err := store.Pop(string(intimate.TTOpenrecUser))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Println(err)
|
log.Println(err)
|
||||||
return
|
return
|
||||||
|
|
1659
testfile/openrec_user.html
Executable file
1659
testfile/openrec_user.html
Executable file
File diff suppressed because it is too large
Load Diff
392
testfile/openrec_user_live.html
Executable file
392
testfile/openrec_user_live.html
Executable file
File diff suppressed because one or more lines are too long
1935
testfile/openrec_user_supporters.html
Executable file
1935
testfile/openrec_user_supporters.html
Executable file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user