Compare commits

...

39 Commits

Author SHA1 Message Date
eson
d041600663 Merge branch 'release/v0.7.0' 2020-09-17 15:00:51 +08:00
eson
750a83bc53 Merge branch 'feature/add-mirrativ' into develop 2020-09-17 15:00:10 +08:00
eson
0e19b59813 add mirrativ finish 2020-09-17 14:58:50 +08:00
eson
fb3b09fc38 mirrativ finish 90% 2020-09-16 18:53:26 +08:00
eson
ee28db4ad8 todo: profile live 2020-09-15 19:06:03 +08:00
eson
925b7d42c7 add: mirrativ init 2020-09-15 17:09:22 +08:00
eson
36c277c3ce fix: update_interval = 0 if that is error 2020-09-15 10:39:34 +08:00
eson
072468005c pkill 添加-9 强制kill 2020-09-14 15:15:41 +08:00
eson
ef7b59ce3d TODO: streamerlist 2020-09-11 18:52:04 +08:00
eson
c4d0140b42 for save 2020-09-10 17:33:52 +08:00
eson
0b8a6fd810 Merge tag 'v0.6.0' into develop
重构后最高效率版本
2020-09-09 17:27:09 +08:00
eson
4693296671 Merge branch 'release/v0.6.0' 2020-09-09 17:26:51 +08:00
eson
a848f26d65 Merge branch 'feature/autostore' into develop 2020-09-09 17:25:55 +08:00
eson
a9bb448351 重构完成 2020-09-09 17:25:36 +08:00
eson
30a6c35980 finish: twitch restructure 2020-09-09 16:49:44 +08:00
eson
b5d5ec04e7 TODO: 2020-09-08 18:54:37 +08:00
eson
fb07d61353 TODO: twitch_task2 fix 错误 2020-09-08 18:24:51 +08:00
eson
28319bf02a TODO 2020-09-07 18:52:59 +08:00
eson
2c557e3b42 todo: twitcasting 测试 store. 2. 修改streamer. tag 符合要求 2020-09-07 18:12:18 +08:00
eson
7849d09a18 完成autostore.
TODO: 替换old store 类.
2020-09-07 17:13:40 +08:00
eson
1590fa0c82 add: autostore
todo: autostore replace old store
2020-09-07 16:54:03 +08:00
eson
d3689a0c8b TODO: sql to time.Time Type 2020-09-07 15:13:50 +08:00
eson
83dcdf86c9 TODO: 2 2020-09-04 19:01:51 +08:00
eson
2236e13af7 TODO: autostore 2020-09-03 18:50:32 +08:00
eson
f86c255407 finish nimo 2020-09-03 14:17:54 +08:00
eson
be47c77e88 TODO: upgrade store method 2020-09-03 12:01:55 +08:00
eson
5912111f29 add: WaitFor method
add: Channel tag
2020-09-02 19:18:44 +08:00
eson
3b7e8e94ea nimo 插件优化
nimo 数据测试提取成功
2020-09-02 18:56:20 +08:00
a72ba6257c change: gcurl version 2020-08-30 05:31:15 +08:00
eson
5d50119825 add new nimo code
change more
2020-08-28 19:07:12 +08:00
eson
eee4e30585 fix: twistcasting tags error.
change: twistcasting, some filed type to number
2020-08-18 16:50:49 +08:00
eson
b595ea8d19 Merge tag 'v0.5.4' into develop
v0.5.4
整理了构建脚本.
删除冗余代码
2020-08-17 14:53:34 +08:00
eson
d2b05e864c Merge branch 'release/v0.5.4' 2020-08-17 14:53:10 +08:00
eson
5a8eda6f1b create ln to supervisor conf.d path 2020-08-17 14:52:21 +08:00
eson
61634751bf add supervisor conf 2020-08-17 14:50:19 +08:00
eson
9d7c2e1e54 1.修改build.sh
2.删除hunter包引用和使用
2020-08-17 14:38:00 +08:00
eson
d1298dc3f3 finish: 重构openrec 2020-08-17 13:10:29 +08:00
eson
a50c879d83 todo: openrec 重构 2020-08-14 19:26:03 +08:00
eson
b20f97f7c9 1. 修复 store_extractor Update 错误.
2. 重构前两个网站的代码架构不合理.
3. 减少冗余代码
2020-08-13 19:11:53 +08:00
64 changed files with 17549 additions and 1150 deletions

1
.gitignore vendored
View File

@ -1,3 +1,4 @@
bin
*.log
log
screenlog.*

578
autostore.go Normal file
View File

@ -0,0 +1,578 @@
package intimate
import (
"database/sql"
"encoding/binary"
"fmt"
"log"
"reflect"
"strconv"
"time"
)
// StoreExtractorDB 全局的Extractor DB 库链接
var StoreExtractorDB *Store
// TStreamer 全局的Streamer. 在config init 完成初始化
var TStreamer *Table
// TClog 全局的Clog
var TClog *Table
// TStreamerList 全局的streamer list 这个表存的url. 进去可以找到主播的列表. 便于动态更新
var TStreamerList *Table
/*Store 结构体. 必须使用tag. field 数据库字段标签 uid 唯一id字段标签必须存在
*/
type Store struct {
db *sql.DB
}
// Table 表
type Table struct {
store *Store
name string
setting interface{}
updatesql string
selectsql string
insertsql string
duplicatesql string
}
func NewStore(uri string) *Store {
db, err := sql.Open("mysql", uri)
if err != nil {
panic(err)
}
s := &Store{db: db}
return s
}
// Table 选择表.
func (store *Store) Table(name string) *Table {
table := &Table{store: store}
table.name = name
table.insertsql = `INSERT INTO ` + table.name + `(%s) values(%s)`
table.duplicatesql = `INSERT INTO ` + table.name + `(%s) values(%s) ON DUPLICATE KEY UPDATE %s`
table.updatesql = `UPDATE ` + table.name + ` SET %s WHERE %s = ?`
table.selectsql = `SELECT %s FROM ` + table.name + ` WHERE %s `
return table
}
// Queue mysql 队列结构
type Queue struct {
table *Table
obj reflect.Type
fieldIndex []int
selected string
cond CondWhere
uidname string
uididx int
}
type CondWhere struct {
Condition string
CondArgs []interface{}
}
// OperatorType 字典Operator 标志位的类型
type OperatorType string
const (
// OpOK 正常
OpOK OperatorType = "0"
// OpWAIT 等待处理
OpWAIT OperatorType = "1000"
// OpERROR 错误处理
OpERROR OperatorType = "10000"
)
// ConditionDefault 默认的条件
func ConditionDefault(platform Platform) CondWhere {
return CondWhere{
Condition: "platform = ? and operator = 0 and TIMESTAMPDIFF(MINUTE , update_time, CURRENT_TIMESTAMP()) >= update_interval",
CondArgs: []interface{}{string(platform)},
}
}
// Queue 根据Table生成一个队列. 处理结构. 每次弹出一个 obj 是要处理的结构体 自定义的whereCondition条件
func (t *Table) Queue(obj interface{}, whereCondition CondWhere) *Queue {
q := &Queue{}
q.cond = whereCondition
q.obj = reflect.TypeOf(obj)
q.table = t
q.fieldIndex = []int{} // select 需要配对字段变量的对应index位置
for i := 0; i < q.obj.NumField(); i++ {
field := q.obj.Field(i)
if fname, ok := field.Tag.Lookup("field"); ok {
q.selected += fname + ","
if _, ok := field.Tag.Lookup("uid"); ok {
q.uididx = i
q.uidname = fname
}
q.fieldIndex = append(q.fieldIndex, i)
}
}
q.selected = q.selected[:len(q.selected)-1]
return q
}
// Pop 队列弹出一个数据(任务). 参考队列处理 不支持嵌套.
func (queue *Queue) Pop() (result interface{}, err error) {
db := queue.table.store.db
tx, err := db.Begin()
if err != nil {
return nil, err
}
defer func() {
cerr := tx.Commit()
if cerr != nil {
log.Println(cerr)
log.Println(tx.Rollback())
}
}()
selectsql := `SELECT ` + queue.selected + ` FROM ` + queue.table.name + ` WHERE ` + queue.cond.Condition + " limit 1 for update"
rows, err := tx.Query(selectsql, queue.cond.CondArgs...)
if err != nil {
return nil, fmt.Errorf("table: %s queue is empty. %s", queue.table.name, err.Error())
}
var fields = make([]interface{}, len(queue.fieldIndex))
for i := range fields {
var iv interface{}
fields[i] = &iv
}
if rows.Next() {
err = rows.Scan(fields...)
if err != nil {
return nil, err
}
}
columntypes, err := rows.ColumnTypes()
if err != nil {
return nil, err
}
if err = rows.Close(); err != nil {
return nil, err
}
_, err = tx.Exec("UPDATE "+queue.table.name+" SET operator = "+string(OpWAIT)+" WHERE "+queue.uidname+" = ?", fields[queue.uididx])
if err != nil {
log.Println(err)
return nil, err
}
obj := reflect.New(queue.obj).Elem()
for i, idx := range queue.fieldIndex {
field := obj.Field(idx)
convert(*fields[i].(*interface{}), field, columntypes[i])
}
return obj.Addr().Interface(), err
}
// Insert nil 不插入. 不支持嵌套. 必须是Ptr类型
func (t *Table) Insert(obj interface{}) error {
ov := reflect.ValueOf(obj).Elem()
ot := reflect.TypeOf(obj)
fieldsql := ""
argssql := ""
var args []interface{}
for i := 0; i < ov.NumField(); i++ {
field := ov.Field(i)
ftype := ot.Elem().Field(i)
if fname, ok := ftype.Tag.Lookup("field"); ok {
if flag, ok := ftype.Tag.Lookup("uid"); ok {
if flag == "auto" {
continue
}
}
k := ftype.Type.Kind()
if k == reflect.Ptr || k == reflect.Interface {
if !field.IsNil() {
felem := field.Elem()
args = append(args, felem.Interface())
fieldsql += fname + ","
argssql += "?,"
}
} else {
args = append(args, field.Interface())
fieldsql += fname + ","
argssql += "?,"
}
}
}
ssql := fmt.Sprintf(t.insertsql, fieldsql[:len(fieldsql)-1], argssql[:len(argssql)-1])
_, err := t.store.db.Exec(ssql, args...)
return err
}
// DUpdate ON DUPLICATE KEY UPDATE struct. Field对应的tag field字段
type DUpdate struct {
Field string // selected 字段
Value interface{}
}
// InsertOrUpdate nil 不插入. 不支持嵌套. 必须是Ptr类型
func (t *Table) InsertOrUpdate(obj interface{}, updates ...DUpdate) error {
ov := reflect.ValueOf(obj).Elem()
ot := reflect.TypeOf(obj)
fieldsql := ""
argssql := ""
var SourceUpdate []*DUpdate
var OtherUpdate []*DUpdate
for _, u := range updates {
if u.Value == nil {
SourceUpdate = append(SourceUpdate, &u)
} else {
OtherUpdate = append(OtherUpdate, &u)
}
}
var args []interface{}
for i := 0; i < ov.NumField(); i++ {
field := ov.Field(i)
ftype := ot.Elem().Field(i)
if fname, ok := ftype.Tag.Lookup("field"); ok {
// if flag, ok := ftype.Tag.Lookup("uid"); ok {
// if flag == "auto" {
// continue
// }
// }
k := ftype.Type.Kind()
if k == reflect.Ptr || k == reflect.Interface {
if !field.IsNil() {
felem := field.Elem()
args = append(args, felem.Interface())
fieldsql += fname + ","
argssql += "?,"
}
} else {
args = append(args, field.Interface())
fieldsql += fname + ","
argssql += "?,"
}
for _, u := range SourceUpdate {
if u.Field == fname {
u.Value = args[len(args)-1]
break
}
}
}
}
var duplicateSet string = ""
for _, u := range SourceUpdate {
duplicateSet += u.Field + " = ?,"
args = append(args, u.Value)
}
for _, u := range OtherUpdate {
duplicateSet += u.Field + " = ?,"
args = append(args, u.Value)
}
ssql := fmt.Sprintf(t.duplicatesql, fieldsql[:len(fieldsql)-1], argssql[:len(argssql)-1], duplicateSet[:len(duplicateSet)-1])
_, err := t.store.db.Exec(ssql, args...)
return err
}
// InsertRetAutoID nil 不插入. 不支持嵌套. 并返回auto uid
func (t *Table) InsertRetAutoID(obj interface{}) (int64, error) {
ov := reflect.ValueOf(obj).Elem()
ot := reflect.TypeOf(obj)
fieldsql := ""
argssql := ""
var args []interface{}
for i := 0; i < ov.NumField(); i++ {
field := ov.Field(i)
ftype := ot.Elem().Field(i)
if fname, ok := ftype.Tag.Lookup("field"); ok {
if flag, ok := ftype.Tag.Lookup("uid"); ok {
if flag == "auto" {
continue
}
}
k := ftype.Type.Kind()
if k == reflect.Ptr || k == reflect.Interface {
if !field.IsNil() {
felem := field.Elem()
args = append(args, felem.Interface())
fieldsql += fname + ","
argssql += "?,"
}
} else {
args = append(args, field.Interface())
fieldsql += fname + ","
argssql += "?,"
}
}
}
ssql := fmt.Sprintf(t.insertsql, fieldsql[:len(fieldsql)-1], argssql[:len(argssql)-1])
result, err := t.store.db.Exec(ssql, args...)
if err != nil {
return 0, err
}
return result.LastInsertId()
}
// Update 结构体更新
func (t *Table) Update(obj interface{}) error {
ov := reflect.ValueOf(obj).Elem()
ot := reflect.TypeOf(obj)
fieldsql := ""
var uidname string
var uidvalue interface{}
var args []interface{}
for i := 0; i < ov.NumField(); i++ {
field := ov.Field(i)
ftype := ot.Elem().Field(i)
if fname, ok := ftype.Tag.Lookup("field"); ok {
if _, ok := ftype.Tag.Lookup("uid"); ok {
if uidvalue != nil {
panic(fmt.Errorf("uid must unique, %s and %s", uidname, fname))
}
uidname = fname
uidvalue = field.Interface()
continue
}
k := ftype.Type.Kind()
if k == reflect.Ptr || k == reflect.Interface {
if !field.IsNil() {
felem := field.Elem()
args = append(args, felem.Interface())
fieldsql += fname + " = ?,"
}
} else {
args = append(args, field.Interface())
fieldsql += fname + " = ?,"
}
}
}
if uidvalue == nil {
panic(fmt.Errorf("update must contain `uid` tag"))
}
usql := fmt.Sprintf(t.updatesql, fieldsql[:len(fieldsql)-1], uidname)
args = append(args, uidvalue)
_, err := t.store.db.Exec(usql, args...)
return err
}
// UpdateError 更新错误数据
func (t *Table) UpdateError(obj interface{}, err error) {
ov := reflect.ValueOf(obj).Elem()
ot := reflect.TypeOf(obj)
var uidname string
var uidvalue interface{}
for i := 0; i < ov.NumField(); i++ {
field := ov.Field(i)
ftype := ot.Elem().Field(i)
if fname, ok := ftype.Tag.Lookup("field"); ok {
if _, ok := ftype.Tag.Lookup("uid"); ok {
if uidvalue != nil {
panic(fmt.Errorf("uid must unique, %s and %s", uidname, fname))
}
uidname = fname
uidvalue = field.Interface()
break
}
}
}
_, dberr := t.store.db.Exec("update "+t.name+" set operator = ?, error_msg = ? where "+uidname+" = ?", 10000, sql.NullString{String: err.Error(), Valid: true}, uidvalue)
if dberr != nil {
// email tell owner to deal with
panic(dberr)
}
}
func assign(field reflect.Value, src interface{}) (bool, error) {
switch field.Kind() {
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
s := asString(src)
i64, err := strconv.ParseInt(s, 10, field.Type().Bits())
if err != nil {
err = strconvErr(err)
return false, fmt.Errorf("converting driver.Value type %T (%q) to a %s: %v", src, s, field.Kind(), err)
}
field.SetInt(i64)
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
s := asString(src)
u64, err := strconv.ParseUint(s, 10, field.Type().Bits())
if err != nil {
err = strconvErr(err)
return false, fmt.Errorf("converting driver.Value type %T (%q) to a %s: %v", src, s, field.Kind(), err)
}
field.SetUint(u64)
case reflect.Float32, reflect.Float64:
s := asString(src)
f64, err := strconv.ParseFloat(s, field.Type().Bits())
if err != nil {
err = strconvErr(err)
return false, fmt.Errorf("converting driver.Value type %T (%q) to a %s: %v", src, s, field.Kind(), err)
}
field.SetFloat(f64)
case reflect.String:
field.SetString(string(src.([]byte)))
case reflect.Interface:
return true, nil
}
return false, nil
}
func convert(src interface{}, field reflect.Value, columntype *sql.ColumnType) error {
// log.Println("type:", field.Type(), ",kind:", field.Kind(), ",field:", field, "scanType:", columntype.ScanType(), "databaseType:", columntype.DatabaseTypeName())
if field.Kind() == reflect.Ptr {
fn := field.Type().Elem() // New 一个 field.Type().Elem() . 然后判断 columntype 转化 成 NullString Time
field.Set(reflect.New(fn))
field = field.Elem()
// log.Println("type:", fn.Name(), ",kind:", field.Kind(), ",fieldtype:", field.Type())
}
// log.Println(field.Kind(), field, reflect.TypeOf(field).Elem().Name(), columntype.ScanType().Kind())
if src == nil {
return fmt.Errorf("converting NULL to %s is unsupported", field.Kind())
}
switch columntype.DatabaseTypeName() {
case "TINYINT", "SMALLINT", "MEDIUMINT", "INT", "BIGINT":
isdefault, err := assign(field, src)
if err != nil {
return err
}
if isdefault {
s := asString(src)
i64, err := strconv.ParseInt(s, 10, 64)
if err != nil {
err = strconvErr(err)
return fmt.Errorf("converting driver.Value type %T (%q) to a %s: %v", src, s, field.Kind(), err)
}
// reflect.New(reflect.TypeOf(i64))
field.Set(reflect.ValueOf(i64))
}
case "FLOAT", "DOUBLE", "DECIMAL":
isdefault, err := assign(field, src)
if err != nil {
return err
}
if isdefault {
s := asString(src)
f64, err := strconv.ParseFloat(s, 64)
if err != nil {
err = strconvErr(err)
return fmt.Errorf("converting driver.Value type %T (%q) to a %s: %v", src, s, field.Kind(), err)
}
field.Set(reflect.ValueOf(f64))
}
case "BINARY", "VARBINARY", "TINYBLOB", "BLOB", "MEDIUMBLOB", "LONGBLOB", "JSON":
isdefault, err := assign(field, src)
if err != nil {
return err
}
if isdefault {
field.Set(reflect.ValueOf(src.([]byte)))
}
case "CHAR", "VARCHAR", "TINYTEXT", "TEXT", "MEDIUMTEXT", "LONGTEXT":
isdefault, err := assign(field, src)
if err != nil {
return err
}
if isdefault {
field.Set(reflect.ValueOf(string(src.([]byte))))
}
case "BIT":
var bits []byte = make([]byte, 8)
copy(bits, src.([]byte))
switch field.Kind() {
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
field.SetInt(int64(binary.LittleEndian.Uint64(bits)))
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
field.SetUint(binary.LittleEndian.Uint64(bits))
case reflect.Interface:
field.Set(reflect.ValueOf(binary.LittleEndian.Uint64(bits)))
}
case "YEAR", "TIME", "DATE", "DATETIME", "TIMESTAMP":
s := src.(time.Time)
switch field.Interface().(type) {
case time.Time:
field.Set(reflect.ValueOf(src))
case string:
field.SetString(s.Format(time.RFC3339Nano))
case []byte:
field.SetBytes([]byte(s.Format(time.RFC3339Nano)))
default:
}
}
// log.Println(fv, columntype.ScanType().Kind())
if iscan, ok := field.Addr().Interface().(sql.Scanner); ok {
err := iscan.Scan(src)
if err != nil {
return err
}
}
return nil
}

57
autostore_test.go Normal file
View File

@ -0,0 +1,57 @@
package intimate
import (
"database/sql"
"encoding/json"
"testing"
"time"
)
func TestAutoStore(t *testing.T) {
uri := "root:@tcp(127.0.0.1:4000)/test?parseTime=true&loc=Local&charset=utf8mb4&collation=utf8mb4_unicode_ci"
store := NewStore(uri)
// queue := store.Table("streamer").Queue(TSreamer{}, CondWhere{Condition: "operator = 0"})
// re, _ := queue.Pop()
// pstreamer := re.(*TSreamer)
// m := make(map[string]interface{})
// json.Unmarshal(pstreamer.Iface.([]byte), &m)
// spew.Println(re.(*TSreamer), m)
streamer := &TSreamer{}
streamer.Uid = 1
streamer.UserID = &sql.NullString{String: "xixi", Valid: true}
streamer.Name = "streamer"
streamer.Operator = 0
streamer.Bit = 0b11
// streamer.Ext = &sql.NullString{String: "ext", Valid: true}
tag := make(map[string]interface{})
tag["json"] = true
tag["name"] = "test"
btag, err := json.Marshal(tag)
if err != nil {
t.Error(err)
}
streamer.Iface = btag
now := time.Now()
streamer.UpdateTime = &now
err = store.Table("streamer").InsertOrUpdate(streamer, DUpdate{Field: "userid"})
if err != nil {
t.Error(err)
}
}
type TSreamer struct {
Uid int `field:"uid" uid:"auto"`
Name interface{} `field:"name"`
UserID *sql.NullString `field:"userid"`
Ext *sql.NullString `field:"ext"`
Iface interface{} `field:"tag"`
Bit uint64 `field:"bit"`
Operator int `field:"operator"`
UpdateTime *time.Time `field:"update_time"`
}

View File

@ -7,13 +7,17 @@ src=`pwd`
for path in `ls -d $source_tasks`
do
echo $path
cd $path && go build
projectname=${path##*/}
projectworkspace=$src/bin/$projectname
cd $path && mkdir $projectworkspace -p && go build -o $projectworkspace/$projectname
cd $src
done
for path in `ls -d $extractor_tasks`
do
echo $path
cd $path && go build
projectname=${path##*/}
projectworkspace=$src/bin/$projectname
cd $path && mkdir $projectworkspace -p && go build -o $projectworkspace/$projectname
cd $src
done
done

View File

@ -18,6 +18,18 @@ func init() {
// storeOpenrec = NewStore()
log.SetFlags(log.Llongfile | log.Ltime)
// StoreExtractorDB 全局的Extractor DB 库链接
StoreExtractorDB = NewStore(InitConfig.Database.ExtractorURI)
// TStreamer 全局的Streamer
TStreamer = StoreExtractorDB.Table("streamer")
// TClog 全局的Clog
TClog = StoreExtractorDB.Table("collect_log")
// TStreamerList 全局的streamer list 这个表存的url. 进去可以找到主播的列表. 便于动态更新
TStreamerList = StoreExtractorDB.Table("streamer_list")
}
// Config 配置

376
convert.go Normal file
View File

@ -0,0 +1,376 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Type conversions for Scan.
package intimate
import (
"database/sql/driver"
"errors"
"fmt"
"reflect"
"strconv"
)
var errNilPtr = errors.New("destination pointer is nil") // embedded in descriptive error
// convertAssignRows copies to dest the value in src, converting it if possible.
// An error is returned if the copy would result in loss of information.
// dest should be a pointer type. If rows is passed in, the rows will
// be used as the parent for any cursor values converted from a
// driver.Rows to a *Rows.
// func convertAssignRows(dest, src interface{}, rows *sql.Rows) error {
// // Common cases, without reflect.
// switch s := src.(type) {
// case string:
// switch d := dest.(type) {
// case *string:
// if d == nil {
// return errNilPtr
// }
// *d = s
// return nil
// case *[]byte:
// if d == nil {
// return errNilPtr
// }
// *d = []byte(s)
// return nil
// case *sql.RawBytes:
// if d == nil {
// return errNilPtr
// }
// *d = append((*d)[:0], s...)
// return nil
// }
// case []byte:
// switch d := dest.(type) {
// case *string:
// if d == nil {
// return errNilPtr
// }
// *d = string(s)
// return nil
// case *interface{}:
// if d == nil {
// return errNilPtr
// }
// *d = cloneBytes(s)
// return nil
// case *[]byte:
// if d == nil {
// return errNilPtr
// }
// *d = cloneBytes(s)
// return nil
// case *sql.RawBytes:
// if d == nil {
// return errNilPtr
// }
// *d = s
// return nil
// }
// case time.Time:
// switch d := dest.(type) {
// case *time.Time:
// *d = s
// return nil
// case *string:
// *d = s.Format(time.RFC3339Nano)
// return nil
// case *[]byte:
// if d == nil {
// return errNilPtr
// }
// *d = []byte(s.Format(time.RFC3339Nano))
// return nil
// case *sql.RawBytes:
// if d == nil {
// return errNilPtr
// }
// *d = s.AppendFormat((*d)[:0], time.RFC3339Nano)
// return nil
// }
// case decimalDecompose:
// switch d := dest.(type) {
// case decimalCompose:
// return d.Compose(s.Decompose(nil))
// }
// case nil:
// switch d := dest.(type) {
// case *interface{}:
// if d == nil {
// return errNilPtr
// }
// *d = nil
// return nil
// case *[]byte:
// if d == nil {
// return errNilPtr
// }
// *d = nil
// return nil
// case *sql.RawBytes:
// if d == nil {
// return errNilPtr
// }
// *d = nil
// return nil
// }
// // The driver is returning a cursor the client may iterate over.
// }
// var sv reflect.Value
// switch d := dest.(type) {
// case *string:
// sv = reflect.ValueOf(src)
// switch sv.Kind() {
// case reflect.Bool,
// reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64,
// reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64,
// reflect.Float32, reflect.Float64:
// *d = asString(src)
// return nil
// }
// case *[]byte:
// sv = reflect.ValueOf(src)
// if b, ok := asBytes(nil, sv); ok {
// *d = b
// return nil
// }
// case *sql.RawBytes:
// sv = reflect.ValueOf(src)
// if b, ok := asBytes([]byte(*d)[:0], sv); ok {
// *d = sql.RawBytes(b)
// return nil
// }
// case *bool:
// bv, err := driver.Bool.ConvertValue(src)
// if err == nil {
// *d = bv.(bool)
// }
// return err
// case *interface{}:
// *d = src
// return nil
// }
// if scanner, ok := dest.(sql.Scanner); ok {
// return scanner.Scan(src)
// }
// dpv := reflect.ValueOf(dest)
// if dpv.Kind() != reflect.Ptr {
// return errors.New("destination not a pointer")
// }
// if dpv.IsNil() {
// return errNilPtr
// }
// if !sv.IsValid() {
// sv = reflect.ValueOf(src)
// }
// dv := reflect.Indirect(dpv)
// if sv.IsValid() && sv.Type().AssignableTo(dv.Type()) {
// switch b := src.(type) {
// case []byte:
// dv.Set(reflect.ValueOf(cloneBytes(b)))
// default:
// dv.Set(sv)
// }
// return nil
// }
// if dv.Kind() == sv.Kind() && sv.Type().ConvertibleTo(dv.Type()) {
// dv.Set(sv.Convert(dv.Type()))
// return nil
// }
// // The following conversions use a string value as an intermediate representation
// // to convert between various numeric types.
// //
// // This also allows scanning into user defined types such as "type Int int64".
// // For symmetry, also check for string destination types.
// switch dv.Kind() {
// case reflect.Ptr:
// if src == nil {
// dv.Set(reflect.Zero(dv.Type()))
// return nil
// }
// dv.Set(reflect.New(dv.Type().Elem()))
// return convertAssignRows(dv.Interface(), src, rows)
// case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
// if src == nil {
// return fmt.Errorf("converting NULL to %s is unsupported", dv.Kind())
// }
// s := asString(src)
// i64, err := strconv.ParseInt(s, 10, dv.Type().Bits())
// if err != nil {
// err = strconvErr(err)
// return fmt.Errorf("converting driver.Value type %T (%q) to a %s: %v", src, s, dv.Kind(), err)
// }
// dv.SetInt(i64)
// return nil
// case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
// if src == nil {
// return fmt.Errorf("converting NULL to %s is unsupported", dv.Kind())
// }
// s := asString(src)
// u64, err := strconv.ParseUint(s, 10, dv.Type().Bits())
// if err != nil {
// err = strconvErr(err)
// return fmt.Errorf("converting driver.Value type %T (%q) to a %s: %v", src, s, dv.Kind(), err)
// }
// dv.SetUint(u64)
// return nil
// case reflect.Float32, reflect.Float64:
// if src == nil {
// return fmt.Errorf("converting NULL to %s is unsupported", dv.Kind())
// }
// s := asString(src)
// f64, err := strconv.ParseFloat(s, dv.Type().Bits())
// if err != nil {
// err = strconvErr(err)
// return fmt.Errorf("converting driver.Value type %T (%q) to a %s: %v", src, s, dv.Kind(), err)
// }
// dv.SetFloat(f64)
// return nil
// case reflect.String:
// if src == nil {
// return fmt.Errorf("converting NULL to %s is unsupported", dv.Kind())
// }
// switch v := src.(type) {
// case string:
// dv.SetString(v)
// return nil
// case []byte:
// dv.SetString(string(v))
// return nil
// }
// }
// return fmt.Errorf("unsupported Scan, storing driver.Value type %T into type %T", src, dest)
// }
func strconvErr(err error) error {
if ne, ok := err.(*strconv.NumError); ok {
return ne.Err
}
return err
}
func cloneBytes(b []byte) []byte {
if b == nil {
return nil
}
c := make([]byte, len(b))
copy(c, b)
return c
}
func asString(src interface{}) string {
switch v := src.(type) {
case string:
return v
case []byte:
return string(v)
}
rv := reflect.ValueOf(src)
switch rv.Kind() {
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
return strconv.FormatInt(rv.Int(), 10)
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
return strconv.FormatUint(rv.Uint(), 10)
case reflect.Float64:
return strconv.FormatFloat(rv.Float(), 'g', -1, 64)
case reflect.Float32:
return strconv.FormatFloat(rv.Float(), 'g', -1, 32)
case reflect.Bool:
return strconv.FormatBool(rv.Bool())
}
return fmt.Sprintf("%v", src)
}
func asBytes(buf []byte, rv reflect.Value) (b []byte, ok bool) {
switch rv.Kind() {
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
return strconv.AppendInt(buf, rv.Int(), 10), true
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
return strconv.AppendUint(buf, rv.Uint(), 10), true
case reflect.Float32:
return strconv.AppendFloat(buf, rv.Float(), 'g', -1, 32), true
case reflect.Float64:
return strconv.AppendFloat(buf, rv.Float(), 'g', -1, 64), true
case reflect.Bool:
return strconv.AppendBool(buf, rv.Bool()), true
case reflect.String:
s := rv.String()
return append(buf, s...), true
}
return
}
var valuerReflectType = reflect.TypeOf((*driver.Valuer)(nil)).Elem()
// callValuerValue returns vr.Value(), with one exception:
// If vr.Value is an auto-generated method on a pointer type and the
// pointer is nil, it would panic at runtime in the panicwrap
// method. Treat it like nil instead.
// Issue 8415.
//
// This is so people can implement driver.Value on value types and
// still use nil pointers to those types to mean nil/NULL, just like
// string/*string.
//
// This function is mirrored in the database/sql/driver package.
func callValuerValue(vr driver.Valuer) (v driver.Value, err error) {
if rv := reflect.ValueOf(vr); rv.Kind() == reflect.Ptr &&
rv.IsNil() &&
rv.Type().Elem().Implements(valuerReflectType) {
return nil, nil
}
return vr.Value()
}
// decimal composes or decomposes a decimal value to and from individual parts.
// There are four parts: a boolean negative flag, a form byte with three possible states
// (finite=0, infinite=1, NaN=2), a base-2 big-endian integer
// coefficient (also known as a significand) as a []byte, and an int32 exponent.
// These are composed into a final value as "decimal = (neg) (form=finite) coefficient * 10 ^ exponent".
// A zero length coefficient is a zero value.
// The big-endian integer coefficient stores the most significant byte first (at coefficient[0]).
// If the form is not finite the coefficient and exponent should be ignored.
// The negative parameter may be set to true for any form, although implementations are not required
// to respect the negative parameter in the non-finite form.
//
// Implementations may choose to set the negative parameter to true on a zero or NaN value,
// but implementations that do not differentiate between negative and positive
// zero or NaN values should ignore the negative parameter without error.
// If an implementation does not support Infinity it may be converted into a NaN without error.
// If a value is set that is larger than what is supported by an implementation,
// an error must be returned.
// Implementations must return an error if a NaN or Infinity is attempted to be set while neither
// are supported.
//
// NOTE(kardianos): This is an experimental interface. See https://golang.org/issue/30870
type decimal interface {
decimalDecompose
decimalCompose
}
type decimalDecompose interface {
// Decompose returns the internal decimal state in parts.
// If the provided buf has sufficient capacity, buf may be returned as the coefficient with
// the value set and length set as appropriate.
Decompose(buf []byte) (form byte, negative bool, coefficient []byte, exponent int32)
}
type decimalCompose interface {
// Compose sets the internal decimal value from parts. If the value cannot be
// represented then an error should be returned.
Compose(form byte, negative bool, coefficient []byte, exponent int32) error
}

Binary file not shown.

View File

@ -1,28 +1,28 @@
-----BEGIN PRIVATE KEY-----
MIIEvwIBADANBgkqhkiG9w0BAQEFAASCBKkwggSlAgEAAoIBAQDSG09DSvB03TOe
eOmQwfiCIf0wa2WRB31ewxa6i/PRgEKeJSUvIsIuaECUer2ss+J3rwSS2lDpGuiw
FnsVyZqKI/+Rcuc83YJGYg6OAzVMz6UL8YCWhXu3huTJ+V+a5iNereIC69ZERRJt
nXlWqsq6HKya+6BP9sX9CI4GTHQrnWBysAxsswhdnnnRvu+GxglWafSIzuS6OizT
1M1CmkZxNvDJhTSOR7SJlIYm2kM5/fIL53BdndF2IGAjfV1WV7AjwhTfun5cViEO
i8niQUIMY4L0AiO9grFD1g1xIYkeuVBoLxOUBzPxJwQmb64gseb9Dvt0BKLRGoou
SIOyE+KVAgMBAAECggEAI4b6J2kR0VUBEDwmVHO0K38HUstqNHSVgrNO0dLt8sAz
I44o5DhGqPW4a9L4ZS5SrkWyKonPcic6buISRIwfPVoacjQBfVWAXJnil6lbtyYK
ZMNcqLcgBRfCcpOgEq91DiKta6yIwekDFXVyCdFd78v+9ML1J+hUsLVkXJTLdP88
PGamRWVd6vGy3QMRjyM29GLPgS+/6Vrp1cptSuYNqYhlszohmu8lBvzjH9jbPh9d
GFrrd8Bs7IRCdtKZig/3fbln4JEyyOYE+gcT2jplPksB6mR/5DBIdkVbeuFwGB0+
h1/PKlprNQt7+Ei0HhHnTib7lZP8WGo4HkSi7PsAGQKBgQD1Ptho0wJiI2+6gL1O
iNsEJVKIQ2Sxdx3wI/qudphM99t6xKCpPyVI2Nd9PBf2jbZjGAaz+P/KQYxEqb6i
PRcQ+i99wCQoRfnRvUbKA4goEpKwRXmvn+499dm6D5pEuumOXGQYCmaFXuLTRN/I
BL6GNgLtoZAlLjUXaWtk8TszGQKBgQDbUf3p3HLpCjRvRDW/vA5xj+08t7xtF9uO
NilGK79uOA4VnxE2w3ioYqQ7t3I8J/0rAzGKq3tylg4QX6UpQ4b2koRr2B3cqoAk
dsRdNWAHwCNepz8hTLsZyuihzbNv2nHmoqhzjK/FcrBHx5NAM+T6OBpLzQBnbUzk
3wIcqm223QKBgQDo/IRxyY0pGMtLXoT6ODACF0b6JzRhGG37tuKvngGAlbQQRP7w
6wmL1F2cH1wQon7UU34CupqfVnhgvvZZgToJqfU2PTTcgeYc6Pl4b7SJhWOQTOCX
BZQ7jvYCulHv27aIxaNd53uQVx2cYoFKr58lN+i+QtADUoujq0YYxshb+QKBgQDW
ZOti7kZCeuBRGIu2V56C8uBFp5MBzf2polZsqx1iIFfcWPfZ4fGUIYFMgwKfvbOl
lWSbmxB9LiSnaugoU0OezBG43rYqXV4Qxy0jtKagTPoGcFWtNrX7+7e3XD8Zi6Am
hkFHW3MEAB5EvNq8Oz6OP8Os78SCVn2BimMlJJFF3QKBgQCF+aEAiBv+ivcmHUeP
2eBq9nLltPFAfXJ/p31MMQ6Jgo36DBqUeoLeyq/WfIXvwqbVbP9fANZrKoTPbI97
dilCHUoO33rafXJy6jtaggtpz14tt9soecTop0vM/rU7tGtfBe6NXg9LRl+oDJCU
37I3a9Is+2CLyAUXWCk9mLfFsQ==
MIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQDA6O6owySvzaJ8
iv6aXRMcuk0lnNKquPNVHeraadSkBVhtaJqPAT7saOVnn9g4s/21OBV6wGmWdl5W
5+8h+F4qXrlGO8N1Tl8mmBDetqg6T6RrI9vB2Th9hjlp2PCGCEg/lX+tx25qBiDG
9uCB3pU4KMyfof1Z2jbo7aOfttgxZ1YdssenB//+31QjeZVw7wZACKaSzl6Z+WvY
pastMZ95YgTQvUq1AP93ny8AG+/bKUlE2vZpDygUJAXT8R7PGVD6T0/hUjTZk9HM
hrLShNjsBMxWWsrc7rdfW+R44pSuw5kHtIBrjRdrr8m1W0BCGTlGjq7Sk0OOZbsO
sh7x/1kvAgMBAAECggEAQYFXOTs6g/AB0uSn2+brDIA+nh0IdCxlhGuqSgxRxniC
NHSmtxSnN0PPUka291o3Noga0xnvQMZk9oG/c/uQjjaoFy9ICddY/BP69oVzzoko
vbDpeIO4aPdhG44s7nZ1ghHTz0C/P4VhL+65AaCBbxZ7lWE+VHqC1smLHDrMYsmr
PfOxk5LZwdNE/YmZuOj1zX3nQGX+wtK0EMslRbyYjg3EwXUa9d5WNnhyFxt5MD9m
w9G1rJh5uDUIS60xHUB7ET4BKJMLj+pfy8Ahf3SDFfIzinjuvKg+z1St0nTSNqI7
QP5KFNvfxqcrtYwf0wU6MGmjJ98FFiDoPY2QQfIlwQKBgQDj3eWDHA02Kr9DAde9
CuR0y4LixFhVDp7mYBnYxod3GMkPCGhSfuRcA/Tu7JgpACK/zsLi9gbzPkUAS2Y/
ly8lBFyCYlY96mRF0FgzOUCwP5Xjc9roDgLi1RVM3+Tnv1FcIa0CkoXPuWtmoVYS
W0S/Y9JKOrV2CEJvirvM3hysHwKBgQDYuipzsV+i3qx4STKXa7F0F8jGp+jr4Pgc
GqnMo5ysFePFFd4QZ1UHJ8HNmRlXsD+hAawzljVL8yCns8dZOE99xvSjgFzA/p/N
QToOgCRy6YZ5kZQA1ldZOhoVS5xTJRf7eavLwnV/8VGWxFNXdlvzdZecC8YvwEbh
+gONR9Sw8QKBgQDiBwnSs9CnGl/+J6XIbzWOZRC/v+Oe+5HDEzCQRt241W0a7EwP
CK+cVpBvL2D/ypKtkHrghwr93Za0ZBZYna62GU1qSAEkCYzNc40JwpDYQzp7FEDg
ToWuW491YeG9v5kEuqLTGmSBWHugogiuzLm031+JIhM9sZO08Pi4jtoCIQKBgEKC
XyTB+idxTlxbmjr9wadu5R6RyNkaYQm+5Pz3+MyWqx+AZnWCQIXBnagKo8xVOjGe
4cmy/wgfxJWyhirLLRncRJcofU581W2X5sksagMZxzrnghP/sy6etgJzAbZCW+nA
nQga1RQ+altqRGlC2VUNcUliE5/z0cznfM+Oi6cBAoGBAK68wjkperl8+ARQrqII
Zg41PVrsFv6XmuLWv3bqnsx81BTbnHdqmPT4pScPLZvj+6c3iX8D8wzhzbgZtS0I
h+r1yMEdzyRZMGtvdi/f/+1TuhOTr/ZeoqYHWEduo8iRYLQBN9gGEkkKBQ4d14zi
8elaha7vrG/teCrFoeFcXIPr
-----END PRIVATE KEY-----

View File

@ -0,0 +1,159 @@
package main
import (
"database/sql"
"encoding/json"
"intimate"
"log"
"time"
"github.com/474420502/gcurl"
"github.com/tidwall/gjson"
)
func main() {
sessionstr := `
-H 'authority: www.mirrativ.com'
-H 'accept: application/json'
-H 'x-timezone: Asia/Shanghai'
-H 'x-csrf-token: F3Ojd6RBtApP6YAZzVn-9jWN1of159VxAqOQL1Zn'
-H 'user-agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36'
-H 'content-type: application/json'
-H 'sec-fetch-site: same-origin'
-H 'sec-fetch-mode: cors'
-H 'sec-fetch-dest: empty'
-H 'referer: https://www.mirrativ.com/live/O5Ia4iX9c5CeZj7DFtg52Q'
-H 'accept-language: zh-CN,zh;q=0.9,ja;q=0.8'
-H 'cookie: f=A2D75F0E-D218-11EA-A042-452BF6D21CE8; _ga=GA1.2.689947597.1596081392; mr_id=kxb65LddGMZf5C28jkR_tGCZD_ZFOAepD5gfXO7eNjfPMB8EKYvU1Vg_Y29V1lsa; _gid=GA1.2.2116692650.1600139685; lang=ja'`
ps := intimate.NewPerfectShutdown()
gprofile := gcurl.Parse(`curl 'https://www.mirrativ.com/api/user/profile?user_id=103383701'` + sessionstr)
tpProfile := gprofile.CreateTemporary(nil)
tpProfileUserID := tpProfile.QueryParam("user_id")
g := gcurl.Parse(`culr 'https://www.mirrativ.com/api/live/live?live_id=O5Ia4iX9c5CeZj7DFtg52Q'` + sessionstr)
tpLive := g.CreateTemporary(nil)
tpLiveID := tpLive.QueryParam("live_id")
var lasterr error
queue := intimate.TStreamer.Queue(intimate.Streamer{}, intimate.ConditionDefault(intimate.PMirrativ))
for !ps.IsClose() {
istreamer, err := queue.Pop()
if err != nil {
if lasterr != err {
lasterr = err
log.Println(err)
}
ps.Wait(time.Second * 5)
continue
}
now := &sql.NullTime{Time: time.Now(), Valid: true}
streamer := istreamer.(*intimate.Streamer)
streamer.UpdateTime = now
userid := *streamer.UserId
log.Println(userid)
tpProfileUserID.StringSet(userid)
resp, err := tpProfile.Execute()
if err != nil {
log.Println(err)
time.Sleep(time.Second)
continue
}
clog := &intimate.CollectLog{}
clog.Platform = intimate.PMirrativ
clog.UpdateTime = now
clog.UserId = userid
clog.StreamerUid = streamer.Uid
profilejson := gjson.ParseBytes(resp.Content())
if result := profilejson.Get("follower_num"); result.Exists() {
clog.Followers = &sql.NullInt64{Int64: result.Int(), Valid: true}
}
if result := profilejson.Get("onlive.live_id"); result.Exists() {
liveID := result.String()
tpLiveID.StringSet(liveID)
resp, err = tpLive.Execute()
if err != nil {
log.Println(err)
continue
}
livejson := gjson.ParseBytes(resp.Content())
if result := livejson.Get("total_viewer_num"); result.Exists() {
clog.Views = &sql.NullInt64{Int64: result.Int(), Valid: true}
}
if result := livejson.Get("title"); result.Exists() {
clog.LiveTitle = &sql.NullString{String: result.String(), Valid: true}
}
if result := livejson.Get("started_at"); result.Exists() {
clog.LiveStartTime = &sql.NullTime{Time: time.Unix(result.Int(), 0), Valid: true}
}
if result := livejson.Get("heartbeated_at"); result.Exists() {
clog.LiveEndTime = &sql.NullTime{Time: time.Unix(result.Int(), 0), Valid: true}
}
if result := livejson.Get("app_id"); result.Exists() {
streamer.Channel = &sql.NullString{String: result.String(), Valid: true}
}
if result := livejson.Get("timeline.#.app.short_title"); result.Exists() {
for _, tl := range result.Array() {
var tags []string = []string{tl.String()}
jtags, _ := json.Marshal(tags)
streamer.Tags = jtags
clog.Tags = jtags
break
}
} else {
log.Println(string(resp.Content()))
return
}
if result := livejson.Get("gift_ranking_url"); result.Exists() {
// streamer.Channel = &sql.NullString{String: result.String(), Valid: true}
gifturl := "curl '" + result.String() + "&type=monthly&cursor='" + sessionstr
ggift := gcurl.Parse(gifturl)
tp := ggift.CreateTemporary(nil)
tp.SetURLRawPath("/api/gift/ranking")
pcursor := tp.QueryParam("cursor")
var gratuity int64 = 0
for {
giftdata, err := tp.Execute()
giftjson := gjson.ParseBytes(giftdata.Content())
if err != nil {
log.Println(err)
} else {
for _, rpoint := range giftjson.Get("ranking.#.point").Array() {
gratuity += rpoint.Int()
}
}
ncursor := giftjson.Get("next_cursor").String()
if ncursor == "" {
break
}
pcursor.StringSet(ncursor)
}
// https://www.mirrativ.com/gift/ranking?live_id=O5Ia4iX9c5CeZj7DFtg52Q&obfuscated_user_id=PgIBEgc6jVc
clog.Gratuity = &sql.NullInt64{Int64: gratuity, Valid: true}
}
cid, err := intimate.TClog.InsertRetAutoID(clog)
if err != nil {
log.Println(err)
}
streamer.LatestLogUid = cid
}
intimate.TStreamer.Update(streamer)
time.Sleep(time.Second * 2)
}
}

View File

@ -0,0 +1,9 @@
package main
import (
"testing"
)
func TestDo(t *testing.T) {
main()
}

View File

@ -0,0 +1,125 @@
package main
import (
"database/sql"
"intimate"
"log"
"time"
"github.com/474420502/extractor"
"github.com/tebeka/selenium"
)
// sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
// var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STNimo))
// // estore 解析存储连接实例
// var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
func main() {
Execute()
}
type LiveInfo struct {
Followers int64 `exp:"//div[contains(@class,'nimo-rm_followers')]//span[@class='text c2']" mth:"r:ExtractNumber"`
Views int64 `exp:"//div[contains(@class,'nimo-rm_audience')]//span[@class='text c2']" mth:"r:ExtractNumber"`
Channel string `exp:"//div[contains(@class,'nimo-rm_type')]//span"`
Gratuity []int64 `exp:"//div[contains(@class,'rank-item-after3')]//span[contains(@class,'nimo-currency__count')]"`
}
func Execute() {
adriver := intimate.GetChromeDriver()
count := 0
countlimit := 200
wd := adriver.Webdriver
waitfor := intimate.NewWaitFor(wd)
ps := intimate.NewPerfectShutdown()
queue := intimate.TStreamer.Queue(intimate.Streamer{}, intimate.ConditionDefault(intimate.PNimo))
for !ps.IsClose() {
istreamer, err := queue.Pop()
if err != nil {
log.Println(err)
intimate.TStreamer.UpdateError(istreamer, err)
continue
}
streamer := istreamer.(*intimate.Streamer)
wd.Get(streamer.LiveUrl.String)
// wd.Get("https://www.nimo.tv/live/1253835677")
waitfor.Default("//div[contains(@class,'nimo-rm_followers')]//span[@class='text c2' and text() != '']", nil)
waitfor.WaitWithTimeout("//div[contains(@class,'rank-item-top3')]", 7*time.Second, nil)
element, err := wd.FindElement(selenium.ByXPATH, "//div[contains(@class,'rank-item-top3')]")
if err != nil {
log.Println(streamer.Uid, err)
} else {
err = element.MoveTo(50, 50)
element.Click()
if err != nil {
log.Println(streamer.Uid, err)
}
}
waitfor.Default("//div[contains(@class,'nimo-rm_audience')]//span[@class='text c2']", nil)
var pagesource string
pagesource, _ = wd.PageSource()
etor := extractor.ExtractHtmlString(pagesource)
li := etor.GetObjectByTag(LiveInfo{}).(*LiveInfo)
// log.Printf("%#v", li)
utime := sql.NullTime{Time: time.Now(), Valid: true}
clog := &intimate.CollectLog{}
clog.Platform = intimate.PNimo
clog.Followers = &sql.NullInt64{Int64: li.Followers, Valid: true}
clog.Views = &sql.NullInt64{Int64: li.Views, Valid: true}
clog.UpdateTime = &utime
clog.StreamerUid = streamer.Uid
var sum int64 = 0
for _, v := range li.Gratuity {
sum += v
}
clog.Gratuity = &sql.NullInt64{Int64: sum, Valid: true}
cuid, err := intimate.TClog.InsertRetAutoID(clog)
if err != nil {
panic(err)
}
streamer.Channel = &sql.NullString{String: li.Channel, Valid: true}
streamer.LatestLogUid = cuid
streamer.UpdateTime = &utime
streamer.Operator = 0
switch {
case li.Followers <= 1000:
streamer.UpdateInterval = 720
case li.Followers <= 10000:
streamer.UpdateInterval = 360
case li.Followers <= 100000:
streamer.UpdateInterval = 180
case li.Followers <= 1000000:
streamer.UpdateInterval = 90
default:
streamer.UpdateInterval = 60
}
// estore.Update(streamer, "update_interval", streamer.UpdateInterval, "operator", streamer.Operator, "channel", streamer.Channel, "latest_log_uid", streamer.LatestLogUid, "update_time", streamer.UpdateTime)
err = intimate.TStreamer.Update(streamer)
if err != nil {
panic(err)
}
count++
if count >= countlimit {
count = 0
adriver.Close()
adriver = intimate.GetChromeDriver()
}
}
}

View File

@ -0,0 +1,9 @@
package main
import (
"testing"
)
func TestMain(t *testing.T) {
Execute()
}

View File

@ -15,6 +15,5 @@ import (
*/
func main() {
oe := &OpenrecExtractor{}
oe.Execute()
Execute()
}

View File

@ -3,236 +3,260 @@ package main
import (
"database/sql"
"encoding/json"
"errors"
"intimate"
"log"
"os"
"os/signal"
"regexp"
"strconv"
"strings"
"sync/atomic"
"syscall"
"time"
"github.com/474420502/extractor"
"github.com/474420502/gcurl"
"github.com/474420502/requests"
"github.com/tidwall/gjson"
)
var estore = intimate.NewStoreExtractor()
var sstore = intimate.NewStoreSource(string(intimate.STOpenrec))
// OpenrecExtractor 提取方法
type OpenrecExtractor struct {
user *intimate.ExtractorSource
userLive *intimate.ExtractorSource
supporters *intimate.ExtractorSource
//UserInfo 提取信息的结构体
type UserInfo struct {
UserName string `exp:"//p[ contains(@class, 'c-global__user__profile__list__name__text')]"`
Followers int64 `exp:"//p[@class='c-global__user__count__row__right js-userCountFollowers']" mth:"r:ParseNumber"`
Views int64 `exp:"//ul[@class='c-contents']//p[@class='c-thumbnailVideo__footer__liveCount']" mth:"r:ExtractNumber"`
}
func (oe *OpenrecExtractor) Execute() {
//UserLive 提取信息的结构体
type UserLive struct {
Title string `exp:"//h1[contains(@class,'MovieTitle__Title')]"`
LiveStartTime string `exp:"//meta[@itemprop='uploadDate']/@content"`
LiveEndTime string `exp:"//meta[@itemprop='duration']/@content"`
Tags []string `exp:"//div[contains(@class,'MovieMetaContent__TagContainer')]//a[@role ='button']"`
}
var loop int32 = 1
// Execute 执行
func Execute() {
go func() {
signalchan := make(chan os.Signal)
signal.Notify(signalchan, syscall.SIGKILL, syscall.SIGQUIT, syscall.SIGTERM, syscall.SIGSTOP)
log.Println("accept stop command:", <-signalchan)
atomic.StoreInt32(&loop, 0)
}()
ps := intimate.NewPerfectShutdown()
ses := requests.NewSession()
squeue := intimate.TStreamer.Queue(intimate.Streamer{}, intimate.ConditionDefault(intimate.Popenrec))
var lasterr error = nil
for atomic.LoadInt32(&loop) > 0 {
var err error
for !ps.IsClose() {
istreamer, err := squeue.Pop()
source, err := sstore.Pop(intimate.TOpenrecUser, 0)
if err != nil {
// streamer, err := estore.Pop(intimate.Popenrec) //队列里弹出一个streamer行. 进行解析
if istreamer == nil || err != nil {
if err != lasterr {
log.Println(err, lasterr)
lasterr = err
}
time.Sleep(time.Second * 5)
time.Sleep(time.Second * 2)
continue
}
streamer := istreamer.(*intimate.Streamer)
userId := *streamer.UserId
var updateUrl map[string]string
err = json.Unmarshal(streamer.UpdateUrl.([]byte), &updateUrl) // 反序列化update_url, 里面存了需要采集的url
if err != nil {
log.Println(err)
continue
}
// Check Userid
userUrl := updateUrl["user"]
log.Println(userUrl)
tp := ses.Get(userUrl) // 获取user url页面数据
resp, err := tp.Execute()
streamer.UpdateTime = &sql.NullTime{Time: time.Now(), Valid: true}
if err != nil {
log.Println(err)
intimate.TStreamer.UpdateError(streamer, err)
continue
}
sdata := source.Ext.([]byte)
datamap := gjson.ParseBytes(sdata).Map()
cookies := ses.GetCookies(tp.GetParsedURL())
source.Operator = int32(intimate.OperatorError)
userId := datamap["var_user_id"].String()
scurl := updateUrl["supporters"] //获取打赏者的数据
curl := gcurl.Parse(scurl)
supportersSession := curl.CreateSession()
streamer := &intimate.Streamer{}
streamer.UserId = userId
streamer.Platform = intimate.Popenrec
temporary := curl.CreateTemporary(supportersSession)
supportersSession.SetCookies(temporary.GetParsedURL(), cookies)
var supporters []string
for { // supporters 数据需要登录信息. 下面为赋值 supporters链接获取的uid token random码
htmlUser := datamap["html_user"]
oe.user = intimate.NewExtractorSource(&htmlUser)
oe.user.CreateExtractor()
supportersQuery := temporary.GetQuery()
htmlLive := datamap["html_live"]
oe.userLive = intimate.NewExtractorSource(&htmlLive)
oe.userLive.CreateExtractor()
jsonSupporters := datamap["json_supporters"]
oe.supporters = intimate.NewExtractorSource(&jsonSupporters)
clog := &intimate.CollectLog{}
// log.Println(anchorId)
oe.extractFollowers(clog)
oe.extractUserName(streamer)
oe.extractViewsAndLiveStreaming(clog)
oe.extractGiversAndGratuity(clog)
oe.extractLive(clog)
oe.extractTags(clog)
streamer.Uid = source.StreamerId.Int64
streamer.UpdateTime = source.UpdateTime
streamer.Tags = clog.Tags
clog.Platform = intimate.Popenrec
clog.UserId = userId
clog.UpdateTime = source.UpdateTime
logUid := estore.InsertClog(clog)
LiveUrl := "https://www.openrec.tv/live/" + userId
streamer.LiveUrl = sql.NullString{String: LiveUrl, Valid: true}
streamer.LatestLogUid = logUid
streamer.Operator = 0
estore.UpdateStreamer(streamer)
source.Operator = int32(intimate.OperatorExtractorOK)
sstore.UpdateOperator(source)
}
}
func (oe *OpenrecExtractor) extractFollowers(clog intimate.ISet) {
extractor := oe.user.GetExtractor()
xp, err := extractor.XPathResult("//p[@class='c-global__user__count__row__right js-userCountFollowers']/text()")
if err != nil {
log.Println(err)
}
if !xp.NodeIter().Next() {
log.Println("不存在粉丝数")
}
followers := strings.ReplaceAll(xp.String(), ",", "")
followersInt, err := strconv.ParseInt(followers, 10, 64)
if err != nil {
log.Println(err)
}
clog.Set("Followers", sql.NullInt64{Int64: followersInt, Valid: true})
}
func (oe *OpenrecExtractor) extractUserName(streamer intimate.ISet) {
extractor := oe.user.GetExtractor()
xp, err := extractor.XPathResult("//p[ contains(@class, 'c-global__user__profile__list__name__text')]/text()")
if err != nil {
log.Println(err)
} else {
if xp.NodeIter().Next() {
userName := xp.String()
streamer.Set("UserName", sql.NullString{String: userName, Valid: true})
}
}
}
func (oe *OpenrecExtractor) extractViewsAndLiveStreaming(clog intimate.ISet) {
extractor := oe.user.GetExtractor()
// c-contents
xp, err := extractor.XPathResult("//ul[@class='c-contents']//p[@class='c-thumbnailVideo__footer__liveCount']/text()")
if err != nil {
log.Println(err)
}
if xp.NodeIter().Next() {
views := regexp.MustCompile(`[0-9,]+`).FindString(xp.String())
views = strings.ReplaceAll(views, ",", "")
viewsint, err := strconv.Atoi(views)
if err != nil {
log.Println(err)
}
clog.Set("Views", sql.NullInt64{Int64: int64(viewsint), Valid: true})
clog.Set("IsLiveStreaming", true)
}
}
func (oe *OpenrecExtractor) extractGiversAndGratuity(clog intimate.ISet) {
// extractor := oe.user.GetExtractor()
giverjson := oe.supporters.GetSource()
var givers []interface{}
var gratuity int64 = 0
for _, v := range giverjson.Array() {
giverSource := gjson.Parse(v.String())
for _, item := range giverSource.Get("data.items").Array() {
givers = append(givers, item.Map())
gratuity += item.Get("total_yells").Int()
}
}
giversbytes, err := json.Marshal(givers)
if err != nil {
log.Println(err)
clog.Set("ErrorMsg", sql.NullString{String: err.Error(), Valid: true})
} else {
clog.Set("Giver", giversbytes)
}
clog.Set("Gratuity", sql.NullInt64{Int64: gratuity, Valid: true})
}
func (oe *OpenrecExtractor) extractLive(clog intimate.ISet) {
extractor := oe.userLive.GetExtractor()
mathes := regexp.MustCompile("MovieTitle__Title[^>]+>(.{1,50})</h1>").FindStringSubmatch(oe.userLive.GetSource().Str)
if len(mathes) == 2 {
clog.Set("LiveTitle", sql.NullString{String: mathes[1], Valid: true})
content, err := extractor.XPathResult("//meta[@itemprop='uploadDate']/@content")
if err != nil {
log.Println(err)
}
iter := content.NodeIter()
if iter.Next() {
tm, err := time.ParseInLocation("2006-01-02T15:04:05Z07:00", iter.Node().NodeValue(), time.Local)
if err != nil {
log.Println(err)
}
// log.Println(iter.Node().NodeValue(), tm.Local())
clog.Set("LiveStartTime", sql.NullTime{Time: tm.Local(), Valid: true})
duration, err := extractor.XPathResult("//meta[@itemprop='duration']/@content")
if err != nil {
log.Println(err)
}
diter := duration.NodeIter()
if diter.Next() {
dt, err := intimate.ParseDuration(diter.Node().NodeValue())
if err != nil {
log.Println(err)
for _, cookie := range cookies {
if cookie.Name == "uuid" {
supportersQuery.Set("Uuid", cookie.Value)
continue
}
endtm := tm.Add(dt)
clog.Set("LiveEndTime", sql.NullTime{Time: endtm.Local(), Valid: true})
if cookie.Name == "token" {
supportersQuery.Set("Token", cookie.Value)
continue
}
if cookie.Name == "random" {
supportersQuery.Set("Random", cookie.Value)
continue
}
}
supportersQuery.Set("identify_id", userId)
temporary.SetQuery(supportersQuery)
resp, err := temporary.Execute()
if err != nil {
log.Println(err)
}
supporterjson := gjson.ParseBytes(resp.Content())
supporterdata := supporterjson.Get("data") //解析supporters获取的json数据
if supporterdata.Type == gjson.Null {
break
}
supporters = append(supporters, string(resp.Content()))
temporary.QueryParam("page_number").IntAdd(1)
}
// cookies := cxt.Session().GetCookies(wf.GetParsedURL())
// ext := make(map[string]interface{})
jsonSupporters := supporters
htmlUser := string(resp.Content())
liveUrl := updateUrl["live"]
tp = ses.Get(liveUrl)
resp, err = tp.Execute()
if err != nil {
log.Println(err)
intimate.TStreamer.UpdateError(streamer, err)
continue
}
htmlLive := string(resp.Content())
// ext["var_user_id"] = userId
// streamer.Platform = intimate.Popenrec
streamer.UpdateInterval = 120
streamer.UpdateTime = &sql.NullTime{Time: time.Now(), Valid: true}
streamer.Operator = 0
Extractor(streamer, userId, htmlUser, htmlLive, jsonSupporters)
}
}
func Extractor(streamer *intimate.Streamer, userId string, htmlUser, htmlLive string, jsonSupporters []string) {
// sdata := source.Ext.([]byte)
// datamap := gjson.ParseBytes(sdata).Map()
// userId := datamap["var_user_id"].String()
// streamer := &intimate.Streamer{}
// streamer.UserId = &userId
// streamer.Platform = intimate.Popenrec 不需要更新字段
// htmlUser := datamap["html_user"]
userEtor := extractor.ExtractHtmlString(htmlUser)
ui, ok1 := userEtor.GetObjectByTag(UserInfo{}).(*UserInfo)
// htmlLive := datamap["html_live"]
liveEtor := extractor.ExtractHtmlString(htmlLive)
ul, ok2 := liveEtor.GetObjectByTag(UserLive{}).(*UserLive)
// jsonSupporters := datamap["json_supporters"]
clog := &intimate.CollectLog{}
if ok1 {
clog.Followers = &sql.NullInt64{Int64: ui.Followers, Valid: true}
clog.Views = &sql.NullInt64{Int64: ui.Views, Valid: true}
if ui.Views != 0 {
clog.IsLiveStreaming = true
}
streamer.UserName = &sql.NullString{String: ui.UserName, Valid: true}
// giverjson := jsonSupporters
var givers []interface{}
var gratuity int64 = 0
for _, v := range jsonSupporters {
giverSource := gjson.Parse(v)
for _, item := range giverSource.Get("data.items").Array() {
givers = append(givers, item.Map())
gratuity += item.Get("total_yells").Int()
}
}
}
}
func (oe *OpenrecExtractor) extractTags(clog intimate.ISet) {
var tags []string
matheslist := regexp.MustCompile(`<[^>]+TagButton[^>]+>([^<]{1,100})<`).FindAllStringSubmatch(oe.userLive.GetSource().Str, -1)
for _, m := range matheslist {
tags = append(tags, m[1])
giversbytes, err := json.Marshal(givers)
if err != nil {
log.Println(err)
clog.ErrorMsg = &sql.NullString{String: err.Error(), Valid: true}
} else {
clog.Giver = giversbytes
}
clog.Gratuity = &sql.NullInt64{Int64: gratuity, Valid: true}
} else {
log.Println("UserInfo may be not exists")
intimate.TStreamer.UpdateError(streamer, errors.New("UserInfo may be not exists"))
return
}
tagsBytes, err := json.Marshal(tags)
//log.Println(ul)
if ok2 {
clog.LiveTitle = &sql.NullString{String: ul.Title, Valid: true}
startTime, err := time.ParseInLocation("2006-01-02T15:04:05Z07:00", ul.LiveStartTime, time.Local)
if err != nil {
log.Println(err)
} else {
clog.LiveStartTime = &sql.NullTime{Time: startTime.Local(), Valid: true}
duration, err := intimate.ParseDuration(ul.LiveEndTime)
if err != nil {
log.Println(err)
} else {
endTime := startTime.Add(duration)
clog.LiveEndTime = &sql.NullTime{Time: endTime.Local(), Valid: true}
}
}
if tags, err := json.Marshal(ul.Tags); err == nil {
clog.Tags = tags
} else {
log.Println("json error", ul.Tags, clog.Tags)
}
}
// streamer.Uid = source.StreamerId.Int64
// streamer.UpdateTime = &source.UpdateTime
if clog.Tags != nil {
streamer.Tags = clog.Tags
}
clog.Platform = intimate.Popenrec
clog.UserId = userId
clog.UpdateTime = &sql.NullTime{Time: time.Now(), Valid: true}
clog.StreamerUid = streamer.Uid
logUid, err := intimate.TClog.InsertRetAutoID(clog)
if err != nil {
log.Println(err)
return
}
clog.Set("Tags", tagsBytes)
LiveUrl := "https://www.openrec.tv/live/" + userId
streamer.LiveUrl = &sql.NullString{String: LiveUrl, Valid: true}
streamer.LatestLogUid = logUid
// streamer.Operator = 0
// log.Println(*streamer.UserId)
intimate.TStreamer.Update(streamer)
// source.Operator = int32(intimate.OperatorExtractorOK)
// sstore.UpdateOperator(source)
}

View File

@ -7,7 +7,6 @@ import (
"testing"
"time"
"github.com/474420502/hunter"
"github.com/lestrrat-go/libxml2"
)
@ -91,29 +90,6 @@ func TestCase(t *testing.T) {
t.Error(xr)
}
func TestUserName(t *testing.T) {
f, err := os.Open("test.html")
if err != nil {
panic(err)
}
data, err := ioutil.ReadAll(f)
if err != nil {
panic(err)
}
extractor := hunter.NewExtractor(data)
xp, err := extractor.XPathResult("//p[ contains(@class, 'c-global__user__profile__list__name__text')]/text()")
if err != nil {
t.Error(err)
} else {
if xp.NodeIter().Next() {
userName := xp.String()
t.Error(userName)
}
}
}
func TestExtractor(t *testing.T) {
oe := &OpenrecExtractor{}
oe.Execute()
Execute()
}

View File

@ -2,9 +2,9 @@ package main
import (
"database/sql"
"encoding/json"
"intimate"
"log"
"regexp"
"strconv"
"strings"
"time"
@ -21,8 +21,8 @@ var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
type LiveData struct {
UserName string `exp:"//span[@class='tw-live-author__info-username']" method:"Text"`
Follower string `exp:"(//span[@class='tw-user-nav-list-count'])[2]" method:"Text"`
MaxViews string `exp:"//span[@id='max_viewer_count']" method:"Text"`
Follower int64 `exp:"(//span[@class='tw-user-nav-list-count'])[2]" method:"r:ExtractNumber"`
MaxViews int64 `exp:"//span[@id='max_viewer_count']/text()" method:"r:ExtractNumber"`
LiveTitle string `exp:"//meta[@property='og:title']" method:"AttributeValue,content"`
LiveStart string `exp:"//time[@data-kind='relative']" method:"AttributeValue,datetime"`
LiveDuration string `exp:"//span[@id='updatetimer']" method:"AttributeValue,data-duration"`
@ -33,25 +33,40 @@ func main() {
ps := intimate.NewPerfectShutdown()
ses := requests.NewSession()
streamerQueue := intimate.TStreamer.Queue(intimate.Streamer{}, intimate.ConditionDefault(intimate.Ptwitcasting))
var lasterr error
for !ps.IsClose() {
streamer, err := estore.Pop(intimate.Ptwitcasting)
// streamer, err := estore.Pop(intimate.Ptwitcasting)
isteamer, err := streamerQueue.Pop()
if err != nil {
log.Println(err, streamer.UserId)
if lasterr != err {
lasterr = err
log.Println(err)
}
time.Sleep(time.Minute)
continue
}
streamer.LiveUrl = sql.NullString{String: "https://twitcasting.tv/" + streamer.UserId, Valid: true}
streamer := isteamer.(*intimate.Streamer)
streamer.LiveUrl = &sql.NullString{String: "https://twitcasting.tv/" + *streamer.UserId, Valid: true}
resp, err := ses.Get(streamer.LiveUrl.String).Execute()
if err != nil {
estore.UpdateError(streamer, err)
log.Println(err, streamer.UserId)
intimate.TStreamer.UpdateError(streamer, err)
log.Println(err, *streamer.UserId)
continue
}
var ldata *LiveData
// f, _ := os.OpenFile("./twistcasting.html", os.O_CREATE|os.O_RDWR|os.O_TRUNC, os.ModePerm)
// f.Write(resp.Content())
etor := extractor.ExtractHtml(resp.Content())
ldata = etor.GetObjectByTag(LiveData{}).(*LiveData)
ldata.MaxViews = regexp.MustCompile("\\d+").FindString(ldata.MaxViews)
ildata := etor.GetObjectByTag(LiveData{})
if ildata == nil {
log.Println(streamer.LiveUrl.String)
continue
}
ldata = ildata.(*LiveData)
// ldata.MaxViews = regexp.MustCompile("\\d+").FindString(ldata.MaxViews)
coincount := 0
for i := 0; ; i++ {
@ -59,14 +74,14 @@ func main() {
giverurl := streamer.LiveUrl.String + "/backers/" + strconv.Itoa(i)
resp, err = ses.Get(giverurl).Execute()
if err != nil {
estore.UpdateError(streamer, err)
intimate.TStreamer.UpdateError(streamer, err)
log.Panic(err)
}
etor := extractor.ExtractHtml(resp.Content())
xp, err := etor.XPaths("//td[@class='tw-memorial-table-recent-point']")
if err != nil {
estore.UpdateError(streamer, err)
intimate.TStreamer.UpdateError(streamer, err)
log.Panic(err)
}
@ -90,63 +105,81 @@ func main() {
}
}
streamer.Platform = intimate.Ptwitcasting
streamer.UpdateTime = sql.NullTime{Time: time.Now(), Valid: true}
streamer.UserName = sql.NullString{String: ldata.UserName, Valid: true}
streamer.Operator = 10
// streamer.UpdateInterval = 60
clog := &intimate.CollectLog{}
clog.UserId = streamer.UserId
clog.Gratuity = sql.NullInt64{Int64: int64(coincount), Valid: true}
clog.Platform = streamer.Platform
clog.UpdateTime = streamer.UpdateTime
clog.LiveTitle = sql.NullString{String: ldata.LiveTitle, Valid: true}
fl, err := intimate.ParseNumberEx(ldata.Follower)
if err == nil {
clog.Followers = sql.NullInt64{Int64: int64(fl), Valid: true}
switch {
case fl <= 100:
streamer.UpdateInterval = 360
case fl <= 1000:
streamer.UpdateInterval = 240
case fl <= 100:
streamer.UpdateInterval = 120
default:
streamer.UpdateInterval = 60
}
} else {
log.Println(err)
}
views, err := strconv.Atoi(ldata.MaxViews)
if err == nil {
clog.Views = sql.NullInt64{Int64: int64(views), Valid: true}
} else {
clog.Views = sql.NullInt64{Int64: int64(0), Valid: true}
// log.Println(err, streamer.UserId)
}
// st, err := strconv.Atoi(ldata.LiveStart)
st, err := time.Parse("Mon, 02 Jan 2006 15:04:05 -0700", ldata.LiveStart)
if err == nil {
startTime := st
clog.LiveStartTime = sql.NullTime{Time: startTime, Valid: true}
dt, err := strconv.Atoi(ldata.LiveDuration)
if time.Now().Sub(startTime) >= time.Hour*24*90 {
streamer.Operator = 5
}
if err == nil {
endTime := startTime.Add((time.Duration)(dt) * time.Millisecond)
clog.LiveEndTime = sql.NullTime{Time: endTime, Valid: true}
} else {
log.Println(err, streamer.UserId)
}
} else {
var tags []byte
tags, err = json.Marshal(ldata.Tags)
if err != nil {
log.Println(err, streamer.UserId)
}
streamer.LatestLogUid = estore.InsertClog(clog)
estore.UpdateStreamer(streamer)
streamer.Platform = intimate.Ptwitcasting
streamer.UpdateTime = &sql.NullTime{Time: time.Now(), Valid: true}
streamer.UserName = &sql.NullString{String: ldata.UserName, Valid: true}
streamer.Operator = 0
streamer.Tags = tags
// streamer.UpdateInterval = 60
clog := &intimate.CollectLog{}
clog.UserId = *streamer.UserId
clog.Gratuity = &sql.NullInt64{Int64: int64(coincount), Valid: true}
clog.Platform = streamer.Platform
clog.UpdateTime = streamer.UpdateTime
clog.LiveTitle = &sql.NullString{String: ldata.LiveTitle, Valid: true}
clog.Tags = tags
clog.Followers = &sql.NullInt64{Int64: int64(ldata.Follower), Valid: true}
switch {
case ldata.Follower <= 100:
streamer.UpdateInterval = 720
case ldata.Follower <= 1000:
streamer.UpdateInterval = 320
case ldata.Follower <= 10000:
streamer.UpdateInterval = 240
default:
streamer.UpdateInterval = 120
}
clog.Views = &sql.NullInt64{Int64: ldata.MaxViews, Valid: true}
if ldata.LiveStart != "" {
st, err := time.Parse("Mon, 02 Jan 2006 15:04:05 -0700", ldata.LiveStart)
if err == nil {
startTime := st
clog.LiveStartTime = &sql.NullTime{Time: startTime, Valid: true}
dt, err := strconv.Atoi(ldata.LiveDuration)
liveduration := time.Now().Sub(startTime)
switch {
case liveduration >= time.Hour*24*240:
streamer.Operator = 5
case liveduration >= time.Hour*24*60:
streamer.UpdateInterval = 60 * 24 * 30
case liveduration >= time.Hour*24*30:
streamer.UpdateInterval = 60 * 24 * 15
case liveduration >= time.Hour*24*15:
streamer.UpdateInterval = 60 * 24 * 7
case liveduration >= time.Hour*24*7:
streamer.UpdateInterval = 60 * 24 * 3
}
if err == nil {
endTime := startTime.Add((time.Duration)(dt) * time.Millisecond)
clog.LiveEndTime = &sql.NullTime{Time: endTime, Valid: true}
} else {
log.Println(err, streamer.UserId)
}
} else {
log.Println(err, streamer.UserId)
}
}
clog.StreamerUid = streamer.Uid
uid, err := intimate.TClog.InsertRetAutoID(clog)
if err != nil {
log.Println(err)
continue
}
streamer.LatestLogUid = uid
intimate.TStreamer.Update(streamer)
// estore.UpdateStreamer(streamer)
log.Println(*streamer.UserId)
}
}

View File

@ -10,7 +10,7 @@ import (
// Follower string `exp:".//span[@class='tw-user-nav-list-count']" method:"Text"`
// }
func estMain(t *testing.T) {
func TestMain(t *testing.T) {
main()
}

View File

@ -3,136 +3,326 @@ package main
import (
"database/sql"
"encoding/json"
"fmt"
"intimate"
"log"
"regexp"
"strings"
"time"
"github.com/tebeka/selenium"
)
// sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STTwitch))
// // sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
// var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STTwitch))
// estore 解析存储连接实例
var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
// // estore 解析存储连接实例
// var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
func main() {
wd := intimate.GetChromeDriver(3030)
adriver := intimate.GetChromeDriver()
ps := intimate.NewPerfectShutdown()
counter := intimate.NewCounter()
counter.SetMaxLimit(200)
counter.SetMaxToDo(func(olist ...interface{}) error {
owd := olist[0].(*selenium.WebDriver)
(*owd).Close()
(*owd).Quit()
*owd = intimate.GetChromeDriver(3030)
return nil
}, &wd)
slqueue := intimate.TStreamerList.Queue(intimate.StreamerList{}, intimate.ConditionDefault(intimate.Ptwitch))
squeue := intimate.TStreamer.Queue(intimate.Streamer{}, intimate.ConditionDefault(intimate.Ptwitch))
var count = 0
var countlimt = 200
var recreate = time.Now()
var lasterr error = nil
// var err error
for !ps.IsClose() {
streamer, err := estore.Pop(intimate.Ptwitch, 0)
if streamer == nil || err != nil {
if err != lasterr {
log.Println(err, lasterr)
wd := adriver.Webdriver
// sourceChannel, err := sstore.Pop(intimate.TTwitchChannel)
isl, err := slqueue.Pop()
if err != nil {
if lasterr != err {
lasterr = err
log.Println(err)
}
istreamer, err := squeue.Pop()
if err != nil {
if lasterr != err {
lasterr = err
log.Println(err)
ps.Wait(time.Minute)
continue
}
}
streamer := istreamer.(*intimate.Streamer)
Extractor(wd, streamer)
if err = intimate.TStreamer.Update(streamer); err != nil {
log.Println(err)
}
count++
if count >= countlimt || time.Now().Sub(recreate) >= time.Minute*120 {
count = 0
adriver.Close()
adriver = intimate.GetChromeDriver()
recreate = time.Now()
}
time.Sleep(time.Second * 2)
continue
}
var updateUrl map[string]string
json.Unmarshal(streamer.UpdateUrl.([]byte), &updateUrl)
liveUrl := updateUrl["live"]
liveUrl = strings.Replace(liveUrl, "/watchparty", "", -1)
log.Println(liveUrl)
streamerlist := isl.(*intimate.StreamerList)
// err = wd.Get("https://www.twitch.tv/zoe_0601" + "/about")
err = wd.Get(liveUrl + "/about")
weburl := streamerlist.Url + "?sort=VIEWER_COUNT"
err = wd.Get(weburl)
if err != nil {
log.Println(err)
estore.UpdateError(streamer, err)
time.Sleep(time.Second * 5)
// sstore.UpdateError(sourceChannel, err)
intimate.TStreamerList.UpdateError(streamerlist, err)
time.Sleep(time.Second * 10)
continue
}
streamer.LiveUrl = sql.NullString{String: liveUrl, Valid: true}
clog := &intimate.CollectLog{}
clog.UserId = streamer.UserId
clog.Gratuity = sql.NullInt64{Int64: 0, Valid: false}
time.Sleep(time.Millisecond * 500)
err = extractUserName(wd, streamer)
if err != nil {
_, err = wd.FindElement(selenium.ByXPATH, "//a[@data-a-target='browse-channels-button']")
if err == nil {
log.Println(streamer.UserId, "may be cancell")
streamer.Operator = 5
streamer.UpdateTime = sql.NullTime{Time: time.Now(), Valid: true}
estore.UpdateStreamer(streamer)
wd.WaitWithTimeout(func(wd selenium.WebDriver) (bool, error) {
_, err := wd.FindElement(selenium.ByXPATH, "(//div/p[@class=''])[last()]")
if err != nil {
return false, err
}
continue
}
err = extractFollowers(wd, clog)
if err != nil {
continue
}
return true, nil
}, time.Second*10)
err = extractViews(wd, clog) // views + tags + gratuity
btn, err := wd.FindElement(selenium.ByXPATH, "//button[@data-a-target='browse-sort-menu']")
if err != nil {
// 不直播时提取礼物 gratuity
wd.WaitWithTimeout(func(web selenium.WebDriver) (bool, error) {
channelchat, err := wd.FindElement(selenium.ByXPATH, `//a[@data-a-target="channel-home-tab-Chat"]`)
btn, _ := web.FindElement(selenium.ByXPATH, `//button[@data-test-selector="expand-grabber"]`)
if (err == nil && channelchat != nil) || btn != nil {
if channelchat != nil {
channelchat.Click()
}
time.Sleep(time.Second)
extractGratuity(wd, clog)
return true, nil
log.Println(err)
continue
}
btn.Click()
var elements []selenium.WebElement
var liveurls = 0
var delayerror = 2
for i := 0; i < 200 && !ps.IsClose(); i++ {
elements, err = wd.FindElements(selenium.ByXPATH, "(//div/p[@class=''])[last()]")
if err != nil {
log.Println(err)
break
}
time.Sleep(time.Millisecond * 200)
wd.KeyDown(selenium.EndKey)
time.Sleep(time.Millisecond * 200)
wd.KeyUp(selenium.EndKey)
time.Sleep(time.Millisecond * 2000)
if len(elements) == liveurls {
delayerror--
if delayerror <= 0 {
break
}
return false, nil
}, time.Second*4)
} else {
delayerror = 2
}
liveurls = len(elements)
}
articles, err := wd.FindElements(selenium.ByXPATH, "//article")
if err != nil {
log.Println(err)
continue
}
streamer.Platform = intimate.Ptwitch
clog.Platform = streamer.Platform
clog.UpdateTime = sql.NullTime{Time: time.Now(), Valid: true}
lastClogId := estore.InsertClog(clog)
var streamers []*intimate.Streamer
for _, article := range articles {
streamer.Operator = 10
streamer.LatestLogUid = lastClogId
if clog.Tags != nil {
streamer.Tags = clog.Tags
e, err := article.FindElement(selenium.ByXPATH, ".//a[@data-a-target='preview-card-title-link' and @href]")
if err != nil {
log.Println(err)
continue
}
href, err := e.GetAttribute("href")
if err != nil {
log.Println(err)
continue
}
btns, err := article.FindElements(selenium.ByXPATH, ".//div[@class='tw-full-width tw-inline-block']//button")
if err != nil {
log.Println(err)
continue
}
var tags []string
for _, btn := range btns {
tag, err := btn.GetAttribute("data-a-target")
if err == nil {
tags = append(tags, tag)
}
}
streamer := &intimate.Streamer{}
matches := regexp.MustCompile(`https://www.twitch.tv/(\w+)`).FindStringSubmatch(href)
if len(matches) == 2 {
mc := matches[1]
streamer.UserId = &mc
} else {
log.Println(href)
continue
}
jtags, err := json.Marshal(tags)
if err != nil {
log.Println(err)
} else {
streamer.Tags = jtags
}
streamer.Platform = intimate.Ptwitch
streamer.LiveUrl = &sql.NullString{String: href, Valid: true}
streamer.Operator = 0
streamers = append(streamers, streamer)
// if estore.InsertStreamer(streamer) {
// // log.Println("streamer update tags", streamer.Uid, tags)
// if streamer.Tags != nil {
// estore.Update(streamer, "Tags", streamer.Tags)
// }
// }
}
switch fl := clog.Followers.Int64; {
case fl > 100000:
streamer.UpdateInterval = 120
case fl > 10000:
streamer.UpdateInterval = 240
case fl > 1000:
streamer.UpdateInterval = 360
case fl > 100:
streamer.UpdateInterval = 720
case fl > 0:
streamer.UpdateInterval = 1440
for _, streamer := range streamers {
Extractor(wd, streamer)
streamer.UpdateTime = &sql.NullTime{Time: time.Now(), Valid: true}
if err = intimate.TStreamer.InsertOrUpdate(streamer,
intimate.DUpdate{Field: "tags"},
intimate.DUpdate{Field: "update_time"},
intimate.DUpdate{Field: "update_interval"},
); err != nil {
log.Println(err)
}
}
log.Println("streamer find", len(articles))
if len(articles) == 0 {
intimate.TStreamerList.UpdateError(streamerlist, fmt.Errorf(""))
}
count++
if count >= countlimt || time.Now().Sub(recreate) >= time.Minute*120 {
count = 0
adriver.Close()
adriver = intimate.GetChromeDriver()
recreate = time.Now()
}
streamer.UpdateTime = clog.UpdateTime
estore.UpdateStreamer(streamer)
counter.AddWithReset(1)
}
wd.Close()
wd.Quit()
adriver.Close()
}
func Extractor(wd selenium.WebDriver, streamer *intimate.Streamer) {
// streamer, err := estore.Pop(intimate.Ptwitch)
// if streamer == nil || err != nil {
// if err != lasterr {
// log.Println(err, lasterr)
// lasterr = err
// }
// time.Sleep(time.Second * 2)
// continue
// }
// var updateUrl map[string]string
// json.Unmarshal(streamer.UpdateUrl.([]byte), &updateUrl)
liveUrl := "https://www.twitch.tv/" + (*streamer.UserId)
// liveUrl = strings.Replace(liveUrl, "/watchparty", "", -1)
log.Println(liveUrl)
// err = wd.Get("https://www.twitch.tv/zoe_0601" + "/about")
err := wd.Get(liveUrl + "/about")
if err != nil {
errstr := fmt.Errorf("%s: %s", err.Error(), liveUrl+"/about")
log.Println(errstr)
intimate.TStreamer.UpdateError(streamer, errstr)
time.Sleep(time.Second * 5)
return
}
streamer.LiveUrl = &sql.NullString{String: liveUrl, Valid: true}
clog := &intimate.CollectLog{}
clog.UserId = *streamer.UserId
clog.Gratuity = &sql.NullInt64{Int64: 0, Valid: false}
time.Sleep(time.Millisecond * 500)
err = extractUserName(wd, streamer)
if err != nil {
_, err = wd.FindElement(selenium.ByXPATH, "//a[@data-a-target='browse-channels-button']")
if err == nil {
log.Println(*streamer.UserId, "may be cancell")
streamer.Operator = 5
intimate.TStreamer.UpdateError(streamer, fmt.Errorf(*streamer.UserId, "may be cancell"))
}
return
}
err = extractFollowers(wd, clog)
if err != nil {
// log.Println(err)
streamer.UpdateInterval += 30
return
}
err = extractViews(wd, clog) // views + tags + gratuity
if err != nil {
// 不直播时提取礼物 gratuity
wd.WaitWithTimeout(func(web selenium.WebDriver) (bool, error) {
channelchat, err := wd.FindElement(selenium.ByXPATH, `//a[@data-a-target="channel-home-tab-Chat"]`)
btn, _ := web.FindElement(selenium.ByXPATH, `//button[@data-test-selector="expand-grabber"]`)
if (err == nil && channelchat != nil) || btn != nil {
if channelchat != nil {
channelchat.Click()
}
time.Sleep(time.Second)
extractGratuity(wd, clog)
return true, nil
}
return false, nil
}, time.Second*4)
}
streamer.Platform = intimate.Ptwitch
clog.Platform = streamer.Platform
clog.UpdateTime = &sql.NullTime{Time: time.Now(), Valid: true}
// clog.StreamerUid = streamer.Uid
lastClogId, err := intimate.TClog.InsertRetAutoID(clog)
if err != nil {
log.Println(err)
return
}
streamer.LatestLogUid = lastClogId
if clog.Tags != nil {
streamer.Tags = clog.Tags
}
switch fl := clog.Followers.Int64; {
case fl > 100000:
streamer.UpdateInterval = 120
case fl > 10000:
streamer.UpdateInterval = 240 * 2
case fl > 1000:
streamer.UpdateInterval = 360 * 2
case fl > 100:
streamer.UpdateInterval = 720 * 2
case fl > 0:
streamer.UpdateInterval = 1440 * 4
}
streamer.UpdateTime = clog.UpdateTime
// intimate.TStreamer.InsertOrUpdate(streamer)
// count++
// if count >= countlimt {
// count = 0
// // wd.Quit()
// wd = intimate.GetChromeDriver(3030)
// }
}
func extractUserName(wd selenium.WebDriver, streamer *intimate.Streamer) error {
@ -141,7 +331,7 @@ func extractUserName(wd selenium.WebDriver, streamer *intimate.Streamer) error {
if err == nil {
if ltxt, err := label.Text(); err == nil && ltxt != "" {
// log.Println("label:", ltxt)
streamer.UserName = sql.NullString{String: ltxt, Valid: true}
streamer.UserName = &sql.NullString{String: ltxt, Valid: true}
return true, nil
}
}
@ -161,7 +351,7 @@ func extractFollowers(wd selenium.WebDriver, clog *intimate.CollectLog) error {
}
followers = regexp.MustCompile(`[\d,]+`).FindString(followers)
fint, _ := intimate.ParseNumber(followers)
clog.Followers = sql.NullInt64{Int64: int64(fint), Valid: true}
clog.Followers = &sql.NullInt64{Int64: int64(fint), Valid: true}
// log.Println("followers: ", followers, fint)
return true, nil
}, 4*time.Second)
@ -174,7 +364,7 @@ func extractViews(wd selenium.WebDriver, clog *intimate.CollectLog) error {
if txt, err := views.Text(); err == nil {
vint, _ := intimate.ParseNumber(txt)
clog.Views = sql.NullInt64{Int64: vint, Valid: true}
clog.Views = &sql.NullInt64{Int64: vint, Valid: true}
// log.Println("views:", txt)
views.Click()
@ -194,7 +384,7 @@ func extractTitle(wd selenium.WebDriver, clog *intimate.CollectLog) error {
title, err := web.FindElement(selenium.ByXPATH, `//h2[@data-a-target='stream-title']`)
if err == nil {
if txt, err := title.Text(); err == nil {
clog.LiveTitle = sql.NullString{String: txt, Valid: true}
clog.LiveTitle = &sql.NullString{String: txt, Valid: true}
return true, nil
}
}
@ -246,7 +436,7 @@ func extractGratuity(wd selenium.WebDriver, clog *intimate.CollectLog) error {
log.Println(err)
}
}
clog.Gratuity = sql.NullInt64{Int64: gratuity, Valid: true}
clog.Gratuity = &sql.NullInt64{Int64: gratuity, Valid: true}
}
return true, nil
}

View File

@ -3,29 +3,25 @@ package intimate
import (
"database/sql"
"reflect"
"time"
"github.com/474420502/hunter"
"github.com/tidwall/gjson"
)
type GetSet struct {
}
type StreamerList struct {
UrlHash []byte //
Platform Platform //
Url string //
UrlHash string `field:"urlhash" uid:"true"` //
Platform string `field:"platform" ` //
Url string `field:"url" ` //
Label sql.NullString //
Label *sql.NullString `field:"label" ` //
Serialize interface{}
Serialize interface{} `field:"serialize" `
UpdateInterval int32
UpdateTime time.Time //
UpdateInterval int32 `field:"update_interval" `
UpdateTime *sql.NullTime `field:"update_time" ` //
ErrorMsg sql.NullString
Operator int32
ErrorMsg *sql.NullString `field:"error_msg" ` //
Operator int32 `field:"operator" `
LastOperator int32
}
@ -41,25 +37,26 @@ func (sl *StreamerList) Set(field string, value interface{}) {
}
type Streamer struct {
Uid int64 //
Platform Platform //
UserId string //
Uid int64 `field:"uid" uid:"auto"` //
Platform Platform `field:"platform"` //
UserId *string `field:"user_id"` //
UserName sql.NullString //
LiveUrl sql.NullString //
Channel sql.NullString //
Tags interface{}
Ext interface{} //
UserName *sql.NullString `field:"user_name"` //
LiveUrl *sql.NullString `field:"live_url"` //
Channel *sql.NullString `field:"channel"` //
Tags interface{} `field:"tags"`
Ext interface{} `field:"ext"` //
// Comments interface{} `field:"comments"`
IsUpdateStreamer bool // 更新上面的内容
IsUpdateUrl bool
UpdateInterval int32
UpdateUrl interface{}
LatestLogUid int64
UpdateTime sql.NullTime //
UpdateInterval int32 `field:"update_interval"`
UpdateUrl interface{} `field:"update_url"` // TODO: nil
LatestLogUid int64 `field:"latest_log_uid"`
UpdateTime *sql.NullTime `field:"update_time"` //
ErrorMsg sql.NullString
Operator int32
ErrorMsg *sql.NullString `field:"error_msg"`
Operator int32 `field:"operator"`
LastOperator int32
}
@ -75,24 +72,25 @@ func (ai *Streamer) Set(field string, value interface{}) {
}
type CollectLog struct {
LogUid int64 // 日志id
StreamerUid int64 // StreamerId 表id与
LogUid int64 `field:"log_uid"` // 日志id
StreamerUid int64 `field:"streamer_uid"` // StreamerId 表id与
Platform Platform //
UserId string // 平台的UserId
IsLiveStreaming bool //
IsError bool //
Followers sql.NullInt64 //
Views sql.NullInt64 //
Giver interface{} //
Gratuity sql.NullInt64 //
LiveTitle sql.NullString //
LiveStartTime sql.NullTime //
LiveEndTime sql.NullTime //
UpdateTime sql.NullTime //
Tags interface{}
Ext interface{} //
ErrorMsg sql.NullString //
Platform Platform `field:"platform"` //
UserId string `field:"user_id"` // 平台的UserId
IsLiveStreaming bool `field:"is_live_streaming"` //
IsError bool `field:"is_error"` //
Followers *sql.NullInt64 `field:"followers"` //
Views *sql.NullInt64 `field:"views"` //
Giver interface{} `field:"giver"` //
Gratuity *sql.NullInt64 `field:"gratuity"` //
LiveTitle *sql.NullString `field:"live_title"` //
LiveStartTime *sql.NullTime `field:"live_start_time"` //
LiveEndTime *sql.NullTime `field:"live_end_time"` //
UpdateTime *sql.NullTime `field:"update_time"` //
Tags interface{} `field:"tags"`
Ext interface{} `field:"ext"` //
ErrorMsg *sql.NullString `field:"error_msg"` //
Comments interface{} `field:"comments"` //
}
// Get Simple Value
@ -104,36 +102,3 @@ func (cl *CollectLog) Get(field string) interface{} {
func (cl *CollectLog) Set(field string, value interface{}) {
reflect.ValueOf(cl).Elem().FieldByName(field).Set(reflect.ValueOf(value))
}
type ExtractorSource struct {
source *gjson.Result
extractor *hunter.Extractor
}
func NewExtractorSource(gr *gjson.Result) *ExtractorSource {
es := &ExtractorSource{}
es.SetSource(gr)
return es
}
func (es *ExtractorSource) SetSource(gr *gjson.Result) {
es.source = gr
es.extractor = nil
}
func (es *ExtractorSource) Clear() {
es.source = nil
es.extractor = nil
}
func (es *ExtractorSource) CreateExtractor() {
es.extractor = hunter.NewExtractor([]byte(es.source.String()))
}
func (es *ExtractorSource) GetSource() *gjson.Result {
return es.source
}
func (es *ExtractorSource) GetExtractor() *hunter.Extractor {
return es.extractor
}

12
go.mod
View File

@ -1,18 +1,18 @@
module intimate
go 1.14
go 1.15
require (
github.com/474420502/extractor v0.7.2
github.com/474420502/extractor v0.9.6
github.com/474420502/focus v0.12.0
github.com/474420502/gcurl v0.1.2
github.com/474420502/hunter v0.3.4
github.com/474420502/requests v1.6.0
github.com/474420502/gcurl v0.4.5
github.com/474420502/requests v1.9.1
github.com/davecgh/go-spew v1.1.1
github.com/go-sql-driver/mysql v1.5.0
github.com/lestrrat-go/libxml2 v0.0.0-20200215080510-6483566f52cb
github.com/tebeka/selenium v0.9.9
github.com/tidwall/gjson v1.6.0
github.com/tidwall/pretty v1.0.1 // indirect
golang.org/x/net v0.0.0-20200707034311-ab3426394381 // indirect
golang.org/x/net v0.0.0-20200822124328-c89045814202 // indirect
gopkg.in/yaml.v2 v2.3.0
)

24
go.sum
View File

@ -2,18 +2,20 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMT
cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
cloud.google.com/go v0.38.0/go.mod h1:990N+gfupTy94rShfmMCWGDn0LpTmnzTp2qbd1dvSRU=
cloud.google.com/go v0.41.0/go.mod h1:OauMR7DV8fzvZIl2qg6rkaIhD/vmgk4iwEw/h6ercmg=
github.com/474420502/extractor v0.7.2 h1:idZnsekOKRV8fpJwsRcr6Ol7KSphKXe9tc+JJXgGqQ4=
github.com/474420502/extractor v0.7.2/go.mod h1:92J6QZKstpAKGhv+DibemhQbR/d6lJ+ftyR/ZHmeJ0w=
github.com/474420502/extractor v0.9.6 h1:mwwzwVeu/sZ4FV190Bl32ta4iVjERBWL6eGLXRDlAtg=
github.com/474420502/extractor v0.9.6/go.mod h1:vJnXWmvO5bJDW4Yag0GoE2GxtHRg03TAxp2oXN1DcSY=
github.com/474420502/focus v0.12.0 h1:+icbmj7IEOefvTegHt5EpcHt6WFbe2miIrceUJx2Evo=
github.com/474420502/focus v0.12.0/go.mod h1:d0PMjtMxFz1a9HIhwyFPkWa+JF+0LgOrEUfd8iZka6s=
github.com/474420502/gcurl v0.1.2 h1:ON9Yz3IgAdtDlFlHfkAJ3aIEBDxH0RiViPE5ST5ohKg=
github.com/474420502/gcurl v0.1.2/go.mod h1:hws5q/Ao64bXLLDnldz9VyTQUndTWc/i5DzdEazFfoM=
github.com/474420502/gcurl v0.4.4 h1:ZILu7RRjDBGHpTGmuWGKf1NZZbZsC7AHPlI8RHqs9As=
github.com/474420502/gcurl v0.4.4/go.mod h1:7w4knyVJa1ia4I1xd0krG51fKLGwMmNn5sfG2zPWbqM=
github.com/474420502/gcurl v0.4.5 h1:4y+NbbBGRIWDpfe/iojdSUzlShcZmnkHV4T4etiWQsw=
github.com/474420502/gcurl v0.4.5/go.mod h1:7w4knyVJa1ia4I1xd0krG51fKLGwMmNn5sfG2zPWbqM=
github.com/474420502/htmlquery v1.2.4-0.20200812072201-e871dd09247a h1:E1T6CYQKsUn7fMvNbeKfISjBLfOJjZX4KpWwStT20Kc=
github.com/474420502/htmlquery v1.2.4-0.20200812072201-e871dd09247a/go.mod h1:AoSN890esHwNKecV0tCs+W0ele1xgFL1Jqk6UcrdxgU=
github.com/474420502/hunter v0.3.4 h1:fyLAgI84jWe3IcqsISC53j1w3CXI1FERxX//Potns0M=
github.com/474420502/hunter v0.3.4/go.mod h1:pe4Xr/I+2agvq339vS/OZV+EiHAWtpXQs75rioSW9oA=
github.com/474420502/requests v1.6.0 h1:f4h4j40eT0P5whhg9LdkotD8CaKjtuDu/vz9iSUkCgY=
github.com/474420502/requests v1.6.0/go.mod h1:SLXrQ5dL9c7dkIeKNUCBAjOIt3J9KFCS2RQjWJecNwo=
github.com/474420502/requests v1.7.0 h1:oaBwVrxZ7yZ+hDOKwHm2NflYib2y1geIUxBxQ2U48mw=
github.com/474420502/requests v1.7.0/go.mod h1:SLXrQ5dL9c7dkIeKNUCBAjOIt3J9KFCS2RQjWJecNwo=
github.com/474420502/requests v1.9.1 h1:gCDmBCW1ym8zOvKKBGjEG9wafMY7imYk2u28+Oy1WEc=
github.com/474420502/requests v1.9.1/go.mod h1:x2T9l+e40R6kxxMvNm+YSZ9D6BHAXUDak4kQElIPJ9A=
github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802 h1:1BDTz0u9nC3//pOCMdNH+CiXJVYJh5UQNCOBG7jbELc=
@ -22,8 +24,6 @@ github.com/BurntSushi/xgbutil v0.0.0-20160919175755-f7c97cef3b4e h1:4ZrkT/RzpnRO
github.com/BurntSushi/xgbutil v0.0.0-20160919175755-f7c97cef3b4e/go.mod h1:uw9h2sd4WWHOPdJ13MQpwK5qYWKYDumDqxWWIknEQ+k=
github.com/Pallinder/go-randomdata v1.1.0 h1:gUubB1IEUliFmzjqjhf+bgkg1o6uoFIkRsP3VrhEcx8=
github.com/Pallinder/go-randomdata v1.1.0/go.mod h1:yHmJgulpD2Nfrm0cR9tI/+oAgRqCQQixsA8HyRZfV9Y=
github.com/Pallinder/go-randomdata v1.2.0 h1:DZ41wBchNRb/0GfsePLiSwb0PHZmT67XY00lCDlaYPg=
github.com/Pallinder/go-randomdata v1.2.0/go.mod h1:yHmJgulpD2Nfrm0cR9tI/+oAgRqCQQixsA8HyRZfV9Y=
github.com/antchfx/xpath v1.1.6 h1:6sVh6hB5T6phw1pFpHRQ+C4bd8sNI+O58flqtg7h0R0=
github.com/antchfx/xpath v1.1.6/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk=
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio=
@ -114,8 +114,8 @@ golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLL
golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e h1:3G+cUijn7XD+S4eJFddp53Pv7+slrESplyjG25HgL+k=
golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200421231249-e086a090c8fd/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200707034311-ab3426394381 h1:VXak5I6aEWmAXeQjA+QSZzlgNrpq9mjcfDemuexIKsU=
golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20200822124328-c89045814202 h1:VvcQYSHwXgi7W+TpUR6A9g6Up98WAHf3f/ulnJ62IyA=
golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=

View File

@ -12,4 +12,10 @@ const (
// Ptwitcasting twitcasting 平台
Ptwitcasting Platform = "twitcasting"
// PNimo PNimo 平台
PNimo Platform = "nimo"
// PMirrativ PNimo 平台
PMirrativ Platform = "mirrativ"
)

View File

@ -228,15 +228,14 @@ func NewStoreExtractor() *StoreExtractor {
return &StoreExtractor{db: db}
}
// Pop 弹出一条未处理的数据
func (store *StoreExtractor) Pop(platform Platform, operators ...int32) (*Streamer, error) {
// PopNoWait 弹出一个不用按时间间隔更新的主播信息, 主要用来测试.
func (store *StoreExtractor) PopNoWait(platform Platform, condition string, operators ...int32) (*Streamer, error) {
tx, err := store.db.Begin()
if err != nil {
return nil, err
}
var args = []interface{}{string(platform)}
selectSQL := `select uid, update_time, user_id, update_url, is_update_streamer, update_interval from ` + StreamerTable + ` where platform = ? and TIMESTAMPDIFF(MINUTE , update_time, CURRENT_TIMESTAMP()) >= update_interval`
selectSQL := `select uid, update_time, user_id, tags, live_url, update_url, is_update_streamer, update_interval from ` + StreamerTable + ` where platform = ? and ` + condition
if len(operators) == 0 {
selectSQL += " and operator = ?"
args = append(args, 0)
@ -264,7 +263,52 @@ func (store *StoreExtractor) Pop(platform Platform, operators ...int32) (*Stream
s := &Streamer{}
// uid, url, target_type, source, ext, operator
err = row.Scan(&s.Uid, &s.UpdateTime, &s.UserId, &s.UpdateUrl, &s.IsUpdateStreamer, &s.UpdateInterval)
err = row.Scan(&s.Uid, &s.UpdateTime, &s.UserId, &s.Tags, &s.LiveUrl, &s.UpdateUrl, &s.IsUpdateStreamer, &s.UpdateInterval)
if err != nil {
return nil, err
}
s.Set("LastOperator", s.Operator)
_, err = tx.Exec("update "+StreamerTable+" set operator = ? where uid = ?", OperatorWait, s.Uid)
return s, nil
}
// Pop 弹出一条未处理的数据
func (store *StoreExtractor) Pop(platform Platform, operators ...int32) (*Streamer, error) {
tx, err := store.db.Begin()
if err != nil {
return nil, err
}
var args = []interface{}{string(platform)}
selectSQL := `select uid, update_time, user_id, tags, live_url, update_url, is_update_streamer, update_interval from ` + StreamerTable + ` where platform = ? and TIMESTAMPDIFF(MINUTE , update_time, CURRENT_TIMESTAMP()) >= update_interval`
if len(operators) == 0 {
selectSQL += " and operator = ?"
args = append(args, 0)
} else {
for _, operator := range operators {
selectSQL += " and operator = ?"
args = append(args, operator)
}
}
defer func() {
err := tx.Commit()
if err != nil {
log.Println(err)
err = tx.Rollback()
if err != nil {
log.Println(err)
}
}
store.popCount++
}()
// log.Println(selectSQL + ` limit 1 for update`)
row := tx.QueryRow(selectSQL+` limit 1 for update`, args...)
s := &Streamer{}
// uid, url, target_type, source, ext, operator
err = row.Scan(&s.Uid, &s.UpdateTime, &s.UserId, &s.Tags, &s.LiveUrl, &s.UpdateUrl, &s.IsUpdateStreamer, &s.UpdateInterval)
if err != nil {
return nil, err
}
@ -316,9 +360,9 @@ func (store *StoreExtractor) InsertStreamerList(streamerlist IGet) (isExists boo
}
// InsertStreamer Streamer表, 插入数据
func (store *StoreExtractor) InsertStreamer(streamer IGet) (isExists bool) {
func (store *StoreExtractor) InsertStreamer(streamer *Streamer) (isExists bool) {
// select uid from table where platform = ? and user_id = ?
selectSQL := "SELECT is_update_url, uid FROM " + StreamerTable + " WHERE platform = ? AND user_id = ?"
// selectSQL := "SELECT is_update_url, uid FROM " + StreamerTable + " WHERE platform = ? AND user_id = ?"
tx, err := store.db.Begin()
if err != nil {
panic(err)
@ -335,18 +379,17 @@ func (store *StoreExtractor) InsertStreamer(streamer IGet) (isExists bool) {
}
}()
row := tx.QueryRow(selectSQL+` LIMIT 1 FOR UPDATE`, streamer.Get("Platform"), streamer.Get("UserId"))
var isUpdateUrl bool
var Uid int64
if err = row.Scan(&isUpdateUrl, &Uid); err == nil {
if isUpdateUrl {
tx.Exec("UPDATE "+StreamerTable+" SET update_url = ?", streamer.Get("UpdateUrl"))
}
streamer.(ISet).Set("Uid", Uid)
return true
}
streamer.UpdateTime = &sql.NullTime{Time: time.Now().Add(-time.Hour * 100000), Valid: true}
_, err = tx.Exec("INSERT IGNORE INTO "+StreamerTable+"(platform, user_id, user_name, live_url, update_url, tags, update_time) VALUES(?,?,?,?,?,?,?);",
streamer.Platform,
streamer.UserId,
streamer.UserName,
streamer.LiveUrl,
streamer.UpdateUrl,
streamer.Tags,
streamer.UpdateTime,
)
_, err = tx.Exec("INSERT INTO "+StreamerTable+"(platform, user_id, update_url, tags, update_time) VALUES(?,?,?,?,?);", streamer.Get("Platform"), streamer.Get("UserId"), streamer.Get("UpdateUrl"), streamer.Get("Tags"), time.Now().Add(-time.Hour*100000))
if err != nil {
panic(err)
}
@ -382,7 +425,8 @@ func (store *StoreExtractor) UpdateOperator(isource IGet) {
// UpdateStreamer Streamer表, 插入数据
func (store *StoreExtractor) UpdateStreamer(streamer IGet) {
_, err := store.db.Exec("UPDATE "+StreamerTable+" SET user_name = ?, live_url = ?, channel = ?, latest_log_uid = ?, tags = ?, ext = ?, operator = ?, update_time = ?, update_interval = ? WHERE uid = ?;",
// log.Printf("UPDATE "+StreamerTable+" SET user_name = %v, live_url = %v, channel = %v, latest_log_uid = %v, tags = %v, ext = %v, operator = %v, update_time = %v, update_interval = %v WHERE uid = %v", streamer.Get("UserName"), streamer.Get("LiveUrl"), streamer.Get("Channel"), streamer.Get("LatestLogUid"), streamer.Get("Tags"), streamer.Get("Ext"), streamer.Get("Operator"), streamer.Get("UpdateTime"), streamer.Get("UpdateInterval"), streamer.Get("Uid"))
_, err := store.db.Exec("UPDATE "+StreamerTable+" SET user_name = ?, live_url = ?, channel = ?, latest_log_uid = ?, tags = ?, ext = ?, operator = ?, update_time = ?, update_interval = ? WHERE uid = ?",
streamer.Get("UserName"), streamer.Get("LiveUrl"), streamer.Get("Channel"), streamer.Get("LatestLogUid"), streamer.Get("Tags"), streamer.Get("Ext"), streamer.Get("Operator"), streamer.Get("UpdateTime"), streamer.Get("UpdateInterval"), streamer.Get("Uid"))
if err != nil {
panic(err)
@ -396,12 +440,14 @@ func (store *StoreExtractor) Update(streamer IGet, fieldvalues ...interface{}) {
for i := 0; i < len(fieldvalues); i += 2 {
field := fieldvalues[i]
values = append(values, fieldvalues[i+1])
updateSQL += field.(string) + " = ? "
updateSQL += field.(string) + " = ?,"
}
updateSQL = updateSQL[0 : len(updateSQL)-1]
updateSQL += "WHERE uid = ?"
values = append(values, streamer.Get("Uid"))
_, err := store.db.Exec(updateSQL, values...)
if err != nil {
log.Println(updateSQL)
panic(err)
}
}

View File

@ -5,8 +5,7 @@ import (
)
func TestStoreInsert(t *testing.T) {
// ht := hunter.NewHunter(openrecRanking)
// ht.Execute()
}
func TestStoreInsertCase1(t *testing.T) {

1
supervisor_conf/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
conf.d

View File

@ -0,0 +1,7 @@
CURPATH=`pwd`
BINPATH=$(dirname "$CURPATH")/bin
find $BINPATH -type f -name 'log' -exec truncate -s 0 {} +

View File

@ -0,0 +1,8 @@
CURPATH=`pwd`
SUPPATH=$(dirname "$CURPATH")
mkdir -p $CURPATH/conf.d
cp *.conf $CURPATH/conf.d/
sed -i "s#MYPATH#$SUPPATH#g" $CURPATH/conf.d/*.conf
ln -sf $CURPATH/conf.d/*.conf /etc/supervisor/conf.d/

View File

@ -0,0 +1,10 @@
[supervisord]
nodaemon=true
[program:mirrativ_extractor]
directory = MYPATH/bin/mirrativ_extractor/
command= MYPATH/bin/mirrativ_extractor/mirrativ_extractor
autorestart=true
stderr_logfile=MYPATH/bin/mirrativ_extractor/log
stderr_logfile_maxbytes=0
stopsignal=QUIT

View File

@ -0,0 +1,13 @@
[supervisord]
nodaemon=false
[program:mirrativ_task1]
environment=DISPLAY=":99"
directory = MYPATH/bin/mirrativ_task1/
command= MYPATH/bin/mirrativ_task1/mirrativ_task1
# process_name=%(program_name)s_%(process_num)02d ;多进程名称
# numprocs=1 ;启动多个进程
autorestart=true
stderr_logfile=MYPATH/bin/mirrativ_task1/log
stderr_logfile_maxbytes=0
stopsignal=QUIT

View File

@ -0,0 +1,10 @@
[supervisord]
nodaemon=true
[program:nimo_extractor]
directory = MYPATH/bin/nimo_extractor/
command= MYPATH/bin/nimo_extractor/nimo_extractor
autorestart=true
stderr_logfile=MYPATH/bin/nimo_extractor/log
stderr_logfile_maxbytes=0
stopsignal=QUIT

View File

@ -0,0 +1,13 @@
[supervisord]
nodaemon=false
[program:nimo_task1]
environment=DISPLAY=":99"
directory = MYPATH/bin/nimo_task1/
command= MYPATH/bin/nimo_task1/nimo_task1
# process_name=%(program_name)s_%(process_num)02d ;多进程名称
# numprocs=1 ;启动多个进程
autorestart=true
stderr_logfile=MYPATH/bin/nimo_task1/log
stderr_logfile_maxbytes=0
stopsignal=QUIT

View File

@ -0,0 +1,10 @@
[supervisord]
nodaemon=true
[program:openrec_extractor]
directory = MYPATH/bin/openrec_extractor/
command= MYPATH/bin/openrec_extractor/openrec_extractor
autorestart=true
stderr_logfile=MYPATH/bin/openrec_extractor/log
stderr_logfile_maxbytes=0
stopsignal=QUIT

View File

@ -0,0 +1,10 @@
[supervisord]
nodaemon=true
[program:openrec_ranking]
directory = MYPATH/bin/openrec_task1
command= MYPATH/bin/openrec_task1/openrec_task1
autorestart=true
stderr_logfile=MYPATH/bin/openrec_task1/log
stderr_logfile_maxbytes=0
stopsignal=QUIT

View File

@ -0,0 +1,14 @@
[supervisord]
nodaemon=false
[program:twitcasting_extractor]
environment=DISPLAY=":99"
directory = MYPATH/bin/twitcasting_extractor/
command= MYPATH/bin/twitcasting_extractor/twitcasting_extractor
process_name=%(program_name)s_%(process_num)02d ;多进程名称
numprocs=3 ;启动多个进程
autorestart=true
stderr_logfile=MYPATH/bin/twitcasting_extractor/log
# stderr_logfile=%(supervisorctl.var.directory)s/log
stderr_logfile_maxbytes=0
stopsignal=QUIT

View File

@ -0,0 +1,13 @@
[supervisord]
nodaemon=false
[program:twitcasting_task1]
environment=DISPLAY=":99"
directory = MYPATH/bin/twitcasting_task1/
command= MYPATH/bin/twitcasting_task1/twitcasting_task1
# process_name=%(program_name)s_%(process_num)02d ;多进程名称
# numprocs=1 ;启动多个进程
autorestart=true
stderr_logfile=MYPATH/bin/twitcasting_task1/log
stderr_logfile_maxbytes=0
stopsignal=QUIT

View File

@ -0,0 +1,13 @@
[supervisord]
nodaemon=true
[program:twitch_extractor]
environment=DISPLAY=":99"
directory = MYPATH/bin/twitch_extractor
command= MYPATH/bin/twitch_extractor/twitch_extractor
process_name=%(program_name)s_%(process_num)02d ;多进程名称
numprocs=5 ;启动多个进程
autorestart=true
stderr_logfile=MYPATH/bin/twitch_extractor/log
stderr_logfile_maxbytes=0
stopsignal=QUIT

View File

@ -0,0 +1,13 @@
[supervisord]
nodaemon=true
[program:twitch_extractor_p1]
environment=DISPLAY=":99",pac_proxy=http://localhost:1090/pac
directory = MYPATH/bin/twitch_extractor
command= MYPATH/bin/twitch_extractor/twitch_extractor
process_name=%(program_name)s_%(process_num)02d ;多进程名称
numprocs=2 ;启动多个进程
autorestart=true
stderr_logfile=MYPATH/bin/twitch_extractor/log
stderr_logfile_maxbytes=0
stopsignal=QUIT

View File

@ -0,0 +1,13 @@
[supervisord]
nodaemon=false
[program:twitch_task1]
environment=DISPLAY=":99"
directory = MYPATH/bin/twitch_task1
command= MYPATH/bin/twitch_task1/twitch_task1
# process_name=%(program_name)s_%(process_num)02d ;多进程名称
# numprocs=1 ;启动多个进程
autorestart=true
stderr_logfile=MYPATH/bin/twitch_task1/log
stderr_logfile_maxbytes=0
stopsignal=QUIT

View File

@ -0,0 +1,8 @@
[supervisord]
nodaemon=true
[program:xvfb-99]
command=/usr/bin/Xvfb :99 -screen 0 1280x720x24 -ac -nolisten tcp -dpi 96 +extension RANDR -nolisten tcp
autorestart=true

View File

@ -12,4 +12,7 @@ const (
// STTwitcasting STTwitcasting源table名称
STTwitcasting SourceTable = "source_twitcasting"
// STNimo nimo源table名称
STNimo SourceTable = "source_nimo"
)

View File

@ -0,0 +1,83 @@
package main
import (
"database/sql"
"intimate"
"log"
"time"
"github.com/474420502/gcurl"
"github.com/tidwall/gjson"
)
func main() {
bcurl := `curl 'https://www.mirrativ.com/api/live/catalog?id=2&cursor=%s' \
-H 'authority: www.mirrativ.com' \
-H 'accept: application/json' \
-H 'x-timezone: Asia/Shanghai' \
-H 'x-csrf-token: F3Ojd6RBtApP6YAZzVn-9jWN1of159VxAqOQL1Zn' \
-H 'user-agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36' \
-H 'content-type: application/json' \
-H 'sec-fetch-site: same-origin' \
-H 'sec-fetch-mode: cors' \
-H 'sec-fetch-dest: empty' \
-H 'referer: https://www.mirrativ.com/' \
-H 'accept-language: zh-CN,zh;q=0.9,ja;q=0.8' \
-H 'cookie: f=A2D75F0E-D218-11EA-A042-452BF6D21CE8; _ga=GA1.2.689947597.1596081392; mr_id=kxb65LddGMZf5C28jkR_tGCZD_ZFOAepD5gfXO7eNjfPMB8EKYvU1Vg_Y29V1lsa; _gid=GA1.2.2116692650.1600139685; lang=ja' \
--compressed`
curl := gcurl.Parse(bcurl)
tp := curl.CreateTemporary(nil)
cursor := tp.QueryParam(`cursor`)
cursor.StringSet("")
ps := intimate.NewPerfectShutdown()
for !ps.IsClose() {
log.Println(tp.ParsedURL.String())
resp, err := tp.Execute()
if err != nil {
log.Println(err)
time.Sleep(time.Second * 2)
continue
}
apijson := gjson.ParseBytes(resp.Content())
next := apijson.Get("next_cursor").String()
for _, liveinfo := range apijson.Get("list").Array() {
var prekey string
if liveinfo.Get("live_preview").Exists() {
prekey = "live_preview"
} else {
prekey = "live"
}
owner := liveinfo.Get(prekey + ".owner")
if guserid := owner.Get("user_id"); guserid.String() != "" {
streamer := &intimate.Streamer{}
streamer.Platform = intimate.PMirrativ
streamer.Operator = 0
streamer.UserId = &guserid.Str
streamer.UserName = &sql.NullString{String: owner.Get("name").String(), Valid: true}
streamer.UpdateInterval = 600
streamer.UpdateTime = intimate.GetUpdateTimeNow()
err = intimate.TStreamer.InsertOrUpdate(
streamer,
intimate.DUpdate{Field: "update_time"},
)
if err != nil {
log.Println(err)
panic(err)
}
}
}
if next == "" {
ps.Wait(time.Minute * 10)
} else {
ps.Wait(time.Second * 2)
}
cursor.StringSet(next)
}
}

View File

@ -1,6 +1,8 @@
package main
import "testing"
import (
"testing"
)
func TestMain(t *testing.T) {
main()

View File

@ -0,0 +1,5 @@
package main
func main() {
Execute()
}

View File

@ -0,0 +1,109 @@
package main
import (
"database/sql"
"encoding/json"
"intimate"
"log"
"time"
"github.com/474420502/gcurl"
"github.com/tidwall/gjson"
)
// estore 解析存储连接实例
// var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
// Execute 执行
func Execute() {
nimoapi := `curl 'https://api.nimo.tv/oversea/nimo/api/v2/liveRoom/liveRoomPage-1-100-/HK/1028/1000' \
-H 'authority: api.nimo.tv' \
-H 'user-agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36' \
-H 'content-type: multipart/form-data; boundary=----WebKitFormBoundary3bCA1lzvhj4kBR4Q' \
-H 'accept: */*' \
-H 'origin: https://www.nimo.tv' \
-H 'sec-fetch-site: same-site' \
-H 'sec-fetch-mode: cors' \
-H 'sec-fetch-dest: empty' \
-H 'referer: https://www.nimo.tv/lives' \
-H 'accept-language: zh-CN,zh;q=0.9' \
--data-binary $'------WebKitFormBoundary3bCA1lzvhj4kBR4Q\r\nContent-Disposition: form-data; name="keyType"\r\n\r\n0\r\n------WebKitFormBoundary3bCA1lzvhj4kBR4Q\r\nContent-Disposition: form-data; name="body"\r\n\r\n{"deviceType":7,"requestSource":"WEB","iNetType":5}\r\n------WebKitFormBoundary3bCA1lzvhj4kBR4Q--\r\n' \
--compressed`
curl := gcurl.Parse(nimoapi)
ses := curl.CreateSession()
tp := curl.CreateTemporary(ses)
param := tp.PathParam("liveRoomPage-(1)-")
ps := intimate.NewPerfectShutdown()
for !ps.IsClose() {
resp, err := tp.Execute()
if err != nil {
log.Println(err)
time.Sleep(time.Second)
continue
}
result := gjson.ParseBytes(resp.Content())
roomlist := result.Get("data.result.liveRoomViewList")
if !roomlist.IsArray() {
log.Println("json is error")
log.Println(string(resp.Content()))
break
}
rooms := roomlist.Array()
log.Println(tp.GetURLRawPath(), "rooms:", len(rooms))
if len(rooms) == 0 {
param.IntSet(1)
time.Sleep(time.Minute * 15)
continue
}
for _, room := range rooms {
streamer := &intimate.Streamer{}
streamer.Platform = intimate.PNimo
if userid := room.Get("id").String(); userid != "" {
streamer.UserId = &userid
streamer.LiveUrl = &sql.NullString{String: "https://www.nimo.tv/live/" + userid, Valid: true}
channel := room.Get("roomTypeName").String()
streamer.Channel = &sql.NullString{String: channel, Valid: channel != ""}
username := room.Get("anchorName").String()
streamer.UserName = &sql.NullString{String: username, Valid: username != ""}
if rtags := room.Get("anchorLabels"); rtags.IsArray() {
var tags []string
for _, r := range rtags.Array() {
tag := r.Get("labelName").String()
tags = append(tags, tag)
}
data, err := json.Marshal(tags)
if err != nil {
panic(err)
}
streamer.Tags = data
}
streamer.UpdateInterval = 120
err = intimate.TStreamer.Insert(streamer)
if err != nil {
panic(err)
}
} else {
log.Println("userid is null.", room.String())
}
}
param.IntAdd(1)
}
}

View File

@ -0,0 +1,55 @@
package main
import (
"fmt"
"io/ioutil"
"os"
"testing"
"github.com/474420502/gcurl"
)
func CrawlContent(args ...interface{}) []byte {
nimoapi := `curl 'https://api.nimo.tv/oversea/nimo/api/v2/liveRoom/liveRoomPage-%d-30-/HK/1028/1000' \
-H 'authority: api.nimo.tv' \
-H 'user-agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36' \
-H 'content-type: multipart/form-data; boundary=----WebKitFormBoundary3bCA1lzvhj4kBR4Q' \
-H 'accept: */*' \
-H 'origin: https://www.nimo.tv' \
-H 'sec-fetch-site: same-site' \
-H 'sec-fetch-mode: cors' \
-H 'sec-fetch-dest: empty' \
-H 'referer: https://www.nimo.tv/lives' \
-H 'accept-language: zh-CN,zh;q=0.9' \
--data-binary $'------WebKitFormBoundary3bCA1lzvhj4kBR4Q\r\nContent-Disposition: form-data; name="keyType"\r\n\r\n0\r\n------WebKitFormBoundary3bCA1lzvhj4kBR4Q\r\nContent-Disposition: form-data; name="body"\r\n\r\n{"deviceType":7,"requestSource":"WEB","iNetType":5}\r\n------WebKitFormBoundary3bCA1lzvhj4kBR4Q--\r\n' \
--compressed`
curl := gcurl.Parse(fmt.Sprintf(nimoapi, 1))
tp := curl.CreateTemporary(nil)
resp, err := tp.Execute()
if err != nil {
panic(err)
}
return resp.Content()
}
func openTestFile(...interface{}) []byte {
f, err := os.Open("../../../testfile/nimo1.json")
if err != nil {
panic(err)
}
data, err := ioutil.ReadAll(f)
if err != nil {
panic(err)
}
return data
}
var Crawl func(...interface{}) []byte
func Test(t *testing.T) {
Execute()
}

View File

@ -1,8 +1,5 @@
package main
import "github.com/474420502/hunter"
func main() {
ht := hunter.NewHunter(openrecRanking)
ht.Execute()
Execute()
}

View File

@ -4,29 +4,24 @@ import (
"encoding/json"
"intimate"
"log"
"os"
"os/signal"
"strconv"
"sync/atomic"
"syscall"
"time"
"github.com/474420502/hunter"
"github.com/474420502/gcurl"
"github.com/tidwall/gjson"
)
var openrecRanking *OpenrecRanking
// // sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
// var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STOpenrec))
// sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STOpenrec))
// // estore 解析存储连接实例
// var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
// estore 解析存储连接实例
var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
// Execute 执行方法
func Execute() {
func init() {
openrecRanking = &OpenrecRanking{}
openrecRanking.PreCurlUrl = `curl 'https://public.openrec.tv/external/api/v5/channel-ranks?period=monthly&date=&tag=&page=1' \
ps := intimate.NewPerfectShutdown()
turl := `curl 'https://public.openrec.tv/external/api/v5/channel-ranks?period=monthly&date=&tag=&page=1' \
-H 'authority: public.openrec.tv' \
-H 'accept: application/json, text/javascript, */*; q=0.01' \
-H 'user-agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36' \
@ -39,43 +34,25 @@ func init() {
-H 'if-none-match: W/"25edb-aUYBdmLqZcr6DW4ZWKX9r2aqolg"' \
--compressed`
}
g := gcurl.Parse(turl)
tp := g.Temporary()
// OpenrecRanking 获取排名任务
type OpenrecRanking struct {
hunter.PreCurlUrl
}
for !ps.IsClose() {
// Execute 执行方法
func (or *OpenrecRanking) Execute(cxt *hunter.TaskContext) {
var loop int32 = 1
go func() {
signalchan := make(chan os.Signal)
signal.Notify(signalchan, syscall.SIGKILL, syscall.SIGQUIT, syscall.SIGTERM, syscall.SIGSTOP)
log.Println("accept stop command:", <-signalchan)
atomic.StoreInt32(&loop, 0)
}()
for atomic.LoadInt32(&loop) > 0 {
resp, err := cxt.Hunt()
resp, err := tp.Execute()
if err != nil {
log.Println(err)
time.Sleep(time.Second * 2)
continue
}
tp := cxt.Temporary()
content := resp.Content()
if len(content) <= 200 { //末页时没有内容返回, 末页退出
finishpoint := time.Now()
log.Println("任务Ranking UserId结束休眠, 下次启动时间:", finishpoint.Add(time.Minute*120))
for time.Now().Sub(finishpoint) < time.Minute*120 {
time.Sleep(time.Second)
if atomic.LoadInt32(&loop) <= 0 {
if ps.IsClose() {
return
}
}
@ -93,15 +70,8 @@ func (or *OpenrecRanking) Execute(cxt *hunter.TaskContext) {
for _, User := range result.Array() {
userid := User.Get("channel.id").String()
// data := &intimate.Source{}
// data.Source = sql.NullString{String: userid, Valid: len(userid) > 0}
// data.Url = tp.GetRawURL()
// data.TargetType = string(intimate.TTOpenrecUser)
// sstore.Insert(data)
streamer := &intimate.Streamer{}
streamer.UserId = userid
streamer.UserId = &userid
streamer.Platform = intimate.Popenrec
updateUrl := make(map[string]interface{})
@ -113,27 +83,18 @@ func (or *OpenrecRanking) Execute(cxt *hunter.TaskContext) {
updateUrlBytes, err := json.Marshal(updateUrl)
if err != nil {
estore.UpdateError(streamer, err)
intimate.TStreamer.UpdateError(streamer, err)
continue
}
streamer.UpdateUrl = updateUrlBytes
estore.InsertStreamer(streamer)
intimate.TStreamer.Insert(streamer)
}
}
log.Println("streamer count:", len(result.Array()), tp.ParsedURL.String())
// 修改url query 参数的page递增. 遍历所有页面
querys := tp.GetQuery()
page, err := strconv.Atoi(querys.Get("page"))
if err != nil {
log.Println(err)
return
}
page++
querys.Set("page", strconv.Itoa(page))
tp.SetQuery(querys)
tp.QueryParam("page").IntAdd(1)
time.Sleep(time.Second * 1)
}
}

View File

@ -3,67 +3,14 @@ package main
import (
"testing"
"time"
"github.com/tidwall/gjson"
"github.com/474420502/hunter"
)
// OpenrecRanking 获取排名任务
type OpenrecRankingTest struct {
hunter.PreCurlUrl
}
// Execute 执行方法
func (or *OpenrecRankingTest) Execute(cxt *hunter.TaskContext) {
resp, err := cxt.Hunt()
if err != nil {
panic(err)
}
t := cxt.GetShare("test").(*testing.T)
if !gjson.ValidBytes(resp.Content()) {
t.Error("source is not json format.")
}
result := gjson.GetBytes(resp.Content(), "0.rank")
if result.Int() != 1 {
t.Error("rank is error. result raw is ", result.Raw)
}
if cxt.Temporary().GetQuery().Get("page") != "1" {
t.Error("Temporary page error")
}
// t.Error(string(resp.Content()))
}
func TestRanking(t *testing.T) {
curlBash := `curl 'https://public.openrec.tv/external/api/v5/channel-ranks?period=monthly&date=&tag=&page=1' \
-H 'authority: public.openrec.tv' \
-H 'accept: application/json, text/javascript, */*; q=0.01' \
-H 'user-agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36' \
-H 'origin: https://www.openrec.tv' \
-H 'sec-fetch-site: same-site' \
-H 'sec-fetch-mode: cors' \
-H 'sec-fetch-dest: empty' \
-H 'referer: https://www.openrec.tv/ranking' \
-H 'accept-language: zh-CN,zh;q=0.9' \
-H 'if-none-match: W/"25edb-aUYBdmLqZcr6DW4ZWKX9r2aqolg"' \
--compressed`
ort := &OpenrecRankingTest{}
ort.PreCurlUrl = hunter.PreCurlUrl(curlBash)
ht := hunter.NewHunter(ort)
ht.SetShare("test", t)
ht.Execute()
}
func TestTimeAdd(t *testing.T) {
finishpoint := time.Now()
time.Sleep(time.Second * 2)
t.Error(time.Now().Sub(finishpoint) > time.Second*1)
}
func TestRankingInsert(t *testing.T) {
ht := hunter.NewHunter(openrecRanking)
ht.Execute()
func TestMain(t *testing.T) {
main()
}

View File

@ -1,2 +0,0 @@
openrec_task2
log

View File

@ -1,8 +0,0 @@
package main
import "github.com/474420502/hunter"
func main() {
ht := hunter.NewHunter(oer)
ht.Execute()
}

View File

@ -1,178 +0,0 @@
package main
import (
"database/sql"
"encoding/json"
"intimate"
"log"
"os"
"os/signal"
"strconv"
"sync/atomic"
"syscall"
"time"
"github.com/474420502/gcurl"
"github.com/tidwall/gjson"
"github.com/474420502/hunter"
)
var oer *OpenrecExtratorRanking
// sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STOpenrec))
// estore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_extractor.sql
var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
func init() {
oer = &OpenrecExtratorRanking{}
}
// OpenrecExtratorRanking 获取用户信息
type OpenrecExtratorRanking struct {
// Store *intimate.Store
}
// Execute 执行方法
func (oer *OpenrecExtratorRanking) Execute(cxt *hunter.TaskContext) {
var loop int32 = 1
go func() {
signalchan := make(chan os.Signal)
signal.Notify(signalchan, syscall.SIGKILL, syscall.SIGQUIT, syscall.SIGTERM, syscall.SIGSTOP)
log.Println("accept stop command:", <-signalchan)
atomic.StoreInt32(&loop, 0)
}()
var lasterr error = nil
for atomic.LoadInt32(&loop) > 0 {
streamer, err := estore.Pop(intimate.Popenrec) //队列里弹出一个streamer行. 进行解析
if streamer == nil || err != nil {
if err != lasterr {
log.Println(err, lasterr)
lasterr = err
}
time.Sleep(time.Second * 2)
continue
}
userId := streamer.UserId
var updateUrl map[string]string
err = json.Unmarshal(streamer.UpdateUrl.([]byte), &updateUrl) // 反序列化update_url, 里面存了需要采集的url
if err != nil {
log.Println(err)
continue
}
// Check Userid
userUrl := updateUrl["user"]
tp := cxt.Session().Get(userUrl) // 获取user url页面数据
resp, err := tp.Execute()
streamer.UpdateTime = sql.NullTime{Time: time.Now(), Valid: true}
if err != nil {
log.Println(err)
estore.UpdateError(streamer, err)
continue
}
cookies := cxt.Session().GetCookies(tp.GetParsedURL())
scurl := updateUrl["supporters"] //获取打赏者的数据
curl := gcurl.ParseRawCURL(scurl)
supportersSession := curl.CreateSession()
temporary := curl.CreateTemporary(supportersSession)
supportersSession.SetCookies(temporary.GetParsedURL(), cookies)
var supporters []string
for { // supporters 数据需要登录信息. 下面为赋值 supporters链接获取的uid token random码
supportersQuery := temporary.GetQuery()
for _, cookie := range cookies {
if cookie.Name == "uuid" {
supportersQuery.Set("Uuid", cookie.Value)
continue
}
if cookie.Name == "token" {
supportersQuery.Set("Token", cookie.Value)
continue
}
if cookie.Name == "random" {
supportersQuery.Set("Random", cookie.Value)
continue
}
}
supportersQuery.Set("identify_id", userId)
temporary.SetQuery(supportersQuery)
resp, err := temporary.Execute()
if err != nil {
log.Println(err)
}
supporterjson := gjson.ParseBytes(resp.Content())
supporterdata := supporterjson.Get("data") //解析supporters获取的json数据
if supporterdata.Type == gjson.Null {
break
}
supporters = append(supporters, string(resp.Content()))
page := supportersQuery.Get("page_number") // page_number 加1
pageint, err := strconv.Atoi(page)
if err != nil {
log.Println(err)
break
}
pageint++
page = strconv.Itoa(pageint)
supportersQuery.Set("page_number", page)
temporary.SetQuery(supportersQuery)
}
// cookies := cxt.Session().GetCookies(wf.GetParsedURL())
ext := make(map[string]interface{})
ext["json_supporters"] = supporters
ext["html_user"] = string(resp.Content())
liveUrl := updateUrl["live"]
tp = cxt.Session().Get(liveUrl)
resp, err = tp.Execute()
if err != nil {
log.Println(err)
estore.UpdateError(streamer, err)
continue
}
ext["html_live"] = string(resp.Content())
ext["var_user_id"] = userId
extJsonBytes, err := json.Marshal(ext)
if err != nil {
log.Println(err)
estore.UpdateError(streamer, err)
continue
}
streamer.Operator = int32(intimate.OperatorOK)
source := &intimate.Source{}
source.Target = intimate.TOpenrecUser
source.Ext = string(extJsonBytes)
source.StreamerId = sql.NullInt64{Int64: streamer.Uid, Valid: true}
sstore.Insert(source)
estore.UpdateOperator(streamer)
}
}

View File

@ -1,12 +0,0 @@
package main
import (
"testing"
"github.com/474420502/hunter"
)
func TestOpenrecUser(t *testing.T) {
ht := hunter.NewHunter(oer)
ht.Execute()
}

View File

@ -21,5 +21,5 @@ func TestUpdateTime(t *testing.T) {
}
func TestMain(t *testing.T) {
main()
}

View File

@ -70,13 +70,15 @@ func Execute() {
if ok := queuedict[wurl]; !ok {
log.Println(wurl)
sl := &intimate.StreamerList{}
sl.Platform = intimate.Ptwitcasting
sl.Platform = string(intimate.Ptwitcasting)
sl.Url = wurl
sl.Operator = 0
sl.UpdateInterval = 120
sl.UpdateTime = time.Now()
sl.UpdateTime = &sql.NullTime{Time: time.Now(), Valid: true}
sl.UrlHash = intimate.GetUrlHash(sl.Url)
estore.InsertStreamerList(sl)
intimate.TStreamerList.Insert(sl)
// estore.InsertStreamerList(sl)
queue.Put(wurl)
queuedict[wurl] = true
@ -89,7 +91,7 @@ func Execute() {
continue
}
var splist = xps.ForEachTag(SearchProfile{})
var splist = xps.ForEachObjectByTag(SearchProfile{})
for _, isp := range splist {
sp := isp.(*SearchProfile)
if sp.LiveUrl == "" {
@ -102,12 +104,13 @@ func Execute() {
sp.TagUrl[i] = wurl
if ok := queuedict[wurl]; !ok {
sl := &intimate.StreamerList{}
sl.Platform = intimate.Ptwitcasting
sl.Platform = string(intimate.Ptwitcasting)
sl.Url = wurl
sl.Operator = 0
sl.UpdateInterval = 120
sl.UpdateTime = time.Now()
estore.InsertStreamerList(sl)
sl.UpdateTime = &sql.NullTime{Time: time.Now(), Valid: true}
sl.UrlHash = intimate.GetUrlHash(sl.Url)
intimate.TStreamerList.Insert(sl)
queue.Put(wurl)
queuedict[wurl] = true
@ -122,17 +125,19 @@ func Execute() {
// log.Println(sp)
streamer := &intimate.Streamer{}
streamer.Platform = intimate.Ptwitcasting
streamer.LiveUrl = sql.NullString{String: sp.LiveUrl, Valid: true}
streamer.LiveUrl = &sql.NullString{String: sp.LiveUrl, Valid: true}
if btags, err := json.Marshal(sp.Tag); err != nil {
log.Println(err)
} else {
streamer.Tags = btags
}
streamer.UpdateInterval = 120
streamer.UpdateTime = sql.NullTime{Time: time.Now(), Valid: true}
streamer.UserName = sql.NullString{String: sp.UserName, Valid: true}
streamer.UserId = sp.UserId
estore.InsertStreamer(streamer)
streamer.UpdateTime = intimate.GetUpdateTimeNow()
streamer.UserName = &sql.NullString{String: sp.UserName, Valid: true}
streamer.UserId = &sp.UserId
streamer.Operator = 0
// estore.InsertStreamer(streamer)
intimate.TStreamer.Insert(streamer)
}
log.Println("finish remain", queue.Size())

View File

@ -1,6 +1,5 @@
package main
func main() {
e := ChannelLink{}
e.Execute()
Execute()
}

View File

@ -1,7 +1,6 @@
package main
import (
"database/sql"
"intimate"
"log"
"time"
@ -9,104 +8,113 @@ import (
"github.com/tebeka/selenium"
)
// sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STTwitch))
// // sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
// var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STTwitch))
// estore 解析存储连接实例
var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
// // estore 解析存储连接实例
// var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
// 获取类型的所有频道链接
// ChannelLink 频道链接
type ChannelLink struct {
}
// Execute 执行任务
func (cl *ChannelLink) Execute() {
var err error
wd := intimate.GetChromeDriver(3030)
func Execute() {
ps := intimate.NewPerfectShutdown()
var adriver *intimate.AutoCloseDriver
weburl := "https://www.twitch.tv/directory?sort=VIEWER_COUNT"
err = wd.Get(weburl)
if err != nil {
panic(err)
}
for !ps.IsClose() {
cardCondition := func(wd selenium.WebDriver) (bool, error) {
elements, err := wd.FindElements(selenium.ByXPATH, "//span/a[contains(@data-a-target,'card-') and @href]")
var err error
adriver = intimate.GetChromeDriver()
wd := adriver.Webdriver
weburl := "https://www.twitch.tv/directory?sort=VIEWER_COUNT"
err = wd.Get(weburl)
if err != nil {
return false, err
}
return len(elements) > 0, nil
}
wd.WaitWithTimeout(cardCondition, time.Second*15)
time.Sleep(time.Second)
e, err := wd.FindElement(selenium.ByXPATH, "//button[@data-a-target='browse-sort-menu']")
if err != nil {
panic(err)
}
e.Click()
var hrefs map[string]bool = make(map[string]bool)
var delayerror = 5
for i := 0; i <= 200; i++ {
cards, err := wd.FindElements(selenium.ByXPATH, "//span/a[contains(@data-a-target,'card-') and @href]")
if err != nil {
log.Println(err)
break
panic(err)
}
if len(hrefs) == 0 {
delayerror--
if delayerror <= 0 {
cardCondition := func(wd selenium.WebDriver) (bool, error) {
elements, err := wd.FindElements(selenium.ByXPATH, "//span/a[contains(@data-a-target,'card-') and @href]")
if err != nil {
return false, err
}
return len(elements) > 0, nil
}
wd.WaitWithTimeout(cardCondition, time.Second*15)
time.Sleep(time.Second)
e, err := wd.FindElement(selenium.ByXPATH, "//button[@data-a-target='browse-sort-menu']")
if err != nil {
panic(err)
}
e.Click()
var lasthreflen = 0
var hrefs map[string]bool = make(map[string]bool)
var delayerror = 5
for i := 0; i <= 200; i++ {
cards, err := wd.FindElements(selenium.ByXPATH, "//span/a[contains(@data-a-target,'card-') and @href]")
if err != nil {
log.Println(err)
break
}
} else {
delayerror = 5
}
for ii := 0; ii < 10; ii++ {
for _, card := range cards {
href, err := card.GetAttribute("href")
if err != nil {
log.Println(href, err)
continue
} else {
hrefs[href] = true
if len(hrefs) == lasthreflen {
delayerror--
if delayerror <= 0 {
break
}
} else {
delayerror = 7
}
break
}
lasthreflen = len(hrefs)
if ps.IsClose() {
break
}
for ii := 0; ii < 10; ii++ {
for _, card := range cards {
href, err := card.GetAttribute("href")
if err != nil {
log.Println(href, err)
continue
} else {
hrefs[href] = true
}
}
break
}
if len(cards) > 10 {
log.Println(len(cards))
wd.ExecuteScript(`items = document.evaluate("//div[@data-target='directory-page__card-container']/../self::div[@data-target and @style]", document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
if ps.IsClose() {
break
}
if len(cards) > 10 {
log.Println(len(cards))
wd.ExecuteScript(`items = document.evaluate("//div[@data-target='directory-page__card-container']/../self::div[@data-target and @style]", document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
for (var i = 0; i < items.snapshotLength - 10; i++) { item = items.snapshotItem(i); item.remove() ;};`, nil)
}
time.Sleep(time.Millisecond * 200)
wd.KeyDown(selenium.EndKey)
time.Sleep(time.Millisecond * 200)
wd.KeyUp(selenium.EndKey)
time.Sleep(time.Millisecond * 2500)
}
time.Sleep(time.Millisecond * 200)
wd.KeyDown(selenium.EndKey)
time.Sleep(time.Millisecond * 200)
wd.KeyUp(selenium.EndKey)
time.Sleep(time.Millisecond * 2500)
for href := range hrefs {
sl := &intimate.StreamerList{}
sl.Url = href
sl.UrlHash = intimate.GetUrlHash(sl.Url)
sl.Platform = string(intimate.Ptwitch)
sl.UpdateTime = intimate.GetUpdateTimeNow()
err := intimate.TStreamerList.Insert(sl)
if err != nil {
log.Println(err)
}
}
log.Println("hrefs len:", len(hrefs))
adriver.Close()
ps.Wait(time.Minute * 5)
}
for href := range hrefs {
// TODO: Save href
source := &intimate.Source{}
source.Source = sql.NullString{String: href, Valid: true}
source.Operator = 0
source.Target = intimate.TTwitchChannel
source.Url = weburl
sstore.Insert(source)
}
log.Println("hrefs len:", len(hrefs))
sstore.Deduplicate(intimate.TTwitchChannel, "source")
}

View File

@ -5,8 +5,7 @@ import (
)
func TestCase1(t *testing.T) {
e := ChannelLink{}
e.Execute()
Execute()
}
func TestLiveUrl(t *testing.T) {

View File

@ -1,2 +0,0 @@
twitch_task2
log

View File

@ -1,6 +0,0 @@
package main
func main() {
ul := UserList{}
ul.Execute()
}

View File

@ -1,180 +0,0 @@
package main
import (
"database/sql"
"encoding/json"
"intimate"
"log"
"regexp"
"time"
"github.com/tebeka/selenium"
)
// sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STTwitch))
// estore 解析存储连接实例
var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
// 获取类型的所有频道链接
// UserList 频道链接
type UserList struct {
}
// Execute 执行任务
func (cl *UserList) Execute() {
// DELETE FROM source_twitch WHERE uid NOT IN (SELECT MAX(s.uid) FROM (SELECT uid, source FROM source_twitch ) s GROUP BY s.source) ;
//article//a[@data-a-target='preview-card-title-link']
wd := intimate.GetChromeDriver(3030)
defer wd.Close()
defer wd.Quit()
ps := intimate.NewPerfectShutdown()
counter := intimate.NewCounter()
counter.SetMaxLimit(100)
counter.SetMaxToDo(func(olist ...interface{}) error {
owd := olist[0].(*selenium.WebDriver)
if err := (*owd).Quit(); err != nil {
log.Println(err)
}
*owd = intimate.GetChromeDriver(3030)
return nil
}, &wd)
for !ps.IsClose() {
var err error
sourceChannel, err := sstore.Pop(intimate.TTwitchChannel)
if err != nil {
panic(err)
}
weburl := sourceChannel.Source.String + "?sort=VIEWER_COUNT"
err = wd.Get(weburl)
if err != nil {
log.Println(err)
sstore.UpdateError(sourceChannel, err)
time.Sleep(time.Second * 10)
continue
}
wd.WaitWithTimeout(func(wd selenium.WebDriver) (bool, error) {
_, err := wd.FindElement(selenium.ByXPATH, "(//div/p[@class=''])[last()]")
if err != nil {
return false, err
}
return true, nil
}, time.Second*10)
btn, err := wd.FindElement(selenium.ByXPATH, "//button[@data-a-target='browse-sort-menu']")
if err != nil {
log.Println(err)
continue
}
btn.Click()
var elements []selenium.WebElement
var liveurls = 0
var delayerror = 2
for i := 0; i < 200 && !ps.IsClose(); i++ {
elements, err = wd.FindElements(selenium.ByXPATH, "(//div/p[@class=''])[last()]")
if err != nil {
log.Println(err)
break
}
time.Sleep(time.Millisecond * 200)
wd.KeyDown(selenium.EndKey)
time.Sleep(time.Millisecond * 200)
wd.KeyUp(selenium.EndKey)
time.Sleep(time.Millisecond * 2000)
if len(elements) == liveurls {
delayerror--
if delayerror <= 0 {
break
}
} else {
delayerror = 2
}
liveurls = len(elements)
}
articles, err := wd.FindElements(selenium.ByXPATH, "//article")
if err != nil {
log.Println(err)
continue
}
for _, article := range articles {
e, err := article.FindElement(selenium.ByXPATH, ".//a[@data-a-target='preview-card-title-link' and @href]")
if err != nil {
log.Println(err)
continue
}
href, err := e.GetAttribute("href")
if err != nil {
log.Println(err)
continue
}
btns, err := article.FindElements(selenium.ByXPATH, ".//div[@class='tw-full-width tw-inline-block']//button")
if err != nil {
log.Println(err)
continue
}
var tags []string
for _, btn := range btns {
tag, err := btn.GetAttribute("data-a-target")
if err == nil {
tags = append(tags, tag)
}
}
streamer := &intimate.Streamer{}
matches := regexp.MustCompile(`https://www.twitch.tv/(\w+)`).FindStringSubmatch(href)
if len(matches) == 2 {
streamer.UserId = matches[1]
} else {
log.Println(href)
continue
}
jtags, err := json.Marshal(tags)
if err != nil {
log.Println(err)
} else {
streamer.Tags = jtags
}
streamer.Platform = intimate.Ptwitch
updateUrl := make(map[string]string)
updateUrl["live"] = href
streamer.LiveUrl = sql.NullString{String: href, Valid: true}
data, err := json.Marshal(updateUrl)
if err != nil {
log.Println(err)
continue
}
streamer.UpdateUrl = data
streamer.Operator = 0
if estore.InsertStreamer(streamer) {
// log.Println("streamer update tags", streamer.Uid, tags)
estore.Update(streamer, "Tags", streamer.Tags)
}
}
log.Println("streamer find", len(articles))
if len(articles) == 0 {
sourceChannel.Operator = 5
sstore.UpdateOperator(sourceChannel)
}
counter.AddWithReset(1)
}
wd.Close()
wd.Quit()
}

2565
testfile/mirrativ.json Normal file

File diff suppressed because it is too large Load Diff

10205
testfile/nimo1.json Normal file

File diff suppressed because it is too large Load Diff

2015
testfile/nimolive.html Normal file

File diff suppressed because one or more lines are too long

133
utils.go
View File

@ -1,11 +1,14 @@
package intimate
import (
"crypto/md5"
"database/sql"
"fmt"
"log"
"net"
"os"
"os/exec"
"os/signal"
"runtime"
"strconv"
"strings"
"sync/atomic"
@ -28,15 +31,26 @@ func init() {
}
// GetUpdateTimeNow 获取马上更新时间. 与第一次连用
func GetUpdateTimeNow() *sql.NullTime {
return &sql.NullTime{Time: time.Now().Add(-time.Hour * 100000), Valid: true}
}
func GetUrlHash(urlstr string) string {
return fmt.Sprintf("%x", md5.Sum([]byte(urlstr)))
}
// ParseNumber 去逗号解析数字
func ParseNumber(number string) (int64, error) {
number = strings.ReplaceAll(number, ",", "")
return strconv.ParseInt(number, 10, 64)
func ParseNumber(num string) (int64, error) {
num = strings.Trim(num, " ")
num = strings.ReplaceAll(num, ",", "")
return strconv.ParseInt(num, 10, 64)
}
// ParseNumberEx 解析带字符的数字
func ParseNumberEx(num string) (float64, error) {
num = strings.Trim(num, " ")
num = strings.ReplaceAll(num, ",", "")
last := num[len(num)-1]
factor := 1.0
switch {
@ -80,7 +94,39 @@ func ParseDuration(dt string) (time.Duration, error) {
return tdt.Sub(zeroTime), nil
}
func GetChromeDriver(port int) selenium.WebDriver {
type AutoCloseDriver struct {
Webdriver selenium.WebDriver
Port int
}
func (adriver *AutoCloseDriver) Close() {
data, err := exec.Command("/bin/bash", "-c", fmt.Sprintf(`pgrep -f "port=%d"`, adriver.Port)).Output()
if err != nil {
log.Println(err)
log.Println(string(data))
return
}
// log.Println(string(data))
killshell := fmt.Sprintf("pkill -9 -P %s", data)
err = exec.Command("/bin/bash", "-c", killshell).Run()
if err != nil {
log.Println(err)
return
}
err = exec.Command("/bin/bash", "-c", fmt.Sprintf("kill -9 %s", data)).Run()
if err != nil {
log.Println(err)
return
}
}
func GetChromeDriver() *AutoCloseDriver {
port := GetFreePort()
var err error
caps := selenium.Capabilities{"browserName": "chrome"}
@ -88,6 +134,7 @@ func GetChromeDriver(port int) selenium.WebDriver {
for _, epath := range []string{"../../../crx/myblock.crx", "../../crx/myblock.crx"} {
_, err := os.Stat(epath)
if err == nil {
err := chromecaps.AddExtension(epath)
if err != nil {
panic(err)
@ -115,28 +162,27 @@ func GetChromeDriver(port int) selenium.WebDriver {
chromecaps.ExcludeSwitches = append(chromecaps.ExcludeSwitches, "enable-automation")
caps.AddChrome(chromecaps)
_, err = selenium.NewChromeDriverService("/usr/bin/chromedriver", port)
if err != nil {
panic(err)
}
wd, err := selenium.NewRemote(caps, fmt.Sprintf("http://localhost:%d/wd/hub", port))
if err != nil {
panic(err)
}
runtime.SetFinalizer(wd, func(obj interface{}) {
if err := obj.(selenium.WebDriver).Close(); err != nil {
log.Println(err)
}
if err := obj.(selenium.WebDriver).Quit(); err != nil {
log.Println(err)
}
})
adriver := &AutoCloseDriver{}
adriver.Port = port
adriver.Webdriver = wd
wd.ExecuteScript("windows.navigator.webdriver = undefined", nil)
if err != nil {
panic(err)
}
return wd
return adriver
}
// PerfectShutdown 完美关闭程序
@ -164,6 +210,18 @@ func (ps *PerfectShutdown) IsClose() bool {
return atomic.LoadInt32(&ps.loop) == 0
}
// Wait 判断是否要关闭
func (ps *PerfectShutdown) Wait(tm time.Duration) bool {
now := time.Now()
for time.Now().Sub(now) <= tm {
if ps.IsClose() {
return false
}
time.Sleep(time.Second)
}
return true
}
type Counter struct {
dcount int
count int
@ -256,3 +314,50 @@ func (c *Counter) Add(n int) error {
}
return nil
}
type WaitFor struct {
WebDriver selenium.WebDriver
}
func NewWaitFor(wd selenium.WebDriver) *WaitFor {
return &WaitFor{WebDriver: wd}
}
func (wf *WaitFor) Default(xpath string, do func(elements ...selenium.WebElement) bool) error {
return wf.WaitWithTimeout(xpath, 15*time.Second, do)
}
func (wf *WaitFor) WaitWithTimeout(xpath string, timeout time.Duration, do func(elements ...selenium.WebElement) bool) error {
return wf.WebDriver.WaitWithTimeout(func(wd selenium.WebDriver) (bool, error) {
elements, err := wd.FindElements(selenium.ByXPATH, xpath)
if err != nil {
log.Println(err)
return false, err
}
if len(elements) > 0 {
if do == nil {
return true, nil
}
if do(elements...) {
return true, nil
}
}
return false, nil
}, timeout)
}
func GetFreePort() int {
addr, err := net.ResolveTCPAddr("tcp", "localhost:0")
if err != nil {
panic(err)
}
l, err := net.ListenTCP("tcp", addr)
if err != nil {
panic(err)
}
defer l.Close()
return l.Addr().(*net.TCPAddr).Port
}