Compare commits

..

No commits in common. "master" and "v0.5.2" have entirely different histories.

64 changed files with 1138 additions and 17537 deletions

1
.gitignore vendored
View File

@ -1,4 +1,3 @@
bin
*.log
log
screenlog.*

View File

@ -1,578 +0,0 @@
package intimate
import (
"database/sql"
"encoding/binary"
"fmt"
"log"
"reflect"
"strconv"
"time"
)
// StoreExtractorDB 全局的Extractor DB 库链接
var StoreExtractorDB *Store
// TStreamer 全局的Streamer. 在config init 完成初始化
var TStreamer *Table
// TClog 全局的Clog
var TClog *Table
// TStreamerList 全局的streamer list 这个表存的url. 进去可以找到主播的列表. 便于动态更新
var TStreamerList *Table
/*Store 结构体. 必须使用tag. field 数据库字段标签 uid 唯一id字段标签必须存在
*/
type Store struct {
db *sql.DB
}
// Table 表
type Table struct {
store *Store
name string
setting interface{}
updatesql string
selectsql string
insertsql string
duplicatesql string
}
func NewStore(uri string) *Store {
db, err := sql.Open("mysql", uri)
if err != nil {
panic(err)
}
s := &Store{db: db}
return s
}
// Table 选择表.
func (store *Store) Table(name string) *Table {
table := &Table{store: store}
table.name = name
table.insertsql = `INSERT INTO ` + table.name + `(%s) values(%s)`
table.duplicatesql = `INSERT INTO ` + table.name + `(%s) values(%s) ON DUPLICATE KEY UPDATE %s`
table.updatesql = `UPDATE ` + table.name + ` SET %s WHERE %s = ?`
table.selectsql = `SELECT %s FROM ` + table.name + ` WHERE %s `
return table
}
// Queue mysql 队列结构
type Queue struct {
table *Table
obj reflect.Type
fieldIndex []int
selected string
cond CondWhere
uidname string
uididx int
}
type CondWhere struct {
Condition string
CondArgs []interface{}
}
// OperatorType 字典Operator 标志位的类型
type OperatorType string
const (
// OpOK 正常
OpOK OperatorType = "0"
// OpWAIT 等待处理
OpWAIT OperatorType = "1000"
// OpERROR 错误处理
OpERROR OperatorType = "10000"
)
// ConditionDefault 默认的条件
func ConditionDefault(platform Platform) CondWhere {
return CondWhere{
Condition: "platform = ? and operator = 0 and TIMESTAMPDIFF(MINUTE , update_time, CURRENT_TIMESTAMP()) >= update_interval",
CondArgs: []interface{}{string(platform)},
}
}
// Queue 根据Table生成一个队列. 处理结构. 每次弹出一个 obj 是要处理的结构体 自定义的whereCondition条件
func (t *Table) Queue(obj interface{}, whereCondition CondWhere) *Queue {
q := &Queue{}
q.cond = whereCondition
q.obj = reflect.TypeOf(obj)
q.table = t
q.fieldIndex = []int{} // select 需要配对字段变量的对应index位置
for i := 0; i < q.obj.NumField(); i++ {
field := q.obj.Field(i)
if fname, ok := field.Tag.Lookup("field"); ok {
q.selected += fname + ","
if _, ok := field.Tag.Lookup("uid"); ok {
q.uididx = i
q.uidname = fname
}
q.fieldIndex = append(q.fieldIndex, i)
}
}
q.selected = q.selected[:len(q.selected)-1]
return q
}
// Pop 队列弹出一个数据(任务). 参考队列处理 不支持嵌套.
func (queue *Queue) Pop() (result interface{}, err error) {
db := queue.table.store.db
tx, err := db.Begin()
if err != nil {
return nil, err
}
defer func() {
cerr := tx.Commit()
if cerr != nil {
log.Println(cerr)
log.Println(tx.Rollback())
}
}()
selectsql := `SELECT ` + queue.selected + ` FROM ` + queue.table.name + ` WHERE ` + queue.cond.Condition + " limit 1 for update"
rows, err := tx.Query(selectsql, queue.cond.CondArgs...)
if err != nil {
return nil, fmt.Errorf("table: %s queue is empty. %s", queue.table.name, err.Error())
}
var fields = make([]interface{}, len(queue.fieldIndex))
for i := range fields {
var iv interface{}
fields[i] = &iv
}
if rows.Next() {
err = rows.Scan(fields...)
if err != nil {
return nil, err
}
}
columntypes, err := rows.ColumnTypes()
if err != nil {
return nil, err
}
if err = rows.Close(); err != nil {
return nil, err
}
_, err = tx.Exec("UPDATE "+queue.table.name+" SET operator = "+string(OpWAIT)+" WHERE "+queue.uidname+" = ?", fields[queue.uididx])
if err != nil {
log.Println(err)
return nil, err
}
obj := reflect.New(queue.obj).Elem()
for i, idx := range queue.fieldIndex {
field := obj.Field(idx)
convert(*fields[i].(*interface{}), field, columntypes[i])
}
return obj.Addr().Interface(), err
}
// Insert nil 不插入. 不支持嵌套. 必须是Ptr类型
func (t *Table) Insert(obj interface{}) error {
ov := reflect.ValueOf(obj).Elem()
ot := reflect.TypeOf(obj)
fieldsql := ""
argssql := ""
var args []interface{}
for i := 0; i < ov.NumField(); i++ {
field := ov.Field(i)
ftype := ot.Elem().Field(i)
if fname, ok := ftype.Tag.Lookup("field"); ok {
if flag, ok := ftype.Tag.Lookup("uid"); ok {
if flag == "auto" {
continue
}
}
k := ftype.Type.Kind()
if k == reflect.Ptr || k == reflect.Interface {
if !field.IsNil() {
felem := field.Elem()
args = append(args, felem.Interface())
fieldsql += fname + ","
argssql += "?,"
}
} else {
args = append(args, field.Interface())
fieldsql += fname + ","
argssql += "?,"
}
}
}
ssql := fmt.Sprintf(t.insertsql, fieldsql[:len(fieldsql)-1], argssql[:len(argssql)-1])
_, err := t.store.db.Exec(ssql, args...)
return err
}
// DUpdate ON DUPLICATE KEY UPDATE struct. Field对应的tag field字段
type DUpdate struct {
Field string // selected 字段
Value interface{}
}
// InsertOrUpdate nil 不插入. 不支持嵌套. 必须是Ptr类型
func (t *Table) InsertOrUpdate(obj interface{}, updates ...DUpdate) error {
ov := reflect.ValueOf(obj).Elem()
ot := reflect.TypeOf(obj)
fieldsql := ""
argssql := ""
var SourceUpdate []*DUpdate
var OtherUpdate []*DUpdate
for _, u := range updates {
if u.Value == nil {
SourceUpdate = append(SourceUpdate, &u)
} else {
OtherUpdate = append(OtherUpdate, &u)
}
}
var args []interface{}
for i := 0; i < ov.NumField(); i++ {
field := ov.Field(i)
ftype := ot.Elem().Field(i)
if fname, ok := ftype.Tag.Lookup("field"); ok {
// if flag, ok := ftype.Tag.Lookup("uid"); ok {
// if flag == "auto" {
// continue
// }
// }
k := ftype.Type.Kind()
if k == reflect.Ptr || k == reflect.Interface {
if !field.IsNil() {
felem := field.Elem()
args = append(args, felem.Interface())
fieldsql += fname + ","
argssql += "?,"
}
} else {
args = append(args, field.Interface())
fieldsql += fname + ","
argssql += "?,"
}
for _, u := range SourceUpdate {
if u.Field == fname {
u.Value = args[len(args)-1]
break
}
}
}
}
var duplicateSet string = ""
for _, u := range SourceUpdate {
duplicateSet += u.Field + " = ?,"
args = append(args, u.Value)
}
for _, u := range OtherUpdate {
duplicateSet += u.Field + " = ?,"
args = append(args, u.Value)
}
ssql := fmt.Sprintf(t.duplicatesql, fieldsql[:len(fieldsql)-1], argssql[:len(argssql)-1], duplicateSet[:len(duplicateSet)-1])
_, err := t.store.db.Exec(ssql, args...)
return err
}
// InsertRetAutoID nil 不插入. 不支持嵌套. 并返回auto uid
func (t *Table) InsertRetAutoID(obj interface{}) (int64, error) {
ov := reflect.ValueOf(obj).Elem()
ot := reflect.TypeOf(obj)
fieldsql := ""
argssql := ""
var args []interface{}
for i := 0; i < ov.NumField(); i++ {
field := ov.Field(i)
ftype := ot.Elem().Field(i)
if fname, ok := ftype.Tag.Lookup("field"); ok {
if flag, ok := ftype.Tag.Lookup("uid"); ok {
if flag == "auto" {
continue
}
}
k := ftype.Type.Kind()
if k == reflect.Ptr || k == reflect.Interface {
if !field.IsNil() {
felem := field.Elem()
args = append(args, felem.Interface())
fieldsql += fname + ","
argssql += "?,"
}
} else {
args = append(args, field.Interface())
fieldsql += fname + ","
argssql += "?,"
}
}
}
ssql := fmt.Sprintf(t.insertsql, fieldsql[:len(fieldsql)-1], argssql[:len(argssql)-1])
result, err := t.store.db.Exec(ssql, args...)
if err != nil {
return 0, err
}
return result.LastInsertId()
}
// Update 结构体更新
func (t *Table) Update(obj interface{}) error {
ov := reflect.ValueOf(obj).Elem()
ot := reflect.TypeOf(obj)
fieldsql := ""
var uidname string
var uidvalue interface{}
var args []interface{}
for i := 0; i < ov.NumField(); i++ {
field := ov.Field(i)
ftype := ot.Elem().Field(i)
if fname, ok := ftype.Tag.Lookup("field"); ok {
if _, ok := ftype.Tag.Lookup("uid"); ok {
if uidvalue != nil {
panic(fmt.Errorf("uid must unique, %s and %s", uidname, fname))
}
uidname = fname
uidvalue = field.Interface()
continue
}
k := ftype.Type.Kind()
if k == reflect.Ptr || k == reflect.Interface {
if !field.IsNil() {
felem := field.Elem()
args = append(args, felem.Interface())
fieldsql += fname + " = ?,"
}
} else {
args = append(args, field.Interface())
fieldsql += fname + " = ?,"
}
}
}
if uidvalue == nil {
panic(fmt.Errorf("update must contain `uid` tag"))
}
usql := fmt.Sprintf(t.updatesql, fieldsql[:len(fieldsql)-1], uidname)
args = append(args, uidvalue)
_, err := t.store.db.Exec(usql, args...)
return err
}
// UpdateError 更新错误数据
func (t *Table) UpdateError(obj interface{}, err error) {
ov := reflect.ValueOf(obj).Elem()
ot := reflect.TypeOf(obj)
var uidname string
var uidvalue interface{}
for i := 0; i < ov.NumField(); i++ {
field := ov.Field(i)
ftype := ot.Elem().Field(i)
if fname, ok := ftype.Tag.Lookup("field"); ok {
if _, ok := ftype.Tag.Lookup("uid"); ok {
if uidvalue != nil {
panic(fmt.Errorf("uid must unique, %s and %s", uidname, fname))
}
uidname = fname
uidvalue = field.Interface()
break
}
}
}
_, dberr := t.store.db.Exec("update "+t.name+" set operator = ?, error_msg = ? where "+uidname+" = ?", 10000, sql.NullString{String: err.Error(), Valid: true}, uidvalue)
if dberr != nil {
// email tell owner to deal with
panic(dberr)
}
}
func assign(field reflect.Value, src interface{}) (bool, error) {
switch field.Kind() {
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
s := asString(src)
i64, err := strconv.ParseInt(s, 10, field.Type().Bits())
if err != nil {
err = strconvErr(err)
return false, fmt.Errorf("converting driver.Value type %T (%q) to a %s: %v", src, s, field.Kind(), err)
}
field.SetInt(i64)
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
s := asString(src)
u64, err := strconv.ParseUint(s, 10, field.Type().Bits())
if err != nil {
err = strconvErr(err)
return false, fmt.Errorf("converting driver.Value type %T (%q) to a %s: %v", src, s, field.Kind(), err)
}
field.SetUint(u64)
case reflect.Float32, reflect.Float64:
s := asString(src)
f64, err := strconv.ParseFloat(s, field.Type().Bits())
if err != nil {
err = strconvErr(err)
return false, fmt.Errorf("converting driver.Value type %T (%q) to a %s: %v", src, s, field.Kind(), err)
}
field.SetFloat(f64)
case reflect.String:
field.SetString(string(src.([]byte)))
case reflect.Interface:
return true, nil
}
return false, nil
}
func convert(src interface{}, field reflect.Value, columntype *sql.ColumnType) error {
// log.Println("type:", field.Type(), ",kind:", field.Kind(), ",field:", field, "scanType:", columntype.ScanType(), "databaseType:", columntype.DatabaseTypeName())
if field.Kind() == reflect.Ptr {
fn := field.Type().Elem() // New 一个 field.Type().Elem() . 然后判断 columntype 转化 成 NullString Time
field.Set(reflect.New(fn))
field = field.Elem()
// log.Println("type:", fn.Name(), ",kind:", field.Kind(), ",fieldtype:", field.Type())
}
// log.Println(field.Kind(), field, reflect.TypeOf(field).Elem().Name(), columntype.ScanType().Kind())
if src == nil {
return fmt.Errorf("converting NULL to %s is unsupported", field.Kind())
}
switch columntype.DatabaseTypeName() {
case "TINYINT", "SMALLINT", "MEDIUMINT", "INT", "BIGINT":
isdefault, err := assign(field, src)
if err != nil {
return err
}
if isdefault {
s := asString(src)
i64, err := strconv.ParseInt(s, 10, 64)
if err != nil {
err = strconvErr(err)
return fmt.Errorf("converting driver.Value type %T (%q) to a %s: %v", src, s, field.Kind(), err)
}
// reflect.New(reflect.TypeOf(i64))
field.Set(reflect.ValueOf(i64))
}
case "FLOAT", "DOUBLE", "DECIMAL":
isdefault, err := assign(field, src)
if err != nil {
return err
}
if isdefault {
s := asString(src)
f64, err := strconv.ParseFloat(s, 64)
if err != nil {
err = strconvErr(err)
return fmt.Errorf("converting driver.Value type %T (%q) to a %s: %v", src, s, field.Kind(), err)
}
field.Set(reflect.ValueOf(f64))
}
case "BINARY", "VARBINARY", "TINYBLOB", "BLOB", "MEDIUMBLOB", "LONGBLOB", "JSON":
isdefault, err := assign(field, src)
if err != nil {
return err
}
if isdefault {
field.Set(reflect.ValueOf(src.([]byte)))
}
case "CHAR", "VARCHAR", "TINYTEXT", "TEXT", "MEDIUMTEXT", "LONGTEXT":
isdefault, err := assign(field, src)
if err != nil {
return err
}
if isdefault {
field.Set(reflect.ValueOf(string(src.([]byte))))
}
case "BIT":
var bits []byte = make([]byte, 8)
copy(bits, src.([]byte))
switch field.Kind() {
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
field.SetInt(int64(binary.LittleEndian.Uint64(bits)))
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
field.SetUint(binary.LittleEndian.Uint64(bits))
case reflect.Interface:
field.Set(reflect.ValueOf(binary.LittleEndian.Uint64(bits)))
}
case "YEAR", "TIME", "DATE", "DATETIME", "TIMESTAMP":
s := src.(time.Time)
switch field.Interface().(type) {
case time.Time:
field.Set(reflect.ValueOf(src))
case string:
field.SetString(s.Format(time.RFC3339Nano))
case []byte:
field.SetBytes([]byte(s.Format(time.RFC3339Nano)))
default:
}
}
// log.Println(fv, columntype.ScanType().Kind())
if iscan, ok := field.Addr().Interface().(sql.Scanner); ok {
err := iscan.Scan(src)
if err != nil {
return err
}
}
return nil
}

View File

@ -1,57 +0,0 @@
package intimate
import (
"database/sql"
"encoding/json"
"testing"
"time"
)
func TestAutoStore(t *testing.T) {
uri := "root:@tcp(127.0.0.1:4000)/test?parseTime=true&loc=Local&charset=utf8mb4&collation=utf8mb4_unicode_ci"
store := NewStore(uri)
// queue := store.Table("streamer").Queue(TSreamer{}, CondWhere{Condition: "operator = 0"})
// re, _ := queue.Pop()
// pstreamer := re.(*TSreamer)
// m := make(map[string]interface{})
// json.Unmarshal(pstreamer.Iface.([]byte), &m)
// spew.Println(re.(*TSreamer), m)
streamer := &TSreamer{}
streamer.Uid = 1
streamer.UserID = &sql.NullString{String: "xixi", Valid: true}
streamer.Name = "streamer"
streamer.Operator = 0
streamer.Bit = 0b11
// streamer.Ext = &sql.NullString{String: "ext", Valid: true}
tag := make(map[string]interface{})
tag["json"] = true
tag["name"] = "test"
btag, err := json.Marshal(tag)
if err != nil {
t.Error(err)
}
streamer.Iface = btag
now := time.Now()
streamer.UpdateTime = &now
err = store.Table("streamer").InsertOrUpdate(streamer, DUpdate{Field: "userid"})
if err != nil {
t.Error(err)
}
}
type TSreamer struct {
Uid int `field:"uid" uid:"auto"`
Name interface{} `field:"name"`
UserID *sql.NullString `field:"userid"`
Ext *sql.NullString `field:"ext"`
Iface interface{} `field:"tag"`
Bit uint64 `field:"bit"`
Operator int `field:"operator"`
UpdateTime *time.Time `field:"update_time"`
}

View File

@ -7,17 +7,13 @@ src=`pwd`
for path in `ls -d $source_tasks`
do
echo $path
projectname=${path##*/}
projectworkspace=$src/bin/$projectname
cd $path && mkdir $projectworkspace -p && go build -o $projectworkspace/$projectname
cd $path && go build
cd $src
done
for path in `ls -d $extractor_tasks`
do
echo $path
projectname=${path##*/}
projectworkspace=$src/bin/$projectname
cd $path && mkdir $projectworkspace -p && go build -o $projectworkspace/$projectname
cd $path && go build
cd $src
done
done

View File

@ -18,18 +18,6 @@ func init() {
// storeOpenrec = NewStore()
log.SetFlags(log.Llongfile | log.Ltime)
// StoreExtractorDB 全局的Extractor DB 库链接
StoreExtractorDB = NewStore(InitConfig.Database.ExtractorURI)
// TStreamer 全局的Streamer
TStreamer = StoreExtractorDB.Table("streamer")
// TClog 全局的Clog
TClog = StoreExtractorDB.Table("collect_log")
// TStreamerList 全局的streamer list 这个表存的url. 进去可以找到主播的列表. 便于动态更新
TStreamerList = StoreExtractorDB.Table("streamer_list")
}
// Config 配置

View File

@ -1,376 +0,0 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Type conversions for Scan.
package intimate
import (
"database/sql/driver"
"errors"
"fmt"
"reflect"
"strconv"
)
var errNilPtr = errors.New("destination pointer is nil") // embedded in descriptive error
// convertAssignRows copies to dest the value in src, converting it if possible.
// An error is returned if the copy would result in loss of information.
// dest should be a pointer type. If rows is passed in, the rows will
// be used as the parent for any cursor values converted from a
// driver.Rows to a *Rows.
// func convertAssignRows(dest, src interface{}, rows *sql.Rows) error {
// // Common cases, without reflect.
// switch s := src.(type) {
// case string:
// switch d := dest.(type) {
// case *string:
// if d == nil {
// return errNilPtr
// }
// *d = s
// return nil
// case *[]byte:
// if d == nil {
// return errNilPtr
// }
// *d = []byte(s)
// return nil
// case *sql.RawBytes:
// if d == nil {
// return errNilPtr
// }
// *d = append((*d)[:0], s...)
// return nil
// }
// case []byte:
// switch d := dest.(type) {
// case *string:
// if d == nil {
// return errNilPtr
// }
// *d = string(s)
// return nil
// case *interface{}:
// if d == nil {
// return errNilPtr
// }
// *d = cloneBytes(s)
// return nil
// case *[]byte:
// if d == nil {
// return errNilPtr
// }
// *d = cloneBytes(s)
// return nil
// case *sql.RawBytes:
// if d == nil {
// return errNilPtr
// }
// *d = s
// return nil
// }
// case time.Time:
// switch d := dest.(type) {
// case *time.Time:
// *d = s
// return nil
// case *string:
// *d = s.Format(time.RFC3339Nano)
// return nil
// case *[]byte:
// if d == nil {
// return errNilPtr
// }
// *d = []byte(s.Format(time.RFC3339Nano))
// return nil
// case *sql.RawBytes:
// if d == nil {
// return errNilPtr
// }
// *d = s.AppendFormat((*d)[:0], time.RFC3339Nano)
// return nil
// }
// case decimalDecompose:
// switch d := dest.(type) {
// case decimalCompose:
// return d.Compose(s.Decompose(nil))
// }
// case nil:
// switch d := dest.(type) {
// case *interface{}:
// if d == nil {
// return errNilPtr
// }
// *d = nil
// return nil
// case *[]byte:
// if d == nil {
// return errNilPtr
// }
// *d = nil
// return nil
// case *sql.RawBytes:
// if d == nil {
// return errNilPtr
// }
// *d = nil
// return nil
// }
// // The driver is returning a cursor the client may iterate over.
// }
// var sv reflect.Value
// switch d := dest.(type) {
// case *string:
// sv = reflect.ValueOf(src)
// switch sv.Kind() {
// case reflect.Bool,
// reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64,
// reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64,
// reflect.Float32, reflect.Float64:
// *d = asString(src)
// return nil
// }
// case *[]byte:
// sv = reflect.ValueOf(src)
// if b, ok := asBytes(nil, sv); ok {
// *d = b
// return nil
// }
// case *sql.RawBytes:
// sv = reflect.ValueOf(src)
// if b, ok := asBytes([]byte(*d)[:0], sv); ok {
// *d = sql.RawBytes(b)
// return nil
// }
// case *bool:
// bv, err := driver.Bool.ConvertValue(src)
// if err == nil {
// *d = bv.(bool)
// }
// return err
// case *interface{}:
// *d = src
// return nil
// }
// if scanner, ok := dest.(sql.Scanner); ok {
// return scanner.Scan(src)
// }
// dpv := reflect.ValueOf(dest)
// if dpv.Kind() != reflect.Ptr {
// return errors.New("destination not a pointer")
// }
// if dpv.IsNil() {
// return errNilPtr
// }
// if !sv.IsValid() {
// sv = reflect.ValueOf(src)
// }
// dv := reflect.Indirect(dpv)
// if sv.IsValid() && sv.Type().AssignableTo(dv.Type()) {
// switch b := src.(type) {
// case []byte:
// dv.Set(reflect.ValueOf(cloneBytes(b)))
// default:
// dv.Set(sv)
// }
// return nil
// }
// if dv.Kind() == sv.Kind() && sv.Type().ConvertibleTo(dv.Type()) {
// dv.Set(sv.Convert(dv.Type()))
// return nil
// }
// // The following conversions use a string value as an intermediate representation
// // to convert between various numeric types.
// //
// // This also allows scanning into user defined types such as "type Int int64".
// // For symmetry, also check for string destination types.
// switch dv.Kind() {
// case reflect.Ptr:
// if src == nil {
// dv.Set(reflect.Zero(dv.Type()))
// return nil
// }
// dv.Set(reflect.New(dv.Type().Elem()))
// return convertAssignRows(dv.Interface(), src, rows)
// case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
// if src == nil {
// return fmt.Errorf("converting NULL to %s is unsupported", dv.Kind())
// }
// s := asString(src)
// i64, err := strconv.ParseInt(s, 10, dv.Type().Bits())
// if err != nil {
// err = strconvErr(err)
// return fmt.Errorf("converting driver.Value type %T (%q) to a %s: %v", src, s, dv.Kind(), err)
// }
// dv.SetInt(i64)
// return nil
// case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
// if src == nil {
// return fmt.Errorf("converting NULL to %s is unsupported", dv.Kind())
// }
// s := asString(src)
// u64, err := strconv.ParseUint(s, 10, dv.Type().Bits())
// if err != nil {
// err = strconvErr(err)
// return fmt.Errorf("converting driver.Value type %T (%q) to a %s: %v", src, s, dv.Kind(), err)
// }
// dv.SetUint(u64)
// return nil
// case reflect.Float32, reflect.Float64:
// if src == nil {
// return fmt.Errorf("converting NULL to %s is unsupported", dv.Kind())
// }
// s := asString(src)
// f64, err := strconv.ParseFloat(s, dv.Type().Bits())
// if err != nil {
// err = strconvErr(err)
// return fmt.Errorf("converting driver.Value type %T (%q) to a %s: %v", src, s, dv.Kind(), err)
// }
// dv.SetFloat(f64)
// return nil
// case reflect.String:
// if src == nil {
// return fmt.Errorf("converting NULL to %s is unsupported", dv.Kind())
// }
// switch v := src.(type) {
// case string:
// dv.SetString(v)
// return nil
// case []byte:
// dv.SetString(string(v))
// return nil
// }
// }
// return fmt.Errorf("unsupported Scan, storing driver.Value type %T into type %T", src, dest)
// }
func strconvErr(err error) error {
if ne, ok := err.(*strconv.NumError); ok {
return ne.Err
}
return err
}
func cloneBytes(b []byte) []byte {
if b == nil {
return nil
}
c := make([]byte, len(b))
copy(c, b)
return c
}
func asString(src interface{}) string {
switch v := src.(type) {
case string:
return v
case []byte:
return string(v)
}
rv := reflect.ValueOf(src)
switch rv.Kind() {
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
return strconv.FormatInt(rv.Int(), 10)
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
return strconv.FormatUint(rv.Uint(), 10)
case reflect.Float64:
return strconv.FormatFloat(rv.Float(), 'g', -1, 64)
case reflect.Float32:
return strconv.FormatFloat(rv.Float(), 'g', -1, 32)
case reflect.Bool:
return strconv.FormatBool(rv.Bool())
}
return fmt.Sprintf("%v", src)
}
func asBytes(buf []byte, rv reflect.Value) (b []byte, ok bool) {
switch rv.Kind() {
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
return strconv.AppendInt(buf, rv.Int(), 10), true
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
return strconv.AppendUint(buf, rv.Uint(), 10), true
case reflect.Float32:
return strconv.AppendFloat(buf, rv.Float(), 'g', -1, 32), true
case reflect.Float64:
return strconv.AppendFloat(buf, rv.Float(), 'g', -1, 64), true
case reflect.Bool:
return strconv.AppendBool(buf, rv.Bool()), true
case reflect.String:
s := rv.String()
return append(buf, s...), true
}
return
}
var valuerReflectType = reflect.TypeOf((*driver.Valuer)(nil)).Elem()
// callValuerValue returns vr.Value(), with one exception:
// If vr.Value is an auto-generated method on a pointer type and the
// pointer is nil, it would panic at runtime in the panicwrap
// method. Treat it like nil instead.
// Issue 8415.
//
// This is so people can implement driver.Value on value types and
// still use nil pointers to those types to mean nil/NULL, just like
// string/*string.
//
// This function is mirrored in the database/sql/driver package.
func callValuerValue(vr driver.Valuer) (v driver.Value, err error) {
if rv := reflect.ValueOf(vr); rv.Kind() == reflect.Ptr &&
rv.IsNil() &&
rv.Type().Elem().Implements(valuerReflectType) {
return nil, nil
}
return vr.Value()
}
// decimal composes or decomposes a decimal value to and from individual parts.
// There are four parts: a boolean negative flag, a form byte with three possible states
// (finite=0, infinite=1, NaN=2), a base-2 big-endian integer
// coefficient (also known as a significand) as a []byte, and an int32 exponent.
// These are composed into a final value as "decimal = (neg) (form=finite) coefficient * 10 ^ exponent".
// A zero length coefficient is a zero value.
// The big-endian integer coefficient stores the most significant byte first (at coefficient[0]).
// If the form is not finite the coefficient and exponent should be ignored.
// The negative parameter may be set to true for any form, although implementations are not required
// to respect the negative parameter in the non-finite form.
//
// Implementations may choose to set the negative parameter to true on a zero or NaN value,
// but implementations that do not differentiate between negative and positive
// zero or NaN values should ignore the negative parameter without error.
// If an implementation does not support Infinity it may be converted into a NaN without error.
// If a value is set that is larger than what is supported by an implementation,
// an error must be returned.
// Implementations must return an error if a NaN or Infinity is attempted to be set while neither
// are supported.
//
// NOTE(kardianos): This is an experimental interface. See https://golang.org/issue/30870
type decimal interface {
decimalDecompose
decimalCompose
}
type decimalDecompose interface {
// Decompose returns the internal decimal state in parts.
// If the provided buf has sufficient capacity, buf may be returned as the coefficient with
// the value set and length set as appropriate.
Decompose(buf []byte) (form byte, negative bool, coefficient []byte, exponent int32)
}
type decimalCompose interface {
// Compose sets the internal decimal value from parts. If the value cannot be
// represented then an error should be returned.
Compose(form byte, negative bool, coefficient []byte, exponent int32) error
}

Binary file not shown.

View File

@ -1,28 +1,28 @@
-----BEGIN PRIVATE KEY-----
MIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQDA6O6owySvzaJ8
iv6aXRMcuk0lnNKquPNVHeraadSkBVhtaJqPAT7saOVnn9g4s/21OBV6wGmWdl5W
5+8h+F4qXrlGO8N1Tl8mmBDetqg6T6RrI9vB2Th9hjlp2PCGCEg/lX+tx25qBiDG
9uCB3pU4KMyfof1Z2jbo7aOfttgxZ1YdssenB//+31QjeZVw7wZACKaSzl6Z+WvY
pastMZ95YgTQvUq1AP93ny8AG+/bKUlE2vZpDygUJAXT8R7PGVD6T0/hUjTZk9HM
hrLShNjsBMxWWsrc7rdfW+R44pSuw5kHtIBrjRdrr8m1W0BCGTlGjq7Sk0OOZbsO
sh7x/1kvAgMBAAECggEAQYFXOTs6g/AB0uSn2+brDIA+nh0IdCxlhGuqSgxRxniC
NHSmtxSnN0PPUka291o3Noga0xnvQMZk9oG/c/uQjjaoFy9ICddY/BP69oVzzoko
vbDpeIO4aPdhG44s7nZ1ghHTz0C/P4VhL+65AaCBbxZ7lWE+VHqC1smLHDrMYsmr
PfOxk5LZwdNE/YmZuOj1zX3nQGX+wtK0EMslRbyYjg3EwXUa9d5WNnhyFxt5MD9m
w9G1rJh5uDUIS60xHUB7ET4BKJMLj+pfy8Ahf3SDFfIzinjuvKg+z1St0nTSNqI7
QP5KFNvfxqcrtYwf0wU6MGmjJ98FFiDoPY2QQfIlwQKBgQDj3eWDHA02Kr9DAde9
CuR0y4LixFhVDp7mYBnYxod3GMkPCGhSfuRcA/Tu7JgpACK/zsLi9gbzPkUAS2Y/
ly8lBFyCYlY96mRF0FgzOUCwP5Xjc9roDgLi1RVM3+Tnv1FcIa0CkoXPuWtmoVYS
W0S/Y9JKOrV2CEJvirvM3hysHwKBgQDYuipzsV+i3qx4STKXa7F0F8jGp+jr4Pgc
GqnMo5ysFePFFd4QZ1UHJ8HNmRlXsD+hAawzljVL8yCns8dZOE99xvSjgFzA/p/N
QToOgCRy6YZ5kZQA1ldZOhoVS5xTJRf7eavLwnV/8VGWxFNXdlvzdZecC8YvwEbh
+gONR9Sw8QKBgQDiBwnSs9CnGl/+J6XIbzWOZRC/v+Oe+5HDEzCQRt241W0a7EwP
CK+cVpBvL2D/ypKtkHrghwr93Za0ZBZYna62GU1qSAEkCYzNc40JwpDYQzp7FEDg
ToWuW491YeG9v5kEuqLTGmSBWHugogiuzLm031+JIhM9sZO08Pi4jtoCIQKBgEKC
XyTB+idxTlxbmjr9wadu5R6RyNkaYQm+5Pz3+MyWqx+AZnWCQIXBnagKo8xVOjGe
4cmy/wgfxJWyhirLLRncRJcofU581W2X5sksagMZxzrnghP/sy6etgJzAbZCW+nA
nQga1RQ+altqRGlC2VUNcUliE5/z0cznfM+Oi6cBAoGBAK68wjkperl8+ARQrqII
Zg41PVrsFv6XmuLWv3bqnsx81BTbnHdqmPT4pScPLZvj+6c3iX8D8wzhzbgZtS0I
h+r1yMEdzyRZMGtvdi/f/+1TuhOTr/ZeoqYHWEduo8iRYLQBN9gGEkkKBQ4d14zi
8elaha7vrG/teCrFoeFcXIPr
MIIEvwIBADANBgkqhkiG9w0BAQEFAASCBKkwggSlAgEAAoIBAQDSG09DSvB03TOe
eOmQwfiCIf0wa2WRB31ewxa6i/PRgEKeJSUvIsIuaECUer2ss+J3rwSS2lDpGuiw
FnsVyZqKI/+Rcuc83YJGYg6OAzVMz6UL8YCWhXu3huTJ+V+a5iNereIC69ZERRJt
nXlWqsq6HKya+6BP9sX9CI4GTHQrnWBysAxsswhdnnnRvu+GxglWafSIzuS6OizT
1M1CmkZxNvDJhTSOR7SJlIYm2kM5/fIL53BdndF2IGAjfV1WV7AjwhTfun5cViEO
i8niQUIMY4L0AiO9grFD1g1xIYkeuVBoLxOUBzPxJwQmb64gseb9Dvt0BKLRGoou
SIOyE+KVAgMBAAECggEAI4b6J2kR0VUBEDwmVHO0K38HUstqNHSVgrNO0dLt8sAz
I44o5DhGqPW4a9L4ZS5SrkWyKonPcic6buISRIwfPVoacjQBfVWAXJnil6lbtyYK
ZMNcqLcgBRfCcpOgEq91DiKta6yIwekDFXVyCdFd78v+9ML1J+hUsLVkXJTLdP88
PGamRWVd6vGy3QMRjyM29GLPgS+/6Vrp1cptSuYNqYhlszohmu8lBvzjH9jbPh9d
GFrrd8Bs7IRCdtKZig/3fbln4JEyyOYE+gcT2jplPksB6mR/5DBIdkVbeuFwGB0+
h1/PKlprNQt7+Ei0HhHnTib7lZP8WGo4HkSi7PsAGQKBgQD1Ptho0wJiI2+6gL1O
iNsEJVKIQ2Sxdx3wI/qudphM99t6xKCpPyVI2Nd9PBf2jbZjGAaz+P/KQYxEqb6i
PRcQ+i99wCQoRfnRvUbKA4goEpKwRXmvn+499dm6D5pEuumOXGQYCmaFXuLTRN/I
BL6GNgLtoZAlLjUXaWtk8TszGQKBgQDbUf3p3HLpCjRvRDW/vA5xj+08t7xtF9uO
NilGK79uOA4VnxE2w3ioYqQ7t3I8J/0rAzGKq3tylg4QX6UpQ4b2koRr2B3cqoAk
dsRdNWAHwCNepz8hTLsZyuihzbNv2nHmoqhzjK/FcrBHx5NAM+T6OBpLzQBnbUzk
3wIcqm223QKBgQDo/IRxyY0pGMtLXoT6ODACF0b6JzRhGG37tuKvngGAlbQQRP7w
6wmL1F2cH1wQon7UU34CupqfVnhgvvZZgToJqfU2PTTcgeYc6Pl4b7SJhWOQTOCX
BZQ7jvYCulHv27aIxaNd53uQVx2cYoFKr58lN+i+QtADUoujq0YYxshb+QKBgQDW
ZOti7kZCeuBRGIu2V56C8uBFp5MBzf2polZsqx1iIFfcWPfZ4fGUIYFMgwKfvbOl
lWSbmxB9LiSnaugoU0OezBG43rYqXV4Qxy0jtKagTPoGcFWtNrX7+7e3XD8Zi6Am
hkFHW3MEAB5EvNq8Oz6OP8Os78SCVn2BimMlJJFF3QKBgQCF+aEAiBv+ivcmHUeP
2eBq9nLltPFAfXJ/p31MMQ6Jgo36DBqUeoLeyq/WfIXvwqbVbP9fANZrKoTPbI97
dilCHUoO33rafXJy6jtaggtpz14tt9soecTop0vM/rU7tGtfBe6NXg9LRl+oDJCU
37I3a9Is+2CLyAUXWCk9mLfFsQ==
-----END PRIVATE KEY-----

View File

@ -1,159 +0,0 @@
package main
import (
"database/sql"
"encoding/json"
"intimate"
"log"
"time"
"github.com/474420502/gcurl"
"github.com/tidwall/gjson"
)
func main() {
sessionstr := `
-H 'authority: www.mirrativ.com'
-H 'accept: application/json'
-H 'x-timezone: Asia/Shanghai'
-H 'x-csrf-token: F3Ojd6RBtApP6YAZzVn-9jWN1of159VxAqOQL1Zn'
-H 'user-agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36'
-H 'content-type: application/json'
-H 'sec-fetch-site: same-origin'
-H 'sec-fetch-mode: cors'
-H 'sec-fetch-dest: empty'
-H 'referer: https://www.mirrativ.com/live/O5Ia4iX9c5CeZj7DFtg52Q'
-H 'accept-language: zh-CN,zh;q=0.9,ja;q=0.8'
-H 'cookie: f=A2D75F0E-D218-11EA-A042-452BF6D21CE8; _ga=GA1.2.689947597.1596081392; mr_id=kxb65LddGMZf5C28jkR_tGCZD_ZFOAepD5gfXO7eNjfPMB8EKYvU1Vg_Y29V1lsa; _gid=GA1.2.2116692650.1600139685; lang=ja'`
ps := intimate.NewPerfectShutdown()
gprofile := gcurl.Parse(`curl 'https://www.mirrativ.com/api/user/profile?user_id=103383701'` + sessionstr)
tpProfile := gprofile.CreateTemporary(nil)
tpProfileUserID := tpProfile.QueryParam("user_id")
g := gcurl.Parse(`culr 'https://www.mirrativ.com/api/live/live?live_id=O5Ia4iX9c5CeZj7DFtg52Q'` + sessionstr)
tpLive := g.CreateTemporary(nil)
tpLiveID := tpLive.QueryParam("live_id")
var lasterr error
queue := intimate.TStreamer.Queue(intimate.Streamer{}, intimate.ConditionDefault(intimate.PMirrativ))
for !ps.IsClose() {
istreamer, err := queue.Pop()
if err != nil {
if lasterr != err {
lasterr = err
log.Println(err)
}
ps.Wait(time.Second * 5)
continue
}
now := &sql.NullTime{Time: time.Now(), Valid: true}
streamer := istreamer.(*intimate.Streamer)
streamer.UpdateTime = now
userid := *streamer.UserId
log.Println(userid)
tpProfileUserID.StringSet(userid)
resp, err := tpProfile.Execute()
if err != nil {
log.Println(err)
time.Sleep(time.Second)
continue
}
clog := &intimate.CollectLog{}
clog.Platform = intimate.PMirrativ
clog.UpdateTime = now
clog.UserId = userid
clog.StreamerUid = streamer.Uid
profilejson := gjson.ParseBytes(resp.Content())
if result := profilejson.Get("follower_num"); result.Exists() {
clog.Followers = &sql.NullInt64{Int64: result.Int(), Valid: true}
}
if result := profilejson.Get("onlive.live_id"); result.Exists() {
liveID := result.String()
tpLiveID.StringSet(liveID)
resp, err = tpLive.Execute()
if err != nil {
log.Println(err)
continue
}
livejson := gjson.ParseBytes(resp.Content())
if result := livejson.Get("total_viewer_num"); result.Exists() {
clog.Views = &sql.NullInt64{Int64: result.Int(), Valid: true}
}
if result := livejson.Get("title"); result.Exists() {
clog.LiveTitle = &sql.NullString{String: result.String(), Valid: true}
}
if result := livejson.Get("started_at"); result.Exists() {
clog.LiveStartTime = &sql.NullTime{Time: time.Unix(result.Int(), 0), Valid: true}
}
if result := livejson.Get("heartbeated_at"); result.Exists() {
clog.LiveEndTime = &sql.NullTime{Time: time.Unix(result.Int(), 0), Valid: true}
}
if result := livejson.Get("app_id"); result.Exists() {
streamer.Channel = &sql.NullString{String: result.String(), Valid: true}
}
if result := livejson.Get("timeline.#.app.short_title"); result.Exists() {
for _, tl := range result.Array() {
var tags []string = []string{tl.String()}
jtags, _ := json.Marshal(tags)
streamer.Tags = jtags
clog.Tags = jtags
break
}
} else {
log.Println(string(resp.Content()))
return
}
if result := livejson.Get("gift_ranking_url"); result.Exists() {
// streamer.Channel = &sql.NullString{String: result.String(), Valid: true}
gifturl := "curl '" + result.String() + "&type=monthly&cursor='" + sessionstr
ggift := gcurl.Parse(gifturl)
tp := ggift.CreateTemporary(nil)
tp.SetURLRawPath("/api/gift/ranking")
pcursor := tp.QueryParam("cursor")
var gratuity int64 = 0
for {
giftdata, err := tp.Execute()
giftjson := gjson.ParseBytes(giftdata.Content())
if err != nil {
log.Println(err)
} else {
for _, rpoint := range giftjson.Get("ranking.#.point").Array() {
gratuity += rpoint.Int()
}
}
ncursor := giftjson.Get("next_cursor").String()
if ncursor == "" {
break
}
pcursor.StringSet(ncursor)
}
// https://www.mirrativ.com/gift/ranking?live_id=O5Ia4iX9c5CeZj7DFtg52Q&obfuscated_user_id=PgIBEgc6jVc
clog.Gratuity = &sql.NullInt64{Int64: gratuity, Valid: true}
}
cid, err := intimate.TClog.InsertRetAutoID(clog)
if err != nil {
log.Println(err)
}
streamer.LatestLogUid = cid
}
intimate.TStreamer.Update(streamer)
time.Sleep(time.Second * 2)
}
}

View File

@ -1,9 +0,0 @@
package main
import (
"testing"
)
func TestDo(t *testing.T) {
main()
}

View File

@ -1,125 +0,0 @@
package main
import (
"database/sql"
"intimate"
"log"
"time"
"github.com/474420502/extractor"
"github.com/tebeka/selenium"
)
// sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
// var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STNimo))
// // estore 解析存储连接实例
// var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
func main() {
Execute()
}
type LiveInfo struct {
Followers int64 `exp:"//div[contains(@class,'nimo-rm_followers')]//span[@class='text c2']" mth:"r:ExtractNumber"`
Views int64 `exp:"//div[contains(@class,'nimo-rm_audience')]//span[@class='text c2']" mth:"r:ExtractNumber"`
Channel string `exp:"//div[contains(@class,'nimo-rm_type')]//span"`
Gratuity []int64 `exp:"//div[contains(@class,'rank-item-after3')]//span[contains(@class,'nimo-currency__count')]"`
}
func Execute() {
adriver := intimate.GetChromeDriver()
count := 0
countlimit := 200
wd := adriver.Webdriver
waitfor := intimate.NewWaitFor(wd)
ps := intimate.NewPerfectShutdown()
queue := intimate.TStreamer.Queue(intimate.Streamer{}, intimate.ConditionDefault(intimate.PNimo))
for !ps.IsClose() {
istreamer, err := queue.Pop()
if err != nil {
log.Println(err)
intimate.TStreamer.UpdateError(istreamer, err)
continue
}
streamer := istreamer.(*intimate.Streamer)
wd.Get(streamer.LiveUrl.String)
// wd.Get("https://www.nimo.tv/live/1253835677")
waitfor.Default("//div[contains(@class,'nimo-rm_followers')]//span[@class='text c2' and text() != '']", nil)
waitfor.WaitWithTimeout("//div[contains(@class,'rank-item-top3')]", 7*time.Second, nil)
element, err := wd.FindElement(selenium.ByXPATH, "//div[contains(@class,'rank-item-top3')]")
if err != nil {
log.Println(streamer.Uid, err)
} else {
err = element.MoveTo(50, 50)
element.Click()
if err != nil {
log.Println(streamer.Uid, err)
}
}
waitfor.Default("//div[contains(@class,'nimo-rm_audience')]//span[@class='text c2']", nil)
var pagesource string
pagesource, _ = wd.PageSource()
etor := extractor.ExtractHtmlString(pagesource)
li := etor.GetObjectByTag(LiveInfo{}).(*LiveInfo)
// log.Printf("%#v", li)
utime := sql.NullTime{Time: time.Now(), Valid: true}
clog := &intimate.CollectLog{}
clog.Platform = intimate.PNimo
clog.Followers = &sql.NullInt64{Int64: li.Followers, Valid: true}
clog.Views = &sql.NullInt64{Int64: li.Views, Valid: true}
clog.UpdateTime = &utime
clog.StreamerUid = streamer.Uid
var sum int64 = 0
for _, v := range li.Gratuity {
sum += v
}
clog.Gratuity = &sql.NullInt64{Int64: sum, Valid: true}
cuid, err := intimate.TClog.InsertRetAutoID(clog)
if err != nil {
panic(err)
}
streamer.Channel = &sql.NullString{String: li.Channel, Valid: true}
streamer.LatestLogUid = cuid
streamer.UpdateTime = &utime
streamer.Operator = 0
switch {
case li.Followers <= 1000:
streamer.UpdateInterval = 720
case li.Followers <= 10000:
streamer.UpdateInterval = 360
case li.Followers <= 100000:
streamer.UpdateInterval = 180
case li.Followers <= 1000000:
streamer.UpdateInterval = 90
default:
streamer.UpdateInterval = 60
}
// estore.Update(streamer, "update_interval", streamer.UpdateInterval, "operator", streamer.Operator, "channel", streamer.Channel, "latest_log_uid", streamer.LatestLogUid, "update_time", streamer.UpdateTime)
err = intimate.TStreamer.Update(streamer)
if err != nil {
panic(err)
}
count++
if count >= countlimit {
count = 0
adriver.Close()
adriver = intimate.GetChromeDriver()
}
}
}

View File

@ -1,9 +0,0 @@
package main
import (
"testing"
)
func TestMain(t *testing.T) {
Execute()
}

View File

@ -15,5 +15,6 @@ import (
*/
func main() {
Execute()
oe := &OpenrecExtractor{}
oe.Execute()
}

View File

@ -3,260 +3,236 @@ package main
import (
"database/sql"
"encoding/json"
"errors"
"intimate"
"log"
"os"
"os/signal"
"regexp"
"strconv"
"strings"
"sync/atomic"
"syscall"
"time"
"github.com/474420502/extractor"
"github.com/474420502/gcurl"
"github.com/474420502/requests"
"github.com/tidwall/gjson"
)
//UserInfo 提取信息的结构体
type UserInfo struct {
UserName string `exp:"//p[ contains(@class, 'c-global__user__profile__list__name__text')]"`
Followers int64 `exp:"//p[@class='c-global__user__count__row__right js-userCountFollowers']" mth:"r:ParseNumber"`
Views int64 `exp:"//ul[@class='c-contents']//p[@class='c-thumbnailVideo__footer__liveCount']" mth:"r:ExtractNumber"`
var estore = intimate.NewStoreExtractor()
var sstore = intimate.NewStoreSource(string(intimate.STOpenrec))
// OpenrecExtractor 提取方法
type OpenrecExtractor struct {
user *intimate.ExtractorSource
userLive *intimate.ExtractorSource
supporters *intimate.ExtractorSource
}
//UserLive 提取信息的结构体
type UserLive struct {
Title string `exp:"//h1[contains(@class,'MovieTitle__Title')]"`
LiveStartTime string `exp:"//meta[@itemprop='uploadDate']/@content"`
LiveEndTime string `exp:"//meta[@itemprop='duration']/@content"`
Tags []string `exp:"//div[contains(@class,'MovieMetaContent__TagContainer')]//a[@role ='button']"`
}
func (oe *OpenrecExtractor) Execute() {
// Execute 执行
func Execute() {
var loop int32 = 1
ps := intimate.NewPerfectShutdown()
ses := requests.NewSession()
squeue := intimate.TStreamer.Queue(intimate.Streamer{}, intimate.ConditionDefault(intimate.Popenrec))
go func() {
signalchan := make(chan os.Signal)
signal.Notify(signalchan, syscall.SIGKILL, syscall.SIGQUIT, syscall.SIGTERM, syscall.SIGSTOP)
log.Println("accept stop command:", <-signalchan)
atomic.StoreInt32(&loop, 0)
}()
var lasterr error = nil
for !ps.IsClose() {
istreamer, err := squeue.Pop()
for atomic.LoadInt32(&loop) > 0 {
var err error
// streamer, err := estore.Pop(intimate.Popenrec) //队列里弹出一个streamer行. 进行解析
if istreamer == nil || err != nil {
source, err := sstore.Pop(intimate.TOpenrecUser, 0)
if err != nil {
if err != lasterr {
log.Println(err, lasterr)
lasterr = err
}
time.Sleep(time.Second * 2)
continue
}
streamer := istreamer.(*intimate.Streamer)
userId := *streamer.UserId
var updateUrl map[string]string
err = json.Unmarshal(streamer.UpdateUrl.([]byte), &updateUrl) // 反序列化update_url, 里面存了需要采集的url
if err != nil {
log.Println(err)
continue
}
// Check Userid
userUrl := updateUrl["user"]
log.Println(userUrl)
tp := ses.Get(userUrl) // 获取user url页面数据
resp, err := tp.Execute()
streamer.UpdateTime = &sql.NullTime{Time: time.Now(), Valid: true}
if err != nil {
log.Println(err)
intimate.TStreamer.UpdateError(streamer, err)
time.Sleep(time.Second * 5)
continue
}
cookies := ses.GetCookies(tp.GetParsedURL())
sdata := source.Ext.([]byte)
datamap := gjson.ParseBytes(sdata).Map()
scurl := updateUrl["supporters"] //获取打赏者的数据
curl := gcurl.Parse(scurl)
supportersSession := curl.CreateSession()
source.Operator = int32(intimate.OperatorError)
userId := datamap["var_user_id"].String()
temporary := curl.CreateTemporary(supportersSession)
supportersSession.SetCookies(temporary.GetParsedURL(), cookies)
var supporters []string
for { // supporters 数据需要登录信息. 下面为赋值 supporters链接获取的uid token random码
streamer := &intimate.Streamer{}
streamer.UserId = userId
streamer.Platform = intimate.Popenrec
supportersQuery := temporary.GetQuery()
htmlUser := datamap["html_user"]
oe.user = intimate.NewExtractorSource(&htmlUser)
oe.user.CreateExtractor()
for _, cookie := range cookies {
if cookie.Name == "uuid" {
supportersQuery.Set("Uuid", cookie.Value)
continue
}
htmlLive := datamap["html_live"]
oe.userLive = intimate.NewExtractorSource(&htmlLive)
oe.userLive.CreateExtractor()
if cookie.Name == "token" {
supportersQuery.Set("Token", cookie.Value)
continue
}
jsonSupporters := datamap["json_supporters"]
oe.supporters = intimate.NewExtractorSource(&jsonSupporters)
clog := &intimate.CollectLog{}
if cookie.Name == "random" {
supportersQuery.Set("Random", cookie.Value)
continue
}
}
// log.Println(anchorId)
supportersQuery.Set("identify_id", userId)
temporary.SetQuery(supportersQuery)
oe.extractFollowers(clog)
oe.extractUserName(streamer)
oe.extractViewsAndLiveStreaming(clog)
oe.extractGiversAndGratuity(clog)
oe.extractLive(clog)
oe.extractTags(clog)
resp, err := temporary.Execute()
if err != nil {
log.Println(err)
}
supporterjson := gjson.ParseBytes(resp.Content())
supporterdata := supporterjson.Get("data") //解析supporters获取的json数据
if supporterdata.Type == gjson.Null {
break
}
supporters = append(supporters, string(resp.Content()))
streamer.Uid = source.StreamerId.Int64
streamer.UpdateTime = source.UpdateTime
streamer.Tags = clog.Tags
temporary.QueryParam("page_number").IntAdd(1)
}
clog.Platform = intimate.Popenrec
clog.UserId = userId
clog.UpdateTime = source.UpdateTime
// cookies := cxt.Session().GetCookies(wf.GetParsedURL())
// ext := make(map[string]interface{})
logUid := estore.InsertClog(clog)
jsonSupporters := supporters
htmlUser := string(resp.Content())
liveUrl := updateUrl["live"]
tp = ses.Get(liveUrl)
resp, err = tp.Execute()
if err != nil {
log.Println(err)
intimate.TStreamer.UpdateError(streamer, err)
continue
}
htmlLive := string(resp.Content())
// ext["var_user_id"] = userId
// streamer.Platform = intimate.Popenrec
streamer.UpdateInterval = 120
streamer.UpdateTime = &sql.NullTime{Time: time.Now(), Valid: true}
LiveUrl := "https://www.openrec.tv/live/" + userId
streamer.LiveUrl = sql.NullString{String: LiveUrl, Valid: true}
streamer.LatestLogUid = logUid
streamer.Operator = 0
Extractor(streamer, userId, htmlUser, htmlLive, jsonSupporters)
estore.UpdateStreamer(streamer)
source.Operator = int32(intimate.OperatorExtractorOK)
sstore.UpdateOperator(source)
}
}
func Extractor(streamer *intimate.Streamer, userId string, htmlUser, htmlLive string, jsonSupporters []string) {
// sdata := source.Ext.([]byte)
// datamap := gjson.ParseBytes(sdata).Map()
// userId := datamap["var_user_id"].String()
// streamer := &intimate.Streamer{}
// streamer.UserId = &userId
// streamer.Platform = intimate.Popenrec 不需要更新字段
// htmlUser := datamap["html_user"]
userEtor := extractor.ExtractHtmlString(htmlUser)
ui, ok1 := userEtor.GetObjectByTag(UserInfo{}).(*UserInfo)
// htmlLive := datamap["html_live"]
liveEtor := extractor.ExtractHtmlString(htmlLive)
ul, ok2 := liveEtor.GetObjectByTag(UserLive{}).(*UserLive)
// jsonSupporters := datamap["json_supporters"]
clog := &intimate.CollectLog{}
if ok1 {
clog.Followers = &sql.NullInt64{Int64: ui.Followers, Valid: true}
clog.Views = &sql.NullInt64{Int64: ui.Views, Valid: true}
if ui.Views != 0 {
clog.IsLiveStreaming = true
}
streamer.UserName = &sql.NullString{String: ui.UserName, Valid: true}
// giverjson := jsonSupporters
var givers []interface{}
var gratuity int64 = 0
for _, v := range jsonSupporters {
giverSource := gjson.Parse(v)
for _, item := range giverSource.Get("data.items").Array() {
givers = append(givers, item.Map())
gratuity += item.Get("total_yells").Int()
}
}
giversbytes, err := json.Marshal(givers)
if err != nil {
log.Println(err)
clog.ErrorMsg = &sql.NullString{String: err.Error(), Valid: true}
} else {
clog.Giver = giversbytes
}
clog.Gratuity = &sql.NullInt64{Int64: gratuity, Valid: true}
} else {
log.Println("UserInfo may be not exists")
intimate.TStreamer.UpdateError(streamer, errors.New("UserInfo may be not exists"))
return
}
//log.Println(ul)
if ok2 {
clog.LiveTitle = &sql.NullString{String: ul.Title, Valid: true}
startTime, err := time.ParseInLocation("2006-01-02T15:04:05Z07:00", ul.LiveStartTime, time.Local)
if err != nil {
log.Println(err)
} else {
clog.LiveStartTime = &sql.NullTime{Time: startTime.Local(), Valid: true}
duration, err := intimate.ParseDuration(ul.LiveEndTime)
if err != nil {
log.Println(err)
} else {
endTime := startTime.Add(duration)
clog.LiveEndTime = &sql.NullTime{Time: endTime.Local(), Valid: true}
}
}
if tags, err := json.Marshal(ul.Tags); err == nil {
clog.Tags = tags
} else {
log.Println("json error", ul.Tags, clog.Tags)
}
}
// streamer.Uid = source.StreamerId.Int64
// streamer.UpdateTime = &source.UpdateTime
if clog.Tags != nil {
streamer.Tags = clog.Tags
}
clog.Platform = intimate.Popenrec
clog.UserId = userId
clog.UpdateTime = &sql.NullTime{Time: time.Now(), Valid: true}
clog.StreamerUid = streamer.Uid
logUid, err := intimate.TClog.InsertRetAutoID(clog)
func (oe *OpenrecExtractor) extractFollowers(clog intimate.ISet) {
extractor := oe.user.GetExtractor()
xp, err := extractor.XPathResult("//p[@class='c-global__user__count__row__right js-userCountFollowers']/text()")
if err != nil {
log.Println(err)
return
}
if !xp.NodeIter().Next() {
log.Println("不存在粉丝数")
}
LiveUrl := "https://www.openrec.tv/live/" + userId
streamer.LiveUrl = &sql.NullString{String: LiveUrl, Valid: true}
streamer.LatestLogUid = logUid
// streamer.Operator = 0
// log.Println(*streamer.UserId)
intimate.TStreamer.Update(streamer)
// source.Operator = int32(intimate.OperatorExtractorOK)
// sstore.UpdateOperator(source)
followers := strings.ReplaceAll(xp.String(), ",", "")
followersInt, err := strconv.ParseInt(followers, 10, 64)
if err != nil {
log.Println(err)
}
clog.Set("Followers", sql.NullInt64{Int64: followersInt, Valid: true})
}
func (oe *OpenrecExtractor) extractUserName(streamer intimate.ISet) {
extractor := oe.user.GetExtractor()
xp, err := extractor.XPathResult("//p[ contains(@class, 'c-global__user__profile__list__name__text')]/text()")
if err != nil {
log.Println(err)
} else {
if xp.NodeIter().Next() {
userName := xp.String()
streamer.Set("UserName", sql.NullString{String: userName, Valid: true})
}
}
}
func (oe *OpenrecExtractor) extractViewsAndLiveStreaming(clog intimate.ISet) {
extractor := oe.user.GetExtractor()
// c-contents
xp, err := extractor.XPathResult("//ul[@class='c-contents']//p[@class='c-thumbnailVideo__footer__liveCount']/text()")
if err != nil {
log.Println(err)
}
if xp.NodeIter().Next() {
views := regexp.MustCompile(`[0-9,]+`).FindString(xp.String())
views = strings.ReplaceAll(views, ",", "")
viewsint, err := strconv.Atoi(views)
if err != nil {
log.Println(err)
}
clog.Set("Views", sql.NullInt64{Int64: int64(viewsint), Valid: true})
clog.Set("IsLiveStreaming", true)
}
}
func (oe *OpenrecExtractor) extractGiversAndGratuity(clog intimate.ISet) {
// extractor := oe.user.GetExtractor()
giverjson := oe.supporters.GetSource()
var givers []interface{}
var gratuity int64 = 0
for _, v := range giverjson.Array() {
giverSource := gjson.Parse(v.String())
for _, item := range giverSource.Get("data.items").Array() {
givers = append(givers, item.Map())
gratuity += item.Get("total_yells").Int()
}
}
giversbytes, err := json.Marshal(givers)
if err != nil {
log.Println(err)
clog.Set("ErrorMsg", sql.NullString{String: err.Error(), Valid: true})
} else {
clog.Set("Giver", giversbytes)
}
clog.Set("Gratuity", sql.NullInt64{Int64: gratuity, Valid: true})
}
func (oe *OpenrecExtractor) extractLive(clog intimate.ISet) {
extractor := oe.userLive.GetExtractor()
mathes := regexp.MustCompile("MovieTitle__Title[^>]+>(.{1,50})</h1>").FindStringSubmatch(oe.userLive.GetSource().Str)
if len(mathes) == 2 {
clog.Set("LiveTitle", sql.NullString{String: mathes[1], Valid: true})
content, err := extractor.XPathResult("//meta[@itemprop='uploadDate']/@content")
if err != nil {
log.Println(err)
}
iter := content.NodeIter()
if iter.Next() {
tm, err := time.ParseInLocation("2006-01-02T15:04:05Z07:00", iter.Node().NodeValue(), time.Local)
if err != nil {
log.Println(err)
}
// log.Println(iter.Node().NodeValue(), tm.Local())
clog.Set("LiveStartTime", sql.NullTime{Time: tm.Local(), Valid: true})
duration, err := extractor.XPathResult("//meta[@itemprop='duration']/@content")
if err != nil {
log.Println(err)
}
diter := duration.NodeIter()
if diter.Next() {
dt, err := intimate.ParseDuration(diter.Node().NodeValue())
if err != nil {
log.Println(err)
}
endtm := tm.Add(dt)
clog.Set("LiveEndTime", sql.NullTime{Time: endtm.Local(), Valid: true})
}
}
}
}
func (oe *OpenrecExtractor) extractTags(clog intimate.ISet) {
var tags []string
matheslist := regexp.MustCompile(`<[^>]+TagButton[^>]+>([^<]{1,100})<`).FindAllStringSubmatch(oe.userLive.GetSource().Str, -1)
for _, m := range matheslist {
tags = append(tags, m[1])
}
tagsBytes, err := json.Marshal(tags)
if err != nil {
log.Println(err)
}
clog.Set("Tags", tagsBytes)
}

View File

@ -7,6 +7,7 @@ import (
"testing"
"time"
"github.com/474420502/hunter"
"github.com/lestrrat-go/libxml2"
)
@ -90,6 +91,29 @@ func TestCase(t *testing.T) {
t.Error(xr)
}
func TestExtractor(t *testing.T) {
Execute()
func TestUserName(t *testing.T) {
f, err := os.Open("test.html")
if err != nil {
panic(err)
}
data, err := ioutil.ReadAll(f)
if err != nil {
panic(err)
}
extractor := hunter.NewExtractor(data)
xp, err := extractor.XPathResult("//p[ contains(@class, 'c-global__user__profile__list__name__text')]/text()")
if err != nil {
t.Error(err)
} else {
if xp.NodeIter().Next() {
userName := xp.String()
t.Error(userName)
}
}
}
func TestExtractor(t *testing.T) {
oe := &OpenrecExtractor{}
oe.Execute()
}

View File

@ -2,9 +2,9 @@ package main
import (
"database/sql"
"encoding/json"
"intimate"
"log"
"regexp"
"strconv"
"strings"
"time"
@ -21,8 +21,8 @@ var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
type LiveData struct {
UserName string `exp:"//span[@class='tw-live-author__info-username']" method:"Text"`
Follower int64 `exp:"(//span[@class='tw-user-nav-list-count'])[2]" method:"r:ExtractNumber"`
MaxViews int64 `exp:"//span[@id='max_viewer_count']/text()" method:"r:ExtractNumber"`
Follower string `exp:"(//span[@class='tw-user-nav-list-count'])[2]" method:"Text"`
MaxViews string `exp:"//span[@id='max_viewer_count']" method:"Text"`
LiveTitle string `exp:"//meta[@property='og:title']" method:"AttributeValue,content"`
LiveStart string `exp:"//time[@data-kind='relative']" method:"AttributeValue,datetime"`
LiveDuration string `exp:"//span[@id='updatetimer']" method:"AttributeValue,data-duration"`
@ -33,40 +33,25 @@ func main() {
ps := intimate.NewPerfectShutdown()
ses := requests.NewSession()
streamerQueue := intimate.TStreamer.Queue(intimate.Streamer{}, intimate.ConditionDefault(intimate.Ptwitcasting))
var lasterr error
for !ps.IsClose() {
// streamer, err := estore.Pop(intimate.Ptwitcasting)
isteamer, err := streamerQueue.Pop()
streamer, err := estore.Pop(intimate.Ptwitcasting)
if err != nil {
if lasterr != err {
lasterr = err
log.Println(err)
}
time.Sleep(time.Minute)
continue
log.Println(err, streamer.UserId)
}
streamer := isteamer.(*intimate.Streamer)
streamer.LiveUrl = &sql.NullString{String: "https://twitcasting.tv/" + *streamer.UserId, Valid: true}
streamer.LiveUrl = sql.NullString{String: "https://twitcasting.tv/" + streamer.UserId, Valid: true}
resp, err := ses.Get(streamer.LiveUrl.String).Execute()
if err != nil {
intimate.TStreamer.UpdateError(streamer, err)
log.Println(err, *streamer.UserId)
estore.UpdateError(streamer, err)
log.Println(err, streamer.UserId)
continue
}
var ldata *LiveData
// f, _ := os.OpenFile("./twistcasting.html", os.O_CREATE|os.O_RDWR|os.O_TRUNC, os.ModePerm)
// f.Write(resp.Content())
etor := extractor.ExtractHtml(resp.Content())
ildata := etor.GetObjectByTag(LiveData{})
if ildata == nil {
log.Println(streamer.LiveUrl.String)
continue
}
ldata = ildata.(*LiveData)
// ldata.MaxViews = regexp.MustCompile("\\d+").FindString(ldata.MaxViews)
ldata = etor.GetObjectByTag(LiveData{}).(*LiveData)
ldata.MaxViews = regexp.MustCompile("\\d+").FindString(ldata.MaxViews)
coincount := 0
for i := 0; ; i++ {
@ -74,14 +59,14 @@ func main() {
giverurl := streamer.LiveUrl.String + "/backers/" + strconv.Itoa(i)
resp, err = ses.Get(giverurl).Execute()
if err != nil {
intimate.TStreamer.UpdateError(streamer, err)
estore.UpdateError(streamer, err)
log.Panic(err)
}
etor := extractor.ExtractHtml(resp.Content())
xp, err := etor.XPaths("//td[@class='tw-memorial-table-recent-point']")
if err != nil {
intimate.TStreamer.UpdateError(streamer, err)
estore.UpdateError(streamer, err)
log.Panic(err)
}
@ -105,81 +90,63 @@ func main() {
}
}
var tags []byte
tags, err = json.Marshal(ldata.Tags)
if err != nil {
log.Println(err, streamer.UserId)
}
streamer.Platform = intimate.Ptwitcasting
streamer.UpdateTime = &sql.NullTime{Time: time.Now(), Valid: true}
streamer.UserName = &sql.NullString{String: ldata.UserName, Valid: true}
streamer.Operator = 0
streamer.Tags = tags
streamer.UpdateTime = sql.NullTime{Time: time.Now(), Valid: true}
streamer.UserName = sql.NullString{String: ldata.UserName, Valid: true}
streamer.Operator = 10
// streamer.UpdateInterval = 60
clog := &intimate.CollectLog{}
clog.UserId = *streamer.UserId
clog.Gratuity = &sql.NullInt64{Int64: int64(coincount), Valid: true}
clog.UserId = streamer.UserId
clog.Gratuity = sql.NullInt64{Int64: int64(coincount), Valid: true}
clog.Platform = streamer.Platform
clog.UpdateTime = streamer.UpdateTime
clog.LiveTitle = &sql.NullString{String: ldata.LiveTitle, Valid: true}
clog.Tags = tags
clog.Followers = &sql.NullInt64{Int64: int64(ldata.Follower), Valid: true}
switch {
case ldata.Follower <= 100:
streamer.UpdateInterval = 720
case ldata.Follower <= 1000:
streamer.UpdateInterval = 320
case ldata.Follower <= 10000:
streamer.UpdateInterval = 240
default:
streamer.UpdateInterval = 120
clog.LiveTitle = sql.NullString{String: ldata.LiveTitle, Valid: true}
fl, err := intimate.ParseNumberEx(ldata.Follower)
if err == nil {
clog.Followers = sql.NullInt64{Int64: int64(fl), Valid: true}
switch {
case fl <= 100:
streamer.UpdateInterval = 360
case fl <= 1000:
streamer.UpdateInterval = 240
case fl <= 100:
streamer.UpdateInterval = 120
default:
streamer.UpdateInterval = 60
}
} else {
log.Println(err)
}
clog.Views = &sql.NullInt64{Int64: ldata.MaxViews, Valid: true}
if ldata.LiveStart != "" {
st, err := time.Parse("Mon, 02 Jan 2006 15:04:05 -0700", ldata.LiveStart)
views, err := strconv.Atoi(ldata.MaxViews)
if err == nil {
clog.Views = sql.NullInt64{Int64: int64(views), Valid: true}
} else {
clog.Views = sql.NullInt64{Int64: int64(0), Valid: true}
// log.Println(err, streamer.UserId)
}
// st, err := strconv.Atoi(ldata.LiveStart)
st, err := time.Parse("Mon, 02 Jan 2006 15:04:05 -0700", ldata.LiveStart)
if err == nil {
startTime := st
clog.LiveStartTime = sql.NullTime{Time: startTime, Valid: true}
dt, err := strconv.Atoi(ldata.LiveDuration)
if time.Now().Sub(startTime) >= time.Hour*24*90 {
streamer.Operator = 5
}
if err == nil {
startTime := st
clog.LiveStartTime = &sql.NullTime{Time: startTime, Valid: true}
dt, err := strconv.Atoi(ldata.LiveDuration)
liveduration := time.Now().Sub(startTime)
switch {
case liveduration >= time.Hour*24*240:
streamer.Operator = 5
case liveduration >= time.Hour*24*60:
streamer.UpdateInterval = 60 * 24 * 30
case liveduration >= time.Hour*24*30:
streamer.UpdateInterval = 60 * 24 * 15
case liveduration >= time.Hour*24*15:
streamer.UpdateInterval = 60 * 24 * 7
case liveduration >= time.Hour*24*7:
streamer.UpdateInterval = 60 * 24 * 3
}
if err == nil {
endTime := startTime.Add((time.Duration)(dt) * time.Millisecond)
clog.LiveEndTime = &sql.NullTime{Time: endTime, Valid: true}
} else {
log.Println(err, streamer.UserId)
}
endTime := startTime.Add((time.Duration)(dt) * time.Millisecond)
clog.LiveEndTime = sql.NullTime{Time: endTime, Valid: true}
} else {
log.Println(err, streamer.UserId)
}
} else {
log.Println(err, streamer.UserId)
}
clog.StreamerUid = streamer.Uid
uid, err := intimate.TClog.InsertRetAutoID(clog)
if err != nil {
log.Println(err)
continue
}
streamer.LatestLogUid = uid
intimate.TStreamer.Update(streamer)
// estore.UpdateStreamer(streamer)
log.Println(*streamer.UserId)
streamer.LatestLogUid = estore.InsertClog(clog)
estore.UpdateStreamer(streamer)
}
}

View File

@ -10,7 +10,7 @@ import (
// Follower string `exp:".//span[@class='tw-user-nav-list-count']" method:"Text"`
// }
func TestMain(t *testing.T) {
func estMain(t *testing.T) {
main()
}

View File

@ -3,326 +3,136 @@ package main
import (
"database/sql"
"encoding/json"
"fmt"
"intimate"
"log"
"regexp"
"strings"
"time"
"github.com/tebeka/selenium"
)
// // sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
// var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STTwitch))
// sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STTwitch))
// // estore 解析存储连接实例
// var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
// estore 解析存储连接实例
var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
func main() {
adriver := intimate.GetChromeDriver()
wd := intimate.GetChromeDriver(3030)
ps := intimate.NewPerfectShutdown()
slqueue := intimate.TStreamerList.Queue(intimate.StreamerList{}, intimate.ConditionDefault(intimate.Ptwitch))
squeue := intimate.TStreamer.Queue(intimate.Streamer{}, intimate.ConditionDefault(intimate.Ptwitch))
var count = 0
var countlimt = 200
var recreate = time.Now()
counter := intimate.NewCounter()
counter.SetMaxLimit(200)
counter.SetMaxToDo(func(olist ...interface{}) error {
owd := olist[0].(*selenium.WebDriver)
(*owd).Close()
(*owd).Quit()
*owd = intimate.GetChromeDriver(3030)
return nil
}, &wd)
var lasterr error = nil
// var err error
for !ps.IsClose() {
wd := adriver.Webdriver
// sourceChannel, err := sstore.Pop(intimate.TTwitchChannel)
isl, err := slqueue.Pop()
if err != nil {
if lasterr != err {
streamer, err := estore.Pop(intimate.Ptwitch, 0)
if streamer == nil || err != nil {
if err != lasterr {
log.Println(err, lasterr)
lasterr = err
log.Println(err)
}
istreamer, err := squeue.Pop()
if err != nil {
if lasterr != err {
lasterr = err
log.Println(err)
ps.Wait(time.Minute)
continue
}
}
streamer := istreamer.(*intimate.Streamer)
Extractor(wd, streamer)
if err = intimate.TStreamer.Update(streamer); err != nil {
log.Println(err)
}
count++
if count >= countlimt || time.Now().Sub(recreate) >= time.Minute*120 {
count = 0
adriver.Close()
adriver = intimate.GetChromeDriver()
recreate = time.Now()
}
time.Sleep(time.Second * 2)
continue
}
streamerlist := isl.(*intimate.StreamerList)
var updateUrl map[string]string
json.Unmarshal(streamer.UpdateUrl.([]byte), &updateUrl)
liveUrl := updateUrl["live"]
liveUrl = strings.Replace(liveUrl, "/watchparty", "", -1)
log.Println(liveUrl)
weburl := streamerlist.Url + "?sort=VIEWER_COUNT"
err = wd.Get(weburl)
// err = wd.Get("https://www.twitch.tv/zoe_0601" + "/about")
err = wd.Get(liveUrl + "/about")
if err != nil {
log.Println(err)
// sstore.UpdateError(sourceChannel, err)
intimate.TStreamerList.UpdateError(streamerlist, err)
time.Sleep(time.Second * 10)
estore.UpdateError(streamer, err)
time.Sleep(time.Second * 5)
continue
}
wd.WaitWithTimeout(func(wd selenium.WebDriver) (bool, error) {
_, err := wd.FindElement(selenium.ByXPATH, "(//div/p[@class=''])[last()]")
if err != nil {
return false, err
}
return true, nil
}, time.Second*10)
streamer.LiveUrl = sql.NullString{String: liveUrl, Valid: true}
clog := &intimate.CollectLog{}
clog.UserId = streamer.UserId
clog.Gratuity = sql.NullInt64{Int64: 0, Valid: false}
btn, err := wd.FindElement(selenium.ByXPATH, "//button[@data-a-target='browse-sort-menu']")
time.Sleep(time.Millisecond * 500)
err = extractUserName(wd, streamer)
if err != nil {
log.Println(err)
_, err = wd.FindElement(selenium.ByXPATH, "//a[@data-a-target='browse-channels-button']")
if err == nil {
log.Println(streamer.UserId, "may be cancell")
streamer.Operator = 5
streamer.UpdateTime = sql.NullTime{Time: time.Now(), Valid: true}
estore.UpdateStreamer(streamer)
}
continue
}
btn.Click()
var elements []selenium.WebElement
var liveurls = 0
var delayerror = 2
for i := 0; i < 200 && !ps.IsClose(); i++ {
elements, err = wd.FindElements(selenium.ByXPATH, "(//div/p[@class=''])[last()]")
if err != nil {
log.Println(err)
break
}
time.Sleep(time.Millisecond * 200)
wd.KeyDown(selenium.EndKey)
time.Sleep(time.Millisecond * 200)
wd.KeyUp(selenium.EndKey)
time.Sleep(time.Millisecond * 2000)
if len(elements) == liveurls {
delayerror--
if delayerror <= 0 {
break
}
} else {
delayerror = 2
}
liveurls = len(elements)
}
articles, err := wd.FindElements(selenium.ByXPATH, "//article")
err = extractFollowers(wd, clog)
if err != nil {
log.Println(err)
continue
}
var streamers []*intimate.Streamer
for _, article := range articles {
e, err := article.FindElement(selenium.ByXPATH, ".//a[@data-a-target='preview-card-title-link' and @href]")
if err != nil {
log.Println(err)
continue
}
href, err := e.GetAttribute("href")
if err != nil {
log.Println(err)
continue
}
btns, err := article.FindElements(selenium.ByXPATH, ".//div[@class='tw-full-width tw-inline-block']//button")
if err != nil {
log.Println(err)
continue
}
var tags []string
for _, btn := range btns {
tag, err := btn.GetAttribute("data-a-target")
if err == nil {
tags = append(tags, tag)
err = extractViews(wd, clog) // views + tags + gratuity
if err != nil {
// 不直播时提取礼物 gratuity
wd.WaitWithTimeout(func(web selenium.WebDriver) (bool, error) {
channelchat, err := wd.FindElement(selenium.ByXPATH, `//a[@data-a-target="channel-home-tab-Chat"]`)
btn, _ := web.FindElement(selenium.ByXPATH, `//button[@data-test-selector="expand-grabber"]`)
if (err == nil && channelchat != nil) || btn != nil {
if channelchat != nil {
channelchat.Click()
}
time.Sleep(time.Second)
extractGratuity(wd, clog)
return true, nil
}
}
return false, nil
streamer := &intimate.Streamer{}
matches := regexp.MustCompile(`https://www.twitch.tv/(\w+)`).FindStringSubmatch(href)
if len(matches) == 2 {
mc := matches[1]
streamer.UserId = &mc
} else {
log.Println(href)
continue
}
jtags, err := json.Marshal(tags)
if err != nil {
log.Println(err)
} else {
streamer.Tags = jtags
}
streamer.Platform = intimate.Ptwitch
streamer.LiveUrl = &sql.NullString{String: href, Valid: true}
streamer.Operator = 0
streamers = append(streamers, streamer)
// if estore.InsertStreamer(streamer) {
// // log.Println("streamer update tags", streamer.Uid, tags)
// if streamer.Tags != nil {
// estore.Update(streamer, "Tags", streamer.Tags)
// }
// }
}, time.Second*4)
}
for _, streamer := range streamers {
Extractor(wd, streamer)
streamer.UpdateTime = &sql.NullTime{Time: time.Now(), Valid: true}
if err = intimate.TStreamer.InsertOrUpdate(streamer,
intimate.DUpdate{Field: "tags"},
intimate.DUpdate{Field: "update_time"},
intimate.DUpdate{Field: "update_interval"},
); err != nil {
log.Println(err)
}
streamer.Platform = intimate.Ptwitch
clog.Platform = streamer.Platform
clog.UpdateTime = sql.NullTime{Time: time.Now(), Valid: true}
lastClogId := estore.InsertClog(clog)
streamer.Operator = 10
streamer.LatestLogUid = lastClogId
if clog.Tags != nil {
streamer.Tags = clog.Tags
}
log.Println("streamer find", len(articles))
if len(articles) == 0 {
intimate.TStreamerList.UpdateError(streamerlist, fmt.Errorf(""))
}
count++
if count >= countlimt || time.Now().Sub(recreate) >= time.Minute*120 {
count = 0
adriver.Close()
adriver = intimate.GetChromeDriver()
recreate = time.Now()
switch fl := clog.Followers.Int64; {
case fl > 100000:
streamer.UpdateInterval = 120
case fl > 10000:
streamer.UpdateInterval = 240
case fl > 1000:
streamer.UpdateInterval = 360
case fl > 100:
streamer.UpdateInterval = 720
case fl > 0:
streamer.UpdateInterval = 1440
}
streamer.UpdateTime = clog.UpdateTime
estore.UpdateStreamer(streamer)
counter.AddWithReset(1)
}
adriver.Close()
}
func Extractor(wd selenium.WebDriver, streamer *intimate.Streamer) {
// streamer, err := estore.Pop(intimate.Ptwitch)
// if streamer == nil || err != nil {
// if err != lasterr {
// log.Println(err, lasterr)
// lasterr = err
// }
// time.Sleep(time.Second * 2)
// continue
// }
// var updateUrl map[string]string
// json.Unmarshal(streamer.UpdateUrl.([]byte), &updateUrl)
liveUrl := "https://www.twitch.tv/" + (*streamer.UserId)
// liveUrl = strings.Replace(liveUrl, "/watchparty", "", -1)
log.Println(liveUrl)
// err = wd.Get("https://www.twitch.tv/zoe_0601" + "/about")
err := wd.Get(liveUrl + "/about")
if err != nil {
errstr := fmt.Errorf("%s: %s", err.Error(), liveUrl+"/about")
log.Println(errstr)
intimate.TStreamer.UpdateError(streamer, errstr)
time.Sleep(time.Second * 5)
return
}
streamer.LiveUrl = &sql.NullString{String: liveUrl, Valid: true}
clog := &intimate.CollectLog{}
clog.UserId = *streamer.UserId
clog.Gratuity = &sql.NullInt64{Int64: 0, Valid: false}
time.Sleep(time.Millisecond * 500)
err = extractUserName(wd, streamer)
if err != nil {
_, err = wd.FindElement(selenium.ByXPATH, "//a[@data-a-target='browse-channels-button']")
if err == nil {
log.Println(*streamer.UserId, "may be cancell")
streamer.Operator = 5
intimate.TStreamer.UpdateError(streamer, fmt.Errorf(*streamer.UserId, "may be cancell"))
}
return
}
err = extractFollowers(wd, clog)
if err != nil {
// log.Println(err)
streamer.UpdateInterval += 30
return
}
err = extractViews(wd, clog) // views + tags + gratuity
if err != nil {
// 不直播时提取礼物 gratuity
wd.WaitWithTimeout(func(web selenium.WebDriver) (bool, error) {
channelchat, err := wd.FindElement(selenium.ByXPATH, `//a[@data-a-target="channel-home-tab-Chat"]`)
btn, _ := web.FindElement(selenium.ByXPATH, `//button[@data-test-selector="expand-grabber"]`)
if (err == nil && channelchat != nil) || btn != nil {
if channelchat != nil {
channelchat.Click()
}
time.Sleep(time.Second)
extractGratuity(wd, clog)
return true, nil
}
return false, nil
}, time.Second*4)
}
streamer.Platform = intimate.Ptwitch
clog.Platform = streamer.Platform
clog.UpdateTime = &sql.NullTime{Time: time.Now(), Valid: true}
// clog.StreamerUid = streamer.Uid
lastClogId, err := intimate.TClog.InsertRetAutoID(clog)
if err != nil {
log.Println(err)
return
}
streamer.LatestLogUid = lastClogId
if clog.Tags != nil {
streamer.Tags = clog.Tags
}
switch fl := clog.Followers.Int64; {
case fl > 100000:
streamer.UpdateInterval = 120
case fl > 10000:
streamer.UpdateInterval = 240 * 2
case fl > 1000:
streamer.UpdateInterval = 360 * 2
case fl > 100:
streamer.UpdateInterval = 720 * 2
case fl > 0:
streamer.UpdateInterval = 1440 * 4
}
streamer.UpdateTime = clog.UpdateTime
// intimate.TStreamer.InsertOrUpdate(streamer)
// count++
// if count >= countlimt {
// count = 0
// // wd.Quit()
// wd = intimate.GetChromeDriver(3030)
// }
wd.Close()
wd.Quit()
}
func extractUserName(wd selenium.WebDriver, streamer *intimate.Streamer) error {
@ -331,7 +141,7 @@ func extractUserName(wd selenium.WebDriver, streamer *intimate.Streamer) error {
if err == nil {
if ltxt, err := label.Text(); err == nil && ltxt != "" {
// log.Println("label:", ltxt)
streamer.UserName = &sql.NullString{String: ltxt, Valid: true}
streamer.UserName = sql.NullString{String: ltxt, Valid: true}
return true, nil
}
}
@ -351,7 +161,7 @@ func extractFollowers(wd selenium.WebDriver, clog *intimate.CollectLog) error {
}
followers = regexp.MustCompile(`[\d,]+`).FindString(followers)
fint, _ := intimate.ParseNumber(followers)
clog.Followers = &sql.NullInt64{Int64: int64(fint), Valid: true}
clog.Followers = sql.NullInt64{Int64: int64(fint), Valid: true}
// log.Println("followers: ", followers, fint)
return true, nil
}, 4*time.Second)
@ -364,7 +174,7 @@ func extractViews(wd selenium.WebDriver, clog *intimate.CollectLog) error {
if txt, err := views.Text(); err == nil {
vint, _ := intimate.ParseNumber(txt)
clog.Views = &sql.NullInt64{Int64: vint, Valid: true}
clog.Views = sql.NullInt64{Int64: vint, Valid: true}
// log.Println("views:", txt)
views.Click()
@ -384,7 +194,7 @@ func extractTitle(wd selenium.WebDriver, clog *intimate.CollectLog) error {
title, err := web.FindElement(selenium.ByXPATH, `//h2[@data-a-target='stream-title']`)
if err == nil {
if txt, err := title.Text(); err == nil {
clog.LiveTitle = &sql.NullString{String: txt, Valid: true}
clog.LiveTitle = sql.NullString{String: txt, Valid: true}
return true, nil
}
}
@ -436,7 +246,7 @@ func extractGratuity(wd selenium.WebDriver, clog *intimate.CollectLog) error {
log.Println(err)
}
}
clog.Gratuity = &sql.NullInt64{Int64: gratuity, Valid: true}
clog.Gratuity = sql.NullInt64{Int64: gratuity, Valid: true}
}
return true, nil
}

View File

@ -3,25 +3,29 @@ package intimate
import (
"database/sql"
"reflect"
"time"
"github.com/474420502/hunter"
"github.com/tidwall/gjson"
)
type GetSet struct {
}
type StreamerList struct {
UrlHash string `field:"urlhash" uid:"true"` //
Platform string `field:"platform" ` //
Url string `field:"url" ` //
UrlHash []byte //
Platform Platform //
Url string //
Label *sql.NullString `field:"label" ` //
Label sql.NullString //
Serialize interface{} `field:"serialize" `
Serialize interface{}
UpdateInterval int32 `field:"update_interval" `
UpdateTime *sql.NullTime `field:"update_time" ` //
UpdateInterval int32
UpdateTime time.Time //
ErrorMsg *sql.NullString `field:"error_msg" ` //
Operator int32 `field:"operator" `
ErrorMsg sql.NullString
Operator int32
LastOperator int32
}
@ -37,26 +41,25 @@ func (sl *StreamerList) Set(field string, value interface{}) {
}
type Streamer struct {
Uid int64 `field:"uid" uid:"auto"` //
Platform Platform `field:"platform"` //
UserId *string `field:"user_id"` //
Uid int64 //
Platform Platform //
UserId string //
UserName *sql.NullString `field:"user_name"` //
LiveUrl *sql.NullString `field:"live_url"` //
Channel *sql.NullString `field:"channel"` //
Tags interface{} `field:"tags"`
Ext interface{} `field:"ext"` //
// Comments interface{} `field:"comments"`
UserName sql.NullString //
LiveUrl sql.NullString //
Channel sql.NullString //
Tags interface{}
Ext interface{} //
IsUpdateStreamer bool // 更新上面的内容
IsUpdateUrl bool
UpdateInterval int32 `field:"update_interval"`
UpdateUrl interface{} `field:"update_url"` // TODO: nil
LatestLogUid int64 `field:"latest_log_uid"`
UpdateTime *sql.NullTime `field:"update_time"` //
UpdateInterval int32
UpdateUrl interface{}
LatestLogUid int64
UpdateTime sql.NullTime //
ErrorMsg *sql.NullString `field:"error_msg"`
Operator int32 `field:"operator"`
ErrorMsg sql.NullString
Operator int32
LastOperator int32
}
@ -72,25 +75,24 @@ func (ai *Streamer) Set(field string, value interface{}) {
}
type CollectLog struct {
LogUid int64 `field:"log_uid"` // 日志id
StreamerUid int64 `field:"streamer_uid"` // StreamerId 表id与
LogUid int64 // 日志id
StreamerUid int64 // StreamerId 表id与
Platform Platform `field:"platform"` //
UserId string `field:"user_id"` // 平台的UserId
IsLiveStreaming bool `field:"is_live_streaming"` //
IsError bool `field:"is_error"` //
Followers *sql.NullInt64 `field:"followers"` //
Views *sql.NullInt64 `field:"views"` //
Giver interface{} `field:"giver"` //
Gratuity *sql.NullInt64 `field:"gratuity"` //
LiveTitle *sql.NullString `field:"live_title"` //
LiveStartTime *sql.NullTime `field:"live_start_time"` //
LiveEndTime *sql.NullTime `field:"live_end_time"` //
UpdateTime *sql.NullTime `field:"update_time"` //
Tags interface{} `field:"tags"`
Ext interface{} `field:"ext"` //
ErrorMsg *sql.NullString `field:"error_msg"` //
Comments interface{} `field:"comments"` //
Platform Platform //
UserId string // 平台的UserId
IsLiveStreaming bool //
IsError bool //
Followers sql.NullInt64 //
Views sql.NullInt64 //
Giver interface{} //
Gratuity sql.NullInt64 //
LiveTitle sql.NullString //
LiveStartTime sql.NullTime //
LiveEndTime sql.NullTime //
UpdateTime sql.NullTime //
Tags interface{}
Ext interface{} //
ErrorMsg sql.NullString //
}
// Get Simple Value
@ -102,3 +104,36 @@ func (cl *CollectLog) Get(field string) interface{} {
func (cl *CollectLog) Set(field string, value interface{}) {
reflect.ValueOf(cl).Elem().FieldByName(field).Set(reflect.ValueOf(value))
}
type ExtractorSource struct {
source *gjson.Result
extractor *hunter.Extractor
}
func NewExtractorSource(gr *gjson.Result) *ExtractorSource {
es := &ExtractorSource{}
es.SetSource(gr)
return es
}
func (es *ExtractorSource) SetSource(gr *gjson.Result) {
es.source = gr
es.extractor = nil
}
func (es *ExtractorSource) Clear() {
es.source = nil
es.extractor = nil
}
func (es *ExtractorSource) CreateExtractor() {
es.extractor = hunter.NewExtractor([]byte(es.source.String()))
}
func (es *ExtractorSource) GetSource() *gjson.Result {
return es.source
}
func (es *ExtractorSource) GetExtractor() *hunter.Extractor {
return es.extractor
}

12
go.mod
View File

@ -1,18 +1,18 @@
module intimate
go 1.15
go 1.14
require (
github.com/474420502/extractor v0.9.6
github.com/474420502/extractor v0.7.2
github.com/474420502/focus v0.12.0
github.com/474420502/gcurl v0.4.5
github.com/474420502/requests v1.9.1
github.com/davecgh/go-spew v1.1.1
github.com/474420502/gcurl v0.1.2
github.com/474420502/hunter v0.3.4
github.com/474420502/requests v1.6.0
github.com/go-sql-driver/mysql v1.5.0
github.com/lestrrat-go/libxml2 v0.0.0-20200215080510-6483566f52cb
github.com/tebeka/selenium v0.9.9
github.com/tidwall/gjson v1.6.0
github.com/tidwall/pretty v1.0.1 // indirect
golang.org/x/net v0.0.0-20200822124328-c89045814202 // indirect
golang.org/x/net v0.0.0-20200707034311-ab3426394381 // indirect
gopkg.in/yaml.v2 v2.3.0
)

24
go.sum
View File

@ -2,20 +2,18 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMT
cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
cloud.google.com/go v0.38.0/go.mod h1:990N+gfupTy94rShfmMCWGDn0LpTmnzTp2qbd1dvSRU=
cloud.google.com/go v0.41.0/go.mod h1:OauMR7DV8fzvZIl2qg6rkaIhD/vmgk4iwEw/h6ercmg=
github.com/474420502/extractor v0.9.6 h1:mwwzwVeu/sZ4FV190Bl32ta4iVjERBWL6eGLXRDlAtg=
github.com/474420502/extractor v0.9.6/go.mod h1:vJnXWmvO5bJDW4Yag0GoE2GxtHRg03TAxp2oXN1DcSY=
github.com/474420502/extractor v0.7.2 h1:idZnsekOKRV8fpJwsRcr6Ol7KSphKXe9tc+JJXgGqQ4=
github.com/474420502/extractor v0.7.2/go.mod h1:92J6QZKstpAKGhv+DibemhQbR/d6lJ+ftyR/ZHmeJ0w=
github.com/474420502/focus v0.12.0 h1:+icbmj7IEOefvTegHt5EpcHt6WFbe2miIrceUJx2Evo=
github.com/474420502/focus v0.12.0/go.mod h1:d0PMjtMxFz1a9HIhwyFPkWa+JF+0LgOrEUfd8iZka6s=
github.com/474420502/gcurl v0.4.4 h1:ZILu7RRjDBGHpTGmuWGKf1NZZbZsC7AHPlI8RHqs9As=
github.com/474420502/gcurl v0.4.4/go.mod h1:7w4knyVJa1ia4I1xd0krG51fKLGwMmNn5sfG2zPWbqM=
github.com/474420502/gcurl v0.4.5 h1:4y+NbbBGRIWDpfe/iojdSUzlShcZmnkHV4T4etiWQsw=
github.com/474420502/gcurl v0.4.5/go.mod h1:7w4knyVJa1ia4I1xd0krG51fKLGwMmNn5sfG2zPWbqM=
github.com/474420502/gcurl v0.1.2 h1:ON9Yz3IgAdtDlFlHfkAJ3aIEBDxH0RiViPE5ST5ohKg=
github.com/474420502/gcurl v0.1.2/go.mod h1:hws5q/Ao64bXLLDnldz9VyTQUndTWc/i5DzdEazFfoM=
github.com/474420502/htmlquery v1.2.4-0.20200812072201-e871dd09247a h1:E1T6CYQKsUn7fMvNbeKfISjBLfOJjZX4KpWwStT20Kc=
github.com/474420502/htmlquery v1.2.4-0.20200812072201-e871dd09247a/go.mod h1:AoSN890esHwNKecV0tCs+W0ele1xgFL1Jqk6UcrdxgU=
github.com/474420502/requests v1.7.0 h1:oaBwVrxZ7yZ+hDOKwHm2NflYib2y1geIUxBxQ2U48mw=
github.com/474420502/requests v1.7.0/go.mod h1:SLXrQ5dL9c7dkIeKNUCBAjOIt3J9KFCS2RQjWJecNwo=
github.com/474420502/requests v1.9.1 h1:gCDmBCW1ym8zOvKKBGjEG9wafMY7imYk2u28+Oy1WEc=
github.com/474420502/requests v1.9.1/go.mod h1:x2T9l+e40R6kxxMvNm+YSZ9D6BHAXUDak4kQElIPJ9A=
github.com/474420502/hunter v0.3.4 h1:fyLAgI84jWe3IcqsISC53j1w3CXI1FERxX//Potns0M=
github.com/474420502/hunter v0.3.4/go.mod h1:pe4Xr/I+2agvq339vS/OZV+EiHAWtpXQs75rioSW9oA=
github.com/474420502/requests v1.6.0 h1:f4h4j40eT0P5whhg9LdkotD8CaKjtuDu/vz9iSUkCgY=
github.com/474420502/requests v1.6.0/go.mod h1:SLXrQ5dL9c7dkIeKNUCBAjOIt3J9KFCS2RQjWJecNwo=
github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802 h1:1BDTz0u9nC3//pOCMdNH+CiXJVYJh5UQNCOBG7jbELc=
@ -24,6 +22,8 @@ github.com/BurntSushi/xgbutil v0.0.0-20160919175755-f7c97cef3b4e h1:4ZrkT/RzpnRO
github.com/BurntSushi/xgbutil v0.0.0-20160919175755-f7c97cef3b4e/go.mod h1:uw9h2sd4WWHOPdJ13MQpwK5qYWKYDumDqxWWIknEQ+k=
github.com/Pallinder/go-randomdata v1.1.0 h1:gUubB1IEUliFmzjqjhf+bgkg1o6uoFIkRsP3VrhEcx8=
github.com/Pallinder/go-randomdata v1.1.0/go.mod h1:yHmJgulpD2Nfrm0cR9tI/+oAgRqCQQixsA8HyRZfV9Y=
github.com/Pallinder/go-randomdata v1.2.0 h1:DZ41wBchNRb/0GfsePLiSwb0PHZmT67XY00lCDlaYPg=
github.com/Pallinder/go-randomdata v1.2.0/go.mod h1:yHmJgulpD2Nfrm0cR9tI/+oAgRqCQQixsA8HyRZfV9Y=
github.com/antchfx/xpath v1.1.6 h1:6sVh6hB5T6phw1pFpHRQ+C4bd8sNI+O58flqtg7h0R0=
github.com/antchfx/xpath v1.1.6/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk=
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio=
@ -114,8 +114,8 @@ golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLL
golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e h1:3G+cUijn7XD+S4eJFddp53Pv7+slrESplyjG25HgL+k=
golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200421231249-e086a090c8fd/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200822124328-c89045814202 h1:VvcQYSHwXgi7W+TpUR6A9g6Up98WAHf3f/ulnJ62IyA=
golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20200707034311-ab3426394381 h1:VXak5I6aEWmAXeQjA+QSZzlgNrpq9mjcfDemuexIKsU=
golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=

View File

@ -12,10 +12,4 @@ const (
// Ptwitcasting twitcasting 平台
Ptwitcasting Platform = "twitcasting"
// PNimo PNimo 平台
PNimo Platform = "nimo"
// PMirrativ PNimo 平台
PMirrativ Platform = "mirrativ"
)

View File

@ -228,50 +228,6 @@ func NewStoreExtractor() *StoreExtractor {
return &StoreExtractor{db: db}
}
// PopNoWait 弹出一个不用按时间间隔更新的主播信息, 主要用来测试.
func (store *StoreExtractor) PopNoWait(platform Platform, condition string, operators ...int32) (*Streamer, error) {
tx, err := store.db.Begin()
if err != nil {
return nil, err
}
var args = []interface{}{string(platform)}
selectSQL := `select uid, update_time, user_id, tags, live_url, update_url, is_update_streamer, update_interval from ` + StreamerTable + ` where platform = ? and ` + condition
if len(operators) == 0 {
selectSQL += " and operator = ?"
args = append(args, 0)
} else {
for _, operator := range operators {
selectSQL += " and operator = ?"
args = append(args, operator)
}
}
defer func() {
err := tx.Commit()
if err != nil {
log.Println(err)
err = tx.Rollback()
if err != nil {
log.Println(err)
}
}
store.popCount++
}()
// log.Println(selectSQL + ` limit 1 for update`)
row := tx.QueryRow(selectSQL+` limit 1 for update`, args...)
s := &Streamer{}
// uid, url, target_type, source, ext, operator
err = row.Scan(&s.Uid, &s.UpdateTime, &s.UserId, &s.Tags, &s.LiveUrl, &s.UpdateUrl, &s.IsUpdateStreamer, &s.UpdateInterval)
if err != nil {
return nil, err
}
s.Set("LastOperator", s.Operator)
_, err = tx.Exec("update "+StreamerTable+" set operator = ? where uid = ?", OperatorWait, s.Uid)
return s, nil
}
// Pop 弹出一条未处理的数据
func (store *StoreExtractor) Pop(platform Platform, operators ...int32) (*Streamer, error) {
@ -280,7 +236,7 @@ func (store *StoreExtractor) Pop(platform Platform, operators ...int32) (*Stream
return nil, err
}
var args = []interface{}{string(platform)}
selectSQL := `select uid, update_time, user_id, tags, live_url, update_url, is_update_streamer, update_interval from ` + StreamerTable + ` where platform = ? and TIMESTAMPDIFF(MINUTE , update_time, CURRENT_TIMESTAMP()) >= update_interval`
selectSQL := `select uid, update_time, user_id, update_url, is_update_streamer, update_interval from ` + StreamerTable + ` where platform = ? and TIMESTAMPDIFF(MINUTE , update_time, CURRENT_TIMESTAMP()) >= update_interval`
if len(operators) == 0 {
selectSQL += " and operator = ?"
args = append(args, 0)
@ -308,7 +264,7 @@ func (store *StoreExtractor) Pop(platform Platform, operators ...int32) (*Stream
s := &Streamer{}
// uid, url, target_type, source, ext, operator
err = row.Scan(&s.Uid, &s.UpdateTime, &s.UserId, &s.Tags, &s.LiveUrl, &s.UpdateUrl, &s.IsUpdateStreamer, &s.UpdateInterval)
err = row.Scan(&s.Uid, &s.UpdateTime, &s.UserId, &s.UpdateUrl, &s.IsUpdateStreamer, &s.UpdateInterval)
if err != nil {
return nil, err
}
@ -360,9 +316,9 @@ func (store *StoreExtractor) InsertStreamerList(streamerlist IGet) (isExists boo
}
// InsertStreamer Streamer表, 插入数据
func (store *StoreExtractor) InsertStreamer(streamer *Streamer) (isExists bool) {
func (store *StoreExtractor) InsertStreamer(streamer IGet) (isExists bool) {
// select uid from table where platform = ? and user_id = ?
// selectSQL := "SELECT is_update_url, uid FROM " + StreamerTable + " WHERE platform = ? AND user_id = ?"
selectSQL := "SELECT is_update_url, uid FROM " + StreamerTable + " WHERE platform = ? AND user_id = ?"
tx, err := store.db.Begin()
if err != nil {
panic(err)
@ -379,17 +335,18 @@ func (store *StoreExtractor) InsertStreamer(streamer *Streamer) (isExists bool)
}
}()
streamer.UpdateTime = &sql.NullTime{Time: time.Now().Add(-time.Hour * 100000), Valid: true}
_, err = tx.Exec("INSERT IGNORE INTO "+StreamerTable+"(platform, user_id, user_name, live_url, update_url, tags, update_time) VALUES(?,?,?,?,?,?,?);",
streamer.Platform,
streamer.UserId,
streamer.UserName,
streamer.LiveUrl,
streamer.UpdateUrl,
streamer.Tags,
streamer.UpdateTime,
)
row := tx.QueryRow(selectSQL+` LIMIT 1 FOR UPDATE`, streamer.Get("Platform"), streamer.Get("UserId"))
var isUpdateUrl bool
var Uid int64
if err = row.Scan(&isUpdateUrl, &Uid); err == nil {
if isUpdateUrl {
tx.Exec("UPDATE "+StreamerTable+" SET update_url = ?", streamer.Get("UpdateUrl"))
}
streamer.(ISet).Set("Uid", Uid)
return true
}
_, err = tx.Exec("INSERT INTO "+StreamerTable+"(platform, user_id, update_url, tags, update_time) VALUES(?,?,?,?,?);", streamer.Get("Platform"), streamer.Get("UserId"), streamer.Get("UpdateUrl"), streamer.Get("Tags"), time.Now().Add(-time.Hour*100000))
if err != nil {
panic(err)
}
@ -425,8 +382,7 @@ func (store *StoreExtractor) UpdateOperator(isource IGet) {
// UpdateStreamer Streamer表, 插入数据
func (store *StoreExtractor) UpdateStreamer(streamer IGet) {
// log.Printf("UPDATE "+StreamerTable+" SET user_name = %v, live_url = %v, channel = %v, latest_log_uid = %v, tags = %v, ext = %v, operator = %v, update_time = %v, update_interval = %v WHERE uid = %v", streamer.Get("UserName"), streamer.Get("LiveUrl"), streamer.Get("Channel"), streamer.Get("LatestLogUid"), streamer.Get("Tags"), streamer.Get("Ext"), streamer.Get("Operator"), streamer.Get("UpdateTime"), streamer.Get("UpdateInterval"), streamer.Get("Uid"))
_, err := store.db.Exec("UPDATE "+StreamerTable+" SET user_name = ?, live_url = ?, channel = ?, latest_log_uid = ?, tags = ?, ext = ?, operator = ?, update_time = ?, update_interval = ? WHERE uid = ?",
_, err := store.db.Exec("UPDATE "+StreamerTable+" SET user_name = ?, live_url = ?, channel = ?, latest_log_uid = ?, tags = ?, ext = ?, operator = ?, update_time = ?, update_interval = ? WHERE uid = ?;",
streamer.Get("UserName"), streamer.Get("LiveUrl"), streamer.Get("Channel"), streamer.Get("LatestLogUid"), streamer.Get("Tags"), streamer.Get("Ext"), streamer.Get("Operator"), streamer.Get("UpdateTime"), streamer.Get("UpdateInterval"), streamer.Get("Uid"))
if err != nil {
panic(err)
@ -440,14 +396,12 @@ func (store *StoreExtractor) Update(streamer IGet, fieldvalues ...interface{}) {
for i := 0; i < len(fieldvalues); i += 2 {
field := fieldvalues[i]
values = append(values, fieldvalues[i+1])
updateSQL += field.(string) + " = ?,"
updateSQL += field.(string) + " = ? "
}
updateSQL = updateSQL[0 : len(updateSQL)-1]
updateSQL += "WHERE uid = ?"
values = append(values, streamer.Get("Uid"))
_, err := store.db.Exec(updateSQL, values...)
if err != nil {
log.Println(updateSQL)
panic(err)
}
}

View File

@ -5,7 +5,8 @@ import (
)
func TestStoreInsert(t *testing.T) {
// ht := hunter.NewHunter(openrecRanking)
// ht.Execute()
}
func TestStoreInsertCase1(t *testing.T) {

View File

@ -1 +0,0 @@
conf.d

View File

@ -1,7 +0,0 @@
CURPATH=`pwd`
BINPATH=$(dirname "$CURPATH")/bin
find $BINPATH -type f -name 'log' -exec truncate -s 0 {} +

View File

@ -1,8 +0,0 @@
CURPATH=`pwd`
SUPPATH=$(dirname "$CURPATH")
mkdir -p $CURPATH/conf.d
cp *.conf $CURPATH/conf.d/
sed -i "s#MYPATH#$SUPPATH#g" $CURPATH/conf.d/*.conf
ln -sf $CURPATH/conf.d/*.conf /etc/supervisor/conf.d/

View File

@ -1,10 +0,0 @@
[supervisord]
nodaemon=true
[program:mirrativ_extractor]
directory = MYPATH/bin/mirrativ_extractor/
command= MYPATH/bin/mirrativ_extractor/mirrativ_extractor
autorestart=true
stderr_logfile=MYPATH/bin/mirrativ_extractor/log
stderr_logfile_maxbytes=0
stopsignal=QUIT

View File

@ -1,13 +0,0 @@
[supervisord]
nodaemon=false
[program:mirrativ_task1]
environment=DISPLAY=":99"
directory = MYPATH/bin/mirrativ_task1/
command= MYPATH/bin/mirrativ_task1/mirrativ_task1
# process_name=%(program_name)s_%(process_num)02d ;多进程名称
# numprocs=1 ;启动多个进程
autorestart=true
stderr_logfile=MYPATH/bin/mirrativ_task1/log
stderr_logfile_maxbytes=0
stopsignal=QUIT

View File

@ -1,10 +0,0 @@
[supervisord]
nodaemon=true
[program:nimo_extractor]
directory = MYPATH/bin/nimo_extractor/
command= MYPATH/bin/nimo_extractor/nimo_extractor
autorestart=true
stderr_logfile=MYPATH/bin/nimo_extractor/log
stderr_logfile_maxbytes=0
stopsignal=QUIT

View File

@ -1,13 +0,0 @@
[supervisord]
nodaemon=false
[program:nimo_task1]
environment=DISPLAY=":99"
directory = MYPATH/bin/nimo_task1/
command= MYPATH/bin/nimo_task1/nimo_task1
# process_name=%(program_name)s_%(process_num)02d ;多进程名称
# numprocs=1 ;启动多个进程
autorestart=true
stderr_logfile=MYPATH/bin/nimo_task1/log
stderr_logfile_maxbytes=0
stopsignal=QUIT

View File

@ -1,10 +0,0 @@
[supervisord]
nodaemon=true
[program:openrec_extractor]
directory = MYPATH/bin/openrec_extractor/
command= MYPATH/bin/openrec_extractor/openrec_extractor
autorestart=true
stderr_logfile=MYPATH/bin/openrec_extractor/log
stderr_logfile_maxbytes=0
stopsignal=QUIT

View File

@ -1,10 +0,0 @@
[supervisord]
nodaemon=true
[program:openrec_ranking]
directory = MYPATH/bin/openrec_task1
command= MYPATH/bin/openrec_task1/openrec_task1
autorestart=true
stderr_logfile=MYPATH/bin/openrec_task1/log
stderr_logfile_maxbytes=0
stopsignal=QUIT

View File

@ -1,14 +0,0 @@
[supervisord]
nodaemon=false
[program:twitcasting_extractor]
environment=DISPLAY=":99"
directory = MYPATH/bin/twitcasting_extractor/
command= MYPATH/bin/twitcasting_extractor/twitcasting_extractor
process_name=%(program_name)s_%(process_num)02d ;多进程名称
numprocs=3 ;启动多个进程
autorestart=true
stderr_logfile=MYPATH/bin/twitcasting_extractor/log
# stderr_logfile=%(supervisorctl.var.directory)s/log
stderr_logfile_maxbytes=0
stopsignal=QUIT

View File

@ -1,13 +0,0 @@
[supervisord]
nodaemon=false
[program:twitcasting_task1]
environment=DISPLAY=":99"
directory = MYPATH/bin/twitcasting_task1/
command= MYPATH/bin/twitcasting_task1/twitcasting_task1
# process_name=%(program_name)s_%(process_num)02d ;多进程名称
# numprocs=1 ;启动多个进程
autorestart=true
stderr_logfile=MYPATH/bin/twitcasting_task1/log
stderr_logfile_maxbytes=0
stopsignal=QUIT

View File

@ -1,13 +0,0 @@
[supervisord]
nodaemon=true
[program:twitch_extractor]
environment=DISPLAY=":99"
directory = MYPATH/bin/twitch_extractor
command= MYPATH/bin/twitch_extractor/twitch_extractor
process_name=%(program_name)s_%(process_num)02d ;多进程名称
numprocs=5 ;启动多个进程
autorestart=true
stderr_logfile=MYPATH/bin/twitch_extractor/log
stderr_logfile_maxbytes=0
stopsignal=QUIT

View File

@ -1,13 +0,0 @@
[supervisord]
nodaemon=true
[program:twitch_extractor_p1]
environment=DISPLAY=":99",pac_proxy=http://localhost:1090/pac
directory = MYPATH/bin/twitch_extractor
command= MYPATH/bin/twitch_extractor/twitch_extractor
process_name=%(program_name)s_%(process_num)02d ;多进程名称
numprocs=2 ;启动多个进程
autorestart=true
stderr_logfile=MYPATH/bin/twitch_extractor/log
stderr_logfile_maxbytes=0
stopsignal=QUIT

View File

@ -1,13 +0,0 @@
[supervisord]
nodaemon=false
[program:twitch_task1]
environment=DISPLAY=":99"
directory = MYPATH/bin/twitch_task1
command= MYPATH/bin/twitch_task1/twitch_task1
# process_name=%(program_name)s_%(process_num)02d ;多进程名称
# numprocs=1 ;启动多个进程
autorestart=true
stderr_logfile=MYPATH/bin/twitch_task1/log
stderr_logfile_maxbytes=0
stopsignal=QUIT

View File

@ -1,8 +0,0 @@
[supervisord]
nodaemon=true
[program:xvfb-99]
command=/usr/bin/Xvfb :99 -screen 0 1280x720x24 -ac -nolisten tcp -dpi 96 +extension RANDR -nolisten tcp
autorestart=true

View File

@ -12,7 +12,4 @@ const (
// STTwitcasting STTwitcasting源table名称
STTwitcasting SourceTable = "source_twitcasting"
// STNimo nimo源table名称
STNimo SourceTable = "source_nimo"
)

View File

@ -1,83 +0,0 @@
package main
import (
"database/sql"
"intimate"
"log"
"time"
"github.com/474420502/gcurl"
"github.com/tidwall/gjson"
)
func main() {
bcurl := `curl 'https://www.mirrativ.com/api/live/catalog?id=2&cursor=%s' \
-H 'authority: www.mirrativ.com' \
-H 'accept: application/json' \
-H 'x-timezone: Asia/Shanghai' \
-H 'x-csrf-token: F3Ojd6RBtApP6YAZzVn-9jWN1of159VxAqOQL1Zn' \
-H 'user-agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36' \
-H 'content-type: application/json' \
-H 'sec-fetch-site: same-origin' \
-H 'sec-fetch-mode: cors' \
-H 'sec-fetch-dest: empty' \
-H 'referer: https://www.mirrativ.com/' \
-H 'accept-language: zh-CN,zh;q=0.9,ja;q=0.8' \
-H 'cookie: f=A2D75F0E-D218-11EA-A042-452BF6D21CE8; _ga=GA1.2.689947597.1596081392; mr_id=kxb65LddGMZf5C28jkR_tGCZD_ZFOAepD5gfXO7eNjfPMB8EKYvU1Vg_Y29V1lsa; _gid=GA1.2.2116692650.1600139685; lang=ja' \
--compressed`
curl := gcurl.Parse(bcurl)
tp := curl.CreateTemporary(nil)
cursor := tp.QueryParam(`cursor`)
cursor.StringSet("")
ps := intimate.NewPerfectShutdown()
for !ps.IsClose() {
log.Println(tp.ParsedURL.String())
resp, err := tp.Execute()
if err != nil {
log.Println(err)
time.Sleep(time.Second * 2)
continue
}
apijson := gjson.ParseBytes(resp.Content())
next := apijson.Get("next_cursor").String()
for _, liveinfo := range apijson.Get("list").Array() {
var prekey string
if liveinfo.Get("live_preview").Exists() {
prekey = "live_preview"
} else {
prekey = "live"
}
owner := liveinfo.Get(prekey + ".owner")
if guserid := owner.Get("user_id"); guserid.String() != "" {
streamer := &intimate.Streamer{}
streamer.Platform = intimate.PMirrativ
streamer.Operator = 0
streamer.UserId = &guserid.Str
streamer.UserName = &sql.NullString{String: owner.Get("name").String(), Valid: true}
streamer.UpdateInterval = 600
streamer.UpdateTime = intimate.GetUpdateTimeNow()
err = intimate.TStreamer.InsertOrUpdate(
streamer,
intimate.DUpdate{Field: "update_time"},
)
if err != nil {
log.Println(err)
panic(err)
}
}
}
if next == "" {
ps.Wait(time.Minute * 10)
} else {
ps.Wait(time.Second * 2)
}
cursor.StringSet(next)
}
}

View File

@ -1,5 +0,0 @@
package main
func main() {
Execute()
}

View File

@ -1,109 +0,0 @@
package main
import (
"database/sql"
"encoding/json"
"intimate"
"log"
"time"
"github.com/474420502/gcurl"
"github.com/tidwall/gjson"
)
// estore 解析存储连接实例
// var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
// Execute 执行
func Execute() {
nimoapi := `curl 'https://api.nimo.tv/oversea/nimo/api/v2/liveRoom/liveRoomPage-1-100-/HK/1028/1000' \
-H 'authority: api.nimo.tv' \
-H 'user-agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36' \
-H 'content-type: multipart/form-data; boundary=----WebKitFormBoundary3bCA1lzvhj4kBR4Q' \
-H 'accept: */*' \
-H 'origin: https://www.nimo.tv' \
-H 'sec-fetch-site: same-site' \
-H 'sec-fetch-mode: cors' \
-H 'sec-fetch-dest: empty' \
-H 'referer: https://www.nimo.tv/lives' \
-H 'accept-language: zh-CN,zh;q=0.9' \
--data-binary $'------WebKitFormBoundary3bCA1lzvhj4kBR4Q\r\nContent-Disposition: form-data; name="keyType"\r\n\r\n0\r\n------WebKitFormBoundary3bCA1lzvhj4kBR4Q\r\nContent-Disposition: form-data; name="body"\r\n\r\n{"deviceType":7,"requestSource":"WEB","iNetType":5}\r\n------WebKitFormBoundary3bCA1lzvhj4kBR4Q--\r\n' \
--compressed`
curl := gcurl.Parse(nimoapi)
ses := curl.CreateSession()
tp := curl.CreateTemporary(ses)
param := tp.PathParam("liveRoomPage-(1)-")
ps := intimate.NewPerfectShutdown()
for !ps.IsClose() {
resp, err := tp.Execute()
if err != nil {
log.Println(err)
time.Sleep(time.Second)
continue
}
result := gjson.ParseBytes(resp.Content())
roomlist := result.Get("data.result.liveRoomViewList")
if !roomlist.IsArray() {
log.Println("json is error")
log.Println(string(resp.Content()))
break
}
rooms := roomlist.Array()
log.Println(tp.GetURLRawPath(), "rooms:", len(rooms))
if len(rooms) == 0 {
param.IntSet(1)
time.Sleep(time.Minute * 15)
continue
}
for _, room := range rooms {
streamer := &intimate.Streamer{}
streamer.Platform = intimate.PNimo
if userid := room.Get("id").String(); userid != "" {
streamer.UserId = &userid
streamer.LiveUrl = &sql.NullString{String: "https://www.nimo.tv/live/" + userid, Valid: true}
channel := room.Get("roomTypeName").String()
streamer.Channel = &sql.NullString{String: channel, Valid: channel != ""}
username := room.Get("anchorName").String()
streamer.UserName = &sql.NullString{String: username, Valid: username != ""}
if rtags := room.Get("anchorLabels"); rtags.IsArray() {
var tags []string
for _, r := range rtags.Array() {
tag := r.Get("labelName").String()
tags = append(tags, tag)
}
data, err := json.Marshal(tags)
if err != nil {
panic(err)
}
streamer.Tags = data
}
streamer.UpdateInterval = 120
err = intimate.TStreamer.Insert(streamer)
if err != nil {
panic(err)
}
} else {
log.Println("userid is null.", room.String())
}
}
param.IntAdd(1)
}
}

View File

@ -1,55 +0,0 @@
package main
import (
"fmt"
"io/ioutil"
"os"
"testing"
"github.com/474420502/gcurl"
)
func CrawlContent(args ...interface{}) []byte {
nimoapi := `curl 'https://api.nimo.tv/oversea/nimo/api/v2/liveRoom/liveRoomPage-%d-30-/HK/1028/1000' \
-H 'authority: api.nimo.tv' \
-H 'user-agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36' \
-H 'content-type: multipart/form-data; boundary=----WebKitFormBoundary3bCA1lzvhj4kBR4Q' \
-H 'accept: */*' \
-H 'origin: https://www.nimo.tv' \
-H 'sec-fetch-site: same-site' \
-H 'sec-fetch-mode: cors' \
-H 'sec-fetch-dest: empty' \
-H 'referer: https://www.nimo.tv/lives' \
-H 'accept-language: zh-CN,zh;q=0.9' \
--data-binary $'------WebKitFormBoundary3bCA1lzvhj4kBR4Q\r\nContent-Disposition: form-data; name="keyType"\r\n\r\n0\r\n------WebKitFormBoundary3bCA1lzvhj4kBR4Q\r\nContent-Disposition: form-data; name="body"\r\n\r\n{"deviceType":7,"requestSource":"WEB","iNetType":5}\r\n------WebKitFormBoundary3bCA1lzvhj4kBR4Q--\r\n' \
--compressed`
curl := gcurl.Parse(fmt.Sprintf(nimoapi, 1))
tp := curl.CreateTemporary(nil)
resp, err := tp.Execute()
if err != nil {
panic(err)
}
return resp.Content()
}
func openTestFile(...interface{}) []byte {
f, err := os.Open("../../../testfile/nimo1.json")
if err != nil {
panic(err)
}
data, err := ioutil.ReadAll(f)
if err != nil {
panic(err)
}
return data
}
var Crawl func(...interface{}) []byte
func Test(t *testing.T) {
Execute()
}

View File

@ -1,5 +1,8 @@
package main
import "github.com/474420502/hunter"
func main() {
Execute()
ht := hunter.NewHunter(openrecRanking)
ht.Execute()
}

View File

@ -4,24 +4,29 @@ import (
"encoding/json"
"intimate"
"log"
"os"
"os/signal"
"strconv"
"sync/atomic"
"syscall"
"time"
"github.com/474420502/gcurl"
"github.com/474420502/hunter"
"github.com/tidwall/gjson"
)
// // sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
// var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STOpenrec))
var openrecRanking *OpenrecRanking
// // estore 解析存储连接实例
// var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
// sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STOpenrec))
// Execute 执行方法
func Execute() {
// estore 解析存储连接实例
var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
ps := intimate.NewPerfectShutdown()
turl := `curl 'https://public.openrec.tv/external/api/v5/channel-ranks?period=monthly&date=&tag=&page=1' \
func init() {
openrecRanking = &OpenrecRanking{}
openrecRanking.PreCurlUrl = `curl 'https://public.openrec.tv/external/api/v5/channel-ranks?period=monthly&date=&tag=&page=1' \
-H 'authority: public.openrec.tv' \
-H 'accept: application/json, text/javascript, */*; q=0.01' \
-H 'user-agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36' \
@ -34,25 +39,43 @@ func Execute() {
-H 'if-none-match: W/"25edb-aUYBdmLqZcr6DW4ZWKX9r2aqolg"' \
--compressed`
g := gcurl.Parse(turl)
tp := g.Temporary()
}
for !ps.IsClose() {
// OpenrecRanking 获取排名任务
type OpenrecRanking struct {
hunter.PreCurlUrl
}
resp, err := tp.Execute()
// Execute 执行方法
func (or *OpenrecRanking) Execute(cxt *hunter.TaskContext) {
var loop int32 = 1
go func() {
signalchan := make(chan os.Signal)
signal.Notify(signalchan, syscall.SIGKILL, syscall.SIGQUIT, syscall.SIGTERM, syscall.SIGSTOP)
log.Println("accept stop command:", <-signalchan)
atomic.StoreInt32(&loop, 0)
}()
for atomic.LoadInt32(&loop) > 0 {
resp, err := cxt.Hunt()
if err != nil {
log.Println(err)
time.Sleep(time.Second * 2)
continue
}
tp := cxt.Temporary()
content := resp.Content()
if len(content) <= 200 { //末页时没有内容返回, 末页退出
finishpoint := time.Now()
log.Println("任务Ranking UserId结束休眠, 下次启动时间:", finishpoint.Add(time.Minute*120))
for time.Now().Sub(finishpoint) < time.Minute*120 {
time.Sleep(time.Second)
if ps.IsClose() {
if atomic.LoadInt32(&loop) <= 0 {
return
}
}
@ -70,8 +93,15 @@ func Execute() {
for _, User := range result.Array() {
userid := User.Get("channel.id").String()
// data := &intimate.Source{}
// data.Source = sql.NullString{String: userid, Valid: len(userid) > 0}
// data.Url = tp.GetRawURL()
// data.TargetType = string(intimate.TTOpenrecUser)
// sstore.Insert(data)
streamer := &intimate.Streamer{}
streamer.UserId = &userid
streamer.UserId = userid
streamer.Platform = intimate.Popenrec
updateUrl := make(map[string]interface{})
@ -83,18 +113,27 @@ func Execute() {
updateUrlBytes, err := json.Marshal(updateUrl)
if err != nil {
intimate.TStreamer.UpdateError(streamer, err)
estore.UpdateError(streamer, err)
continue
}
streamer.UpdateUrl = updateUrlBytes
intimate.TStreamer.Insert(streamer)
estore.InsertStreamer(streamer)
}
}
log.Println("streamer count:", len(result.Array()), tp.ParsedURL.String())
// 修改url query 参数的page递增. 遍历所有页面
tp.QueryParam("page").IntAdd(1)
querys := tp.GetQuery()
page, err := strconv.Atoi(querys.Get("page"))
if err != nil {
log.Println(err)
return
}
page++
querys.Set("page", strconv.Itoa(page))
tp.SetQuery(querys)
time.Sleep(time.Second * 1)
}
}

View File

@ -3,14 +3,67 @@ package main
import (
"testing"
"time"
"github.com/tidwall/gjson"
"github.com/474420502/hunter"
)
// OpenrecRanking 获取排名任务
type OpenrecRankingTest struct {
hunter.PreCurlUrl
}
// Execute 执行方法
func (or *OpenrecRankingTest) Execute(cxt *hunter.TaskContext) {
resp, err := cxt.Hunt()
if err != nil {
panic(err)
}
t := cxt.GetShare("test").(*testing.T)
if !gjson.ValidBytes(resp.Content()) {
t.Error("source is not json format.")
}
result := gjson.GetBytes(resp.Content(), "0.rank")
if result.Int() != 1 {
t.Error("rank is error. result raw is ", result.Raw)
}
if cxt.Temporary().GetQuery().Get("page") != "1" {
t.Error("Temporary page error")
}
// t.Error(string(resp.Content()))
}
func TestRanking(t *testing.T) {
curlBash := `curl 'https://public.openrec.tv/external/api/v5/channel-ranks?period=monthly&date=&tag=&page=1' \
-H 'authority: public.openrec.tv' \
-H 'accept: application/json, text/javascript, */*; q=0.01' \
-H 'user-agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36' \
-H 'origin: https://www.openrec.tv' \
-H 'sec-fetch-site: same-site' \
-H 'sec-fetch-mode: cors' \
-H 'sec-fetch-dest: empty' \
-H 'referer: https://www.openrec.tv/ranking' \
-H 'accept-language: zh-CN,zh;q=0.9' \
-H 'if-none-match: W/"25edb-aUYBdmLqZcr6DW4ZWKX9r2aqolg"' \
--compressed`
ort := &OpenrecRankingTest{}
ort.PreCurlUrl = hunter.PreCurlUrl(curlBash)
ht := hunter.NewHunter(ort)
ht.SetShare("test", t)
ht.Execute()
}
func TestTimeAdd(t *testing.T) {
finishpoint := time.Now()
time.Sleep(time.Second * 2)
t.Error(time.Now().Sub(finishpoint) > time.Second*1)
}
func TestMain(t *testing.T) {
main()
func TestRankingInsert(t *testing.T) {
ht := hunter.NewHunter(openrecRanking)
ht.Execute()
}

View File

@ -0,0 +1,2 @@
openrec_task2
log

View File

@ -0,0 +1,8 @@
package main
import "github.com/474420502/hunter"
func main() {
ht := hunter.NewHunter(oer)
ht.Execute()
}

View File

@ -0,0 +1,178 @@
package main
import (
"database/sql"
"encoding/json"
"intimate"
"log"
"os"
"os/signal"
"strconv"
"sync/atomic"
"syscall"
"time"
"github.com/474420502/gcurl"
"github.com/tidwall/gjson"
"github.com/474420502/hunter"
)
var oer *OpenrecExtratorRanking
// sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STOpenrec))
// estore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_extractor.sql
var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
func init() {
oer = &OpenrecExtratorRanking{}
}
// OpenrecExtratorRanking 获取用户信息
type OpenrecExtratorRanking struct {
// Store *intimate.Store
}
// Execute 执行方法
func (oer *OpenrecExtratorRanking) Execute(cxt *hunter.TaskContext) {
var loop int32 = 1
go func() {
signalchan := make(chan os.Signal)
signal.Notify(signalchan, syscall.SIGKILL, syscall.SIGQUIT, syscall.SIGTERM, syscall.SIGSTOP)
log.Println("accept stop command:", <-signalchan)
atomic.StoreInt32(&loop, 0)
}()
var lasterr error = nil
for atomic.LoadInt32(&loop) > 0 {
streamer, err := estore.Pop(intimate.Popenrec) //队列里弹出一个streamer行. 进行解析
if streamer == nil || err != nil {
if err != lasterr {
log.Println(err, lasterr)
lasterr = err
}
time.Sleep(time.Second * 2)
continue
}
userId := streamer.UserId
var updateUrl map[string]string
err = json.Unmarshal(streamer.UpdateUrl.([]byte), &updateUrl) // 反序列化update_url, 里面存了需要采集的url
if err != nil {
log.Println(err)
continue
}
// Check Userid
userUrl := updateUrl["user"]
tp := cxt.Session().Get(userUrl) // 获取user url页面数据
resp, err := tp.Execute()
streamer.UpdateTime = sql.NullTime{Time: time.Now(), Valid: true}
if err != nil {
log.Println(err)
estore.UpdateError(streamer, err)
continue
}
cookies := cxt.Session().GetCookies(tp.GetParsedURL())
scurl := updateUrl["supporters"] //获取打赏者的数据
curl := gcurl.ParseRawCURL(scurl)
supportersSession := curl.CreateSession()
temporary := curl.CreateTemporary(supportersSession)
supportersSession.SetCookies(temporary.GetParsedURL(), cookies)
var supporters []string
for { // supporters 数据需要登录信息. 下面为赋值 supporters链接获取的uid token random码
supportersQuery := temporary.GetQuery()
for _, cookie := range cookies {
if cookie.Name == "uuid" {
supportersQuery.Set("Uuid", cookie.Value)
continue
}
if cookie.Name == "token" {
supportersQuery.Set("Token", cookie.Value)
continue
}
if cookie.Name == "random" {
supportersQuery.Set("Random", cookie.Value)
continue
}
}
supportersQuery.Set("identify_id", userId)
temporary.SetQuery(supportersQuery)
resp, err := temporary.Execute()
if err != nil {
log.Println(err)
}
supporterjson := gjson.ParseBytes(resp.Content())
supporterdata := supporterjson.Get("data") //解析supporters获取的json数据
if supporterdata.Type == gjson.Null {
break
}
supporters = append(supporters, string(resp.Content()))
page := supportersQuery.Get("page_number") // page_number 加1
pageint, err := strconv.Atoi(page)
if err != nil {
log.Println(err)
break
}
pageint++
page = strconv.Itoa(pageint)
supportersQuery.Set("page_number", page)
temporary.SetQuery(supportersQuery)
}
// cookies := cxt.Session().GetCookies(wf.GetParsedURL())
ext := make(map[string]interface{})
ext["json_supporters"] = supporters
ext["html_user"] = string(resp.Content())
liveUrl := updateUrl["live"]
tp = cxt.Session().Get(liveUrl)
resp, err = tp.Execute()
if err != nil {
log.Println(err)
estore.UpdateError(streamer, err)
continue
}
ext["html_live"] = string(resp.Content())
ext["var_user_id"] = userId
extJsonBytes, err := json.Marshal(ext)
if err != nil {
log.Println(err)
estore.UpdateError(streamer, err)
continue
}
streamer.Operator = int32(intimate.OperatorOK)
source := &intimate.Source{}
source.Target = intimate.TOpenrecUser
source.Ext = string(extJsonBytes)
source.StreamerId = sql.NullInt64{Int64: streamer.Uid, Valid: true}
sstore.Insert(source)
estore.UpdateOperator(streamer)
}
}

View File

@ -0,0 +1,12 @@
package main
import (
"testing"
"github.com/474420502/hunter"
)
func TestOpenrecUser(t *testing.T) {
ht := hunter.NewHunter(oer)
ht.Execute()
}

View File

@ -21,5 +21,5 @@ func TestUpdateTime(t *testing.T) {
}
func TestMain(t *testing.T) {
main()
}

View File

@ -70,15 +70,13 @@ func Execute() {
if ok := queuedict[wurl]; !ok {
log.Println(wurl)
sl := &intimate.StreamerList{}
sl.Platform = string(intimate.Ptwitcasting)
sl.Platform = intimate.Ptwitcasting
sl.Url = wurl
sl.Operator = 0
sl.UpdateInterval = 120
sl.UpdateTime = &sql.NullTime{Time: time.Now(), Valid: true}
sl.UrlHash = intimate.GetUrlHash(sl.Url)
sl.UpdateTime = time.Now()
intimate.TStreamerList.Insert(sl)
// estore.InsertStreamerList(sl)
estore.InsertStreamerList(sl)
queue.Put(wurl)
queuedict[wurl] = true
@ -91,7 +89,7 @@ func Execute() {
continue
}
var splist = xps.ForEachObjectByTag(SearchProfile{})
var splist = xps.ForEachTag(SearchProfile{})
for _, isp := range splist {
sp := isp.(*SearchProfile)
if sp.LiveUrl == "" {
@ -104,13 +102,12 @@ func Execute() {
sp.TagUrl[i] = wurl
if ok := queuedict[wurl]; !ok {
sl := &intimate.StreamerList{}
sl.Platform = string(intimate.Ptwitcasting)
sl.Platform = intimate.Ptwitcasting
sl.Url = wurl
sl.Operator = 0
sl.UpdateInterval = 120
sl.UpdateTime = &sql.NullTime{Time: time.Now(), Valid: true}
sl.UrlHash = intimate.GetUrlHash(sl.Url)
intimate.TStreamerList.Insert(sl)
sl.UpdateTime = time.Now()
estore.InsertStreamerList(sl)
queue.Put(wurl)
queuedict[wurl] = true
@ -125,19 +122,17 @@ func Execute() {
// log.Println(sp)
streamer := &intimate.Streamer{}
streamer.Platform = intimate.Ptwitcasting
streamer.LiveUrl = &sql.NullString{String: sp.LiveUrl, Valid: true}
streamer.LiveUrl = sql.NullString{String: sp.LiveUrl, Valid: true}
if btags, err := json.Marshal(sp.Tag); err != nil {
log.Println(err)
} else {
streamer.Tags = btags
}
streamer.UpdateInterval = 120
streamer.UpdateTime = intimate.GetUpdateTimeNow()
streamer.UserName = &sql.NullString{String: sp.UserName, Valid: true}
streamer.UserId = &sp.UserId
streamer.Operator = 0
// estore.InsertStreamer(streamer)
intimate.TStreamer.Insert(streamer)
streamer.UpdateTime = sql.NullTime{Time: time.Now(), Valid: true}
streamer.UserName = sql.NullString{String: sp.UserName, Valid: true}
streamer.UserId = sp.UserId
estore.InsertStreamer(streamer)
}
log.Println("finish remain", queue.Size())

View File

@ -1,5 +1,6 @@
package main
func main() {
Execute()
e := ChannelLink{}
e.Execute()
}

View File

@ -1,6 +1,7 @@
package main
import (
"database/sql"
"intimate"
"log"
"time"
@ -8,113 +9,104 @@ import (
"github.com/tebeka/selenium"
)
// // sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
// var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STTwitch))
// sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STTwitch))
// // estore 解析存储连接实例
// var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
// estore 解析存储连接实例
var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
// 获取类型的所有频道链接
// ChannelLink 频道链接
type ChannelLink struct {
}
// Execute 执行任务
func Execute() {
func (cl *ChannelLink) Execute() {
var err error
wd := intimate.GetChromeDriver(3030)
ps := intimate.NewPerfectShutdown()
var adriver *intimate.AutoCloseDriver
for !ps.IsClose() {
var err error
adriver = intimate.GetChromeDriver()
wd := adriver.Webdriver
weburl := "https://www.twitch.tv/directory?sort=VIEWER_COUNT"
err = wd.Get(weburl)
if err != nil {
panic(err)
}
cardCondition := func(wd selenium.WebDriver) (bool, error) {
elements, err := wd.FindElements(selenium.ByXPATH, "//span/a[contains(@data-a-target,'card-') and @href]")
if err != nil {
return false, err
}
return len(elements) > 0, nil
}
wd.WaitWithTimeout(cardCondition, time.Second*15)
time.Sleep(time.Second)
e, err := wd.FindElement(selenium.ByXPATH, "//button[@data-a-target='browse-sort-menu']")
if err != nil {
panic(err)
}
e.Click()
var lasthreflen = 0
var hrefs map[string]bool = make(map[string]bool)
var delayerror = 5
for i := 0; i <= 200; i++ {
cards, err := wd.FindElements(selenium.ByXPATH, "//span/a[contains(@data-a-target,'card-') and @href]")
if err != nil {
log.Println(err)
break
}
if len(hrefs) == lasthreflen {
delayerror--
if delayerror <= 0 {
break
}
} else {
delayerror = 7
}
lasthreflen = len(hrefs)
for ii := 0; ii < 10; ii++ {
for _, card := range cards {
href, err := card.GetAttribute("href")
if err != nil {
log.Println(href, err)
continue
} else {
hrefs[href] = true
}
}
break
}
if ps.IsClose() {
break
}
if len(cards) > 10 {
log.Println(len(cards))
wd.ExecuteScript(`items = document.evaluate("//div[@data-target='directory-page__card-container']/../self::div[@data-target and @style]", document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
for (var i = 0; i < items.snapshotLength - 10; i++) { item = items.snapshotItem(i); item.remove() ;};`, nil)
}
time.Sleep(time.Millisecond * 200)
wd.KeyDown(selenium.EndKey)
time.Sleep(time.Millisecond * 200)
wd.KeyUp(selenium.EndKey)
time.Sleep(time.Millisecond * 2500)
}
for href := range hrefs {
sl := &intimate.StreamerList{}
sl.Url = href
sl.UrlHash = intimate.GetUrlHash(sl.Url)
sl.Platform = string(intimate.Ptwitch)
sl.UpdateTime = intimate.GetUpdateTimeNow()
err := intimate.TStreamerList.Insert(sl)
if err != nil {
log.Println(err)
}
}
log.Println("hrefs len:", len(hrefs))
adriver.Close()
ps.Wait(time.Minute * 5)
weburl := "https://www.twitch.tv/directory?sort=VIEWER_COUNT"
err = wd.Get(weburl)
if err != nil {
panic(err)
}
cardCondition := func(wd selenium.WebDriver) (bool, error) {
elements, err := wd.FindElements(selenium.ByXPATH, "//span/a[contains(@data-a-target,'card-') and @href]")
if err != nil {
return false, err
}
return len(elements) > 0, nil
}
wd.WaitWithTimeout(cardCondition, time.Second*15)
time.Sleep(time.Second)
e, err := wd.FindElement(selenium.ByXPATH, "//button[@data-a-target='browse-sort-menu']")
if err != nil {
panic(err)
}
e.Click()
var hrefs map[string]bool = make(map[string]bool)
var delayerror = 5
for i := 0; i <= 200; i++ {
cards, err := wd.FindElements(selenium.ByXPATH, "//span/a[contains(@data-a-target,'card-') and @href]")
if err != nil {
log.Println(err)
break
}
if len(hrefs) == 0 {
delayerror--
if delayerror <= 0 {
break
}
} else {
delayerror = 5
}
for ii := 0; ii < 10; ii++ {
for _, card := range cards {
href, err := card.GetAttribute("href")
if err != nil {
log.Println(href, err)
continue
} else {
hrefs[href] = true
}
}
break
}
if ps.IsClose() {
break
}
if len(cards) > 10 {
log.Println(len(cards))
wd.ExecuteScript(`items = document.evaluate("//div[@data-target='directory-page__card-container']/../self::div[@data-target and @style]", document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
for (var i = 0; i < items.snapshotLength - 10; i++) { item = items.snapshotItem(i); item.remove() ;};`, nil)
}
time.Sleep(time.Millisecond * 200)
wd.KeyDown(selenium.EndKey)
time.Sleep(time.Millisecond * 200)
wd.KeyUp(selenium.EndKey)
time.Sleep(time.Millisecond * 2500)
}
for href := range hrefs {
// TODO: Save href
source := &intimate.Source{}
source.Source = sql.NullString{String: href, Valid: true}
source.Operator = 0
source.Target = intimate.TTwitchChannel
source.Url = weburl
sstore.Insert(source)
}
log.Println("hrefs len:", len(hrefs))
sstore.Deduplicate(intimate.TTwitchChannel, "source")
}

View File

@ -5,7 +5,8 @@ import (
)
func TestCase1(t *testing.T) {
Execute()
e := ChannelLink{}
e.Execute()
}
func TestLiveUrl(t *testing.T) {

2
tasks/twitch/twitch_task2/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
twitch_task2
log

View File

@ -0,0 +1,6 @@
package main
func main() {
ul := UserList{}
ul.Execute()
}

View File

@ -0,0 +1,180 @@
package main
import (
"database/sql"
"encoding/json"
"intimate"
"log"
"regexp"
"time"
"github.com/tebeka/selenium"
)
// sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STTwitch))
// estore 解析存储连接实例
var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
// 获取类型的所有频道链接
// UserList 频道链接
type UserList struct {
}
// Execute 执行任务
func (cl *UserList) Execute() {
// DELETE FROM source_twitch WHERE uid NOT IN (SELECT MAX(s.uid) FROM (SELECT uid, source FROM source_twitch ) s GROUP BY s.source) ;
//article//a[@data-a-target='preview-card-title-link']
wd := intimate.GetChromeDriver(3030)
defer wd.Close()
defer wd.Quit()
ps := intimate.NewPerfectShutdown()
counter := intimate.NewCounter()
counter.SetMaxLimit(100)
counter.SetMaxToDo(func(olist ...interface{}) error {
owd := olist[0].(*selenium.WebDriver)
if err := (*owd).Quit(); err != nil {
log.Println(err)
}
*owd = intimate.GetChromeDriver(3030)
return nil
}, &wd)
for !ps.IsClose() {
var err error
sourceChannel, err := sstore.Pop(intimate.TTwitchChannel)
if err != nil {
panic(err)
}
weburl := sourceChannel.Source.String + "?sort=VIEWER_COUNT"
err = wd.Get(weburl)
if err != nil {
log.Println(err)
sstore.UpdateError(sourceChannel, err)
time.Sleep(time.Second * 10)
continue
}
wd.WaitWithTimeout(func(wd selenium.WebDriver) (bool, error) {
_, err := wd.FindElement(selenium.ByXPATH, "(//div/p[@class=''])[last()]")
if err != nil {
return false, err
}
return true, nil
}, time.Second*10)
btn, err := wd.FindElement(selenium.ByXPATH, "//button[@data-a-target='browse-sort-menu']")
if err != nil {
log.Println(err)
continue
}
btn.Click()
var elements []selenium.WebElement
var liveurls = 0
var delayerror = 2
for i := 0; i < 200 && !ps.IsClose(); i++ {
elements, err = wd.FindElements(selenium.ByXPATH, "(//div/p[@class=''])[last()]")
if err != nil {
log.Println(err)
break
}
time.Sleep(time.Millisecond * 200)
wd.KeyDown(selenium.EndKey)
time.Sleep(time.Millisecond * 200)
wd.KeyUp(selenium.EndKey)
time.Sleep(time.Millisecond * 2000)
if len(elements) == liveurls {
delayerror--
if delayerror <= 0 {
break
}
} else {
delayerror = 2
}
liveurls = len(elements)
}
articles, err := wd.FindElements(selenium.ByXPATH, "//article")
if err != nil {
log.Println(err)
continue
}
for _, article := range articles {
e, err := article.FindElement(selenium.ByXPATH, ".//a[@data-a-target='preview-card-title-link' and @href]")
if err != nil {
log.Println(err)
continue
}
href, err := e.GetAttribute("href")
if err != nil {
log.Println(err)
continue
}
btns, err := article.FindElements(selenium.ByXPATH, ".//div[@class='tw-full-width tw-inline-block']//button")
if err != nil {
log.Println(err)
continue
}
var tags []string
for _, btn := range btns {
tag, err := btn.GetAttribute("data-a-target")
if err == nil {
tags = append(tags, tag)
}
}
streamer := &intimate.Streamer{}
matches := regexp.MustCompile(`https://www.twitch.tv/(\w+)`).FindStringSubmatch(href)
if len(matches) == 2 {
streamer.UserId = matches[1]
} else {
log.Println(href)
continue
}
jtags, err := json.Marshal(tags)
if err != nil {
log.Println(err)
} else {
streamer.Tags = jtags
}
streamer.Platform = intimate.Ptwitch
updateUrl := make(map[string]string)
updateUrl["live"] = href
streamer.LiveUrl = sql.NullString{String: href, Valid: true}
data, err := json.Marshal(updateUrl)
if err != nil {
log.Println(err)
continue
}
streamer.UpdateUrl = data
streamer.Operator = 0
if estore.InsertStreamer(streamer) {
// log.Println("streamer update tags", streamer.Uid, tags)
estore.Update(streamer, "Tags", streamer.Tags)
}
}
log.Println("streamer find", len(articles))
if len(articles) == 0 {
sourceChannel.Operator = 5
sstore.UpdateOperator(sourceChannel)
}
counter.AddWithReset(1)
}
wd.Close()
wd.Quit()
}

View File

@ -1,8 +1,6 @@
package main
import (
"testing"
)
import "testing"
func TestMain(t *testing.T) {
main()

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

133
utils.go
View File

@ -1,14 +1,11 @@
package intimate
import (
"crypto/md5"
"database/sql"
"fmt"
"log"
"net"
"os"
"os/exec"
"os/signal"
"runtime"
"strconv"
"strings"
"sync/atomic"
@ -31,26 +28,15 @@ func init() {
}
// GetUpdateTimeNow 获取马上更新时间. 与第一次连用
func GetUpdateTimeNow() *sql.NullTime {
return &sql.NullTime{Time: time.Now().Add(-time.Hour * 100000), Valid: true}
}
func GetUrlHash(urlstr string) string {
return fmt.Sprintf("%x", md5.Sum([]byte(urlstr)))
}
// ParseNumber 去逗号解析数字
func ParseNumber(num string) (int64, error) {
num = strings.Trim(num, " ")
num = strings.ReplaceAll(num, ",", "")
return strconv.ParseInt(num, 10, 64)
func ParseNumber(number string) (int64, error) {
number = strings.ReplaceAll(number, ",", "")
return strconv.ParseInt(number, 10, 64)
}
// ParseNumberEx 解析带字符的数字
func ParseNumberEx(num string) (float64, error) {
num = strings.Trim(num, " ")
num = strings.ReplaceAll(num, ",", "")
last := num[len(num)-1]
factor := 1.0
switch {
@ -94,39 +80,7 @@ func ParseDuration(dt string) (time.Duration, error) {
return tdt.Sub(zeroTime), nil
}
type AutoCloseDriver struct {
Webdriver selenium.WebDriver
Port int
}
func (adriver *AutoCloseDriver) Close() {
data, err := exec.Command("/bin/bash", "-c", fmt.Sprintf(`pgrep -f "port=%d"`, adriver.Port)).Output()
if err != nil {
log.Println(err)
log.Println(string(data))
return
}
// log.Println(string(data))
killshell := fmt.Sprintf("pkill -9 -P %s", data)
err = exec.Command("/bin/bash", "-c", killshell).Run()
if err != nil {
log.Println(err)
return
}
err = exec.Command("/bin/bash", "-c", fmt.Sprintf("kill -9 %s", data)).Run()
if err != nil {
log.Println(err)
return
}
}
func GetChromeDriver() *AutoCloseDriver {
port := GetFreePort()
func GetChromeDriver(port int) selenium.WebDriver {
var err error
caps := selenium.Capabilities{"browserName": "chrome"}
@ -134,7 +88,6 @@ func GetChromeDriver() *AutoCloseDriver {
for _, epath := range []string{"../../../crx/myblock.crx", "../../crx/myblock.crx"} {
_, err := os.Stat(epath)
if err == nil {
err := chromecaps.AddExtension(epath)
if err != nil {
panic(err)
@ -162,27 +115,28 @@ func GetChromeDriver() *AutoCloseDriver {
chromecaps.ExcludeSwitches = append(chromecaps.ExcludeSwitches, "enable-automation")
caps.AddChrome(chromecaps)
_, err = selenium.NewChromeDriverService("/usr/bin/chromedriver", port)
if err != nil {
panic(err)
}
wd, err := selenium.NewRemote(caps, fmt.Sprintf("http://localhost:%d/wd/hub", port))
if err != nil {
panic(err)
}
adriver := &AutoCloseDriver{}
adriver.Port = port
adriver.Webdriver = wd
runtime.SetFinalizer(wd, func(obj interface{}) {
if err := obj.(selenium.WebDriver).Close(); err != nil {
log.Println(err)
}
if err := obj.(selenium.WebDriver).Quit(); err != nil {
log.Println(err)
}
})
wd.ExecuteScript("windows.navigator.webdriver = undefined", nil)
if err != nil {
panic(err)
}
return adriver
return wd
}
// PerfectShutdown 完美关闭程序
@ -210,18 +164,6 @@ func (ps *PerfectShutdown) IsClose() bool {
return atomic.LoadInt32(&ps.loop) == 0
}
// Wait 判断是否要关闭
func (ps *PerfectShutdown) Wait(tm time.Duration) bool {
now := time.Now()
for time.Now().Sub(now) <= tm {
if ps.IsClose() {
return false
}
time.Sleep(time.Second)
}
return true
}
type Counter struct {
dcount int
count int
@ -314,50 +256,3 @@ func (c *Counter) Add(n int) error {
}
return nil
}
type WaitFor struct {
WebDriver selenium.WebDriver
}
func NewWaitFor(wd selenium.WebDriver) *WaitFor {
return &WaitFor{WebDriver: wd}
}
func (wf *WaitFor) Default(xpath string, do func(elements ...selenium.WebElement) bool) error {
return wf.WaitWithTimeout(xpath, 15*time.Second, do)
}
func (wf *WaitFor) WaitWithTimeout(xpath string, timeout time.Duration, do func(elements ...selenium.WebElement) bool) error {
return wf.WebDriver.WaitWithTimeout(func(wd selenium.WebDriver) (bool, error) {
elements, err := wd.FindElements(selenium.ByXPATH, xpath)
if err != nil {
log.Println(err)
return false, err
}
if len(elements) > 0 {
if do == nil {
return true, nil
}
if do(elements...) {
return true, nil
}
}
return false, nil
}, timeout)
}
func GetFreePort() int {
addr, err := net.ResolveTCPAddr("tcp", "localhost:0")
if err != nil {
panic(err)
}
l, err := net.ListenTCP("tcp", addr)
if err != nil {
panic(err)
}
defer l.Close()
return l.Addr().(*net.TCPAddr).Port
}