package moneymoney import ( "bytes" "context" "encoding/csv" "encoding/gob" "encoding/json" "fmt" "io/ioutil" "log" "net" "os" "regexp" "strconv" "time" "github.com/474420502/gcurl" "github.com/474420502/structure/tree/treelist" "github.com/go-rod/rod" "github.com/go-rod/rod/lib/devices" "github.com/go-rod/rod/lib/launcher" "github.com/go-rod/rod/lib/proto" "github.com/tidwall/gjson" "go.mongodb.org/mongo-driver/bson" "go.mongodb.org/mongo-driver/mongo" "go.mongodb.org/mongo-driver/mongo/options" "go.mongodb.org/mongo-driver/mongo/readpref" "golang.org/x/text/encoding/simplifiedchinese" "golang.org/x/text/transform" ) var DefaultPage *rod.Page func GetDefaultPage() *rod.Page { if DefaultPage != nil { return DefaultPage } screen := devices.Device{ Title: "Laptop with MDPI screen", Capabilities: []string{"touch", "mobile"}, UserAgent: "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36", Screen: devices.Screen{ DevicePixelRatio: 1, Horizontal: devices.ScreenSize{ Width: 1920, Height: 1080, }, }, } port := GetPort() log.Println("get port:", port) rodlauncher := launcher.New(). Bin(`google-chrome`). RemoteDebuggingPort(port). Set("user-data-dir", fmt.Sprintf("/tmp/%s_rod", "money-money")). Delete("headless") //debug url launchers := rodlauncher.MustLaunch() fmt.Printf("debug url: %s\n", launchers) //连接浏览器 browser := rod.New().ControlURL(launchers).MustConnect() page := browser.DefaultDevice(screen).MustPage() // p := page.Timeout(time.Second * 15) DefaultPage = page return DefaultPage } var client *mongo.Client var cstock *mongo.Collection var DateStocks *treelist.Tree[int64, *treelist.Tree[int64, *Stock]] var CountedDays map[int]bool var err error var cxt context.Context var cancel context.CancelFunc func init() { log.SetFlags(log.Llongfile | log.LstdFlags) cxt, cancel = context.WithTimeout(context.TODO(), time.Second*10) defer cancel() client, err = mongo.Connect(cxt, options.Client().ApplyURI("mongodb://localhost:27017")) if err != nil { panic(err) } client.Ping(cxt, &readpref.ReadPref{}) if err != nil { panic(err) } cstock = client.Database("money").Collection("stock") DateStocks = GetAll() CountedDays = map[int]bool{} for i := 1; i < 7; i++ { CountedDays[1<= 50000000000 { DownloadDataFromCode(code) } } } func GetStocks() []*StockBase { // client, err := mongo.Connect(ctx, options.Client().ApplyURI("mongodb://localhost:27017")) cur, err := client.Database("money").Collection("stock").Distinct(context.TODO(), "股票数字代码", bson.M{}) if err != nil { panic(err) } var skipMap map[int64]bool = make(map[int64]bool) for _, idoc := range cur { var scode = idoc.(int64) // err = cur.Decode(&doc) if err == nil { skipMap[scode] = true } else { log.Panic(err) } } var stockCodesFile = "./stock_codes.gob" var stockCodes []*StockBase f, err := os.Open(stockCodesFile) if err == nil { err = gob.NewDecoder(f).Decode(&stockCodes) if err != nil { panic(err) } } else { murl := `curl 'http://quotes.money.163.com/hs/service/diyrank.php?page=0&query=STYPE%3AEQA&fields=NO%2CSYMBOL%2CNAME%2CPRICE%2CPERCENT%2CUPDOWN%2CFIVE_MINUTE%2COPEN%2CYESTCLOSE%2CHIGH%2CLOW%2CVOLUME%2CTURNOVER%2CHS%2CLB%2CWB%2CZF%2CPE%2CMCAP%2CTCAP%2CMFSUM%2CMFRATIO.MFRATIO2%2CMFRATIO.MFRATIO10%2CSNAME%2CCODE%2CANNOUNMT%2CUVSNEWS&sort=PERCENT&order=asc&count=6000&type=query'` tp := gcurl.Parse(murl).Temporary() resp, err := tp.Execute() if err != nil { panic(err) } jr := gjson.ParseBytes(resp.Content()) log.Println(len(jr.Get("list").Array())) for _, s := range jr.Get("list").Array() { var stockCode StockBase err = json.Unmarshal([]byte(s.String()), &stockCode) if err != nil { panic(err) } stockCodes = append(stockCodes, &stockCode) } // log.Println(jr.String()) } f, err = os.OpenFile(stockCodesFile, os.O_CREATE|os.O_TRUNC|os.O_RDWR, 0664) if err != nil { panic(err) } err = gob.NewEncoder(f).Encode(&stockCodes) if err != nil { panic(err) } if f != nil { err = f.Close() if err != nil { panic(err) } } re, _ := regexp.Compile(`\d+`) log.Println("stocks", len(stockCodes)) var result []*StockBase for _, code := range stockCodes { scode, err := strconv.ParseInt(re.FindString(code.SYMBOL), 10, 64) if err != nil { panic(err) } if _, ok := skipMap[scode]; ok { continue } result = append(result, code) // if code.MCAP >= 50000000000 { // DownloadDataFromCode(client, code) // } } return result } func DownloadDataFromCode(code *StockBase) { // 300731 // durl := `curl 'http://quotes.money.163.com/service/chddata.html?code=${CODE}&start=20170101&end=20220621&fields=TCLOSE;HIGH;LOW;TOPEN;LCLOSE;CHG;PCHG;TURNOVER;VOTURNOVER;VATURNOVER;TCAP;MCAP' \ // -H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9' \ // -H 'Accept-Language: zh-CN,zh;q=0.9,en;q=0.8' \ // -H 'Connection: keep-alive' \ // -H 'Cookie: _ntes_nnid=07a59ac6cc3c3873093db99e3419a5c7,1652972918736; _ntes_nuid=07a59ac6cc3c3873093db99e3419a5c7; _antanalysis_s_id=1655737843219; ne_analysis_trace_id=1655740348110; _ntes_stock_recent_=${CODE}%7C1300660%7C1300731%7C0601857%7C0601808; _ntes_stock_recent_=${CODE}%7C1300660%7C1300731%7C0601857%7C0601808; _ntes_stock_recent_=${CODE}%7C1300660%7C1300731%7C0601857%7C0601808; s_n_f_l_n3=90474b666b6678eb1655828892281; pgr_n_f_l_n3=90474b666b6678eb16558271774556486; vinfo_n_f_l_n3=90474b666b6678eb.1.6.1655737842842.1655828160869.1655828914287' \ // -H 'Referer: http://quotes.money.163.com/trade/lsjysj_${SYMBOL}.html' \ // -H 'Upgrade-Insecure-Requests: 1' \ // -H 'User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.0.0 Safari/537.36' ` // http://quotes.money.163.com/0601988.html page := GetDefaultPage() stockurl := fmt.Sprintf("http://quotes.money.163.com/%s.html", code.CODE) log.Println(stockurl) page.Navigate(stockurl) page.WaitNavigation(proto.PageLifecycleEventNameFirstContentfulPaint)() time.Sleep(time.Millisecond * 100) ele, err := page.ElementsX(fmt.Sprintf("//a[contains(@href,'/trade/lsjysj_%s')]/@href", code.SYMBOL)) if err != nil { panic(err) } if iter := ele.First(); iter != nil { iter.WaitEnabled() urlpath, err := iter.HTML() log.Println("click", urlpath) if err != nil { panic(err) } page.Navigate("http://quotes.money.163.com" + urlpath) ahref := page.MustElementX("//a[@id='downloadData']") log.Println("wait downloadData") ahref.WaitEnabled() time.Sleep(time.Millisecond * 500) ahref.Click(proto.InputMouseButtonLeft) e := page.MustElementX("//a[@class='blue_btn submit']") log.Println("wait blue_btn submit") e.WaitEnabled() time.Sleep(time.Millisecond * 500) w := page.Browser().MustWaitDownload() e.Click(proto.InputMouseButtonLeft) downloaddata := w() log.Println(len(downloaddata)) // durl = strings.ReplaceAll(durl, `${SYMBOL}`, code.SYMBOL) // durl = strings.ReplaceAll(durl, `${CODE}`, code.CODE) // resp, err := gcurl.Parse(durl).Temporary().Execute() // if err != nil { // panic(err) // } reader := csv.NewReader(bytes.NewBuffer(downloaddata)) alls, err := reader.ReadAll() if err != nil { panic(err) } var jfield []string for _, field := range alls[0] { v, _ := GbkToUtf8([]byte(field)) jfield = append(jfield, string(v)) // log.Printf("%#v", string(v)) } re, _ := regexp.Compile(`\d+`) var stocks []mongo.WriteModel for _, line := range alls[1:] { var fields []string for _, field := range line { v, _ := GbkToUtf8([]byte(field)) fields = append(fields, string(v)) } code, err := strconv.ParseInt(re.FindString(fields[1]), 10, 64) if err != nil { panic(err) } ts, err := time.ParseInLocation("2006-01-02", fields[0], time.Local) if err != nil { panic(err) } s := &Stock{ Date: ts, CodeStr: fields[1], Name: fields[2], ClosingPrice: ToFloat(fields[3]), MaxPrice: ToFloat(fields[4]), MinPrice: ToFloat(fields[5]), OpeningPrice: ToFloat(fields[6]), PreviousClosingPrice: ToFloat(fields[7]), UpsDowns: ToFloat(fields[8]), UpsDownsRatio: ToFloat(fields[9]), TurnoverRate: ToFloat(fields[10]), Volume: ToFloat(fields[11]), Turnover: ToFloat(fields[12]), MarketValue: ToFloat(fields[13]), CirculatingMarketValue: ToFloat(fields[14]), Code: code, } stocks = append(stocks, &mongo.InsertOneModel{Document: s}) } cstock := client.Database("money").Collection("stock") r, err := cstock.BulkWrite(context.TODO(), stocks) if err != nil { log.Println(err) } log.Println(code.SYMBOL, r) time.Sleep(time.Second * 1) } } func SaveFromCSV(downloaddata []byte) { reader := csv.NewReader(bytes.NewBuffer(downloaddata)) alls, err := reader.ReadAll() if err != nil { panic(err) } var jfield []string for _, field := range alls[0] { v, _ := GbkToUtf8([]byte(field)) jfield = append(jfield, string(v)) // log.Printf("%#v", string(v)) } re, _ := regexp.Compile(`\d+`) var stocks []mongo.WriteModel for _, line := range alls[1:] { var fields []string for _, field := range line { v, _ := GbkToUtf8([]byte(field)) fields = append(fields, string(v)) } code, err := strconv.ParseInt(re.FindString(fields[1]), 10, 64) if err != nil { panic(err) } ts, err := time.ParseInLocation("2006-01-02", fields[0], time.Local) if err != nil { panic(err) } s := &Stock{ Date: ts, CodeStr: fields[1], Name: fields[2], ClosingPrice: ToFloat(fields[3]), MaxPrice: ToFloat(fields[4]), MinPrice: ToFloat(fields[5]), OpeningPrice: ToFloat(fields[6]), PreviousClosingPrice: ToFloat(fields[7]), UpsDowns: ToFloat(fields[8]), UpsDownsRatio: ToFloat(fields[9]), TurnoverRate: ToFloat(fields[10]), Volume: ToFloat(fields[11]), Turnover: ToFloat(fields[12]), MarketValue: ToFloat(fields[13]), CirculatingMarketValue: ToFloat(fields[14]), Code: code, } stocks = append(stocks, &mongo.InsertOneModel{Document: s}) } cstock := client.Database("money").Collection("stock") r, err := cstock.BulkWrite(context.TODO(), stocks) if err != nil { log.Println(err) } log.Println(r) } func ToFloat(s string) float64 { if s == "None" { return 0 } v, err := strconv.ParseFloat(s, 64) if err != nil { panic(err) } return v } func GbkToUtf8(s []byte) ([]byte, error) { reader := transform.NewReader(bytes.NewReader(s), simplifiedchinese.GBK.NewDecoder()) d, e := ioutil.ReadAll(reader) if e != nil { return nil, e } return d, nil } func GetPort() int { l, _ := net.Listen("tcp", ":0") // listen on localhost port := l.Addr().(*net.TCPAddr).Port err := l.Close() if err != nil { panic(err) } // ip := l.Addr().(*net.TCPAddr).IP return port }