package moneymoney import ( "bytes" "context" "encoding/csv" "encoding/gob" "encoding/json" "fmt" "io/ioutil" "log" "net" "os" "regexp" "strconv" "time" "github.com/474420502/gcurl" "github.com/go-rod/rod" "github.com/go-rod/rod/lib/devices" "github.com/go-rod/rod/lib/launcher" "github.com/go-rod/rod/lib/proto" "github.com/tidwall/gjson" "go.mongodb.org/mongo-driver/bson" "go.mongodb.org/mongo-driver/mongo" "go.mongodb.org/mongo-driver/mongo/options" "golang.org/x/text/encoding/simplifiedchinese" "golang.org/x/text/transform" ) type Stock struct { Date string `json:"Date" bson:"Date"` // 日期 CodeStr string `json:"CodeStr" bson:"CodeStr"` // 股票代码 Name string `json:"Name" bson:"Name"` // 名称 ClosingPrice float64 `json:"ClosingPrice" bson:"ClosingPrice"` // 收盘价 MaxPrice float64 `json:"MaxPrice" bson:"MaxPrice"` // 最高价 MinPrice float64 `json:"MinPrice" bson:"MinPrice"` // 最低价 OpeningPrice float64 `json:"OpeningPrice" bson:"OpeningPrice"` // 开盘价 PreviousClosingPrice float64 `json:"PreviousClosingPrice" bson:"PreviousClosingPrice"` // 前收盘 UpsDowns float64 `json:"UpsDowns" bson:"UpsDowns"` // 涨跌额 UpsDownsRatio float64 `json:"UpsDownsRatio" bson:"UpsDownsRatio"` // 涨跌幅 TurnoverRate float64 `json:"TurnoverRate" bson:"TurnoverRate"` // 换手率 Volume float64 `json:"Volume" bson:"Volume"` // 成交量 Turnover float64 `json:"Turnover" bson:"Turnover"` // 成交金额 MarketValue float64 `json:"MarketValue" bson:"MarketValue"` // 总市值 CirculatingMarketValue float64 `json:"CirculatingMarketValue" bson:"CirculatingMarketValue"` // 流通市值 Code string `json:"Code" bson:"Code"` // 股票数字代码 } type StockBase struct { // CodeStr string // 代地区码 // Code string // 不带地区码 CODE string `json:"CODE"` FIVE_MINUTE float64 `json:"FIVE_MINUTE"` HIGH float64 `json:"HIGH"` HS float64 `json:"HS"` LB float64 `json:"LB"` LOW float64 `json:"LOW"` MCAP float64 `json:"MCAP"` MFSUM float64 `json:"MFSUM"` NAME string `json:"NAME"` OPEN float64 `json:"OPEN"` PE float64 `json:"PE"` PERCENT float64 `json:"PERCENT"` PRICE float64 `json:"PRICE"` SNAME string `json:"SNAME"` SYMBOL string `json:"SYMBOL"` TCAP float64 `json:"TCAP"` TURNOVER float64 `json:"TURNOVER"` UPDOWN float64 `json:"UPDOWN"` VOLUME float64 `json:"VOLUME"` WB float64 `json:"WB"` YESTCLOSE float64 `json:"YESTCLOSE"` ZF float64 `json:"ZF"` NO float64 `json:"NO"` } var DefaultPage *rod.Page func GetDefaultPage() *rod.Page { if DefaultPage != nil { return DefaultPage } screen := devices.Device{ Title: "Laptop with MDPI screen", Capabilities: []string{"touch", "mobile"}, UserAgent: "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36", Screen: devices.Screen{ DevicePixelRatio: 1, Horizontal: devices.ScreenSize{ Width: 1920, Height: 1080, }, }, } port := GetPort() log.Println("get port:", port) rodlauncher := launcher.New(). Bin(`google-chrome`). RemoteDebuggingPort(port). Set("user-data-dir", fmt.Sprintf("/tmp/%s_rod", "money-money")). Delete("headless") //debug url launchers := rodlauncher.MustLaunch() fmt.Printf("debug url: %s\n", launchers) //连接浏览器 browser := rod.New().ControlURL(launchers).MustConnect() page := browser.DefaultDevice(screen).MustPage() // p := page.Timeout(time.Second * 15) DefaultPage = page return DefaultPage } func main() { log.SetFlags(log.Llongfile | log.LstdFlags) ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) defer cancel() // client, err := mongo.Connect(ctx, options.Client().ApplyURI("mongodb://localhost:27017")) client, err := mongo.Connect(ctx, options.Client().ApplyURI("mongodb://root:6601502@localhost:27017")) if err != nil { panic(err) } cur, err := client.Database("money").Collection("stock").Aggregate(context.TODO(), bson.A{ bson.M{"$group": bson.M{"_id": "$股票数字代码"}}, }) if err != nil { panic(err) } var skipMap map[string]bool = make(map[string]bool) for cur.Next(context.TODO()) { var doc bson.M err = cur.Decode(&doc) if err == nil { skipMap[strconv.Itoa(int(doc["_id"].(int32)))] = true } else { log.Panic(err) } } murl := `curl 'http://quotes.money.163.com/hs/service/diyrank.php?host=http%3A%2F%2Fquotes.money.163.com%2Fhs%2Fservice%2Fdiyrank.php&page=1&query=STYPE%3AEQA&fields=NO%2CSYMBOL%2CNAME%2CPRICE%2CPERCENT%2CUPDOWN%2CFIVE_MINUTE%2COPEN%2CYESTCLOSE%2CHIGH%2CLOW%2CVOLUME%2CTURNOVER%2CHS%2CLB%2CWB%2CZF%2CPE%2CMCAP%2CTCAP%2CMFSUM%2CMFRATIO.MFRATIO2%2CMFRATIO.MFRATIO10%2CSNAME%2CCODE%2CANNOUNMT%2CUVSNEWS&sort=PERCENT&order=desc&count=1000&type=query' \ -H 'Accept: application/json, text/javascript, */*; q=0.01' \ -H 'Accept-Language: zh-CN,zh;q=0.9,en;q=0.8' \ -H 'Connection: keep-alive' \ -H 'Cookie: _ntes_nnid=07a59ac6cc3c3873093db99e3419a5c7,1652972918736; _ntes_nuid=07a59ac6cc3c3873093db99e3419a5c7; _antanalysis_s_id=1655737843219; s_n_f_l_n3=90474b666b6678eb1655739716131; ne_analysis_trace_id=1655740348110; _ntes_stock_recent_=0601857%7C0601808; _ntes_stock_recent_=0601857%7C0601808; _ntes_stock_recent_=0601857%7C0601808; pgr_n_f_l_n3=90474b666b6678eb165574055174140; vinfo_n_f_l_n3=90474b666b6678eb.1.1.1655737842842.1655738334425.1655740555481' \ -H 'Referer: http://quotes.money.163.com/old/' \ -H 'User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.0.0 Safari/537.36' \ -H 'X-Requested-With: XMLHttpRequest'` tp := gcurl.Parse(murl).Temporary() page := tp.QueryParam(`page=\d+`) var stockCodesFile = "./stock_codes.gob" var stockCodes []*StockBase f, err := os.Open(stockCodesFile) if err == nil { err = gob.NewDecoder(f).Decode(&stockCodes) if err != nil { panic(err) } } else { var i int64 = 0 var pagecount int64 = 100 for ; i < pagecount; i++ { page.IntSet(i) resp, err := tp.Execute() if err != nil { panic(err) } jr := gjson.ParseBytes(resp.Content()) pagecount = jr.Get("pagecount").Int() for _, s := range jr.Get("list").Array() { var stockCode StockBase err = json.Unmarshal([]byte(s.String()), &stockCode) if err != nil { panic(err) } stockCodes = append(stockCodes, &stockCode) } // log.Println(jr.String()) } f, err = os.OpenFile(stockCodesFile, os.O_CREATE|os.O_TRUNC|os.O_RDWR, 0664) if err != nil { panic(err) } err = gob.NewEncoder(f).Encode(&stockCodes) if err != nil { panic(err) } } if f != nil { err = f.Close() if err != nil { panic(err) } } for _, code := range stockCodes { if _, ok := skipMap[code.SYMBOL]; ok { continue } if code.MCAP >= 50000000000 { DownloadDataFromCode(client, code) } } } func DownloadDataFromCode(client *mongo.Client, code *StockBase) { // 300731 // durl := `curl 'http://quotes.money.163.com/service/chddata.html?code=${CODE}&start=20170101&end=20220621&fields=TCLOSE;HIGH;LOW;TOPEN;LCLOSE;CHG;PCHG;TURNOVER;VOTURNOVER;VATURNOVER;TCAP;MCAP' \ // -H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9' \ // -H 'Accept-Language: zh-CN,zh;q=0.9,en;q=0.8' \ // -H 'Connection: keep-alive' \ // -H 'Cookie: _ntes_nnid=07a59ac6cc3c3873093db99e3419a5c7,1652972918736; _ntes_nuid=07a59ac6cc3c3873093db99e3419a5c7; _antanalysis_s_id=1655737843219; ne_analysis_trace_id=1655740348110; _ntes_stock_recent_=${CODE}%7C1300660%7C1300731%7C0601857%7C0601808; _ntes_stock_recent_=${CODE}%7C1300660%7C1300731%7C0601857%7C0601808; _ntes_stock_recent_=${CODE}%7C1300660%7C1300731%7C0601857%7C0601808; s_n_f_l_n3=90474b666b6678eb1655828892281; pgr_n_f_l_n3=90474b666b6678eb16558271774556486; vinfo_n_f_l_n3=90474b666b6678eb.1.6.1655737842842.1655828160869.1655828914287' \ // -H 'Referer: http://quotes.money.163.com/trade/lsjysj_${SYMBOL}.html' \ // -H 'Upgrade-Insecure-Requests: 1' \ // -H 'User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.0.0 Safari/537.36' ` // http://quotes.money.163.com/0601988.html cstock := client.Database("money").Collection("stock") page := GetDefaultPage() stockurl := fmt.Sprintf("http://quotes.money.163.com/%s.html", code.CODE) log.Println(stockurl) page.Navigate(stockurl) page.WaitNavigation(proto.PageLifecycleEventNameFirstContentfulPaint)() time.Sleep(time.Millisecond * 100) ele, err := page.ElementsX(fmt.Sprintf("//a[contains(@href,'/trade/lsjysj_%s')]/@href", code.SYMBOL)) if err != nil { panic(err) } if iter := ele.First(); iter != nil { iter.WaitEnabled() urlpath, err := iter.HTML() log.Println("click", urlpath) if err != nil { panic(err) } page.Navigate("http://quotes.money.163.com" + urlpath) ahref := page.MustElementX("//a[@id='downloadData']") log.Println("wait downloadData") ahref.WaitEnabled() time.Sleep(time.Millisecond * 500) ahref.Click(proto.InputMouseButtonLeft) e := page.MustElementX("//a[@class='blue_btn submit']") log.Println("wait blue_btn submit") e.WaitEnabled() time.Sleep(time.Millisecond * 500) w := page.Browser().MustWaitDownload() e.Click(proto.InputMouseButtonLeft) downloaddata := w() log.Println(len(downloaddata)) // durl = strings.ReplaceAll(durl, `${SYMBOL}`, code.SYMBOL) // durl = strings.ReplaceAll(durl, `${CODE}`, code.CODE) // resp, err := gcurl.Parse(durl).Temporary().Execute() // if err != nil { // panic(err) // } reader := csv.NewReader(bytes.NewBuffer(downloaddata)) alls, err := reader.ReadAll() if err != nil { panic(err) } var jfield []string for _, field := range alls[0] { v, _ := GbkToUtf8([]byte(field)) jfield = append(jfield, string(v)) // log.Printf("%#v", string(v)) } re, _ := regexp.Compile(`\d+`) // var stocks []mongo.WriteModel for _, line := range alls[1:] { var fields []string for _, field := range line { v, _ := GbkToUtf8([]byte(field)) fields = append(fields, string(v)) } code := re.FindString(fields[1]) s := Stock{ Date: fields[0], CodeStr: fields[1], Name: fields[2], ClosingPrice: ToFloat(fields[3]), MaxPrice: ToFloat(fields[4]), MinPrice: ToFloat(fields[5]), OpeningPrice: ToFloat(fields[6]), PreviousClosingPrice: ToFloat(fields[7]), UpsDowns: ToFloat(fields[8]), UpsDownsRatio: ToFloat(fields[9]), TurnoverRate: ToFloat(fields[10]), Volume: ToFloat(fields[11]), Turnover: ToFloat(fields[12]), MarketValue: ToFloat(fields[13]), CirculatingMarketValue: ToFloat(fields[14]), Code: code, } cstock.InsertOne(context.TODO(), mongo.InsertOneModel{Document: s}) } log.Println(code.SYMBOL) time.Sleep(time.Second * 1) } } func ToFloat(s string) float64 { if s == "None" { return 0 } v, err := strconv.ParseFloat(s, 64) if err != nil { panic(err) } return v } func GbkToUtf8(s []byte) ([]byte, error) { reader := transform.NewReader(bytes.NewReader(s), simplifiedchinese.GBK.NewDecoder()) d, e := ioutil.ReadAll(reader) if e != nil { return nil, e } return d, nil } func GetPort() int { l, _ := net.Listen("tcp", ":0") // listen on localhost port := l.Addr().(*net.TCPAddr).Port err := l.Close() if err != nil { panic(err) } // ip := l.Addr().(*net.TCPAddr).IP return port }