package test import ( "fmt" "github.com/gocolly/colly" "github.com/gocolly/colly/extensions" "github.com/tidwall/gjson" "net/http" "regexp" "strings" ) /* 目前可用接口 [商品查询]https://www.showdoc.com.cn/59349170678610?page_id=339616554551473 [商品详情]https://www.showdoc.com.cn/59349170678610?page_id=339687047645094 */ // Response is SDK Response type Response struct { Msg string `json:"msg"` Success int `json:"success"` Data interface{} `json:"data"` } func main() { // // JD // postData := map[string]string{"keyword": "联想", "p": "1", "size": "10"} // fmt.Println(postData["time"]) // res, _ := zm.Send("jd", "getgoods", postData) // fmt.Println(string(res)) // p := Response{} // json.Unmarshal(res, &p) // fmt.Println(p) // // VIP // postData = map[string]string{"keyword": "联想", "p": "1", "size": "10", "order": "0"} // fmt.Println(postData["time"]) // res, _ = zm.Send("wph", "seach_goods", postData) // fmt.Println(string(res)) // p = Response{} // json.Unmarshal(res, &p) // fmt.Println(p) // // PDD // postData = map[string]string{"keyword": "联想", "p": "1", "size": "10", "sort": "goods_price asc"} // res, _ = zm.Send("pdd", "getgoods", postData) // fmt.Println(string(res)) // p = Response{} // json.Unmarshal(res, &p) // fmt.Println(p) for i := 0; i < 1000; i++ { fmt.Println(i) scrapPDD() } } func scrapJD() { c := colly.NewCollector(func(collector *colly.Collector) { extensions.RandomUserAgent(collector) }) c.OnResponse(func(r *colly.Response) { re, _ := regexp.Compile(`[(]//[^\s]*[)]`) body := r.Body fmt.Println(string(body)) urls := re.FindAllString(string(body), -1) fmt.Println(urls) for _, url := range urls { url = strip(url, "()") url = "https:" + url fmt.Println(url) } }) c.Visit("https://wqsitem.jd.com/detail/100008309360_d100008309360_normal.html") } func scrapPDD() { var cookies = []*http.Cookie{} var mapcookies = make(map[string]string) url := fmt.Sprintf("https://mobile.yangkeduo.com/goods.html?goods_id=%s", "156632692649") cs := "api_uid=CiHUKl9DZKpL6QBVK4qWAg==; _nano_fp=Xpdbl0PyX5Pxn0TynT_DTGXbst0kz5cjzGAQDnBR; ua=Mozilla%2F5.0%20(Windows%20NT%2010.0%3B%20Win64%3B%20x64)%20AppleWebKit%2F537.36%20(KHTML%2C%20like%20Gecko)%20Chrome%2F84.0.4147.135%20Safari%2F537.36; webp=1; quick_entrance_click_record=20200824%2C1; PDDAccessToken=XRC6FNX7FRBL6AJRMRBRN4CDG2PZXO3YJZYHFUA4O2PLDAWVYXHA1125821; pdd_user_id=9622705741400; pdd_user_uin=F27EAZ4V5S7EGEVMCJI2P7RFLE_GEXDA; chat_config={'host_whitelist':['.yangkeduo.com','.pinduoduo.com','.10010.com/queen/tencent/pinduoduo-fill.html','.ha.10086.cn/pay/card-sale!toforward.action','wap.ha.10086.cn','m.10010.com']}; pdd_vds=gaLMNqmfGfyYEpyYiZGWopaCicNHbXGWtDNcOZnWLqiDNfLHOXnZaqtCLDiX" csList := strings.Split(cs, ";") for _, c := range csList { s := strings.Trim(c, " ") sList := strings.SplitN(s, "=", 2) mapcookies[sList[len(sList)-len(sList)]] = sList[(len(sList) - len(sList) + 1)] } fmt.Println(mapcookies) for key, value := range mapcookies { if key == "ua" { continue } cookies = append(cookies, &http.Cookie{Name: key, Value: value}) } c := colly.NewCollector( colly.UserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36"), ) c.OnResponse(func(r *colly.Response) { re, _ := regexp.Compile(`window.rawData=.*}`) body := r.Body fmt.Println(string(body)) result := re.FindString(string(body)) // fmt.Println(result) result = strings.SplitN(result, "=", 2)[1] // fmt.Println(result) value := gjson.Get(result, "store.initDataObj.goods.detailGallery") // fmt.Println(value) list := value.Array() imageList := []string{} for _, v := range list { nv := gjson.Get(v.String(), "url") imageList = append(imageList, nv.String()) } fmt.Println(imageList) ck := c.Cookies("https://mobile.yangkeduo.com") fmt.Println(ck) cookies = ck }) c.SetCookies("https://mobile.yangkeduo.com", cookies) c.Visit(url) } func strip(ss string, charss string) string { s, chars := []rune(ss), []rune(charss) length := len(s) max := len(s) - 1 l, r := true, true //标记当左端或者右端找到正常字符后就停止继续寻找 start, end := 0, max tmpEnd := 0 charset := make(map[rune]bool) //创建字符集,也就是唯一的字符,方便后面判断是否存在 for i := 0; i < len(chars); i++ { charset[chars[i]] = true } for i := 0; i < length; i++ { if _, exist := charset[s[i]]; l && !exist { start = i l = false } tmpEnd = max - i if _, exist := charset[s[tmpEnd]]; r && !exist { end = tmpEnd r = false } if !l && !r { break } } if l && r { // 如果左端和右端都没找到正常字符,那么表示该字符串没有正常字符 return "" } return string(s[start : end+1]) }