|
- package test
-
- import (
- "fmt"
- "github.com/gocolly/colly"
- "github.com/gocolly/colly/extensions"
- "github.com/tidwall/gjson"
- "net/http"
- "regexp"
- "strings"
- )
-
- /*
- 目前可用接口
- [商品查询]https://www.showdoc.com.cn/59349170678610?page_id=339616554551473
- [商品详情]https://www.showdoc.com.cn/59349170678610?page_id=339687047645094
-
- */
-
- // Response is SDK Response
- type Response struct {
- Msg string `json:"msg"`
- Success int `json:"success"`
- Data interface{} `json:"data"`
- }
-
- func main() {
- // // JD
- // postData := map[string]string{"keyword": "联想", "p": "1", "size": "10"}
- // fmt.Println(postData["time"])
- // res, _ := zhimeng.Send("jd", "getgoods", postData)
- // fmt.Println(string(res))
- // p := Response{}
- // json.Unmarshal(res, &p)
- // fmt.Println(p)
- // // VIP
- // postData = map[string]string{"keyword": "联想", "p": "1", "size": "10", "order": "0"}
- // fmt.Println(postData["time"])
- // res, _ = zhimeng.Send("wph", "seach_goods", postData)
- // fmt.Println(string(res))
- // p = Response{}
- // json.Unmarshal(res, &p)
- // fmt.Println(p)
- // // PDD
- // postData = map[string]string{"keyword": "联想", "p": "1", "size": "10", "sort": "goods_price asc"}
- // res, _ = zhimeng.Send("pdd", "getgoods", postData)
- // fmt.Println(string(res))
- // p = Response{}
- // json.Unmarshal(res, &p)
- // fmt.Println(p)
- for i := 0; i < 1000; i++ {
- fmt.Println(i)
- scrapPDD()
- }
- }
-
- func scrapJD() {
- c := colly.NewCollector(func(collector *colly.Collector) {
- extensions.RandomUserAgent(collector)
- })
- c.OnResponse(func(r *colly.Response) {
- re, _ := regexp.Compile(`[(]//[^\s]*[)]`)
- body := r.Body
- fmt.Println(string(body))
- urls := re.FindAllString(string(body), -1)
- fmt.Println(urls)
- for _, url := range urls {
- url = strip(url, "()")
- url = "https:" + url
- fmt.Println(url)
- }
- })
- c.Visit("https://wqsitem.jd.com/detail/100008309360_d100008309360_normal.html")
- }
-
- func scrapPDD() {
- var cookies = []*http.Cookie{}
- var mapcookies = make(map[string]string)
- url := fmt.Sprintf("https://mobile.yangkeduo.com/goods.html?goods_id=%s", "156632692649")
- cs := "api_uid=CiHUKl9DZKpL6QBVK4qWAg==; _nano_fp=Xpdbl0PyX5Pxn0TynT_DTGXbst0kz5cjzGAQDnBR; ua=Mozilla%2F5.0%20(Windows%20NT%2010.0%3B%20Win64%3B%20x64)%20AppleWebKit%2F537.36%20(KHTML%2C%20like%20Gecko)%20Chrome%2F84.0.4147.135%20Safari%2F537.36; webp=1; quick_entrance_click_record=20200824%2C1; PDDAccessToken=XRC6FNX7FRBL6AJRMRBRN4CDG2PZXO3YJZYHFUA4O2PLDAWVYXHA1125821; pdd_user_id=9622705741400; pdd_user_uin=F27EAZ4V5S7EGEVMCJI2P7RFLE_GEXDA; chat_config={'host_whitelist':['.yangkeduo.com','.pinduoduo.com','.10010.com/queen/tencent/pinduoduo-fill.html','.ha.10086.cn/pay/card-sale!toforward.action','wap.ha.10086.cn','m.10010.com']}; pdd_vds=gaLMNqmfGfyYEpyYiZGWopaCicNHbXGWtDNcOZnWLqiDNfLHOXnZaqtCLDiX"
- csList := strings.Split(cs, ";")
- for _, c := range csList {
- s := strings.Trim(c, " ")
- sList := strings.SplitN(s, "=", 2)
-
- mapcookies[sList[len(sList)-len(sList)]] = sList[(len(sList) - len(sList) + 1)]
-
- }
- fmt.Println(mapcookies)
- for key, value := range mapcookies {
- if key == "ua" {
- continue
- }
- cookies = append(cookies, &http.Cookie{Name: key, Value: value})
- }
- c := colly.NewCollector(
- colly.UserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36"),
- )
-
- c.OnResponse(func(r *colly.Response) {
- re, _ := regexp.Compile(`window.rawData=.*}`)
- body := r.Body
- fmt.Println(string(body))
- result := re.FindString(string(body))
- // fmt.Println(result)
- result = strings.SplitN(result, "=", 2)[1]
- // fmt.Println(result)
- value := gjson.Get(result, "store.initDataObj.goods.detailGallery")
- // fmt.Println(value)
- list := value.Array()
- imageList := []string{}
- for _, v := range list {
- nv := gjson.Get(v.String(), "url")
- imageList = append(imageList, nv.String())
- }
- fmt.Println(imageList)
- ck := c.Cookies("https://mobile.yangkeduo.com")
- fmt.Println(ck)
- cookies = ck
- })
-
- c.SetCookies("https://mobile.yangkeduo.com", cookies)
-
- c.Visit(url)
- }
-
- func strip(ss string, charss string) string {
- s, chars := []rune(ss), []rune(charss)
- length := len(s)
- max := len(s) - 1
- l, r := true, true //标记当左端或者右端找到正常字符后就停止继续寻找
- start, end := 0, max
- tmpEnd := 0
- charset := make(map[rune]bool) //创建字符集,也就是唯一的字符,方便后面判断是否存在
- for i := 0; i < len(chars); i++ {
- charset[chars[i]] = true
- }
- for i := 0; i < length; i++ {
- if _, exist := charset[s[i]]; l && !exist {
- start = i
- l = false
- }
- tmpEnd = max - i
- if _, exist := charset[s[tmpEnd]]; r && !exist {
- end = tmpEnd
- r = false
- }
- if !l && !r {
- break
- }
- }
- if l && r { // 如果左端和右端都没找到正常字符,那么表示该字符串没有正常字符
- return ""
- }
- return string(s[start : end+1])
- }
|