package zhimeng import ( "crypto/tls" "fmt" "net/http" "regexp" "strings" "github.com/gocolly/colly" ) // ScrapJDDetailImageList is scarp detail images list func ScrapJDDetailImageList(gid string) ([]string, error) { var list []string rurl := "https://wqsitem.jd.com/detail/%s_d%s_normal.html" c := colly.NewCollector( colly.UserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36"), ) c.WithTransport(&http.Transport{ TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, }) c.OnResponse(func(r *colly.Response) { re, _ := regexp.Compile(`[a-zA-z]+://[^\s]*`) body := r.Body // debug // fmt.Println(string(body)) urls := re.FindAllString(string(body), -1) // fmt.Println(urls) ree, _ := regexp.Compile(`[a-zA-z]+://[^\s]*.jpg`) for _, url := range urls { if strings.Contains(url, `.jpg\`) { img := ree.FindString(url) list = append(list, img) } } }) c.OnRequest(func(r *colly.Request) { }) if err := c.Visit(fmt.Sprintf(rurl, gid, gid)); err != nil { return nil, err } // debug // fmt.Println(gid) // fmt.Println(list) return list, nil } func strip(ss string, charss string) string { s, chars := []rune(ss), []rune(charss) length := len(s) max := len(s) - 1 l, r := true, true //标记当左端或者右端找到正常字符后就停止继续寻找 start, end := 0, max tmpEnd := 0 charset := make(map[rune]bool) //创建字符集,也就是唯一的字符,方便后面判断是否存在 for i := 0; i < len(chars); i++ { charset[chars[i]] = true } for i := 0; i < length; i++ { if _, exist := charset[s[i]]; l && !exist { start = i l = false } tmpEnd = max - i if _, exist := charset[s[tmpEnd]]; r && !exist { end = tmpEnd r = false } if !l && !r { break } } if l && r { // 如果左端和右端都没找到正常字符,那么表示该字符串没有正常字符 return "" } return string(s[start : end+1]) }