|
- package zhimeng
-
- import (
- "crypto/tls"
- "fmt"
- "net/http"
- "regexp"
- "strings"
-
- "github.com/gocolly/colly"
- )
-
- // ScrapJDDetailImageList is scarp detail images list
- func ScrapJDDetailImageList(gid string) ([]string, error) {
- var list []string
- rurl := "https://wqsitem.jd.com/detail/%s_d%s_normal.html"
-
- c := colly.NewCollector(
- colly.UserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36"),
- )
- c.WithTransport(&http.Transport{
- TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
- })
- c.OnResponse(func(r *colly.Response) {
- re, _ := regexp.Compile(`[a-zA-z]+://[^\s]*`)
- body := r.Body
- // debug
- // fmt.Println(string(body))
- urls := re.FindAllString(string(body), -1)
- // fmt.Println(urls)
- ree, _ := regexp.Compile(`[a-zA-z]+://[^\s]*.jpg`)
- for _, url := range urls {
- if strings.Contains(url, `.jpg\`) {
- img := ree.FindString(url)
- list = append(list, img)
- }
-
- }
- })
- c.OnRequest(func(r *colly.Request) {
- })
-
- if err := c.Visit(fmt.Sprintf(rurl, gid, gid)); err != nil {
- return nil, err
- }
- // debug
- // fmt.Println(gid)
- // fmt.Println(list)
- return list, nil
- }
-
- func strip(ss string, charss string) string {
- s, chars := []rune(ss), []rune(charss)
- length := len(s)
- max := len(s) - 1
- l, r := true, true //标记当左端或者右端找到正常字符后就停止继续寻找
- start, end := 0, max
- tmpEnd := 0
- charset := make(map[rune]bool) //创建字符集,也就是唯一的字符,方便后面判断是否存在
- for i := 0; i < len(chars); i++ {
- charset[chars[i]] = true
- }
- for i := 0; i < length; i++ {
- if _, exist := charset[s[i]]; l && !exist {
- start = i
- l = false
- }
- tmpEnd = max - i
- if _, exist := charset[s[tmpEnd]]; r && !exist {
- end = tmpEnd
- r = false
- }
- if !l && !r {
- break
- }
- }
- if l && r { // 如果左端和右端都没找到正常字符,那么表示该字符串没有正常字符
- return ""
- }
- return string(s[start : end+1])
- }
|