golang 的 rabbitmq 消费项目
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

jd.go 2.0 KiB

6 months ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081
  1. package zhimeng
  2. import (
  3. "crypto/tls"
  4. "fmt"
  5. "net/http"
  6. "regexp"
  7. "strings"
  8. "github.com/gocolly/colly"
  9. )
  10. // ScrapJDDetailImageList is scarp detail images list
  11. func ScrapJDDetailImageList(gid string) ([]string, error) {
  12. var list []string
  13. rurl := "https://wqsitem.jd.com/detail/%s_d%s_normal.html"
  14. c := colly.NewCollector(
  15. colly.UserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36"),
  16. )
  17. c.WithTransport(&http.Transport{
  18. TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
  19. })
  20. c.OnResponse(func(r *colly.Response) {
  21. re, _ := regexp.Compile(`[a-zA-z]+://[^\s]*`)
  22. body := r.Body
  23. // debug
  24. // fmt.Println(string(body))
  25. urls := re.FindAllString(string(body), -1)
  26. // fmt.Println(urls)
  27. ree, _ := regexp.Compile(`[a-zA-z]+://[^\s]*.jpg`)
  28. for _, url := range urls {
  29. if strings.Contains(url, `.jpg\`) {
  30. img := ree.FindString(url)
  31. list = append(list, img)
  32. }
  33. }
  34. })
  35. c.OnRequest(func(r *colly.Request) {
  36. })
  37. if err := c.Visit(fmt.Sprintf(rurl, gid, gid)); err != nil {
  38. return nil, err
  39. }
  40. // debug
  41. // fmt.Println(gid)
  42. // fmt.Println(list)
  43. return list, nil
  44. }
  45. func strip(ss string, charss string) string {
  46. s, chars := []rune(ss), []rune(charss)
  47. length := len(s)
  48. max := len(s) - 1
  49. l, r := true, true //标记当左端或者右端找到正常字符后就停止继续寻找
  50. start, end := 0, max
  51. tmpEnd := 0
  52. charset := make(map[rune]bool) //创建字符集,也就是唯一的字符,方便后面判断是否存在
  53. for i := 0; i < len(chars); i++ {
  54. charset[chars[i]] = true
  55. }
  56. for i := 0; i < length; i++ {
  57. if _, exist := charset[s[i]]; l && !exist {
  58. start = i
  59. l = false
  60. }
  61. tmpEnd = max - i
  62. if _, exist := charset[s[tmpEnd]]; r && !exist {
  63. end = tmpEnd
  64. r = false
  65. }
  66. if !l && !r {
  67. break
  68. }
  69. }
  70. if l && r { // 如果左端和右端都没找到正常字符,那么表示该字符串没有正常字符
  71. return ""
  72. }
  73. return string(s[start : end+1])
  74. }