Browse Source

更新蜜源数据采集

master
huangjiajun 11 months ago
parent
commit
2c96fe8516
6 changed files with 173 additions and 1 deletions
  1. +3
    -0
      app/task/init.go
  2. +3
    -0
      app/task/md/cron_key.go
  3. +124
    -0
      app/task/svc/svc_data_collect_cron_miyuan_real_news.go
  4. +21
    -0
      app/task/task_data_collect_cron_miyuan_real_news.go
  5. +21
    -0
      app/task/task_data_collect_cron_miyuan_real_news10.go
  6. +1
    -1
      go.mod

+ 3
- 0
app/task/init.go View File

@@ -87,4 +87,7 @@ func initTasks() {
jobs[taskMd.DataCollectCronHaodankuRealNews] = taskDataCollectCronHaodankuRealNews //
jobs[taskMd.DataCollectCronHaodankuRealNews10] = taskDataCollectCronHaodankuRealNews10 //

jobs[taskMd.DataCollectCronMiyuanRealNews] = taskDataCollectCronMiyuanRealNews //
jobs[taskMd.DataCollectCronMiyuanRealNews10] = taskDataCollectCronMiyuanRealNews10 //

}

+ 3
- 0
app/task/md/cron_key.go View File

@@ -3,4 +3,7 @@ package md
const (
DataCollectCronHaodankuRealNews = "data_collect_cron_haodanku_real_news"
DataCollectCronHaodankuRealNews10 = "data_collect_cron_haodanku_real_news10"

DataCollectCronMiyuanRealNews = "data_collect_cron_miyuan_real_news"
DataCollectCronMiyuanRealNews10 = "data_collect_cron_miyuan_real_news10"
)

+ 124
- 0
app/task/svc/svc_data_collect_cron_miyuan_real_news.go View File

@@ -0,0 +1,124 @@
package svc

import (
"applet/app/es/md"
md2 "applet/app/task/md"
"applet/app/utils"
"code.fnuoos.com/go_rely_warehouse/zyos_go_es.git/es"
"code.fnuoos.com/go_rely_warehouse/zyos_go_third_party_api.git/gzmiyuan"
"fmt"
"github.com/syyongx/php2go"
"strings"
"time"
)

func DataCollectCronMiyuanRealNews(types int) {
key := md.ZhiosRealNewsData
//es.CreateIndexIfNotExists(key, md.ZhiosRealNewsDataField)
ids := []string{"MC", "TM", "JD", "PDD", "CD", "ZD", "MS", "QYH"}
ids = []string{"JD"}
num := 10
if types == 0 {
ids = []string{""}
num = 3
}
size := 10
now := time.Now()
fmt.Println("============", time.Since(now))
for _, v := range ids {
for i := 1; i <= num; i++ {
newsData, err := gzmiyuan.GetGzmiRealData(utils.IntToStr(size), utils.IntToStr(i), v)
if newsData == nil || err != nil {
continue
}
platformMap := map[string]string{"MC": "10001", "TM": "10003", "JD": "10004", "PDD": "10005", "CD": "10006", "ZD": "10002", "MS": "10007", "QYH": "10000"}
for _, item := range newsData {
var tmp = md2.RealNewsData{
Id: item.Id,
CateId: utils.SerializeStr([]string{platformMap[item.OpenType]}),
Images: utils.SerializeStr(strings.Split(item.XianbaoImageUrl, ",")),
Platform: "my",
StartTime: utils.TimeStdParseUnix(item.CreateTime),
OldContent: item.XianbaoContent,
}
tmp.Key = php2go.Md5("hdk_real_news_my_" + utils.IntToStr(item.Id))
var uniqueId = tmp.Key
doc, _ := es.FirstDoc(key, uniqueId)
if doc == nil {
goodsList := ToContent(item.XianbaoContent)

tmp.Content = utils.SerializeStr(goodsList)
fmt.Println(tmp)
createDocRet, err := es.CreateDoc(key, uniqueId, tmp)
if err != nil {
return
}
fmt.Printf("CreateDoc ==> %+v \n\n", createDocRet)
}

}
}

}
fmt.Println("==========================end", time.Since(now))
}
func To(str, str1, str2 string) string {
tmp := strings.Split(str, str1)
if len(tmp) > 0 {
tmp1 := strings.Split(tmp[1], str2)
if len(tmp1) > 0 {
return tmp1[0]
}
}
return ""
}
func ToContent(str string) []map[string]string {
platformTypeList := map[string]string{
"JD": "jd",
"PDD": "pdd",
"TB": "taobao",
"TM": "tmall",
}
str = strings.ReplaceAll(str, "<br>", "\n")
strs := strings.Split(str, "<div ")
fmt.Println(strs)
var goodsList = make([]map[string]string, 0)
for _, v := range strs {
goodsListTmp := map[string]string{
"is_click": "0",
"content": v,
"gid": "",
"url": "",
"platform_type": "",
}
if strings.Contains(v, "href=") {
var tmps = strings.Split(v, "</div>")
for _, v1 := range tmps {
if strings.Contains(v1, "href=") {
url := To(v1, "href=\"", "\"")
gid := To(v1, "item_id=\"", "\"")
platformType := To(v1, "open_type=\"", "\"")
goodsListTmp = map[string]string{
"is_click": "1",
"content": "去购买>\n",
"gid": gid,
"url": "",
"platform_type": platformTypeList[platformType],
}
if platformType == "JD" && gid == "" {
goodsListTmp = map[string]string{
"is_click": "0",
"content": url,
"gid": "",
"url": "",
"platform_type": platformTypeList[platformType],
}
}
}
}
}
goodsList = append(goodsList, goodsListTmp)

}
return goodsList
}

+ 21
- 0
app/task/task_data_collect_cron_miyuan_real_news.go View File

@@ -0,0 +1,21 @@
package task

import (
"applet/app/task/svc"
"math/rand"
"time"
)

func taskDataCollectCronMiyuanRealNews() {
for {
if len(ch) > workerNum {
time.Sleep(time.Millisecond * time.Duration(rand.Intn(1000)))
} else {
goto START
}
}
START:
ch <- 1
svc.DataCollectCronMiyuanRealNews(1)
<-ch
}

+ 21
- 0
app/task/task_data_collect_cron_miyuan_real_news10.go View File

@@ -0,0 +1,21 @@
package task

import (
"applet/app/task/svc"
"math/rand"
"time"
)

func taskDataCollectCronMiyuanRealNews10() {
for {
if len(ch) > workerNum {
time.Sleep(time.Millisecond * time.Duration(rand.Intn(1000)))
} else {
goto START
}
}
START:
ch <- 1
svc.DataCollectCronMiyuanRealNews(0)
<-ch
}

+ 1
- 1
go.mod View File

@@ -4,7 +4,7 @@ go 1.15

require (
code.fnuoos.com/go_rely_warehouse/zyos_go_es.git v1.0.1-0.20230707081910-52e70aa52998
code.fnuoos.com/go_rely_warehouse/zyos_go_third_party_api.git v1.1.21-0.20231221064048-3937c0824e59
code.fnuoos.com/go_rely_warehouse/zyos_go_third_party_api.git v1.1.21-0.20240103061829-86ef8b1a29c1
github.com/360EntSecGroup-Skylar/excelize v1.4.1
github.com/afex/hystrix-go v0.0.0-20180502004556-fa1af6a1f4f5
github.com/boombuler/barcode v1.0.1


Loading…
Cancel
Save