当前位置:   article > 正文

python爬取微信小程序数据,python爬取小程序数据_微信小程序爬虫

微信小程序爬虫

大家好,小编来为大家解答以下问题,python爬取微信小程序数据,python爬取小程序数据,现在让我们一起来看看吧!

Python爬虫系列之微信小程序实战

基于Scrapy爬虫框架实现对微信小程序数据的爬取

首先,你得需要安装抓包工具,这里推荐使用Charles,至于怎么使用后期有时间我会出一个事例
  • 最重要的步骤之一就是分析接口,理清楚每一个接口功能,然后连接起来形成接口串思路,再通过Spider的回调函数一次次去分析数据
  • 抓包分析接口过程不做演示了,主要是分析请求头和query参数
  • 以下为代码部分,代码未写详细注释,但是流程写的还是挺清晰的
  • 如需测试请自行抓包更换请求头的token与session,以下测试头已做修改,不能直接使用

代码仅供学习交流,请勿用于非法用途

小程序爬虫接单、app爬虫接单、网页爬虫接单、接口定制、网站开发、小程序开发 > 点击这里联系我们 <

微信请扫描下方二维码

在这里插入图片描述

  1. # -*- coding:utf-8 -*-
  2. import scrapy
  3. '''
  4. @Author :王磊
  5. @Date :2018/12/3
  6. @Deion:美家优享微信小程序全国商品数据爬取
  7. '''
  8. class MeiJiaSpider(scrapy.spiders.Spider):
  9. name = "MeiJiaSpider"
  10. def __init__(self):
  11. self.headers = {
  12. "x-bell-token": "ef4d705aabf4909db847b6de6068605c-4",
  13. "x-session-key": "ab7f2b8673429d5e779c7f5c8b4a8524",
  14. "User-Agent": "Mozilla/5.0 (Linux; Android 8.0.0; MI 5 Build/OPR1.170623.032; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/68.0.3440.91 Mobile Safari/537.36 MicroMessenger/6.7.3.1360(0x26070333) NetType/WIFI Language/zh_CN Process/appbrand0"
  15. }
  16. def start_requests(self):
  17. '''
  18. 获取城市列表
  19. :return:
  20. '''
  21. url = 'https://bell-mall.yunshanmeicai.com/mall/gis/get-city-list'
  22. yield scrapy.FormRequest(
  23. url=url,
  24. headers=self.headers,
  25. dont_filter=True,
  26. callback=self.getCityChild
  27. )
  28. def getCityChild(self, response):
  29. '''
  30. 通过城市列表获取城市子列表,获取子列表经纬度数据
  31. :param response:
  32. :return:
  33. '''
  34. datas = eval(response.text)
  35. if datas['ret']:
  36. url = 'https://bell-mall.yunshanmeicai.com/mall/gis/address-search'
  37. for _ in datas['data']:
  38. name = _['name']
  39. data = {
  40. "key_words": name,
  41. "city": name
  42. }
  43. yield scrapy.FormRequest(
  44. url=url,
  45. headers=self.headers,
  46. formdata=data,
  47. dont_filter=True,
  48. callback=self.sellerParse
  49. )
  50. def sellerParse(self, response):
  51. '''
  52. 通过经纬度获取该位置附近商家列表
  53. :param response:
  54. :return:
  55. '''
  56. res = eval(response.text)
  57. if res['ret']:
  58. datas = res['data']
  59. for _ in datas:
  60. locationData = {"lat": str(_['location']['lat']), "lng": str(_['location']['lng'])}
  61. urlNearby = 'https://bell-mall.yunshanmeicai.com/mall/gis/get-nearby-team'
  62. yield scrapy.FormRequest(
  63. url=urlNearby,
  64. headers=self.headers,
  65. formdata=locationData,
  66. dont_filter=True,
  67. callback=self.sellerInfoParse
  68. )
  69. def sellerInfoParse(self, response):
  70. '''
  71. 获取商家详细信息,包含店铺id,手机号,地区等等(若不需要店铺id以外的其他数据,此过程可省略,因为店铺id在商家列表中以id的形式展示了)
  72. :param response:
  73. :return:
  74. '''
  75. res = eval(response.text)
  76. if res['ret']:
  77. datas = res['data']
  78. urlClass = 'https://bell-mall.yunshanmeicai.com/cart/cart/get-list'
  79. for _ in datas:
  80. query = {}
  81. headers = {
  82. "x-bell-token": "0b5e5bcf70c973b080f39cb7b4ec2306-4",
  83. "x-session-key": "3e76463e81d9551826fc132b10c27794",
  84. "x-group-token": _['id'],
  85. "User-Agent": "Mozilla/5.0 (Linux; Android 8.0.0; MI 5 Build/OPR1.170623.032; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/68.0.3440.91 Mobile Safari/537.36 MicroMessenger/6.7.3.1360(0x26070333) NetType/WIFI Language/zh_CN Process/appbrand0"
  86. }
  87. yield scrapy.FormRequest(
  88. url=urlClass,
  89. headers=headers,
  90. formdata=query,
  91. dont_filter=True,
  92. callback=self.storeClassParse
  93. )
  94. def storeClassParse(self, response):
  95. '''
  96. 通过店铺id获取店铺类目
  97. :param response:
  98. :return:
  99. '''
  100. res = eval(response.text)
  101. if res['ret']:
  102. urlClass = 'https://bell-mall.yunshanmeicai.com/mall/home/get-home-class'
  103. version = {"version": "1.0.0"}
  104. headers = {
  105. "x-bell-token": "0b5e5bcf70c973b080f39cb7b4ec2306-4",
  106. "x-session-key": "3e76463e81d9551826fc132b10c27794",
  107. "x-group-token": str(res['data']['store_id']),
  108. "User-Agent": "Mozilla/5.0 (Linux; Android 8.0.0; MI 5 Build/OPR1.170623.032; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/68.0.3440.91 Mobile Safari/537.36 MicroMessenger/6.7.3.1360(0x26070333) NetType/WIFI Language/zh_CN Process/appbrand0"
  109. }
  110. yield scrapy.FormRequest(
  111. url=urlClass,
  112. headers=headers,
  113. formdata=version,
  114. dont_filter=True,
  115. callback=self.goodsListParse,
  116. meta={"store_id": str(res['data']['store_id'])}
  117. )
  118. def goodsListParse(self, response):
  119. '''
  120. 通过店铺类目id获取商品列表
  121. :param response:
  122. :return:
  123. '''
  124. res = eval(str(response.text).replace('null', 'None'))
  125. if res['ret']:
  126. if res['data']['list']:
  127. data = res['data']['list']
  128. goodsUrl = 'https://bell-mall.yunshanmeicai.com/mall/home/index'
  129. headers = {
  130. "x-bell-token": "0b5e5bcf70c973b080f39cb7b4ec2306-4",
  131. "x-session-key": "3e76463e81d9551826fc132b10c27794",
  132. "x-group-token": str(response.meta['store_id']),
  133. "User-Agent": "Mozilla/5.0 (Linux; Android 8.0.0; MI 5 Build/OPR1.170623.032; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/68.0.3440.91 Mobile Safari/537.36 MicroMessenger/6.7.3.1360(0x26070333) NetType/WIFI Language/zh_CN Process/appbrand0"
  134. }
  135. for _ in data:
  136. query = {"page": "1", "class_id": str(_['id']), "version": "1.0.2"}
  137. yield scrapy.FormRequest(
  138. url=goodsUrl,
  139. headers=headers,
  140. formdata=query,
  141. dont_filter=True,
  142. callback=self.goodsParse
  143. )
  144. def goodsParse(self, response):
  145. '''
  146. 解析最终商品数据
  147. :param response:
  148. :return:
  149. '''
  150. goodsList = eval(response.text)
  151. if goodsList['ret']:
  152. if goodsList['data']['list']:
  153. lists = goodsList['data']['list']
  154. for _ in lists:
  155. start_time = str(_['start_time'])
  156. end_time = str(_['end_time'])
  157. product_id = str(_['product_id'])
  158. product_name = _['product_name']
  159. group_product_name = _['group_product_name']
  160. group_id = str(_['group_id'])
  161. group_type = str(_['group_type'])
  162. product_short_desc = _['product_short_desc']
  163. product_desc = _['product_desc']
  164. product_format_id = str(_['product_format_id'])
  165. already_txt = _['already_txt']
  166. already_nums = str(_['already_nums'])
  167. left_txt = _['left_txt']
  168. left_num = str(_['left_num'])
  169. real_left_num = str(_['real_left_num'])
  170. group_price = str(_['group_price'])
  171. line_price = str(_['line_price'])
  172. product_sales_num = str(_['product_sales_num'])
  173. identify = _['identify']
  174. print(
  175. "start_time: %s ,end_time: %s ,product_id: %s ,product_name: %s ,group_product_name: %s ,group_id: %s ,group_type: %s ,product_short_desc: %s ,product_format_id: %s ,already_txt: %s ,already_nums: %s ,real_left_num: %s ,group_price: %s ,line_price: %s ,product_sales_num: %s ,identify: %s " % (
  176. start_time, end_time, product_id, product_name, group_product_name, group_id, group_type,
  177. product_short_desc, product_format_id, already_txt, already_nums, real_left_num, group_price,
  178. line_price, product_sales_num, identify)
  179. )
  180. '''
  181. "text_label_list": [
  182. {
  183. "label_content": "#fe3113",
  184. "label_name": "热销",
  185. "label_id": 10
  186. }
  187. ],
  188. "pic_label_list": [
  189. {
  190. "label_content": "https:\\/\\/img-oss.yunshanmeicai.com\\/xfresh\\/product\\/69cf3401b000504ea33d9e8b80bfc467.png",
  191. "label_name": "美家福利",
  192. "label_id": 52
  193. }
  194. ],
  195. "loop_pics": [
  196. "https:\\/\\/img-oss.yunshanmeicai.com\\/xfresh\\/product\\/03df45319b36070f67edf4562d6ec74f.jpg"
  197. ],
  198. "new_loop_pics": "https:\\/\\/img-oss.yunshanmeicai.com\\/xfresh\\/product\\/03df45319b36070f67edf4562d6ec74f.jpg?x-oss-process=image\\/resize,w_360",
  199. '''

☞点击这里与我探讨☚

♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪
♪♪后续会更新系列基于Python的爬虫小例子,欢迎关注。♪♪
♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪

文章知识点与官方知识档案匹配,可进一步学习相关知识
Python入门技能树首页概览432898 人正在系统学习中
本文内容由网友自发贡献,转载请注明出处:【wpsshop博客】
推荐阅读
相关标签
  

闽ICP备14008679号