当前位置:   article > 正文

一个比较完善的httpWebRequest 封装,适合网络爬取及暴力破解

httpwebrequest 封装

大家在模拟http请求的时候,对保持长连接及cookies,http头部信息等了解的不是那么深入。在各种网络请求过程中,发送N种问题。

可能问题如下:

1)登录成功后session保持

2)保证所有cookies回传到服务器

3)http头这么多,少一个,请求可能会失败

4)各种编码问题,gzip等压缩问题

为了解决这些问题,本人花了一天时间写了以下一个类,专门做http请求

复制代码

  1. 1 using System;
  2. 2 using System.Collections.Generic;
  3. 3 using System.IO;
  4. 4 using System.IO.Compression;
  5. 5 using System.Linq;
  6. 6 using System.Net;
  7. 7 using System.Text;
  8. 8 using System.Threading.Tasks;
  9. 9
  10. 10 namespace ScanWeb
  11. 11 {
  12. 12 //zetee
  13. 13 //不能Host、Connection、User-Agent、Referer、Range、Content-Type、Content-Length、Expect、Proxy-Connection、If-Modified-Since
  14. 14 //等header. 这些header都是通过属性来设置的 。
  15. 15 public class HttpRequestClient
  16. 16 {
  17. 17 static HashSet<String> UNCHANGEHEADS = new HashSet<string>();
  18. 18 static HttpRequestClient()
  19. 19 {
  20. 20 UNCHANGEHEADS.Add("Host");
  21. 21 UNCHANGEHEADS.Add("Connection");
  22. 22 UNCHANGEHEADS.Add("User-Agent");
  23. 23 UNCHANGEHEADS.Add("Referer");
  24. 24 UNCHANGEHEADS.Add("Range");
  25. 25 UNCHANGEHEADS.Add("Content-Type");
  26. 26 UNCHANGEHEADS.Add("Content-Length");
  27. 27 UNCHANGEHEADS.Add("Expect");
  28. 28 UNCHANGEHEADS.Add("Proxy-Connection");
  29. 29 UNCHANGEHEADS.Add("If-Modified-Since");
  30. 30 UNCHANGEHEADS.Add("Keep-alive");
  31. 31 UNCHANGEHEADS.Add("Accept");
  32. 32
  33. 33 ServicePointManager.DefaultConnectionLimit = 1000;//最大连接数
  34. 34
  35. 35 }
  36. 36
  37. 37 /// <summary>
  38. 38 /// 默认的头
  39. 39 /// </summary>
  40. 40 public static string defaultHeaders = @"Accept:text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8
  41. 41 Accept-Encoding:gzip, deflate, sdch
  42. 42 Accept-Language:zh-CN,zh;q=0.8
  43. 43 Cache-Control:no-cache
  44. 44 Connection:keep-alive
  45. 45 Pragma:no-cache
  46. 46 Upgrade-Insecure-Requests:1
  47. 47 User-Agent:Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36";
  48. 48
  49. 49 /// <summary>
  50. 50 /// 是否跟踪cookies
  51. 51 /// </summary>
  52. 52 bool isTrackCookies = false;
  53. 53 /// <summary>
  54. 54 /// cookies 字典
  55. 55 /// </summary>
  56. 56 Dictionary<String, Cookie> cookieDic = new Dictionary<string, Cookie>();
  57. 57
  58. 58 /// <summary>
  59. 59 /// 平均相应时间
  60. 60 /// </summary>
  61. 61 long avgResponseMilliseconds = -1;
  62. 62
  63. 63 /// <summary>
  64. 64 /// 平均相应时间
  65. 65 /// </summary>
  66. 66 public long AvgResponseMilliseconds
  67. 67 {
  68. 68 get
  69. 69 {
  70. 70 return avgResponseMilliseconds;
  71. 71 }
  72. 72
  73. 73 set
  74. 74 {
  75. 75 if (avgResponseMilliseconds != -1)
  76. 76 {
  77. 77 avgResponseMilliseconds = value + avgResponseMilliseconds / 2;
  78. 78 }
  79. 79 else
  80. 80 {
  81. 81 avgResponseMilliseconds = value;
  82. 82 }
  83. 83
  84. 84 }
  85. 85 }
  86. 86
  87. 87 public HttpRequestClient(bool isTrackCookies = false)
  88. 88 {
  89. 89 this.isTrackCookies = isTrackCookies;
  90. 90 }
  91. 91 /// <summary>
  92. 92 /// http请求
  93. 93 /// </summary>
  94. 94 /// <param name="url"></param>
  95. 95 /// <param name="method">POST,GET</param>
  96. 96 /// <param name="headers">http的头部,直接拷贝谷歌请求的头部即可</param>
  97. 97 /// <param name="content">content,每个key,value 都要UrlEncode才行</param>
  98. 98 /// <param name="contentEncode">content的编码</param>
  99. 99 /// <param name="proxyUrl">代理url</param>
  100. 100 /// <returns></returns>
  101. 101 public string http(string url, string method, string headers, string content, Encoding contentEncode, string proxyUrl)
  102. 102 {
  103. 103 HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
  104. 104 request.Method = method;
  105. 105 if(method.Equals("GET",StringComparison.InvariantCultureIgnoreCase))
  106. 106 {
  107. 107 request.MaximumAutomaticRedirections = 100;
  108. 108 request.AllowAutoRedirect = false;
  109. 109 }
  110. 110
  111. 111 fillHeaders(request, headers);
  112. 112 fillProxy(request, proxyUrl);
  113. 113
  114. 114 #region 添加Post 参数
  115. 115 if (contentEncode == null)
  116. 116 {
  117. 117 contentEncode = Encoding.UTF8;
  118. 118 }
  119. 119 if (!string.IsNullOrWhiteSpace(content))
  120. 120 {
  121. 121 byte[] data = contentEncode.GetBytes(content);
  122. 122 request.ContentLength = data.Length;
  123. 123 using (Stream reqStream = request.GetRequestStream())
  124. 124 {
  125. 125 reqStream.Write(data, 0, data.Length);
  126. 126 reqStream.Close();
  127. 127 }
  128. 128 }
  129. 129 #endregion
  130. 130
  131. 131 HttpWebResponse response = null;
  132. 132 System.Diagnostics.Stopwatch sw = new System.Diagnostics.Stopwatch();
  133. 133 try
  134. 134 {
  135. 135 sw.Start();
  136. 136 response = (HttpWebResponse)request.GetResponse();
  137. 137 sw.Stop();
  138. 138 AvgResponseMilliseconds = sw.ElapsedMilliseconds;
  139. 139 CookieCollection cc = new CookieCollection();
  140. 140 string cookieString = response.Headers[HttpResponseHeader.SetCookie];
  141. 141 if (!string.IsNullOrWhiteSpace(cookieString))
  142. 142 {
  143. 143 var spilit = cookieString.Split(';');
  144. 144 foreach (string item in spilit)
  145. 145 {
  146. 146 var kv = item.Split('=');
  147. 147 if (kv.Length == 2)
  148. 148 cc.Add(new Cookie(kv[0].Trim(), kv[1].Trim()));
  149. 149 }
  150. 150 }
  151. 151 trackCookies(cc);
  152. 152 }
  153. 153 catch (Exception ex)
  154. 154 {
  155. 155 sw.Stop();
  156. 156 AvgResponseMilliseconds = sw.ElapsedMilliseconds;
  157. 157 return "";
  158. 158 }
  159. 159
  160. 160 string result = getResponseBody(response);
  161. 161 return result;
  162. 162 }
  163. 163
  164. 164 /// <summary>
  165. 165 /// post 请求
  166. 166 /// </summary>
  167. 167 /// <param name="url"></param>
  168. 168 /// <param name="headers"></param>
  169. 169 /// <param name="content"></param>
  170. 170 /// <param name="contentEncode"></param>
  171. 171 /// <param name="proxyUrl"></param>
  172. 172 /// <returns></returns>
  173. 173 public string httpPost(string url, string headers, string content, Encoding contentEncode, string proxyUrl = null)
  174. 174 {
  175. 175 return http(url, "POST", headers, content, contentEncode, proxyUrl);
  176. 176 }
  177. 177
  178. 178 /// <summary>
  179. 179 /// get 请求
  180. 180 /// </summary>
  181. 181 /// <param name="url"></param>
  182. 182 /// <param name="headers"></param>
  183. 183 /// <param name="content"></param>
  184. 184 /// <param name="proxyUrl"></param>
  185. 185 /// <returns></returns>
  186. 186 public string httpGet(string url, string headers, string content=null, string proxyUrl=null)
  187. 187 {
  188. 188 return http(url, "GET", headers, null, null, proxyUrl);
  189. 189 }
  190. 190
  191. 191 /// <summary>
  192. 192 /// 填充代理
  193. 193 /// </summary>
  194. 194 /// <param name="proxyUri"></param>
  195. 195 private void fillProxy(HttpWebRequest request, string proxyUri)
  196. 196 {
  197. 197 if (!string.IsNullOrWhiteSpace(proxyUri))
  198. 198 {
  199. 199 WebProxy proxy = new WebProxy();
  200. 200 proxy.Address = new Uri(proxyUri);
  201. 201 request.Proxy = proxy;
  202. 202 }
  203. 203 }
  204. 204
  205. 205
  206. 206 /// <summary>
  207. 207 /// 跟踪cookies
  208. 208 /// </summary>
  209. 209 /// <param name="cookies"></param>
  210. 210 private void trackCookies(CookieCollection cookies)
  211. 211 {
  212. 212 if (!isTrackCookies) return;
  213. 213 if (cookies == null) return;
  214. 214 foreach (Cookie c in cookies)
  215. 215 {
  216. 216 if (cookieDic.ContainsKey(c.Name))
  217. 217 {
  218. 218 cookieDic[c.Name] = c;
  219. 219 }
  220. 220 else
  221. 221 {
  222. 222 cookieDic.Add(c.Name, c);
  223. 223 }
  224. 224 }
  225. 225
  226. 226 }
  227. 227
  228. 228 /// <summary>
  229. 229 /// 格式cookies
  230. 230 /// </summary>
  231. 231 /// <param name="cookies"></param>
  232. 232 private string getCookieStr()
  233. 233 {
  234. 234 StringBuilder sb = new StringBuilder();
  235. 235 foreach (KeyValuePair<string, Cookie> item in cookieDic)
  236. 236 {
  237. 237 if (!item.Value.Expired)
  238. 238 {
  239. 239 if (sb.Length == 0)
  240. 240 {
  241. 241 sb.Append(item.Key).Append("=").Append(item.Value.Value);
  242. 242 }
  243. 243 else
  244. 244 {
  245. 245 sb.Append("; ").Append(item.Key).Append(" = ").Append(item.Value.Value);
  246. 246 }
  247. 247 }
  248. 248 }
  249. 249 return sb.ToString();
  250. 250
  251. 251 }
  252. 252
  253. 253 /// <summary>
  254. 254 /// 填充头
  255. 255 /// </summary>
  256. 256 /// <param name="request"></param>
  257. 257 /// <param name="headers"></param>
  258. 258 private void fillHeaders(HttpWebRequest request, string headers, bool isPrint = false)
  259. 259 {
  260. 260 if (request == null) return;
  261. 261 if (string.IsNullOrWhiteSpace(headers)) return;
  262. 262 string[] hsplit = headers.Split(new String[] { "\r\n" }, StringSplitOptions.RemoveEmptyEntries);
  263. 263 foreach (string item in hsplit)
  264. 264 {
  265. 265 string[] kv = item.Split(':');
  266. 266 string key = kv[0].Trim();
  267. 267 string value = string.Join(":", kv.Skip(1)).Trim();
  268. 268 if (!UNCHANGEHEADS.Contains(key))
  269. 269 {
  270. 270 request.Headers.Add(key, value);
  271. 271 }
  272. 272 else
  273. 273 {
  274. 274 #region 设置http头
  275. 275 switch (key)
  276. 276 {
  277. 277
  278. 278 case "Accept":
  279. 279 {
  280. 280 request.Accept = value;
  281. 281 break;
  282. 282 }
  283. 283 case "Host":
  284. 284 {
  285. 285 request.Host = value;
  286. 286 break;
  287. 287 }
  288. 288 case "Connection":
  289. 289 {
  290. 290 if (value == "keep-alive")
  291. 291 {
  292. 292 request.KeepAlive = true;
  293. 293 }
  294. 294 else
  295. 295 {
  296. 296 request.KeepAlive = false;//just test
  297. 297 }
  298. 298 break;
  299. 299 }
  300. 300 case "Content-Type":
  301. 301 {
  302. 302 request.ContentType = value;
  303. 303 break;
  304. 304 }
  305. 305
  306. 306 case "User-Agent":
  307. 307 {
  308. 308 request.UserAgent = value;
  309. 309 break;
  310. 310 }
  311. 311 case "Referer":
  312. 312 {
  313. 313 request.Referer = value;
  314. 314 break;
  315. 315 }
  316. 316
  317. 317 case "Content-Length":
  318. 318 {
  319. 319 request.ContentLength = Convert.ToInt64(value);
  320. 320 break;
  321. 321 }
  322. 322 case "Expect":
  323. 323 {
  324. 324 request.Expect = value;
  325. 325 break;
  326. 326 }
  327. 327 case "If-Modified-Since":
  328. 328 {
  329. 329 request.IfModifiedSince = Convert.ToDateTime(value);
  330. 330 break;
  331. 331 }
  332. 332 default:
  333. 333 break;
  334. 334 }
  335. 335 #endregion
  336. 336 }
  337. 337 }
  338. 338 CookieCollection cc = new CookieCollection();
  339. 339 string cookieString = request.Headers[HttpRequestHeader.Cookie];
  340. 340 if (!string.IsNullOrWhiteSpace(cookieString))
  341. 341 {
  342. 342 var spilit = cookieString.Split(';');
  343. 343 foreach (string item in spilit)
  344. 344 {
  345. 345 var kv = item.Split('=');
  346. 346 if (kv.Length == 2)
  347. 347 cc.Add(new Cookie(kv[0].Trim(), kv[1].Trim()));
  348. 348 }
  349. 349 }
  350. 350 trackCookies(cc);
  351. 351 if (!isTrackCookies)
  352. 352 {
  353. 353 request.Headers[HttpRequestHeader.Cookie] = "";
  354. 354 }
  355. 355 else
  356. 356 {
  357. 357 request.Headers[HttpRequestHeader.Cookie] = getCookieStr();
  358. 358 }
  359. 359
  360. 360 #region 打印头
  361. 361 if (isPrint)
  362. 362 {
  363. 363 for (int i = 0; i < request.Headers.AllKeys.Length; i++)
  364. 364 {
  365. 365 string key = request.Headers.AllKeys[i];
  366. 366 System.Console.WriteLine(key + ":" + request.Headers[key]);
  367. 367 }
  368. 368 }
  369. 369 #endregion
  370. 370
  371. 371 }
  372. 372
  373. 373
  374. 374 /// <summary>
  375. 375 /// 打印ResponseHeaders
  376. 376 /// </summary>
  377. 377 /// <param name="response"></param>
  378. 378 private void printResponseHeaders(HttpWebResponse response)
  379. 379 {
  380. 380 #region 打印头
  381. 381 if (response == null) return;
  382. 382 for (int i = 0; i < response.Headers.AllKeys.Length; i++)
  383. 383 {
  384. 384 string key = response.Headers.AllKeys[i];
  385. 385 System.Console.WriteLine(key + ":" + response.Headers[key]);
  386. 386 }
  387. 387 #endregion
  388. 388 }
  389. 389
  390. 390
  391. 391 /// <summary>
  392. 392 /// 返回body内容
  393. 393 /// </summary>
  394. 394 /// <param name="response"></param>
  395. 395 /// <returns></returns>
  396. 396 private string getResponseBody(HttpWebResponse response)
  397. 397 {
  398. 398 Encoding defaultEncode = Encoding.UTF8;
  399. 399 string contentType = response.ContentType;
  400. 400 if (contentType != null)
  401. 401 {
  402. 402 if (contentType.ToLower().Contains("gb2312"))
  403. 403 {
  404. 404 defaultEncode = Encoding.GetEncoding("gb2312");
  405. 405 }
  406. 406 else if (contentType.ToLower().Contains("gbk"))
  407. 407 {
  408. 408 defaultEncode = Encoding.GetEncoding("gbk");
  409. 409 }
  410. 410 else if (contentType.ToLower().Contains("zh-cn"))
  411. 411 {
  412. 412 defaultEncode = Encoding.GetEncoding("zh-cn");
  413. 413 }
  414. 414 }
  415. 415
  416. 416 string responseBody = string.Empty;
  417. 417 if (response.ContentEncoding.ToLower().Contains("gzip"))
  418. 418 {
  419. 419 using (GZipStream stream = new GZipStream(response.GetResponseStream(), CompressionMode.Decompress))
  420. 420 {
  421. 421 using (StreamReader reader = new StreamReader(stream))
  422. 422 {
  423. 423 responseBody = reader.ReadToEnd();
  424. 424 }
  425. 425 }
  426. 426 }
  427. 427 else if (response.ContentEncoding.ToLower().Contains("deflate"))
  428. 428 {
  429. 429 using (DeflateStream stream = new DeflateStream(response.GetResponseStream(), CompressionMode.Decompress))
  430. 430 {
  431. 431 using (StreamReader reader = new StreamReader(stream, defaultEncode))
  432. 432 {
  433. 433 responseBody = reader.ReadToEnd();
  434. 434 }
  435. 435 }
  436. 436 }
  437. 437 else
  438. 438 {
  439. 439 using (Stream stream = response.GetResponseStream())
  440. 440 {
  441. 441 using (StreamReader reader = new StreamReader(stream, defaultEncode))
  442. 442 {
  443. 443 responseBody = reader.ReadToEnd();
  444. 444 }
  445. 445 }
  446. 446 }
  447. 447 return responseBody;
  448. 448 }
  449. 449
  450. 450
  451. 451 public static string UrlEncode(string item, Encoding code)
  452. 452 {
  453. 453 return System.Web.HttpUtility.UrlEncode(item.Trim('\t').Trim(), Encoding.GetEncoding("gb2312"));
  454. 454 }
  455. 455
  456. 456 public static string UrlEncodeByGB2312(string item)
  457. 457 {
  458. 458 return UrlEncode(item, Encoding.GetEncoding("gb2312"));
  459. 459 }
  460. 460
  461. 461
  462. 462 public static string UrlEncodeByUTF8(string item)
  463. 463 {
  464. 464 return UrlEncode(item, Encoding.GetEncoding("utf-8"));
  465. 465 }
  466. 466
  467. 467 public static string HtmlDecode(string item)
  468. 468 {
  469. 469 return WebUtility.HtmlDecode(item.Trim('\t').Trim());
  470. 470 }
  471. 471
  472. 472 }
  473. 473 }

复制代码

 

使用方式:

1)打开谷歌浏览器,或者F12

复制Request Headers 里面的所有内容,然后执行代码:

复制代码

  1. string heads = @"Accept:text/javascript, application/javascript, application/ecmascript, application/x-ecmascript, */*; q=0.01
  2. Accept-Encoding:gzip, deflate
  3. Accept-Language:zh-CN,zh;q=0.8
  4. Cache-Control:no-cache
  5. Content-Length:251
  6. Content-Type:application/x-www-form-urlencoded; charset=UTF-8
  7. Cookie:JSESSIONID=B1716F5DAC2F78D1E592F5421D859CFA; Hm_lvt_f44f38cf69626ed8bcfe92d72ed55922=1498099203; Hm_lpvt_f44f38cf69626ed8bcfe92d72ed55922=1498099203; cache_cars=152%7C152%7CBDL212%7C111111%7C111111%2C152%7C152%7CBy769x%7C111111%7C111111%2C152%7C152%7Cd12881%7C111111%7C111111
  8. Host:www.xxxxxxxx.com
  9. Origin:http://www.xxxxxxxx.com
  10. Pragma:no-cache
  11. Proxy-Connection:keep-alive
  12. Referer:http://www.cheshouye.com/api/weizhang/
  13. User-Agent:Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36
  14. X-Requested-With:XMLHttpRequest";
  15. string url = "http://www.xxxxxxxxxxxx.com/api/weizhang/open_task?callback=jQuery1910816327";
  16. HttpRequestClient s = new HttpRequestClient(true);
  17. string content = "chepai_no=b21451&chejia_no=111111&engine_no=111111&city_id=152&car_province_id=12&input_cost=0&vcode=%7B%22cookie_str%22%3A%22%22%2C%22verify_code%22%3A%22%22%2C%22vcode_para%22%3A%7B%22vcode_key%22%3A%22%22%7D%7D&td_key=qja5rbl2d97n&car_type=02&uid=0";
  18. string response= s.httpPost(url, heads, content, Encoding.UTF8);

复制代码

 

就这样,你会惊喜的发现,卧槽!反回来的值和谷歌上显示值一个样子,

只要域名没变化,HttpRequestClient 对象就不要去改变, 多线程请使用ThreadLocal<HttpRequestClient > 

配合我很久之前写的多线类 QueueThreadBase 让你起飞.

你想暴力破解网站登录密码吗?基本思路如下:

1)强大的用户名+密码字典

2)多线程Http+代理(代理可以不用,如果服务器做了ip限制,那么代理就非常有用了,最好是透明的http代理,并且有规则剔除慢的代理)

3)验证码破解.(只要验证码不复杂,在某宝就能买的dll 可用,1000块钱上下)

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/小丑西瓜9/article/detail/259396
推荐阅读
相关标签
  

闽ICP备14008679号