赞
踩
项目中经常使用抓取,有些接口返回的是html,我解析是使用NSoup。
但大部分接口返回使用的Json。
下面我针对json 编写了三方方法,都是用于提取属性结果的
方法1.使用的是JsonPath,配置规则详见:
newtonsoft官网文档
https://www.newtonsoft.com/json/help/html/QueryJsonSelectToken.htm
List<string> resLS = NSoupHelper.GetJsonListStrBySelector(jsonStr, "$..rows[*].newsTitle");
方法2.使用的是正则表达式提取,遍历后返回命中结果
按照json路径传参即可
List<string> regLS = NSoupHelper.GetJsonListStrByRegex(jsonStr, "ROOT rows newsTitle");
方法3.使用的是JToken判断,遍历返回命中结果
按照json路径传参即可
List<string> resLS2 = NSoupHelper.GetJsonListStrBySelector2(jsonStr, "ROOT rows newsTitle");
三种结果都是一致的:
源码
-
- #region Json
- /// <summary>
- /// 根据JsonPath 获取json字符串的相应字段集合
- /// </summary>
- /// <param name="_JsonResult"></param>
- /// <param name="_getSelector">$..Products[?(@.Price >= 50)].Name、$..rows[*].newsTitle</param>
- /// <returns></returns>
- public static List<string> GetJsonListStrBySelector(string _JsonResult, string _getSelector)
- {
- List<string> _listS = new List<string>();
- if (string.IsNullOrEmpty(_JsonResult) || string.IsNullOrEmpty(_getSelector)) { return _listS; }
-
- try
- {
- JObject _BaseJobject = JsonConvert.DeserializeObject<JObject>(_JsonResult);
-
- IEnumerable<JToken> checkedToken = _BaseJobject.SelectTokens(_getSelector);
-
- foreach (JToken item in checkedToken)
- {
- _listS.Add(item.ToString());
- }
-
-
- //https://www.newtonsoft.com/json/help/html/QueryJsonSelectToken.htm
-
- }
- catch { }
- return _listS;
- }
-
- /// <summary>
- /// 根据Selector表达式获取json字符串的相应字段集合 ,原理 正则表达式
- /// </summary>
- /// <param name="_JsonResult"></param>
- /// <param name="_getSelector">逐级 如:ROOT rows newsTitle</param>
- /// <returns></returns>
- public static List<string> GetJsonListStrByRegex(string _JsonResult, string _getSelector)
- {
- List<string> _listS = new List<string>();
- if (string.IsNullOrEmpty(_JsonResult) || string.IsNullOrEmpty(_getSelector)) { return _listS; }
- //处理单引号问题
- if (_JsonResult.IndexOf('\'') > 0)
- {
- _JsonResult= JsonConvert.SerializeObject(JsonConvert.DeserializeObject<JObject>(_JsonResult));
- }
- try
- {
-
- List<string> hitStrs = new List<string>();
-
- string[] jPaths = _getSelector.Split(' ');
- JToken jt = null;
- for (int i = 0; i < jPaths.Length; i++)
- {
- string jp = jPaths[i];
- string[] _l_JFH = new string[] { "[", "{","\"" ,":" };
- string[] _r_FH = new string[] { "]", "}", "\"", ",}" };
- if (i == 0)
- {
- for (int i2 = 0; i2 < _l_JFH.Length; i2++)
- {
- Regex reg = new Regex(string.Format("\"{2}\"[^\\{0}\\{1},]*\\{0}(?<json>[^\\{0}\\{1}]*(((?'Open'\\{0})[^\\{0}\\{1}]*)+((?'-Open'\\{1})[^\\{0}\\{1}]*)+)*?(?(Open)(?!))*[^\\{0}\\{1}]*)[\\{1}]", _l_JFH[i2], _r_FH[i2], jp));
- MatchCollection ms = reg.Matches(_JsonResult);
- foreach (Match _m in ms)
- {
- hitStrs.Add(_m.Groups["json"]?.Value);
- }
- if (ms.Count > 0)
- {
- break;
- }
- }
- if (i == jPaths.Length - 1)
- {
- _listS = hitStrs;
- }
- }
- else if (i == jPaths.Length - 1)
- {
- //最后一个层级
- foreach (string _nowStr in hitStrs)
- {
- for (int i2 = 0; i2 < _l_JFH.Length; i2++)
- {
- Regex reg = new Regex(string.Format("\"{2}\"[^\\{0}\\{1},]*\\{0}(?<json>[^\\{0}\\{1}]*(((?'Open'\\{0})[^\\{0}\\{1}]*)+((?'-Open'\\{1})[^\\{0}\\{1}]*)+)*?(?(Open)(?!))*[^\\{0}\\{1}]*)[\\{1}]", _l_JFH[i2], _r_FH[i2], jp));
- MatchCollection ms = reg.Matches(_nowStr);
- foreach (Match _m in ms)
- {
- _listS.Add(_m.Groups["json"]?.Value);
- }
- if (ms.Count > 0)
- {
- break;
- }
- }
- }
- }
- else
- {
- //中间层级
- List<string> gdStrs = new List<string>();
- foreach (string _nowStr in hitStrs)
- {
- for (int i2 = 0; i2 < _l_JFH.Length; i2++)
- {
- Regex reg = new Regex(string.Format("\"{2}\"[^\\{0}\\{1},]*\\{0}(?<json>[^\\{0}\\{1}]*(((?'Open'\\{0})[^\\{0}\\{1}]*)+((?'-Open'\\{1})[^\\{0}\\{1}]*)+)*?(?(Open)(?!))*[^\\{0}\\{1}]*)[\\{1}]", _l_JFH[i2], _r_FH[i2], jp));
- MatchCollection ms = reg.Matches(_nowStr);
- foreach (Match _m in ms)
- {
- gdStrs.Add(_m.Groups["json"]?.Value);
- }
- if (ms.Count > 0)
- {
- break;
- }
- }
- }
- hitStrs = gdStrs;
- }
- }
- }
- catch { }
- return _listS;
- }
- /// <summary>
- /// 根据Selector表达式获取json字符串的相应字段集合 ,原理 JToken逐级遍历
- /// </summary>
- /// <param name="_doc"></param>
- /// <param name="_getSelector">逐级 如:ROOT rows newsTitle</param>
- /// <returns></returns>
- public static List<string> GetJsonListStrBySelector2(string _JsonResult, string _getSelector)
- {
- List<string> _listS = new List<string>();
- if (string.IsNullOrEmpty(_JsonResult) || string.IsNullOrEmpty(_getSelector)) { return _listS; }
- //处理单引号问题
- if (_JsonResult.IndexOf('\'') > 0)
- {
- _JsonResult = JsonConvert.SerializeObject(JsonConvert.DeserializeObject<JObject>(_JsonResult));
- }
- try
- {
- JToken _BaseJobject = JsonConvert.DeserializeObject<JToken>(_JsonResult);
-
- JToken _ppJObject = _BaseJobject;
-
- string[] jPaths = _getSelector.Split(' ');
-
- //命中集合
- List<JToken> hitTokenList = new List<JToken>();
- for (int i=0;i<jPaths.Length;i++)
- {
- //第一步匹配
- if (i == 0)
- {
- JToken jt = _BaseJobject[jPaths[i]];
- if (jt == null || jt.Type == JTokenType.Null)
- {
- return _listS;
- }
- if (jt.Type == JTokenType.Array)
- {
- foreach (var _jt in (JArray)jt)
- {
- hitTokenList.Add(_jt);
- }
- }
- else
- {
- hitTokenList.Add(jt);
- }
- if (i == jPaths.Length - 1)
- {
- //最后一个层级
- foreach (JToken _nowJT in hitTokenList)
- {
- //肯定不是Array,是的话 不支持匹配 如:[[1,2,3],[4,5,6]]
- JToken jt_next = _nowJT[jPaths[i]];
- if (jt_next == null || jt_next.Type == JTokenType.Null)
- {
- //不是最后一个层级,却出现null 直接返回
- return _listS;
- }
- if (jt_next.Type == JTokenType.Array)
- {
- foreach (var _jt in (JArray)jt_next)
- {
- _listS.Add(_jt.ToString());
- }
- }
- else
- {
- _listS.Add(jt_next.ToString());
- }
- }
- }
- }
- else if (i == jPaths.Length-1)
- {
- //最后一个层级
- foreach (JToken _nowJT in hitTokenList)
- {
- //肯定不是Array,是的话 不支持匹配 如:[[1,2,3],[4,5,6]]
- JToken jt_next = _nowJT[jPaths[i]];
- if (jt_next == null || jt_next.Type == JTokenType.Null)
- {
- //不是最后一个层级,却出现null 直接返回
- return _listS;
- }
- if (jt_next.Type == JTokenType.Array)
- {
- foreach (var _jt in (JArray)jt_next)
- {
- _listS.Add(_jt.ToString());
- }
- }
- else
- {
- _listS.Add(jt_next.ToString());
- }
- }
- }
- else
- {
- //中间层级阶段
- List<JToken> bxTokens = new List<JToken>();
- foreach (JToken _nowJT in hitTokenList)
- {
- //肯定不是Array,是的话 不支持匹配 如:[[1,2,3],[4,5,6]]
- JToken jt_next = _nowJT[jPaths[i]];
- if (jt_next == null || jt_next.Type == JTokenType.Null)
- {
- //不是最后一个层级,却出现null 直接返回
- return _listS;
- }
- if (jt_next.Type == JTokenType.Array)
- {
- foreach (var _jt in (JArray)jt_next)
- {
- bxTokens.Add(_jt);
- }
- }
- else
- {
- bxTokens.Add(jt_next);
- }
- }
- hitTokenList = bxTokens;
- }
- }
- }
- catch { }
- return _listS;
- }
- #endregion
参考json
-
- string jsonStr = @"{ 'ROOT':[{
- 'TOKEN': 'aa1',
- 'SERVICE': 'bb',
- 'DATAPARAM': 'cc',
- 'rows': [
- {
- 'searchValue': null,
- 'createBy': '用户N',
- 'createTime': '2021-02-25 11:06:03',
- 'updateBy': null,
- 'updateTime': null,
- 'remark': null,
- 'params': {},
- 'newsId': 241,
- 'newsTitle': 'IPASON × GUNDAM攀升高达联名独角兽电竞主机达抵达战场',
- 'newsSubtitle': '#光芒闪耀,攀升AMD 3A配置独角兽登场# IPASON × GUNDAM攀升高达联名独角兽电竞主机达抵达战场!2月24日 20:00,预售开启。搭载AMD 锐龙 5000系列处理器和AMD Radeon RX 6000系列显卡,旗舰硬件核心让你体验热血电竞与高达情怀的双重满足!上天猫、京东搜索高达攀升,了解#高达主机# 一起去唤醒并激发心中少年的勇气,男人的浪漫。',
- 'newsUrl': null,
- 'releaseTime': '2021-02-24',
- 'newsSource': '官方',
- 'showOrder': '43',
- 'showStatus': '0',
- 'newsImg': 'http://ipasoncnwebsite.oss-cn-shanghai.aliyuncs.com/images/79049291-16f3-4407-a34f-80fbc3193920.jpg',
- 'newsCategory': 'category_qiyexinwen'
- },
- {
- 'searchValue': null,
- 'createBy': '用户N',
- 'createTime': '2021-01-19 16:55:27',
- 'updateBy': null,
- 'updateTime': null,
- 'remark': null,
- 'params': {},
- 'newsId': 239,
- 'newsTitle': '高性能定制电脑攀升',
- 'newsSubtitle': '高性能定制电脑攀升',
- 'newsUrl': null,
- 'releaseTime': '2021-02-24',
- 'newsSource': '官方',
- 'showOrder': '40',
- 'showStatus': '0',
- 'newsImg': 'http://ipasoncnwebsite.oss-cn-shanghai.aliyuncs.com/images/6b96c7ee-5f15-490c-a140-727da068a62b.png',
- 'newsCategory': 'category_qiyexinwen'
- },
- {
- 'searchValue': null,
- 'createBy': '用户N',
- 'createTime': '2021-01-15 10:33:47',
- 'updateBy': null,
- 'updateTime': null,
- 'remark': null,
- 'params': {},
- 'newsId': 237,
- 'newsTitle': '攀升电脑2020年度荣誉时刻',
- 'newsSubtitle': '攀升电脑2020年度荣誉时刻',
- 'newsUrl': 'https://tieba.baidu.com/p/7190803603',
- 'releaseTime': '2021-01-15',
- 'newsSource': '官方',
- 'showOrder': '37',
- 'showStatus': '0',
- 'newsImg': 'http://ipasoncnwebsite.oss-cn-shanghai.aliyuncs.com/images/01b99cf5-e51f-43a1-8478-72ebfb38de9c.jpg',
- 'newsCategory': 'category_qiyexinwen'
- }
- ]
- }, {
- 'TOKEN': 'c2',
- 'SERVICE': 'bb',
- 'DATAPARAM': 'cc',
- 'rows': [
- {
- 'searchValue': null,
- 'createBy': '用户N',
- 'createTime': '2021-02-25 11:06:03',
- 'updateBy': null,
- 'updateTime': null,
- 'remark': null,
- 'params': {},
- 'newsId': 241,
- 'newsTitle': 'IPASON × GUNDAM攀升高达联名独角兽电竞主机达抵达战场',
- 'newsSubtitle': '#光芒闪耀,攀升AMD 3A配置独角兽登场# IPASON × GUNDAM攀升高达联名独角兽电竞主机达抵达战场!2月24日 20:00,预售开启。搭载AMD 锐龙 5000系列处理器和AMD Radeon RX 6000系列显卡,旗舰硬件核心让你体验热血电竞与高达情怀的双重满足!上天猫、京东搜索高达攀升,了解#高达主机# 一起去唤醒并激发心中少年的勇气,男人的浪漫。',
- 'newsUrl': null,
- 'releaseTime': '2021-02-24',
- 'newsSource': '官方',
- 'showOrder': '43',
- 'showStatus': '0',
- 'newsImg': 'http://ipasoncnwebsite.oss-cn-shanghai.aliyuncs.com/images/79049291-16f3-4407-a34f-80fbc3193920.jpg',
- 'newsCategory': 'category_qiyexinwen'
- },
- {
- 'searchValue': null,
- 'createBy': '用户N',
- 'createTime': '2021-01-19 16:55:27',
- 'updateBy': null,
- 'updateTime': null,
- 'remark': null,
- 'params': {},
- 'newsId': 239,
- 'newsTitle': '高性能制电脑攀升',
- 'newsSubtitle': '高性能定制电脑攀升',
- 'newsUrl': null,
- 'releaseTime': '2021-02-24',
- 'newsSource': '官方',
- 'showOrder': '40',
- 'showStatus': '0',
- 'newsImg': 'http://ipasoncnwebsite.oss-cn-shanghai.aliyuncs.com/images/6b96c7ee-5f15-490c-a140-727da068a62b.png',
- 'newsCategory': 'category_qiyexinwen'
- },
- {
- 'searchValue': null,
- 'createBy': '用户N',
- 'createTime': '2021-01-15 10:33:47',
- 'updateBy': null,
- 'updateTime': null,
- 'remark': null,
- 'params': {},
- 'newsId': 237,
- 'newsTitle': '攀升电脑2020年度荣誉时刻',
- 'newsSubtitle': '攀升电脑2020年度荣誉时刻',
- 'newsUrl': 'https://tieba.baidu.com/p/7190803603',
- 'releaseTime': '2021-01-15',
- 'newsSource': '官方',
- 'showOrder': '37',
- 'showStatus': '0',
- 'newsImg': 'http://ipasoncnwebsite.oss-cn-shanghai.aliyuncs.com/images/01b99cf5-e51f-43a1-8478-72ebfb38de9c.jpg',
- 'newsCategory': 'category_qiyexinwen'
- }
- ]
- }]
- }";
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。