赞
踩
基本用法:
正则的语法: 使⽤元字符进⾏排列组合⽤来匹配字符串 在线测试正则表达式https://tool.oschina.net/regex/
案例:
综上我们可以发现.表示尽可能多的匹配,.?表示尽可能少的匹配
import re
#findall 查找所有. 返回list
lst = re.findall(r"m", "mai le fo len, mai nimei!")
print(lst) # ['m', 'm', 'm']
lst = re.findall(r"\d+", "5点之前. 你要给我5000万")
print(lst) # ['5', '5000']
#['m', 'm', 'm']
#['5', '5000']
#finditer:匹配字符串中的所有的内容,返回的是迭代器,
#从迭代器中拿到内容需要.group
it = re.finditer(r"\d+","我的电话是:10086,我女朋友的电话是:10010")
print(it)
for i in it:
print(i)
print(i.group())
#<callable_iterator object at 0x000002715852C208>
<re.Match object; span=(6, 11), match='10086'>
10086
<re.Match object; span=(21, 26), match='10010'>
10010
#search 会进⾏全文匹配. 但是如果匹配到了第⼀个结果. 就会返回这
#个结果. 如果匹配不上search返回的则是None
ret = re.search(r'\d', '5点之前. 你要给我5000万')
print(ret.group()) # 5
#match 只能从字符串的开头进⾏匹配
ret = re.match('a', 'abc').group()
print(ret) # a
#compile() 可以将⼀个⻓⻓的正则进⾏预加载. ⽅便后⾯的使⽤
obj = re.compile(r'\d{3}') # 将正则表达式编译成为⼀个正则表达式对象, 规则要匹配的是3个数字
ret = obj.search('abc123eeee') # 正则表达式对象调⽤search, 参数为待匹配的字符串
print(ret.group()) # 结果: 123
#re.S让.能匹配换行符 r = """ <div class='wyf'><span id='1'>吴亦凡</span></div> <div class='hb'><span id='2'>黄渤</span></div> <div class='hg'><span id='3'>胡歌</span></div> <div class='hx'><span id='4'>黄训</span></div> <div class='zyx'><span id='5'>张艺兴</span></div> """ obj = re.compile(r"<div class='.*?'><span id='.*?'>.*?</span></div>",re.S) result = obj.finditer(r) for it in result: print(it.group()) #<div class='wyf'><span id='1'>吴亦凡</span></div> <div class='hb'><span id='2'>黄渤</span></div> <div class='hg'><span id='3'>胡歌</span></div> <div class='hx'><span id='4'>黄训</span></div> <div class='zyx'><span id='5'>张艺兴</span></div>
#匹配的内容再进行筛选(?P<name>.*?) r = """ <div class='wyf'><span id='1'>吴亦凡</span></div> <div class='hb'><span id='2'>黄渤</span></div> <div class='hg'><span id='3'>胡歌</span></div> <div class='hx'><span id='4'>黄训</span></div> <div class='zyx'><span id='5'>张艺兴</span></div> """ obj = re.compile(r"<div class='(?P<suoxie>.*?)'><span id='(?P<id>.*?)'>(?P<star>.*?)</span></div>",re.S) result = obj.finditer(r) for it in result: print(it.group()) print(it.group("suoxie")) print(it.group("id")) print(it.group("star")) #<div class='wyf'><span id='1'>吴亦凡</span></div> wyf 1 吴亦凡 <div class='hb'><span id='2'>黄渤</span></div> hb 2 黄渤 <div class='hg'><span id='3'>胡歌</span></div> hg 3 胡歌 <div class='hx'><span id='4'>黄训</span></div> hx 4 黄训 <div class='zyx'><span id='5'>张艺兴</span></div> zyx 5 张艺兴
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。