>> for word in set(word_list):... print(wor">
赞
踩
collections 是 Python 内建的一个集合模块,提供了许多有用的集合类。包括许多常见的强化数据结构类。至于为什么会出现强化数据结构,自然是因为一般的 元组、字典等可能不太满足一些特定的需要。
普通实现
- >>> word_list = ["a", "b", "c", "c", "a", "a"]
- >>> cnt = {}
- >>> for word in set(word_list):
- ... cnt[word] = word_list.count(word)
- ...
- >>> cnt
- {'b': 1, 'c': 2, 'a': 3}
- >>> cnt['d']
- Traceback (most recent call last):
- File "<stdin>", line 1, in <module>
- KeyError: 'd'
Counter 实现
- >>> from collections import Counter
- >>> cnt = Counter()
- >>> word_list = ['a', 'b', 'c', 'c', 'a', 'a']
- >>> for word in word_list:
- ... cnt[word] += 1
- ...
- >>> cnt
- Counter({'a': 3, 'c': 2, 'b': 1})
- >>> cnt['a']
- 3
- >>> cnt['d'] # 即使没有 key,也不会报 KeyError 哟,这点和 defaultdict(int) 比较像。
- 0
普通实现
- >>> word_str = 'hello world'
- >>> word_list = list(word_str)
- >>> cnt = {}
- >>> for word in set(word_list):
- ... cnt[word] = word_list.count(word)
- ...
- >>> cnt
- {'e': 1, 'd': 1, 'h': 1, 'o': 2, 'l': 3, ' ': 1, 'r': 1, 'w': 1}
Counter 实现
- >>> from collections import Counter
- >>> word_str = 'hello world'
- >>> cnt = Counter(word_str)
- >>> cnt
- Counter({'l': 3, 'o': 2, 'h': 1, 'e': 1, ' ': 1, 'w': 1, 'r': 1, 'd': 1})
-
- >>> Counter({'red': 4, 'blue': 2})
- Counter({'red': 4, 'blue': 2})
- >>> Counter(red=4, blue=2)
- Counter({'red': 4, 'blue': 2})
- >>> cnt = Counter(red=4, blue=2)
- >>> cnt
- Counter({'red': 4, 'blue': 2})
- >>> list(cnt.elements())
- ['red', 'red', 'red', 'red', 'blue', 'blue']
- >>> cnt = Counter('hello world')
- >>> cnt
- Counter({'l': 3, 'o': 2, 'h': 1, 'e': 1, ' ': 1, 'w': 1, 'r': 1, 'd': 1})
- >>> cnt.most_common()
- [('l', 3), ('o', 2), ('h', 1), ('e', 1), (' ', 1), ('w', 1), ('r', 1), ('d', 1)]
- >>> cnt.most_common(3)
- [('l', 3), ('o', 2), ('h', 1)]
- >>> a = Counter(a=4, b=2, c=0, d=-2)
- >>> a
- Counter({'a': 4, 'b': 2, 'c': 0, 'd': -2})
- >>> b = Counter(a=1, b=2, c=-3, d=4)
- >>> b
- Counter({'d': 4, 'b': 2, 'a': 1, 'c': -3})
- >>> a.subtract(b)
- >>> a
- Counter({'a': 3, 'c': 3, 'b': 0, 'd': -6})
其实转换成 Counter 类型以后,操作和字典差不多。
- >>> from collections import Counter
- >>> cnt = Counter('hello world')
- >>> cnt
- Counter({'l': 3, 'o': 2, 'h': 1, 'e': 1, ' ': 1, 'w': 1, 'r': 1, 'd': 1})
- >>> cnt.keys()
- dict_keys(['h', 'e', 'l', 'o', ' ', 'w', 'r', 'd'])
- >>> cnt.values()
- dict_values([1, 1, 3, 2, 1, 1, 1, 1])
- >>> sum(cnt.values())
- 11
- >>> dict(cnt)
- {'h': 1, 'e': 1, 'l': 3, 'o': 2, ' ': 1, 'w': 1, 'r': 1, 'd': 1}
- >>> cnt.items()
- dict_items([('h', 1), ('e', 1), ('l', 3), ('o', 2), (' ', 1), ('w', 1), ('r', 1), ('d', 1)])
- >>> Counter(dict([('a', 1), ('b', 2), ('c', 3)]))
- Counter({'c': 3, 'b': 2, 'a': 1})
- >>> cnt.clear()
- >>> cnt
- Counter()

deque 是栈和队列的一种广义实现,俗称双端队列。有效内存地以近似 O(1) 的性能在 deque 的两端插入和删除元素,尽管 list 也支持相似的操作,但是在 pop(0) 和 insert(0,v)(会改变数据的位置和大小)上有O(n)的时间复杂度。如果抛却这些细节不顾的话,你把他当成加强版的 list 好像也没啥毛病。
- >>> from collections import deque
- >>>
- >>> d = deque(['a', 'b', 'c'])
- >>> d
- deque(['a', 'b', 'c'])
- >>> d.append('d')
- >>> d
- deque(['a', 'b', 'c', 'd'])
- >>> d.count('b')
- 1
- >>> d.extend(['e', 'f', 'g'])
- >>> d
- deque(['a', 'b', 'c', 'd', 'e', 'f', 'g'])
- >>> d.pop()
- 'g'
- >>> d
- deque(['a', 'b', 'c', 'd', 'e', 'f'])
- >>> d.remove('d')
- >>> d
- deque(['a', 'b', 'c', 'e', 'f'])
- >>> d.reverse()
- >>> d
- deque(['f', 'e', 'c', 'b', 'a'])
-
- # 队列左端操作
- >>> d
- deque(['f', 'e', 'c', 'b', 'a'])
- >>> d.popleft()
- 'f'
- >>> d
- deque(['e', 'c', 'b', 'a'])
- >>> d.appendleft('h')
- >>> d
- deque(['h', 'e', 'c', 'b', 'a'])
- >>> d.extendleft(['i', 'j', 'k'])
- >>> d
- deque(['k', 'j', 'i', 'h', 'e', 'c', 'b', 'a'])
- # 想想挖掘机的履带,rotate 就不难理解了
- >>> d.rotate(1)
- >>> d
- deque(['a', 'k', 'j', 'i', 'h', 'e', 'c', 'b'])
- >>> d.rotate(2)
- >>> d
- deque(['c', 'b', 'a', 'k', 'j', 'i', 'h', 'e'])

defaultdict 对我来说最大的特点就是不会出现 KeyError 错误了,我们可以又回到列表元素统计那块来看看。
普通实现
- >>> word_list = ["a", "b", "c", "c", "a", "a"]
- >>> cnt = {}
- >>> for word in word_list:
- ... if word not in cnt:
- ... cnt[word] = 1
- ... else:
- ... cnt[word] += 1
- ...
- >>> cnt
- {'a': 3, 'b': 1, 'c': 2}
-
- >>> cnt['d']
- Traceback (most recent call last):
- File "<stdin>", line 1, in <module>
- KeyError: 'd'
defaultdict 实现(没有用 if else 语句去判断哟)
- >>> from collections import defaultdict
- >>> word_list = ["a", "b", "c", "c", "a", "a"]
- >>> cnt = defaultdict(int)
- >>> for word in word_list:
- ... cnt[word] += 1
- ...
- >>> cnt
- defaultdict(<class 'int'>, {'a': 3, 'b': 1, 'c': 2})
见闻知意,就是有顺序的字典,好像没啥特别好解释的了。关于 OrderedDict 的实际应用,csv.DictReader 有部分涉及。
- >>> from collections import OrderedDict
- >>> d = {"banana":3,"apple":2,"pear":1,"orange":4}
- >>> order_dict = OrderedDict(d)
- >>> order_dict
- OrderedDict([('banana', 3), ('apple', 2), ('pear', 1), ('orange', 4)])
- >>> order_dict.keys()
- odict_keys(['banana', 'apple', 'pear', 'orange'])
- >>> order_dict.values()
- odict_values([3, 2, 1, 4])
- >>> order_dict.items()
- odict_items([('banana', 3), ('apple', 2), ('pear', 1), ('orange', 4)])
-
- # 从后(前)删除元素
- >>> order_dict.popitem(last=True)
- ('orange', 4)
- >>> order_dict
- OrderedDict([('banana', 3), ('apple', 2), ('pear', 1)])
- >>> order_dict.popitem(last=False)
- ('banana', 3)
- >>> order_dict
- OrderedDict([('apple', 2), ('pear', 1)])
-
- # 移动元素到末尾
- >>> order_dict
- OrderedDict([('apple', 2), ('pear', 1), ('orange', 4)])
- >>> order_dict.move_to_end('apple')
- >>> order_dict
- OrderedDict([('pear', 1), ('orange', 4), ('apple', 2)])

命名元组,其实用数据库中数据表的思想理解比较容易一些:定义后的命名元组就相当于一个数据表,_fields 就相当于数据表的字段,实例化的对象就相当于生成了一条数据记录。
- >>> from collections import namedtuple
- >>> User = namedtuple('User', ['name', 'age', 'id'])
- >>> User._fields
- ('name', 'age', 'id')
-
- >>> User = namedtuple('User', 'name age id')
- >>> User._fields
- ('name', 'age', 'id')
- >>> user = User('tester', '22', '12345678')
- >>> user
- User(name='tester', age='22', id='12345678')
-
- >>> user = User._make(['looking', 25, '12345678'])
- >>> user
- User(name='looking', age=25, id='12345678')
- >>> user = User._make(['looking', 25, '12345678'])
- >>> user.name
- 'looking'
- >>> user.age
- 25
- >>> user.id
- '12345678'
- >>> user._asdict()
- OrderedDict([('name', 'looking'), ('age', 25), ('id', '12345678')])
- >>> user._replace(age=22)
- User(name='looking', age=22, id='12345678')
-
- >>> dt = {'name':'looking', 'age':25, 'id':'12345678'}
- >>> User(**dt)
- User(name='looking', age=25, id='12345678')
- >>> user
- User(name='looking', age=25, id='12345678')
- >>> list(user)
- ['looking', 25, '12345678']
- >>> User._make(['looking', 25, '12345678'])
- User(name='looking', age=25, id='12345678')
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。