当前位置:   article > 正文

[ Applied Data Science with Python]week1-part2_applied data science using pyspark pdf

applied data science using pyspark pdf

[ Applied Data Science with Python]week1-part2

string 格式化输出

sales_record = {
'price': 3.24,
'num_items': 4,
'person': 'Chris'}

sales_statement = '{} bought {} item(s) at a price of {} each for a total of {}'

print(sales_statement.format(sales_record['person'],
                             sales_record['num_items'],
                             sales_record['price'],
                             sales_record['num_items']*sales_record['price']))
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11

读写CSV文件

import csv
%precision 2
with open('mpg.csv') as csvfile:
    mpg = list(csv.DictReader(csvfile))   
mpg[:3] # The first three dictionaries in our list.
  • 1
  • 2
  • 3
  • 4
  • 5
len(mpg)
#column names of our csv.
mpg[0].keys()
#the average cty fuel economy across all cars
sum(float(d['cty']) for d in mpg) / len(mpg)
  • 1
  • 2
  • 3
  • 4
  • 5
#set,返回唯一值
cylinders = set(d['cyl'] for d in mpg)
  • 1
  • 2
#按照油缸数,求城市内平均每加仑汽油行驶的公里数
CtyMpgByCyl = []

for c in cylinders: # iterate over all the cylinder levels
    summpg = 0
    cyltypecount = 0
    for d in mpg: # iterate over all dictionaries
        if d['cyl'] == c: # if the cylinder level type matches,
            summpg += float(d['cty']) # add the cty mpg
            cyltypecount += 1 # increment the count
    CtyMpgByCyl.append((c, summpg / cyltypecount)) # append the tuple ('cylinder', 'avg mpg')

CtyMpgByCyl.sort(key=lambda x: x[0])
CtyMpgByCyl
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14

时间

import datetime as dt
import time as tm
#返回距离1970年1月1日的时间(以秒为单位)
tm.time()
#将时间戳转换为日期
dtnow = dt.datetime.fromtimestamp(tm.time())
dtnow
#日期的属性
dtnow.year, dtnow.month, dtnow.day, dtnow.hour, dtnow.minute, dtnow.second # get year, month, day, etc.from a datetime

#时间增量(timedelta)是表示两个日期之间差异的持续时间。
delta = dt.timedelta(days = 100) # create a timedelta of 100 days
delta

#获取今天日期
today = dt.date.today()
#datetime.date(2019, 4, 24)
#100天前日期
today - delta # the date 100 days ago
#datetime.date(2019, 1, 14)
today > today-delta # compare dates
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/2023面试高手/article/detail/151975
推荐阅读
相关标签
  

闽ICP备14008679号