当前位置:   article > 正文

python做项目看板_Adventure电商项目-Python 数据仓库实现可视化看板

用python做一个数据可视化网页看板

一、项目介绍

1.1 项目需求

基于第一部分业务背景,为满足业务自主分析提高运营决策时效,现使用Python、数据仓库实现数据自动更新式可视化面板。

1.2 项目操作流程

二、数据处理

2.1 代码处理分析

2.1.1 聚合生成dw_order_by_day表

从mysql数据库中获取ods_sales_orders订单明细表并按日期分组聚合,得到每日销量和每日销售额。读取日期维度表,与上表合并,得到dw_order_by_day每日环比表。 关键代码如下(部分):

#导入相关模块

import pandas as pd

import pymysql

import random

from sqlalchemy import create_engine

pymysql.install_as_MySQLdb()

import datetime

import warnings

warnings.filterwarnings("ignore")

# 连接adventure_ods_new库

adventure_ods = create_engine('mysql://frogdatanew:Frogdata@844@106.12.180.221:3306/adventure_ods_new?charset=gbk')

# 连接datafrog05_adventure_new库

adventure_dw = create_engine('mysql://frognew:Frogdata@0921@106.15.121.232:3306/datafrog05_adventure_new?charset=gbk')

"""利用空列表及循环生成对应随机值,与销量订单聚合表合并形成sum_amount_order_goal(销量订单聚合目标表)"""

sum_amount_goal_list = []

sum_order_goal_list = []

# 转为list类型,遍历每个日期

create_date_list = list(sum_amount_order['create_date'])

for i in create_date_list:

# 生成一个在[0.85,1.1]随机数

a = random.uniform(0.85, 1.1)

b = random.uniform(0.85, 1.1)

# 对应日期下生成总金额(sum_amount)*a的列

amount_goal = list(sum_amount_order[sum_amount_order['create_date'] == i]['sum_amount'])[0] * a

# 对应日期下生成总订单数(sum_order)*b的列

order_goal = list(sum_amount_order[sum_amount_order['create_date'] == i]['sum_order'])[0] * b

# 将生成的目标值加入空列表

sum_amount_goal_list.append(amount_goal)

sum_order_goal_list.append(order_goal)

# 合并sum_amount_order表与刚生成的目标值列,形成sum_amount_order_goal表

sum_amount_order_goal = pd.concat([sum_amount_order, pd.DataFrame(

{'sum_amount_goal': sum_amount_goal_list, 'sum_order_goal':

sum_order_goal_list})], axis=1)

sum_amount_order_goal.head()

"""存储新的dw_order_by_day """

#增添环比值amount_diff

# pct_change()表示当前元素与先前元素的相差百分比,默认竖向,例:前面元素x,当前元素y,公式 result = (y-x)/x

dw_order_by_day['amount_diff'] = dw_order_by_day['sum_amount'].pct_change().fillna(0)

# 追加数据至dw_order_by_day

dw_order_by_day.to_sql('dw_order_by_day_jrh', con=adventure_dw,

if_exists='replace', index=False)

2.1.2 聚合生成dw_order_diff表

上一步中得到dw_order_by_day每日环比表,将当前时间维度和去年同期维度下相同字段进行聚合,在计算销售额,销售量,和客单价的同比,关键代码如下(部分):

"""求取各阶段的总金额"""

"""当天"""

# 当天的总金额

today_amount = dw_order_by_day[dw_order_by_day['is_today'] == 1]['sum_amount'].sum()

# 去年同期的日期维度

before_year_today = list(dw_order_by_day[dw_order_by_day['is_today'] == 1]

['create_date'] + datetime.timedelta(days=-365))

# 去年同期总金额

before_year_today_amount = dw_order_by_day[dw_order_by_day['create_date'].isin(before_year_today)]['sum_amount'].sum()

"""昨天"""

# 昨天的总金额

yesterday_amount = dw_order_by_day[dw_order_by_day['is_yesterday'] == 1]['sum_amount'].sum()

# 去年同期的日期维度

before_year_yesterday = list(dw_order_by_day[dw_order_by_day['is_yesterday'] == 1]

['create_date'] + datetime.timedelta(days=-365))

# 去年同期总金额

before_year_yesterday_amount = dw_order_by_day[dw_order_by_day['create_date'].isin(

before_year_yesterday)]['sum_amount'].sum()

"""当前月份"""

# 当月的总金额

month_amount = dw_order_by_day[dw_order_by_day['is_current_month'] == 1]['sum_amount'].sum()

# 去年同期的日期维度

before_year_month = list(dw_order_by_day[dw_order_by_day['is_current_month'] == 1]

['create_date'] + datetime.timedelta(days=-365))

# 去年同期总金额

before_year_month_amount = dw_order_by_day[dw_order_by_day['create_date'].isin(

before_year_month)]['sum_amount'].sum()

"""当前季度"""

# 当前季度的总金额

quarter_amount = dw_order_by_day[dw_order_by_day['is_current_quarter'] == 1]['sum_amount'].sum()

# 去年同期的日期维度

before_year_quarter = list(dw_order_by_day[dw_order_by_day['is_current_quarter'] == 1]

['create_date'] + datetime.timedelta(days=-365))

# 去年同期总金额

before_year_quarter_amount = dw_order_by_day[dw_order_by_day['create_date'].isin(

before_year_quarter)]['sum_amount'].sum()

"""当前年份"""

# 当年的总金额

year_amount = dw_order_by_day[dw_order_by_day['is_current_year'] == 1]['sum_amount'].sum()

# 去年同期的日期维度

before_year_year = list(dw_order_by_day[dw_order_by_day['is_current_year'] == 1]

['create_date'] + datetime.timedelta(days=-365))

# 去年同期总金额

before_year_year_amount = dw_order_by_day[dw_order_by_day['create_date'].isin(

before_year_year)]['sum_amount'].sum()

"""求取各阶段的总金额、订单数的同期对比数据"""

# 做符号简称,横向提取数据方便

amount_dict = {'today_diff': [today_amount / before_year_today_amount - 1,

today_order / before_year_today_order - 1,

(today_amount / today_order) / (before_year_today_amount /

before_year_today_order) - 1],

'yesterday_diff': [yesterday_amount / before_year_yesterday_amount - 1,

yesterday_order / before_year_yesterday_order - 1,

(yesterday_amount / yesterday_order) / (before_year_yesterday_amount /

before_year_yesterday_order) - 1],

'month_diff': [month_amount / before_year_month_amount - 1,

month_order / before_year_month_order - 1,

(month_amount / month_order) / (before_year_month_amount /

before_year_month_order) - 1],

'quarter_diff': [quarter_amount / before_year_quarter_amount - 1,

quarter_order / before_year_quarter_order - 1,

(quarter_amount / quarter_order) / (before_year_quarter_amount /

before_year_quarter_order) - 1],

'year_diff': [year_amount / before_year_year_amount - 1,

year_order / before_year_year_order - 1,

(year_amount / year_order) / (before_year_year_amount /

before_year_year_order) - 1],

'flag': ['amount', 'order', 'avg']}

dw_amount_diff = pd.DataFrame(amount_dict)

dw_amount_diff

"""存储新的dw_amount_diff表 """

dw_amount_diff.to_sql('dw_amount_diff_jrh', con=adventure_dw, if_exists='replace', index=False)

2.1.3 聚合生成update_sum_data表

将ods_sales_orders订单明细表和ods_customer连接,按照订单日期/产品名/产品子类/产品类别/所在区域/所在省份/所在城市分组聚合,再与日期维度表连接,得到sum_customer_order 关键代码如下(部分):

"""进行数据的聚合"""

sales_customer_order=pd.merge(ods_sales_orders,ods_customer,on='customer_key',how='left')

# 提取订单主键/订单日期/客户编号/产品名/产品子类/产品类别/产品单价/所在区域/所在省份/所在城市

sales_customer_order=sales_customer_order.groupby(['create_date','english_product_name','cpzl_zw',"cplb_zw",

'chinese_territory','chinese_province','chinese_city'],as_index=False).\

agg({'sales_order_key':pd.Series.nunique,'customer_key':pd.Series.nunique,

'unit_price':'sum'}).rename(columns={'sales_order_key':'order_num','customer_key':'customer_num','unit_price':'sum_amount',

'english_product_name':'product_name'})

# 转化订单日期为字符型格式

sales_customer_order['create_date'] = sales_customer_order['create_date'].apply(lambda x: x.strftime('%Y-%m-%d'))

# 获取当日日期维度

dw_customer_order = pd.merge(sales_customer_order, dim_date_df, on='create_date', how='inner')

dw_customer_order.head()

"""step5:进行数据的存储"""

dw_customer_order.to_sql('dw_customer_order_jrh', con=adventure_dw,

if_exists='replace', index=False)

2.2 在MySQL追加索引

在mysql追加索引优化查询速度,类似于让搜索结果不需要遍历整个表,而是根据前缀索引一步步搜寻。

create index index_date on dw_order_by_day_hjf(create_date(8));

select * from dw_order_by_day_hjf where create_date='2019-02-11';

EXPLAIN select * from dw_order_by_day_hjf where create_date='2019-02-11';

2.3 在Linux上做定时部署

在服务器上部署代码,让其每日自动更新,实现方法分三步:

1.定时执行命令:import schedule

2.能够跳出命令窗口输入 python xxx.py 文件使其运行:import os 的os.system()

3.将这个文件挂在后台,等时间一到,自动执行:利用Linux系统自带的&挂后台

import schedule

import time

import datetime

import os

import requests

def job1():

"""

dw_order_by_day 每日环比表

"""

print('Job1:每天6:00执行一次')

print('Job1-startTime:%s' % (datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')))

os.system(

"/home/anaconda3/bin/python3 /home/frog005/adventure/dw_order_by_day_jrh.py >> /home/frog005/adventure_jrh/jrh_logs/dw_order_by_day_schedule.log 2>&1 &")

time.sleep(20)

print('Job1-endTime:%s' % (datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')))

print('------------------------------------------------------------------------')

def job2():

"""

dw_order_diff 同比数据表

"""

print('Job2:每天6:20执行一次')

print('Job2-startTime:%s' % (datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')))

os.system(

"/home/anaconda3/bin/python3 /home/frog005/adventure_jrh/dw_order_diff_jrh.py >> /home/frog005/adventure_jrh/jrh_logs/dw_order_diff_schedule.log 2>&1 &")

time.sleep(20)

print('Job3-endTime:%s' % (datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')))

print('------------------------------------------------------------------------')

def job3():

"""

update_sum_data 生成时间地区产品聚合表

"""

print('Job3:每天6:40执行一次')

print('Job3-startTime:%s' % (datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')))

os.system(

"/home/anaconda3/bin/python3 /home/frog005/adventure_jrh/update_sum_data_jrh.py >> /home/frog005/adventure_jrh/jrh_logs/update_sum_data_schedule.log 2>&1 &")

time.sleep(20)

print('Job4-endTime:%s' % (datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')))

print('------------------------------------------------------------------------')

if __name__ == '__main__':

schedule.every().day.at('06:00').do(job1)

schedule.every().day.at('06:20').do(job2)

schedule.every().day.at('06:40').do(job3)

while True:

schedule.run_pending()

time.sleep(10)

print("wait", datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))

将schedule程序挂在后台执行,并输出日志:

nohup python3 schedule_job_jrh.py > schedule_job_jrh.log 2>&1 &

查看该schedule程序运行情况(验证是否顺利挂在后台)

ps aux| grep schedule_job_jrh.py

三、可视化看板搭建

3.1 整体销售情况

3.2 最近21天销售趋势图

3.3 区域销售详情表

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/小蓝xlanll/article/detail/681840
推荐阅读
相关标签
  

闽ICP备14008679号