赞
踩
# 定义标题与窗口大小 self.setWindowTitle("数据清洗") self.setGeometry(100, 100, 1280, 720) self.stacked_widget = QStackedWidget(self) self.setCentralWidget(self.stacked_widget) self.label = QLabel(datetime.now().strftime("%Y-%m-%d"), self) self.label.setGeometry(10, 5, 130, 50) self.label.setFont(QFont("等线", 18)) # 设置字体和字号 self.clean_data = QPushButton('数据清洗', self) self.clean_data.setFont(QFont('等线', 11)) self.clean_data.setGeometry(155, 10, 130, 30) self.clean_data.clicked.connect(self.show_data_cleaning) self.zip_data = QPushButton('压缩/解压', self) self.zip_data.setFont(QFont('等线', 11)) self.zip_data.setGeometry(300, 10, 130, 30) self.zip_data.clicked.connect(self.show_zip_page) self.zip_data = QPushButton('合并excel', self) self.zip_data.setFont(QFont('等线', 11)) self.zip_data.setGeometry(445, 10, 130, 30) self.zip_data.clicked.connect(self.select_directory) self.line = SeparatorLine(self) self.line.setGeometry(150, 0, 2, 10000) # 添加垂直分割线 self.line_horizontal = SeparatorLine1(self) # noinspection PyTypeChecker self.line_horizontal.setGeometry(150, self.height() / 8, 10000, 2) # 添加水平分割线 def show_data_cleaning(self): if not self.data_cleaning: self.data_cleaning = DataCleaning() self.stacked_widget.addWidget(self.data_cleaning) self.stacked_widget.setCurrentWidget(self.data_cleaning) def show_zip_page(self): if not self.Compression: self.Compression = CompressionDecompression() self.stacked_widget.addWidget(self.Compression) self.stacked_widget.setCurrentWidget(self.Compression) def select_directory(self): if not self.file_directory: self.file_directory = SelectDirectory() self.stacked_widget.addWidget(self.file_directory) self.stacked_widget.setCurrentWidget(self.file_directory)
class DataCleaning(Layout):
def init(self):
super().init()
self.save_path = None
self.file_path = None
self.layout = QVBoxLayout()
self.progress_bar = QProgressBar(self) self.progress_bar.setGeometry(155, 235, 668, 28) self.progress_bar.setVisible(False) self.button_select = QPushButton("打开", self) self.button_select.setFont(QFont('等线', 11)) self.button_select.setGeometry(10, 60, 130, 30) self.button_select.clicked.connect(self.select_file) self.button_save = QPushButton("另存为", self) self.button_save.setFont(QFont('等线', 11)) self.button_save.setGeometry(10, 95, 130, 30) self.button_save.clicked.connect(self.save_file) self.button_run = QPushButton("开始清洗", self) self.button_run.setFont(QFont('等线', 11)) self.button_run.setGeometry(10, 130, 130, 30) self.button_run.clicked.connect(self.run) self.button = QPushButton("清洗附加选项", self) self.button.setFont(QFont('等线', 11)) self.button.setGeometry(155, 60, 130, 30) self.button.clicked.connect(self.button_clicked) self.label_1_1 = QLabel(self) self.label_1_1.setGeometry(290, 50, 700, 50) self.label_1_1.setFont(QFont("等线", 14)) # 设置字体和字号 self.label_1_1.setVisible(False) self.input_text = QLineEdit(self) # 创建一个输入框 self.input_text.setGeometry(155, 125, 130, 30) self.input_text.setVisible(False) self.input_text.setValidator(QIntValidator()) self.label_info = QLabel("清洗指定列", self) # 创建一个标签 self.label_info.setFont(QFont('等线', 11)) self.label_info.setGeometry(155, 95, 130, 30) # 设置标签位置和大小 self.label_info.setVisible(False) # 初始时隐藏标签 self.label_info_1 = QLabel("根据指定列去重", self) self.label_info_1.setFont(QFont('等线', 11)) self.label_info_1.setGeometry(155, 155, 130, 30) self.label_info_1.setVisible(False) self.input_text_1 = QLineEdit(self) self.input_text_1.setGeometry(155, 185, 130, 30) self.input_text_1.setVisible(False) self.input_text_1.setValidator(QIntValidator()) # 指示灯 self.red_light = QLabel(self) self.red_light.setGeometry(10, 170, 20, 20) self.red_light.setStyleSheet("background-color: red") self.red_light.setVisible(False) self.green_light = QLabel(self) self.green_light.setGeometry(35, 170, 20, 20) self.green_light.setStyleSheet("background-color: green") self.green_light.setVisible(False) self.line_horizontal_1 = SeparatorLine1(self) self.line_horizontal_1.setGeometry(150, 225, 10000, 2) self.line_horizontal_1.setVisible(False) def button_clicked(self): self.label_1_1.setText('(温馨提示,下方的输入框只能输入数字,以下参数非必须参数,可不填)') self.input_text.setVisible(not self.input_text.isVisible()) # 切换输入框的可见性 self.label_info.setVisible(self.input_text.isVisible()) self.input_text_1.setVisible(not self.input_text_1.isVisible()) # 切换输入框的可见性 self.label_info_1.setVisible(self.input_text_1.isVisible()) self.label_1_1.setVisible(not self.label_1_1.isVisible()) self.label_1_1.setVisible(self.label_1_1.isVisible()) def select_file(self): # 选择文件 file_dialog = QFileDialog() file_path, _ = file_dialog.getOpenFileName(self, "选择文件", "", "All Files (*)") if file_path: self.file_path = file_path def save_file(self): # 保存文件 file_dialog = QFileDialog() file_dialog.setWindowTitle("另存为") # 修改对话框标题 file_dialog.setNameFilters(["Text Files (*.txt)", "CSV Files (*.csv)", "Excel Files (*.xlsx *.xls)", "JSON Files (*.json *.jsonl)"]) file_dialog.selectNameFilter("Excel Files (*.xlsx") if file_dialog.exec(): self.save_path = file_dialog.selectedFiles()[0] if os.path.exists(self.save_path): # noinspection PyUnresolvedReferences result = QMessageBox.warning(self, "警告", "文件已存在,是否覆盖?", QMessageBox.Yes | QMessageBox.No) # noinspection PyUnresolvedReferences if result == QMessageBox.Yes: bag.Bag.save_excel([], self.save_path) else: self.save_path = '' else: bag.Bag.save_excel([], self.save_path) def run(self): # 开始清洗 resp = judge(self.file_path) total = len(resp) def clean_1(ls, col_number, col_name): self.red_light.setVisible(True) result = [] for i, info in enumerate(tqdm(ls)): mid = [] for value in info[col_number].split('\n'): if re.sub(r'\s', '', value): text = value.lstrip(',.?!;:,。?;:') text1 = text.strip() text2 = emoji.replace_emoji(text1, replace='<emoji>') mid.append(text2) else: pass info[col_number] = '\n'.join(mid) result.append(info) # 更新进度条的值 progress = int((i + 1) / total * 100) self.progress_bar.setValue(progress) QApplication.processEvents() # 刷新界面 sign = [] new_result = [item for item in result if item[col_name] not in sign and not sign.append(item[col_name])] self.red_light.setVisible(False) self.green_light.setVisible(True) if bool(self.save_path): bag.Bag.save_excel(new_result, self.save_path) else: self.save_file() bag.Bag.save_excel(new_result, self.save_path) time.sleep(1) self.green_light.setVisible(False) def clean_2(ls, col_number): self.red_light.setVisible(True) result = [] for i, info in enumerate(tqdm(ls)): mid = [] for value in info[col_number].split('\n'): if re.sub(r'\s', '', value): text = value.lstrip(',.?!;:,。?;:') text1 = text.strip() text2 = emoji.replace_emoji(text1, replace='<emoji>') mid.append(text2) else: pass info[col_number] = '\n'.join(mid) result.append(info) progress = int((i + 1) / total * 100) self.progress_bar.setValue(progress) QApplication.processEvents() # 刷新界面 self.red_light.setVisible(False) self.green_light.setVisible(True) if bool(self.save_path): bag.Bag.save_excel(result, self.save_path) else: self.save_file() bag.Bag.save_excel(result, self.save_path) time.sleep(1) self.green_light.setVisible(False) def clean_3(ls, col_name): self.red_light.setVisible(True) result = [] for i, info in enumerate(tqdm(ls)): mid1 = [] for info1 in info: mid = [] for value in info1.split('\n'): if re.sub(r'\s', '', value): text = value.lstrip(',.?!;:,。?;:') text1 = text.strip() text2 = emoji.replace_emoji(text1, replace='<emoji>') mid.append(text2) else: pass mid1.append('\n'.join(mid)) result.append(mid1) progress = int((i + 1) / total * 100) self.progress_bar.setValue(progress) QApplication.processEvents() # 刷新界面 sign = [] new_result = [item for item in result if item[col_name] not in sign and not sign.append(item[col_name])] self.red_light.setVisible(False) self.green_light.setVisible(True) if bool(self.save_path): bag.Bag.save_excel(new_result, self.save_path) else: self.save_file() bag.Bag.save_excel(new_result, self.save_path) time.sleep(1) self.green_light.setVisible(False) def clean_4(ls): self.red_light.setVisible(True) result = [] for i, info in enumerate(tqdm(ls)): mid1 = [] for info1 in info: mid = [] for value in info1.split('\n'): if re.sub(r'\s', '', value): text = value.lstrip(',.?!;:,。?;:') text1 = text.strip() text2 = emoji.replace_emoji(text1, replace='<emoji>') mid.append(text2) else: pass mid1.append('\n'.join(mid)) result.append(mid1) progress = int((i + 1) / total * 100) self.progress_bar.setValue(progress) QApplication.processEvents() # 刷新界面 self.red_light.setVisible(False) self.green_light.setVisible(True) if bool(self.save_path): bag.Bag.save_excel(result, self.save_path) else: self.save_file() bag.Bag.save_excel(result, self.save_path) time.sleep(1) self.green_light.setVisible(False) additional_options = self.input_text.text() # 获取输入框的文本内容 additional_options_1 = self.input_text_1.text() col_num = additional_options tag_name = additional_options_1 """判断是否夹带清洗附加条件""" target = bool(col_num) target_1 = bool(tag_name) # 进度条 self.progress_bar.setVisible(True) self.line_horizontal_1.setVisible(True) # 处理异常 try: if target and target_1: # 清洗指定列以及根据某某标签去重 clean_1(resp, int(col_num)-1, int(tag_name)-1) elif target and not target_1: # 清洗子指定列,不去重 clean_2(resp, int(col_num)-1) elif not target and target_1: # 清洗全文再根据某某列去重 clean_3(resp, int(tag_name)-1) else: # 清洗全文,不去重 clean_4(resp) QMessageBox.information(self, "完成", "数据清洗完成") except Exception as e: error_message = str(e) QMessageBox.critical(self, "错误", error_message)
class CompressionDecompression(Layout):
def init(self):
super().init()
self.layout = QVBoxLayout()
self.file_path = None self.button_compress = QPushButton("压缩文件", self) self.button_compress.setFont(QFont('等线', 11)) self.button_compress.setGeometry(10, 60, 130, 30) self.button_compress.clicked.connect(self.on_button_clicked) self.button_extract = QPushButton("解压文件", self) self.button_extract.setFont(QFont('等线', 11)) self.button_extract.setGeometry(10, 95, 130, 30) self.button_extract.clicked.connect(self.on_button_clicked1) def on_button_clicked(self): selected_file = bool(self.file_path) if selected_file: extract_path = QFileDialog.getExistingDirectory(self, "另存为") if bool(extract_path): unzip_file(selected_file, extract_path) QMessageBox.information(self, 'success', '解压成功!') else: QMessageBox.critical(self, 'error', '解压失败') else: self.select_file() extract_path = QFileDialog.getExistingDirectory(self, "另存为") if bool(extract_path): unzip_file(self.file_path, extract_path) QMessageBox.information(self, 'success', '解压成功!') else: QMessageBox.critical(self, 'error', '解压失败') def on_button_clicked1(self): selected_file = bool(self.file_path) if selected_file: extract_path = QFileDialog.getExistingDirectory(window, "选择文件夹", options=QFileDialog.ShowDirsOnly) if bool(extract_path): self.compress_to_zip(selected_file, extract_path) QMessageBox.information(self, 'success', '压缩成功!') else: QMessageBox.critical(self, 'error', '压缩失败') else: self.select_folder() extract_path = QFileDialog.getExistingDirectory(window, "选择文件夹", options=QFileDialog.ShowDirsOnly) if bool(extract_path): self.compress_to_zip(self.file_path, extract_path) QMessageBox.critical(self, 'success', '压缩成功!') else: QMessageBox.critical(self, 'error', '压缩失败') def select_file(self): # 选择文件 file_dialog = QFileDialog() file_path, _ = file_dialog.getOpenFileName(self, "选择文件", "", "All Files (*)") if file_path:
一、Python所有方向的学习路线
Python所有方向的技术点做的整理,形成各个领域的知识点汇总,它的用处就在于,你可以按照下面的知识点去找对应的学习资源,保证自己学得较为全面。
二、Python必备开发工具
工具都帮大家整理好了,安装就可直接上手!
三、最新Python学习笔记
当我学到一定基础,有自己的理解能力的时候,会去阅读一些前辈整理的书籍或者手写的笔记资料,这些笔记详细记载了他们对一些技术点的理解,这些理解是比较独到,可以学到不一样的思路。
四、Python视频合集
观看全面零基础学习视频,看视频学习是最快捷也是最有效果的方式,跟着视频中老师的思路,从基础到深入,还是很容易入门的。
五、实战案例
纸上得来终觉浅,要学会跟着视频一起敲,要动手实操,才能将自己的所学运用到实际当中去,这时候可以搞点实战案例来学习。
六、面试宝典
网上学习资料一大堆,但如果学到的知识不成体系,遇到问题时只是浅尝辄止,不再深入研究,那么很难做到真正的技术提升。
一个人可以走的很快,但一群人才能走的更远!不论你是正从事IT行业的老鸟或是对IT行业感兴趣的新人,都欢迎加入我们的的圈子(技术交流、学习资源、职场吐槽、大厂内推、面试辅导),让我们一起学习成长!
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。