确保Python
已安装依赖项:pandas
、openpyxl
、xlrd
pip install pandas openpyxl xlrd
把以下代码保存为merge_csv_files.py文件
:
import os import pandas as pd from pathlib import Path from datetime import datetime def is_valid_file(file_path): encodings = ['utf-8', 'gbk', 'gb2312', 'latin1', 'cp1252', 'utf-16', 'big5', 'shift_jis'] # 优先尝试 CSV 解析 for encoding in encodings: try: pd.read_csv(file_path, nrows=1, encoding=encoding) return True except: continue # 回退到 Excel 解析,使用不同引擎 for engine in ['openpyxl', 'xlrd']: try: pd.read_excel(file_path, nrows=1, engine=engine) return True except: continue return False def read_file(file_path): encodings = ['utf-8', 'gbk', 'gb2312', 'latin1', 'cp1252', 'utf-16', 'big5', 'shift_jis'] # 优先尝试 CSV 解析 for encoding in encodings: try: return pd.read_csv(file_path, encoding=encoding) except Exception: continue # 回退到 Excel 解析,使用不同引擎 for engine in ['openpyxl', 'xlrd']: try: return pd.read_excel(file_path, engine=engine) except Exception as e: print(f"使用 {engine} 引擎读取 Excel 文件 {file_path} 失败: {e}") continue print(f"读取文件 {file_path} 错误: 无法使用可用编码或 Excel 引擎解码") return None def merge_csv_files(): current_dir = Path.cwd() dfs = [] for file_path in current_dir.glob('*.[cC][sS][vV]'): if is_valid_file(file_path): df = read_file(file_path) if df is not None: dfs.append(df) print(f"成功处理: {file_path}") else: print(f"跳过无效文件: {file_path}") if dfs: try: merged_df = pd.concat(dfs, ignore_index=True) # 生成带时间戳的文件名 timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") output_path = current_dir / f'merged_output_{timestamp}.csv' merged_df.to_csv(output_path, index=False, encoding='utf-8') print(f"合并文件已保存为: {output_path}") print(f"总行数: {len(merged_df)}") except Exception as e: print(f"合并文件时出错: {e}") else: print("未找到可合并的有效 CSV 文件") if __name__ == "__main__": merge_csv_files()