确保Python
已安装依赖项:pandas
和openpyxl
pip install pandas openpyxl
把以下代码保存为replace_words.py文件
:
import pandas as pd import re from openpyxl import load_workbook from openpyxl.utils import get_column_letter def replace_words(html_text, replace_text): # Function to replace whole words, preserving case and punctuation def replacement(match): word = match.group(0) # Preserve the punctuation punctuation = '' if word and word[-1] in ',.!?': punctuation = word[-1] word = word[:-1] # Only replace if it's an exact word match (case sensitive) if word.lower() == replace_text.lower(): return f'it{punctuation}' return match.group(0) # Use word boundaries to match whole words only pattern = r'\b' + re.escape(replace_text) + r'\b[,.\!\?]?|\b' + re.escape(replace_text) + r'\b' return re.sub(pattern, replacement, html_text, flags=re.IGNORECASE) def process_excel(input_file, output_file): try: # Read Excel sheets sheet1 = pd.read_excel(input_file, sheet_name='Sheet1') sheet2 = pd.read_excel(input_file, sheet_name='Sheet2') # Ensure required columns exist if not all(col in sheet1.columns for col in ['Variant SKU', 'Body (HTML)']): raise ValueError("Sheet1 must contain 'Variant SKU' and 'Body (HTML)' columns") if not all(col in sheet2.columns for col in ['Variant SKU', 'Replace Text']): raise ValueError("Sheet2 must contain 'Variant SKU' and 'Replace Text' columns") # Create a dictionary for SKU to Replace Text mapping replace_dict = dict(zip(sheet2['Variant SKU'], sheet2['Replace Text'])) # Process replacements sheet1['Body (HTML)'] = sheet1.apply( lambda row: replace_words(row['Body (HTML)'], replace_dict.get(row['Variant SKU'], '')) if row['Variant SKU'] in replace_dict else row['Body (HTML)'], axis=1 ) # Save to new Excel file while preserving formatting with pd.ExcelWriter(output_file, engine='openpyxl') as writer: sheet1.to_excel(writer, sheet_name='Sheet1', index=False) sheet2.to_excel(writer, sheet_name='Sheet2', index=False) # Adjust column widths wb = writer.book for sheet_name in ['Sheet1', 'Sheet2']: ws = wb[sheet_name] for col in ws.columns: max_length = 0 column = col[0].column_letter for cell in col: try: if len(str(cell.value)) > max_length: max_length = len(str(cell.value)) except: pass adjusted_width = max_length + 2 ws.column_dimensions[column].width = adjusted_width except Exception as e: print(f"Error processing Excel file: {str(e)}") if __name__ == "__main__": input_file = "title.xlsx" # Replace with your input file path output_file = "output.xlsx" # Replace with your output file path process_excel(input_file, output_file) print("Excel processing completed. Check the output file.")