当前位置:迷你笔记 » 技术 » Python替换筛选替换特定单词

Python替换筛选替换特定单词

确保Python已安装依赖项:pandasopenpyxl

pip install pandas openpyxl

把以下代码保存为replace_words.py文件

import pandas as pd
import re
from openpyxl import load_workbook
from openpyxl.utils import get_column_letter

def replace_words(html_text, replace_text):
    # Function to replace whole words, preserving case and punctuation
    def replacement(match):
        word = match.group(0)
        # Preserve the punctuation
        punctuation = ''
        if word and word[-1] in ',.!?':
            punctuation = word[-1]
            word = word[:-1]
        # Only replace if it's an exact word match (case sensitive)
        if word.lower() == replace_text.lower():
            return f'it{punctuation}'
        return match.group(0)
    
    # Use word boundaries to match whole words only
    pattern = r'\b' + re.escape(replace_text) + r'\b[,.\!\?]?|\b' + re.escape(replace_text) + r'\b'
    return re.sub(pattern, replacement, html_text, flags=re.IGNORECASE)

def process_excel(input_file, output_file):
    try:
        # Read Excel sheets
        sheet1 = pd.read_excel(input_file, sheet_name='Sheet1')
        sheet2 = pd.read_excel(input_file, sheet_name='Sheet2')
        
        # Ensure required columns exist
        if not all(col in sheet1.columns for col in ['Variant SKU', 'Body (HTML)']):
            raise ValueError("Sheet1 must contain 'Variant SKU' and 'Body (HTML)' columns")
        if not all(col in sheet2.columns for col in ['Variant SKU', 'Replace Text']):
            raise ValueError("Sheet2 must contain 'Variant SKU' and 'Replace Text' columns")
        
        # Create a dictionary for SKU to Replace Text mapping
        replace_dict = dict(zip(sheet2['Variant SKU'], sheet2['Replace Text']))
        
        # Process replacements
        sheet1['Body (HTML)'] = sheet1.apply(
            lambda row: replace_words(row['Body (HTML)'], replace_dict.get(row['Variant SKU'], '')) 
            if row['Variant SKU'] in replace_dict else row['Body (HTML)'],
            axis=1
        )
        
        # Save to new Excel file while preserving formatting
        with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
            sheet1.to_excel(writer, sheet_name='Sheet1', index=False)
            sheet2.to_excel(writer, sheet_name='Sheet2', index=False)
            
            # Adjust column widths
            wb = writer.book
            for sheet_name in ['Sheet1', 'Sheet2']:
                ws = wb[sheet_name]
                for col in ws.columns:
                    max_length = 0
                    column = col[0].column_letter
                    for cell in col:
                        try:
                            if len(str(cell.value)) > max_length:
                                max_length = len(str(cell.value))
                        except:
                            pass
                    adjusted_width = max_length + 2
                    ws.column_dimensions[column].width = adjusted_width
                    
    except Exception as e:
        print(f"Error processing Excel file: {str(e)}")

if __name__ == "__main__":
    input_file = "title.xlsx"  # Replace with your input file path
    output_file = "output.xlsx"  # Replace with your output file path
    process_excel(input_file, output_file)
    print("Excel processing completed. Check the output file.")
未经允许不得转载:迷你笔记 » Python替换筛选替换特定单词

相关文章

评论 (0)

4 + 3 =