青岛市文章资讯

Python办公自动化之openpyxl使用与避坑全面指南

2026-04-03 15:04:01 浏览次数:2
详细信息
Python办公自动化之openpyxl使用与避坑全面指南

openpyxl是Python中最流行的Excel操作库之一,支持读写.xlsx格式文件。本指南将全面介绍openpyxl的使用方法及常见坑点。

一、基础环境配置

安装openpyxl

pip install openpyxl

基础导入

from openpyxl import Workbook, load_workbook
from openpyxl.utils import get_column_letter, column_index_from_string
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side

二、基础操作

1. 创建新工作簿

# 创建工作簿
wb = Workbook()

# 获取活动工作表
ws = wb.active
ws.title = "我的工作表"

# 创建新工作表
ws1 = wb.create_sheet("第二个工作表")  # 插入到最后
ws2 = wb.create_sheet("第三个工作表", 0)  # 插入到第一个位置

# 保存工作簿
wb.save("示例.xlsx")

2. 加载现有工作簿

# 加载工作簿
wb = load_workbook("示例.xlsx")

# 查看所有工作表名称
print(wb.sheetnames)  # ['第三个工作表', '我的工作表', '第二个工作表']

# 选择工作表
ws = wb["我的工作表"]  # 按名称选择
ws = wb.worksheets[1]  # 按索引选择

3. 基本数据读写

# 写入数据
ws["A1"] = "姓名"
ws["B1"] = "年龄"
ws["C1"] = "部门"

# 使用行列号写入
ws.cell(row=2, column=1, value="张三")
ws.cell(row=2, column=2, value=25)
ws.cell(row=2, column=3, value="技术部")

# 批量写入
data = [
    ["李四", 28, "市场部"],
    ["王五", 32, "销售部"],
    ["赵六", 26, "人力资源部"]
]

for row in data:
    ws.append(row)

# 读取数据
print(ws["A2"].value)  # 张三
print(ws.cell(row=2, column=2).value)  # 25

# 读取范围数据
for row in ws.iter_rows(min_row=1, max_row=5, min_col=1, max_col=3, values_only=True):
    print(row)

三、高级功能

1. 样式设置

from openpyxl.styles import Font, PatternFill, Alignment, Border, Side

# 创建样式对象
header_font = Font(name="微软雅黑", size=12, bold=True, color="FFFFFF")
header_fill = PatternFill(fill_type="solid", fgColor="366092")
center_alignment = Alignment(horizontal="center", vertical="center")
thin_border = Border(
    left=Side(style="thin"),
    right=Side(style="thin"),
    top=Side(style="thin"),
    bottom=Side(style="thin")
)

# 应用样式
for cell in ws[1]:  # 第一行
    cell.font = header_font
    cell.fill = header_fill
    cell.alignment = center_alignment
    cell.border = thin_border

# 设置列宽和行高
ws.column_dimensions["A"].width = 20
ws.column_dimensions["B"].width = 10
ws.column_dimensions["C"].width = 15
ws.row_dimensions[1].height = 25

# 设置数字格式
ws["B2"].number_format = "#,##0"
ws["B3"].number_format = "0.00%"

2. 公式使用

# 写入公式
ws["D1"] = "合计"
ws["D2"] = "=SUM(B2:B5)"

# 注意:openpyxl默认不计算公式,需要设置data_only=True读取计算值
ws["D2"].value = "=SUM(B2:B5)"

# 保存后重新加载
wb.save("示例.xlsx")
wb_calc = load_workbook("示例.xlsx", data_only=True)
ws_calc = wb_calc.active
print(ws_calc["D2"].value)  # 如果Excel已经计算过,这里会显示计算值

3. 合并单元格

# 合并单元格
ws.merge_cells("A1:C1")
ws["A1"] = "员工信息表"
ws["A1"].alignment = Alignment(horizontal="center", vertical="center")

# 取消合并
# ws.unmerge_cells("A1:C1")

4. 插入图片

from openpyxl.drawing.image import Image

# 插入图片
img = Image("logo.png")
img.width = 100
img.height = 100
ws.add_image(img, "E1")

5. 筛选和排序

# 添加筛选器
ws.auto_filter.ref = "A1:C5"

# 注意:openpyxl只设置筛选区域,不执行实际筛选

6. 图表创建

from openpyxl.chart import BarChart, Reference, Series

# 创建柱状图
chart = BarChart()
chart.title = "年龄分布"
chart.x_axis.title = "姓名"
chart.y_axis.title = "年龄"

# 指定数据范围
data = Reference(ws, min_col=2, min_row=1, max_row=5, max_col=2)
categories = Reference(ws, min_col=1, min_row=2, max_row=5)

# 添加数据到图表
chart.add_data(data, titles_from_data=True)
chart.set_categories(categories)

# 将图表插入工作表
ws.add_chart(chart, "E10")

四、性能优化技巧

1. 批量写入数据

# 低效方式(不推荐)
for i in range(1, 1001):
    for j in range(1, 51):
        ws.cell(row=i, column=j, value=f"数据{i}-{j}")

# 高效方式(推荐)
data = []
for i in range(1, 1001):
    row_data = [f"数据{i}-{j}" for j in range(1, 51)]
    data.append(row_data)

for row in data:
    ws.append(row)

2. 使用只读模式读取大文件

from openpyxl import load_workbook

# 只读模式,适用于大文件
wb = load_workbook("大数据文件.xlsx", read_only=True)
ws = wb.active

# 逐行读取,节省内存
for row in ws.iter_rows(values_only=True):
    print(row)

# 注意:在只读模式下,不能修改工作簿

3. 使用只写模式写入大文件

from openpyxl import Workbook
from openpyxl.writer.excel import save_workbook

# 只写模式,适用于生成大文件
wb = Workbook(write_only=True)
ws = wb.create_sheet()

# 使用append添加行(必须是可迭代对象)
for row in range(1, 10001):
    ws.append([f"数据{row}-{col}" for col in range(1, 51)])

wb.save("大文件.xlsx")

五、常见坑点及解决方案

坑点1:公式不计算

问题:openpyxl保存的公式不会自动计算 解决方案

# 方案1:使用data_only=True读取已计算的值(需要Excel先计算过)
wb = load_workbook("文件.xlsx", data_only=True)

# 方案2:使用Python计算并写入结果
import pandas as pd

def calculate_formula(ws, cell_address):
    """简单公式计算示例"""
    formula = ws[cell_address].value
    if formula and formula.startswith("="):
        if "SUM" in formula:
            # 提取SUM范围,如SUM(B2:B10)
            range_str = formula.split("(")[1].split(")")[0]
            # 实际应用中需要更完整的公式解析
    return None

坑点2:日期时间处理

问题:Excel和Python的日期格式不同 解决方案

from datetime import datetime
from openpyxl.utils import datetime

# 正确写入日期
ws["A1"] = datetime.datetime.now()

# 设置日期格式
ws["A1"].number_format = "YYYY-MM-DD HH:MM:SS"

# 读取日期
excel_date = ws["A1"].value
if isinstance(excel_date, datetime.datetime):
    print(excel_date.strftime("%Y-%m-%d"))

坑点3:样式继承问题

问题:修改单元格样式可能影响其他单元格 解决方案

# 错误方式:直接修改样式会影响其他单元格
bad_font = ws["A1"].font
bad_font.bold = True  # 这会修改所有使用此字体的单元格

# 正确方式:创建新样式对象
from copy import copy

# 复制样式并修改
new_font = copy(ws["A1"].font)
new_font.bold = True
ws["A1"].font = new_font

坑点4:内存泄漏

问题:处理大文件时内存占用高 解决方案

# 使用iter_rows分批处理
batch_size = 1000
rows_processed = 0

for row in ws.iter_rows(min_row=2, values_only=True):
    # 处理数据
    process_row(row)

    rows_processed += 1
    if rows_processed % batch_size == 0:
        print(f"已处理 {rows_processed} 行")

# 及时关闭工作簿
del wb

坑点5:文件名和路径问题

问题:特殊字符和路径错误 解决方案

import os
from pathlib import Path

def safe_save(wb, filename):
    """安全保存函数"""
    # 处理路径
    path = Path(filename)

    # 确保目录存在
    path.parent.mkdir(parents=True, exist_ok=True)

    # 清理文件名中的特殊字符
    safe_name = "".join(c for c in path.name if c.isalnum() or c in "._- ")

    # 构建安全路径
    safe_path = path.parent / safe_name

    try:
        wb.save(safe_path)
        return safe_path
    except Exception as e:
        print(f"保存失败: {e}")
        return None

坑点6:中文乱码

问题:中文字符显示异常 解决方案

# 设置中文字体
from openpyxl.styles import Font

chinese_font = Font(name="微软雅黑", size=11)
ws["A1"].font = chinese_font

# 或者使用系统字体
import platform
system = platform.system()
if system == "Windows":
    font_name = "微软雅黑"
elif system == "Darwin":  # macOS
    font_name = "PingFang SC"
else:  # Linux
    font_name = "WenQuanYi Micro Hei"

chinese_font = Font(name=font_name, size=11)

六、实用工具函数

1. 批量设置样式

def apply_style_to_range(ws, cell_range, style_dict):
    """为指定范围应用样式"""
    from openpyxl.styles import Font, PatternFill, Alignment, Border, Side

    for row in ws[cell_range]:
        for cell in row:
            if "font" in style_dict:
                cell.font = style_dict["font"]
            if "fill" in style_dict:
                cell.fill = style_dict["fill"]
            if "alignment" in style_dict:
                cell.alignment = style_dict["alignment"]
            if "border" in style_dict:
                cell.border = style_dict["border"]

2. 数据验证

from openpyxl.worksheet.datavalidation import DataValidation

def add_data_validation(ws, cell, options):
    """添加数据验证"""
    dv = DataValidation(
        type="list",
        formula1=f'"{",".join(options)}"',
        allow_blank=True
    )
    dv.add(cell)
    ws.add_data_validation(dv)

3. Excel报告生成器

class ExcelReport:
    """Excel报告生成器"""

    def __init__(self, filename):
        self.wb = Workbook()
        self.ws = self.wb.active
        self.filename = filename

    def add_title(self, title, cell="A1", merge_range=None):
        """添加标题"""
        if merge_range:
            self.ws.merge_cells(merge_range)
            self.ws[cell] = title
            title_cell = self.ws[cell]
        else:
            title_cell = self.ws[cell]
            title_cell.value = title

        title_cell.font = Font(size=16, bold=True)
        title_cell.alignment = Alignment(horizontal="center")

    def add_table(self, headers, data, start_cell="A3"):
        """添加表格"""
        # 添加表头
        start_col = column_index_from_string(start_cell[0])
        start_row = int(start_cell[1:])

        for i, header in enumerate(headers):
            cell = self.ws.cell(row=start_row, column=start_col+i)
            cell.value = header
            cell.font = Font(bold=True)
            cell.fill = PatternFill(fill_type="solid", fgColor="E0E0E0")

        # 添加数据
        for i, row_data in enumerate(data):
            for j, value in enumerate(row_data):
                cell = self.ws.cell(row=start_row+i+1, column=start_col+j)
                cell.value = value

    def save(self):
        """保存文件"""
        self.wb.save(self.filename)

七、完整示例

员工信息表生成

from openpyxl import Workbook
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
from datetime import datetime

def create_employee_report():
    # 创建Workbook
    wb = Workbook()
    ws = wb.active
    ws.title = "员工信息"

    # 数据
    headers = ["工号", "姓名", "部门", "入职日期", "薪资", "绩效评级"]
    employees = [
        ["001", "张三", "技术部", datetime(2020, 3, 15), 15000, "A"],
        ["002", "李四", "市场部", datetime(2019, 7, 22), 12000, "B"],
        ["003", "王五", "销售部", datetime(2021, 1, 10), 13000, "A"],
        ["004", "赵六", "人力资源部", datetime(2018, 11, 5), 11000, "C"],
        ["005", "钱七", "技术部", datetime(2022, 5, 30), 14000, "B"]
    ]

    # 添加标题
    ws.merge_cells("A1:F1")
    title_cell = ws["A1"]
    title_cell.value = "员工信息表"
    title_cell.font = Font(name="微软雅黑", size=18, bold=True, color="366092")
    title_cell.alignment = Alignment(horizontal="center", vertical="center")

    # 添加表头
    header_font = Font(bold=True, color="FFFFFF")
    header_fill = PatternFill(fill_type="solid", fgColor="366092")
    center_alignment = Alignment(horizontal="center", vertical="center")

    for col_idx, header in enumerate(headers, start=1):
        cell = ws.cell(row=2, column=col_idx)
        cell.value = header
        cell.font = header_font
        cell.fill = header_fill
        cell.alignment = center_alignment

    # 添加数据
    for row_idx, employee in enumerate(employees, start=3):
        for col_idx, value in enumerate(employee, start=1):
            cell = ws.cell(row=row_idx, column=col_idx)
            cell.value = value

            # 设置日期格式
            if isinstance(value, datetime):
                cell.number_format = "YYYY-MM-DD"

            # 设置数字格式
            if col_idx == 5:  # 薪资列
                cell.number_format = "#,##0"

            # 设置对齐
            cell.alignment = Alignment(horizontal="center")

    # 设置列宽
    column_widths = [10, 15, 15, 15, 12, 12]
    for i, width in enumerate(column_widths, start=1):
        column_letter = get_column_letter(i)
        ws.column_dimensions[column_letter].width = width

    # 添加边框
    thin_border = Border(
        left=Side(style="thin"),
        right=Side(style="thin"),
        top=Side(style="thin"),
        bottom=Side(style="thin")
    )

    for row in ws.iter_rows(min_row=2, max_row=len(employees)+2, 
                           min_col=1, max_col=len(headers)):
        for cell in row:
            cell.border = thin_border

    # 保存文件
    filename = f"员工信息表_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
    wb.save(filename)
    print(f"文件已保存: {filename}")

    return filename

# 执行生成
create_employee_report()

八、最佳实践总结

文件操作:始终使用with语句或确保正确关闭文件 性能优化:对于大数据量使用read_onlywrite_only模式 样式管理:创建样式对象并复用,避免重复创建 错误处理:添加适当的异常处理机制 代码组织:将常用功能封装为函数或类 兼容性:注意不同Excel版本的兼容性问题 资源清理:及时删除不需要的对象释放内存

通过掌握以上内容,您将能够高效地使用openpyxl进行Excel自动化操作,并避免常见的陷阱。

相关推荐