On this page

Python File 方法

Python 文件对象方法详解

Python 文件对象提供了多种方法来读写和操作文件。以下是文件对象的主要方法及其使用方式的全面说明:

1. 文件打开与关闭

open() 函数

file = open('filename', mode='r', buffering=-1, encoding=None, errors=None, newline=None, closefd=True, opener=None)

close() 方法

file = open('example.txt')
# 文件操作...
file.close()  # 关闭文件

# 推荐使用with语句自动关闭
with open('example.txt') as file:
    # 文件操作...

2. 文件读取方法

read(size=-1)

# 读取整个文件
with open('example.txt', 'r') as f:
    content = f.read()  # 读取全部内容
    
# 读取指定字节数
with open('example.txt', 'r') as f:
    chunk = f.read(100)  # 读取前100个字符

readline(size=-1)

# 逐行读取
with open('example.txt', 'r') as f:
    line = f.readline()  # 读取一行
    while line:
        print(line.strip())
        line = f.readline()

readlines(hint=-1)

# 读取所有行到列表
with open('example.txt', 'r') as f:
    lines = f.readlines()  # 返回包含所有行的列表
    for line in lines:
        print(line.strip())

3. 文件写入方法

write(s)

# 写入字符串
with open('output.txt', 'w') as f:
    f.write("Hello, World!\n")
    f.write("Another line\n")

writelines(lines)

# 写入多行
lines = ["Line 1\n", "Line 2\n", "Line 3\n"]
with open('output.txt', 'w') as f:
    f.writelines(lines)

4. 文件位置方法

tell()

# 获取当前文件位置
with open('example.txt', 'r') as f:
    print(f.tell())  # 0 (文件开头)
    f.read(10)
    print(f.tell())  # 10

seek(offset, whence=0)

# 移动文件指针
with open('example.txt', 'rb') as f:  # 二进制模式确保准确seek
    f.seek(10)  # 移动到第10字节
    f.seek(-5, 2)  # 从文件末尾向前移动5字节
    
    # whence参数:
    # 0 - 从文件开头(默认)
    # 1 - 从当前位置
    # 2 - 从文件末尾

5. 文件刷新与截断

flush()

# 强制刷新缓冲区
with open('output.txt', 'w') as f:
    f.write("Important data")
    f.flush()  # 立即写入磁盘

truncate(size=None)

# 截断文件到指定大小
with open('example.txt', 'r+') as f:
    f.truncate(100)  # 截断到前100字节
    # 不指定size则截断到当前位置

6. 文件属性检查

fileno()

# 返回文件描述符
with open('example.txt') as f:
    print(f.fileno())  # 如: 3

isatty()

# 检查文件是否连接到终端设备
with open('example.txt') as f:
    print(f.isatty())  # False

7. 文件迭代方法

文件对象是可迭代的

# 逐行迭代(内存高效)
with open('large_file.txt') as f:
    for line in f:
        print(line.strip())

next()

# 手动迭代
with open('example.txt') as f:
    try:
        while True:
            line = next(f)
            print(line.strip())
    except StopIteration:
        pass

8. 二进制文件操作

读取和写入二进制数据

# 读取二进制文件
with open('image.jpg', 'rb') as f:
    chunk = f.read(1024)  # 读取1KB数据

# 写入二进制文件
with open('copy.jpg', 'wb') as f:
    f.write(b'\x89PNG\r\n\x1a\n')  # 写入二进制数据

9. 实际应用示例

文件复制函数

def copy_file(src, dst, buffer_size=1024*1024):
    """高效复制大文件"""
    with open(src, 'rb') as src_file, open(dst, 'wb') as dst_file:
        while True:
            chunk = src_file.read(buffer_size)
            if not chunk:
                break
            dst_file.write(chunk)

文件搜索功能

def search_in_file(filename, keyword):
    """在文件中搜索关键词"""
    with open(filename, 'r', encoding='utf-8') as f:
        for line_num, line in enumerate(f, 1):
            if keyword in line:
                print(f"第{line_num}行: {line.strip()}")

日志文件分析

def analyze_log(logfile):
    """分析日志文件"""
    stats = {'INFO': 0, 'WARNING': 0, 'ERROR': 0}
    with open(logfile) as f:
        for line in f:
            if 'INFO' in line:
                stats['INFO'] += 1
            elif 'WARNING' in line:
                stats['WARNING'] += 1
            elif 'ERROR' in line:
                stats['ERROR'] += 1
    return stats

10. 高级技巧

内存映射文件

import mmap

with open('large_file.bin', 'r+b') as f:
    # 创建内存映射
    mm = mmap.mmap(f.fileno(), 0)
    
    # 像操作字符串一样操作文件
    print(mm.find(b'some pattern'))
    mm.close()

文件锁

import fcntl

with open('shared.txt', 'a') as f:
    # 获取独占锁
    fcntl.flock(f, fcntl.LOCK_EX)
    f.write("Exclusive access\n")
    # 释放锁
    fcntl.flock(f, fcntl.LOCK_UN)

11. 常见错误与解决方案

编码问题

# 错误: UnicodeDecodeError
# with open('file.txt') as f: content = f.read()

# 解决方案: 明确指定编码
try:
    with open('file.txt', encoding='utf-8') as f:
        content = f.read()
except UnicodeDecodeError:
    try:
        with open('file.txt', encoding='gbk') as f:
            content = f.read()
    except UnicodeDecodeError:
        # 尝试其他编码或二进制模式
        pass

文件路径问题

import os

# 跨平台路径构建
file_path = os.path.join('folder', 'subfolder', 'file.txt')

# 检查路径是否存在
if os.path.exists(file_path):
    with open(file_path) as f:
        pass

大文件处理

# 错误: 一次性读取大文件导致内存不足
# with open('huge_file.txt') as f: lines = f.readlines()

# 解决方案: 逐行处理或分块读取
with open('huge_file.txt') as f:
    for line in f:
        process_line(line)  # 逐行处理

# 或分块读取
def read_in_chunks(file_object, chunk_size=1024):
    while True:
        chunk = file_object.read(chunk_size)
        if not chunk:
            break
        yield chunk

with open('huge_file.txt') as f:
    for chunk in read_in_chunks(f):
        process_chunk(chunk)

12. 性能优化建议

  1. 使用缓冲区:默认情况下Python会自动缓冲文件I/O
  2. 批量写入:多次小写入合并为一次大写入
  3. 适当选择模式:二进制模式比文本模式更快
  4. 考虑内存映射:对于超大文件使用mmap
  5. 减少系统调用:使用read()大块数据而非频繁小读取

总结

Python文件对象提供了丰富的方法来处理文件I/O操作:

方法类别主要方法
打开/关闭open(), close()
读取read(), readline(), readlines()
写入write(), writelines()
位置seek(), tell()
控制flush(), truncate()
属性fileno(), isatty()
迭代__iter__(), __next__()

掌握这些文件操作方法能够高效地处理各种文件读写需求,从小型配置文件到大型数据文件。合理选择方法和模式可以显著提高程序性能和可靠性。