多进程适合于计算密集型任务和需要大量计算资源的场景,而多线程适合于I/O密集型任务和需要快速上下文切换的场景。
多线程版本
import time
import concurrent.futures
from tqdm import tqdm
file_list = ['file1.txt', 'file2.txt', 'file3.txt']*10
def process_file(file, i):
print(file, i)
# 这里放置处理文件的代码
time.sleep(2)
return file
with (concurrent.futures.ThreadPoolExecutor(max_workers=10)) as executor:
futures = [executor.submit(process_file, file, i) for i, file in enumerate(file_list)]
for future in tqdm(concurrent.futures.as_completed(futures), total=len(futures)):
file = future.result()
多进程版本
import time
import concurrent.futures
from tqdm import tqdm
import os
file_list = ['file1.txt', 'file2.txt', 'file3.txt']*10
def process_file(file, i):
print(f"Processing {file} - {i}")
# 这里放置处理文件的代码
time.sleep(2)
return file
# 确保进程间不会共享全局变量
file_list = file_list[:os.cpu_count()] # 限制任务数量以防过多
with concurrent.futures.ProcessPoolExecutor(max_workers=10) as executor:
futures = [executor.submit(process_file, file, i) for i, file in enumerate(file_list)]
for future in tqdm(concurrent.futures.as_completed(futures), total=len(futures)):
file = future.result()