使用python创建多线程下载超多数量文件

2024 年 07 月 25 日

1035 次浏览

2057字数

import requests
import urllib.parse
from tqdm import tqdm
import os
import concurrent.futures

def check_local_file(url, destination,count,total_count):
    # 如果本地文件存在
    if os.path.exists(destination):
        response = requests.head(url)  # 只获取头部信息，获取文件大小等
        remote_file_size = int(response.headers.get('content-length', 0))
        local_file_size = os.path.getsize(destination)  # 获取本地文件大小

        # 如果本地文件大小与远程文件大小一致
        if local_file_size == remote_file_size:
            print("no."+str(count)+"/"+str(total_count)+" "+destination+"文件已存在且特征一致，无需下载")
            return True
    return False



def download_file(url, destination,count,total_count):
    if not check_local_file(url, destination,count,total_count):
        response = requests.get(url, stream=True)
        total_size = int(response.headers.get('content-length', 0))
        block_size = 1024  # 每次读取的字节数

        with open("down.txt", "a") as file:
                file.write(url + '\n')

        with open(destination, 'wb') as file, tqdm(
                desc="Downloading no."+str(count)+"/"+str(total_count),
                total=total_size,
                unit='iB',
                unit_scale=True,
                unit_divisor=1024
        ) as bar:
            for data in response.iter_content(block_size):
                file.write(data)
                bar.update(len(data))




# 假设链接存储在一个文本文件 'links.txt' 中
with open('full-links.txt', 'r') as file:
    links = file.readlines()

# 遍历链接下载文件

links_array = []
urls = []

for link in links:
    link = link.strip()  # 去除换行符和空格
    urls.append(link)

num_threads = 32  # 根据您的计算机性能调整此值

total_len = len(urls)

with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads) as executor:
    for index, url in enumerate(urls):
        file_name = urllib.parse.urlparse(url).path.split('/')[-1]  # 从链接中获取文件名
        file_path = "./opennueroQTAB/"+file_name
        executor.submit(download_file, url, file_path,index,total_len)

使用python创建多线程下载超多数量文件