I use code to transfer hundreds of large files to a remote server and sometimes after all transfers are done, the remote file size does not match the actual size of the file. I wrote the following code to compare the sizes of the local and remote files, then delete the remote file if the filesize doesn’t match and re-uploads another copy.
Note that some of this code is specific to what I am doing for myself. Happy to answer any questions.
import re
import paramiko
import os
from scp import SCPClient
import sys
import logging
from string import ascii_uppercase
# Create a logger
logging.basicConfig(level=logging.INFO, format='%(asctime)s:%(levelname)s:%(message)s', handlers=[logging.FileHandler("output.log"), logging.StreamHandler()])
# Create the SSH Connection
def createSSHClient(server, user, pem_file):
client = paramiko.SSHClient()
client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
client.connect(server, username=user, key_filename=pem_file)
return client
def progress4(filename, size, sent, peername):
sys.stdout.write("(%s:%s) %s's progress: %.2f%% \r" % (peername[0], peername[1], filename, float(sent)/float(size)*100))
if __name__ == '__main__':
user = 'username'
server = 'IP address'
pem_file = r'\path\to\pem\file.pem'
#Find drive letter of local path. Optional, but I do know the local path but I do not know the drive letter since its mapped differently everytime I start my computer
for drive in ascii_uppercase:
if os.path.exists(os.path.join(drive + ':\\', 'Shared drives', 'Data',)):
data_dir = os.path.join(drive + ':\\', 'Shared drives', 'Data')
remote_dir = '/remote/path/'
logging.info(f"Local directory: {data_dir}")
logging.info(f"Remote directory: {remote_dir}")
remote_files_dict = {}
logging.info("Creating SSH Connection")
ssh_client = createSSHClient(server, user, pem_file)
scp = SCPClient(ssh_client.get_transport(), progress4=progress4)
# This gets the filesizes of the files in the remote dir
logging.info("Retrieving files and filesizes from remote directory")
stdin, stdout, stderr = ssh_client.exec_command('cd /remote/path/; ls -l')
lines = stdout.readlines()
lines = lines[1:]
# Create a dictionary with the filename as key and filesize as a dictionary
if len(lines) > 0:
logging.info("Creating dictionary")
for line in lines:
temp = line.split(' ')
temp_key = temp[8].strip()
remote_files_dict[temp_key] = int(temp[4])
for key in remote_files_dict:
if len(key) > 0:
# This regex is specific to the filenames I am looking for.
benchmark_folder = re.findall('a\d\d\d', key)[0].upper()
local_file = os.path.join(data_dir,benchmark_folder,key)
local_file_size = os.stat(local_file).st_size
if remote_files_dict[key] == local_file_size:
print(key, remote_files_dict[key], local_file_size, True)
elif int(remote_files_dict[key]) < local_file_size:
print(key, remote_files_dict[key], local_file_size, False)
logging.warning("Deleting file from remote path")
scp.remove(os.path.join(remote_dir,remote_files_dict[key]))
logging.info("Copying local file to remote path")
scp.put(local_file)
else:
logging.info(f"There are no files in {remote_dir}")