aboutsummaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorKASR <karim.asrih@gmail.com>2023-05-03 17:31:28 +0200
committerGitHub <noreply@github.com>2023-05-03 18:31:28 +0300
commitb0c71c7b6dc0c0adb507d78f401e95e7ab0f5a38 (patch)
treeeb622de196bed36a2fd48f888cdb64199b0e7df9 /scripts
parenta8a2efdc8161d4f69a0dd863e741c11fbd5df85c (diff)
scripts : platform independent script to verify sha256 checksums (#1203)
* python script to verify the checksum of the llama models Added Python script for verifying SHA256 checksums of files in a directory, which can run on multiple platforms. Improved the formatting of the output results for better readability. * Update README.md update to the readme for improved readability and to explain the usage of the python checksum verification script * update the verification script I've extended the script based on suggestions by @prusnak The script now checks the available RAM, is there is enough to check the file at once it will do so. If not the file is read in chunks. * minor improvment small change so that the available ram is checked and not the total ram * remove the part of the code that reads the file at once if enough ram is available based on suggestions from @prusnak i removed the part of the code that checks whether the user had enough ram to read the entire model at once. the file is now always read in chunks. * Update verify-checksum-models.py quick fix to pass the git check
Diffstat (limited to 'scripts')
-rw-r--r--scripts/verify-checksum-models.py78
1 files changed, 78 insertions, 0 deletions
diff --git a/scripts/verify-checksum-models.py b/scripts/verify-checksum-models.py
new file mode 100644
index 0000000..811372e
--- /dev/null
+++ b/scripts/verify-checksum-models.py
@@ -0,0 +1,78 @@
+import os
+import hashlib
+
+def sha256sum(file):
+ block_size = 16 * 1024 * 1024 # 16 MB block size
+ b = bytearray(block_size)
+ file_hash = hashlib.sha256()
+ mv = memoryview(b)
+ with open(file, 'rb', buffering=0) as f:
+ while True:
+ n = f.readinto(mv)
+ if not n:
+ break
+ file_hash.update(mv[:n])
+
+ return file_hash.hexdigest()
+
+# Define the path to the llama directory (parent folder of script directory)
+llama_path = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir))
+
+# Define the file with the list of hashes and filenames
+hash_list_file = os.path.join(llama_path, "SHA256SUMS")
+
+# Check if the hash list file exists
+if not os.path.exists(hash_list_file):
+ print(f"Hash list file not found: {hash_list_file}")
+ exit(1)
+
+# Read the hash file content and split it into an array of lines
+with open(hash_list_file, "r") as f:
+ hash_list = f.read().splitlines()
+
+# Create an array to store the results
+results = []
+
+# Loop over each line in the hash list
+for line in hash_list:
+ # Split the line into hash and filename
+ hash_value, filename = line.split(" ")
+
+ # Get the full path of the file by joining the llama path and the filename
+ file_path = os.path.join(llama_path, filename)
+
+ # Informing user of the progress of the integrity check
+ print(f"Verifying the checksum of {file_path}")
+
+ # Check if the file exists
+ if os.path.exists(file_path):
+ # Calculate the SHA256 checksum of the file using hashlib
+ file_hash = sha256sum(file_path)
+
+ # Compare the file hash with the expected hash
+ if file_hash == hash_value:
+ valid_checksum = "V"
+ file_missing = ""
+ else:
+ valid_checksum = ""
+ file_missing = ""
+ else:
+ valid_checksum = ""
+ file_missing = "X"
+
+ # Add the results to the array
+ results.append({
+ "filename": filename,
+ "valid checksum": valid_checksum,
+ "file missing": file_missing
+ })
+
+
+# Print column headers for results table
+print("\n" + "filename".ljust(40) + "valid checksum".center(20) + "file missing".center(20))
+print("-" * 80)
+
+# Output the results as a table
+for r in results:
+ print(f"{r['filename']:40} {r['valid checksum']:^20} {r['file missing']:^20}")
+