• Join our Telegram Channel to receive notifications when new databases are released!

Python Script to split files

cybercrow

Newbie
3
1
Code For Medium Size Files:

Python:
##This code will split a single file into multiple files based on user input

import tkinter as tk
from tkinter import filedialog
import os

def split_file(input_file, output_folder, num_files):
    chunk_size = 1024 * 1024  # 1 MB chunk size
    with open(input_file, 'r') as f:
        total_emails = sum(1 for line in f)

    emails_per_file = total_emails // num_files
    remainder = total_emails % num_files

    with open(input_file, 'r') as f:
        start_idx = 0
        for i in range(num_files):
            output_file = os.path.join(output_folder, f'split_file_{i + 1}.txt')

            with open(output_file, 'w') as output_f:
                count = 0
                while count < emails_per_file + (1 if i < remainder else 0):
                    chunk = f.readlines(chunk_size)
                    if not chunk:
                        break

                    output_f.writelines(chunk)
                    count += len(chunk)

            start_idx += count

def get_user_input():
    root = tk.Tk()
    root.withdraw()  # Hide the main window

    input_file = filedialog.askopenfilename(title="Select the input file")
    output_folder = filedialog.askdirectory(title="Select the output folder")
    
    try:
        num_files = int(input("Enter the number of files to split into: "))
    except ValueError:
        print("Invalid input. Please enter a valid number.")
        return None, None, None

    return input_file, output_folder, num_files

def main():
    input_file, output_folder, num_files = get_user_input()

    if input_file and output_folder and num_files:
        split_file(input_file, output_folder, num_files)
        print(f"File successfully split into {num_files} files.")

if __name__ == "__main__":
    main()




Code For Large Size Files:

Python:
import os
import shutil
from tkinter import filedialog
from tqdm import tqdm

def split_large_file(input_file, output_folder, chunk_size_mb):
    # Calculate chunk size in bytes
    chunk_size_bytes = chunk_size_mb * 1024 * 1024
    
    # Get the total size of the input file
    total_size = os.path.getsize(input_file)
    
    # Calculate the number of chunks
    num_chunks = total_size // chunk_size_bytes + 1
    
    # Create output folder if it doesn't exist
    os.makedirs(output_folder, exist_ok=True)
    
    # Open the input file for reading in binary mode
    with open(input_file, 'rb') as infile:
        for i in tqdm(range(num_chunks), desc="Splitting", unit="chunk"):
            # Read a chunk from the input file
            chunk_data = infile.read(chunk_size_bytes)
            
            # Create output chunk file
            chunk_filename = os.path.join(output_folder, f"chunk_{i+1}.bin")
            
            # Write the chunk data to the output file
            with open(chunk_filename, 'wb') as chunk_file:
                chunk_file.write(chunk_data)

    print(f"Splitting complete. {num_chunks} chunks created in {output_folder}")

if __name__ == "__main__":
    from tkinter import Tk

    # Open a Tkinter window to select the input file and output folder
    root = Tk()
    root.withdraw()  # Hide the main window

    # Get the input file and output folder from the user
    input_file = filedialog.askopenfilename(title="Select the input file")
    output_folder = filedialog.askdirectory(title="Select the output folder")

    # Specify the chunk size in megabytes
    chunk_size_mb = 10  # You can adjust this based on your requirements

    # Split the large file into chunks
    split_large_file(input_file, output_folder, chunk_size_mb)
 
  • Like
Reactions: Archimed