Code For Medium Size Files:
Code For Large Size Files:
Python:
##This code will split a single file into multiple files based on user input
import tkinter as tk
from tkinter import filedialog
import os
def split_file(input_file, output_folder, num_files):
chunk_size = 1024 * 1024 # 1 MB chunk size
with open(input_file, 'r') as f:
total_emails = sum(1 for line in f)
emails_per_file = total_emails // num_files
remainder = total_emails % num_files
with open(input_file, 'r') as f:
start_idx = 0
for i in range(num_files):
output_file = os.path.join(output_folder, f'split_file_{i + 1}.txt')
with open(output_file, 'w') as output_f:
count = 0
while count < emails_per_file + (1 if i < remainder else 0):
chunk = f.readlines(chunk_size)
if not chunk:
break
output_f.writelines(chunk)
count += len(chunk)
start_idx += count
def get_user_input():
root = tk.Tk()
root.withdraw() # Hide the main window
input_file = filedialog.askopenfilename(title="Select the input file")
output_folder = filedialog.askdirectory(title="Select the output folder")
try:
num_files = int(input("Enter the number of files to split into: "))
except ValueError:
print("Invalid input. Please enter a valid number.")
return None, None, None
return input_file, output_folder, num_files
def main():
input_file, output_folder, num_files = get_user_input()
if input_file and output_folder and num_files:
split_file(input_file, output_folder, num_files)
print(f"File successfully split into {num_files} files.")
if __name__ == "__main__":
main()
Code For Large Size Files:
Python:
import os
import shutil
from tkinter import filedialog
from tqdm import tqdm
def split_large_file(input_file, output_folder, chunk_size_mb):
# Calculate chunk size in bytes
chunk_size_bytes = chunk_size_mb * 1024 * 1024
# Get the total size of the input file
total_size = os.path.getsize(input_file)
# Calculate the number of chunks
num_chunks = total_size // chunk_size_bytes + 1
# Create output folder if it doesn't exist
os.makedirs(output_folder, exist_ok=True)
# Open the input file for reading in binary mode
with open(input_file, 'rb') as infile:
for i in tqdm(range(num_chunks), desc="Splitting", unit="chunk"):
# Read a chunk from the input file
chunk_data = infile.read(chunk_size_bytes)
# Create output chunk file
chunk_filename = os.path.join(output_folder, f"chunk_{i+1}.bin")
# Write the chunk data to the output file
with open(chunk_filename, 'wb') as chunk_file:
chunk_file.write(chunk_data)
print(f"Splitting complete. {num_chunks} chunks created in {output_folder}")
if __name__ == "__main__":
from tkinter import Tk
# Open a Tkinter window to select the input file and output folder
root = Tk()
root.withdraw() # Hide the main window
# Get the input file and output folder from the user
input_file = filedialog.askopenfilename(title="Select the input file")
output_folder = filedialog.askdirectory(title="Select the output folder")
# Specify the chunk size in megabytes
chunk_size_mb = 10 # You can adjust this based on your requirements
# Split the large file into chunks
split_large_file(input_file, output_folder, chunk_size_mb)