• Join our Telegram Channel to receive notifications when new databases are released!

SCRIPT TO KNOW THE PERCENTAGE OF DOMAINS IN A COMBO FILE

cybercrow

Newbie
3
1
Python:
[COLOR=rgb(84, 172, 210)]import re
import tkinter as tk
from tkinter import filedialog
import pandas as pd

def calculate_domain_percentage(file_path):
    domain_count = {}
    total_emails = 0

    def extract_emails(text):
        return re.findall(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', text)

    if file_path.endswith(".csv"):
        with open(file_path, 'r') as file:
            for line in file:
                emails = extract_emails(line)
                for email in emails:
                    total_emails += 1
                    domain = re.search(r'@(\S+)', email).group(1)
                    domain_count[domain] = domain_count.get(domain, 0) + 1
    elif file_path.endswith(".xlsx"):
        # For large Excel files, you might need to explore other libraries
        # or optimize the reading process based on your specific requirements.
        df = pd.read_excel(file_path)
        for email in df.iloc[:, 0].astype(str):
            emails = extract_emails(email)
            for email in emails:
                total_emails += 1
                domain = re.search(r'@(\S+)', email).group(1)
                domain_count[domain] = domain_count.get(domain, 0) + 1
    else:
        with open(file_path, 'r') as file:
            for line in file:
                emails = extract_emails(line)
                for email in emails:
                    total_emails += 1
                    domain = re.search(r'@(\S+)', email).group(1)
                    domain_count[domain] = domain_count.get(domain, 0) + 1

    if total_emails == 0:
        print("No email addresses found in the file.")
        return None

    percentages = {}
    for domain, count in domain_count.items():
        percentage = (count / total_emails) * 100
        percentages[domain] = {'percentage': percentage, 'count': count}

    return percentages, total_emails

def browse_file():
    root = tk.Tk()
    root.withdraw()  # Hide the main window
    file_path = filedialog.askopenfilename(filetypes=[("Text files", "*.txt"), ("CSV files", "*.csv"), ("Excel files", "*.xlsx")])
    
    if file_path:
        percentages, total_emails = calculate_domain_percentage(file_path)
        if percentages:
            print("\nDomain\t\tPercentage\tCount")
            print("===============================")
            for domain, stats in percentages.items():
                print(f"{domain.ljust(15)}{stats['percentage']:.2f}%\t{stats['count']}")
            
            print("\nTotal emails:", total_emails)

if __name__ == "__main__":
    browse_file()[/COLOR]