Python:
[COLOR=rgb(84, 172, 210)]import re
import tkinter as tk
from tkinter import filedialog
import pandas as pd
def calculate_domain_percentage(file_path):
domain_count = {}
total_emails = 0
def extract_emails(text):
return re.findall(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', text)
if file_path.endswith(".csv"):
with open(file_path, 'r') as file:
for line in file:
emails = extract_emails(line)
for email in emails:
total_emails += 1
domain = re.search(r'@(\S+)', email).group(1)
domain_count[domain] = domain_count.get(domain, 0) + 1
elif file_path.endswith(".xlsx"):
# For large Excel files, you might need to explore other libraries
# or optimize the reading process based on your specific requirements.
df = pd.read_excel(file_path)
for email in df.iloc[:, 0].astype(str):
emails = extract_emails(email)
for email in emails:
total_emails += 1
domain = re.search(r'@(\S+)', email).group(1)
domain_count[domain] = domain_count.get(domain, 0) + 1
else:
with open(file_path, 'r') as file:
for line in file:
emails = extract_emails(line)
for email in emails:
total_emails += 1
domain = re.search(r'@(\S+)', email).group(1)
domain_count[domain] = domain_count.get(domain, 0) + 1
if total_emails == 0:
print("No email addresses found in the file.")
return None
percentages = {}
for domain, count in domain_count.items():
percentage = (count / total_emails) * 100
percentages[domain] = {'percentage': percentage, 'count': count}
return percentages, total_emails
def browse_file():
root = tk.Tk()
root.withdraw() # Hide the main window
file_path = filedialog.askopenfilename(filetypes=[("Text files", "*.txt"), ("CSV files", "*.csv"), ("Excel files", "*.xlsx")])
if file_path:
percentages, total_emails = calculate_domain_percentage(file_path)
if percentages:
print("\nDomain\t\tPercentage\tCount")
print("===============================")
for domain, stats in percentages.items():
print(f"{domain.ljust(15)}{stats['percentage']:.2f}%\t{stats['count']}")
print("\nTotal emails:", total_emails)
if __name__ == "__main__":
browse_file()[/COLOR]