import os
import threading
import tkinter as tk
from tkinter import filedialog, messagebox, ttk
from PIL import Image, ImageTk, ImageDraw
from pdf2image import convert_from_path
import pytesseract
from openpyxl import Workbook
# Tesseract yolu (Windows için örnek)
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
# Senin Poppler bin yolu
poppler_path = r"C:\Users\Murat\poppler\poppler-25.07.0\Library\bin"
if not os.path.exists(poppler_path):
poppler_path = None
print("⚠️ Poppler yolu bulunamadı. PDF işlemleri çalışmayabilir.")
class PDFImageAnalyzer(tk.Tk):
def __init__(self):
super().__init__()
self.title("Resim → PDF → PDF Arama")
self.geometry("900x600")
self.image_folder = ""
self.image_list = []
self.pdf_path = ""
self.search_results = [] # [(sayfa_numarası, [satırlar])]
self.keyword = ""
self.create_widgets()
def create_widgets(self):
frame = tk.Frame(self)
frame.pack(pady=10, fill=tk.X)
tk.Button(frame, text="Resim Klasörü Seç", command=self.select_folder).pack(side=tk.LEFT, padx=5)
tk.Button(frame, text="Resimleri PDF’ye Çevir", command=self.images_to_pdf).pack(side=tk.LEFT, padx=5)
tk.Button(frame, text="PDF Seç", command=self.select_pdf).pack(side=tk.LEFT, padx=5)
tk.Label(frame, text="Aranacak Kelime:").pack(side=tk.LEFT, padx=5)
self.search_entry = tk.Entry(frame)
self.search_entry.pack(side=tk.LEFT, padx=5)
tk.Button(frame, text="Ara", command=self.start_search_thread).pack(side=tk.LEFT, padx=5)
tk.Button(frame, text="Excel Raporu Kaydet", command=self.save_excel).pack(side=tk.LEFT, padx=5)
self.progress = ttk.Progressbar(self, orient="horizontal", length=800, mode="determinate")
self.progress.pack(pady=10)
self.result_listbox = tk.Listbox(self, height=20)
self.result_listbox.pack(padx=10, pady=10, fill=tk.BOTH, expand=True)
self.result_listbox.bind("", self.preview_page)
def select_folder(self):
self.image_folder = filedialog.askdirectory()
if self.image_folder:
self.image_list = [os.path.join(self.image_folder, f) for f in os.listdir(self.image_folder) if f.lower().endswith((".png", ".jpg", ".jpeg", ".bmp"))]
messagebox.showinfo("Bilgi", f"{len(self.image_list)} resim bulundu.")
def images_to_pdf(self):
if not self.image_list:
messagebox.showerror("Hata", "Lütfen önce bir klasör seçin.")
return
images = [Image.open(img).convert("RGB") for img in self.image_list]
save_path = filedialog.asksaveasfilename(defaultextension=".pdf", filetypes=[("PDF dosyaları", "*.pdf")])
if save_path:
images[0].save(save_path, save_all=True, append_images=images[1:])
messagebox.showinfo("Başarılı", f"PDF kaydedildi: {save_path}")
def select_pdf(self):
self.pdf_path = filedialog.askopenfilename(filetypes=[("PDF dosyaları", "*.pdf")])
if self.pdf_path:
messagebox.showinfo("Bilgi", f"Seçilen PDF: {self.pdf_path}")
def start_search_thread(self):
if not poppler_path:
messagebox.showerror("Hata", "Poppler bulunamadı. Lütfen sistemine kur ve tekrar çalıştır.")
return
self.keyword = self.search_entry.get()
thread = threading.Thread(target=self.search_pdf)
thread.start()
def search_pdf(self):
if not self.pdf_path or not self.keyword:
messagebox.showerror("Hata", "Lütfen PDF seçin ve arama kelimesi girin.")
return
keyword = self.keyword
self.search_results.clear()
self.result_listbox.delete(0, tk.END)
try:
pages = convert_from_path(self.pdf_path, poppler_path=poppler_path)
except Exception as e:
messagebox.showerror("Hata", f"PDF okunamadı: {str(e)}")
return
self.progress["maximum"] = len(pages)
self.progress["value"] = 0
for i, page in enumerate(pages):
text = pytesseract.image_to_string(page)
lines = text.split("\n")
found_lines = []
for line in lines:
if keyword.lower() in line.lower():
found_lines.append(line.strip())
if found_lines:
self.search_results.append((i, found_lines))
self.result_listbox.insert(tk.END, f"Sayfa {i+1}: {len(found_lines)} adet bulundu")
for found_line in found_lines:
self.result_listbox.insert(tk.END, f" → {found_line}")
self.progress["value"] = i + 1
self.update_idletasks()
if not self.search_results:
messagebox.showinfo("Sonuç Yok", "Kelime bulunamadı.")
else:
messagebox.showinfo("Tamamlandı", f"{len(self.search_results)} sayfada sonuç bulundu.")
def save_excel(self):
if not self.search_results:
messagebox.showerror("Hata", "Önce arama yapın.")
return
file_path = filedialog.asksaveasfilename(defaultextension=".xlsx", filetypes=[("Excel dosyaları", "*.xlsx")])
if file_path:
wb = Workbook()
ws = wb.active
ws.append(["Sayfa Numarası", "Bulunan Satırlar"])
for page_num, lines in self.search_results:
ws.append([page_num+1, "\n".join(lines)])
wb.save(file_path)
messagebox.showinfo("Başarılı", f"Excel raporu kaydedildi: {file_path}")
def preview_page(self, event):
selection = self.result_listbox.curselection()
if not selection or not self.search_results:
return
# Hangi sayfa tıklandığını bul
idx = selection[0]
page_num = None
for pn, lines in self.search_results:
if idx >= 0:
page_num = pn
break
if page_num is None:
return
pages = convert_from_path(self.pdf_path, poppler_path=poppler_path)
image = pages[page_num]
# Vurgulama
data = pytesseract.image_to_data(image, output_type=pytesseract.Output.DICT)
draw = ImageDraw.Draw(image)
for i, word in enumerate(data['text']):
if self.keyword.lower() in word.lower():
(x, y, w, h) = (data['left'][i], data['top'][i], data['width'][i], data['height'][i])
draw.rectangle([x, y, x + w, y + h], outline="red", width=3)
preview_win = tk.Toplevel(self)
preview_win.title(f"Sayfa Önizleme: {page_num + 1}")
img = ImageTk.PhotoImage(image.resize((800, 1000)))
label = tk.Label(preview_win, image=img)
label.image = img
label.pack()
if __name__ == "__main__":
app = PDFImageAnalyzer()
app.mainloop()