# -*-coding:utf-8 -* import tkinter as tk from tkinter import filedialog from tkinter import messagebox from tkinter import ttk from pdfembannersrc import subwindows import logging import PyPDF2 import os logger = logging.getLogger() class Interface(tk.Toplevel): """ Full split interace """ def __init__(self, parent, **kwargs): tk.Toplevel.__init__(self, parent) self.transient(parent) self.grab_set() self.title("PDF Embanner : full split") self.geometry("800x160") self.protocol("WM_DELETE_WINDOW", self.close) self.bind("", self.close) self.file = None self.f = tk.Frame(self, width=768, height=576, **kwargs) self.f.pack(fill=tk.BOTH) # Création de nos widgets self.f.columnconfigure(1, weight=1) self.f.rowconfigure(12, weight=1) tk.Label(self.f, text="Extract text", bg="blue", fg="white", padx=20).grid(row=0, column=0, columnspan=3, sticky=tk.W) tk.Label(self.f, text="Return the text in PDF in one or several text files").grid(row=1, column=0, columnspan=3, sticky=tk.W) tk.Button(self.f, text="Open", command=self.open).grid(row=2, column=0) self.open_label = tk.Label(self.f, text="-" if self.file is None else self.file) self.open_label.grid(row=2, column=1, columnspan=3, sticky=tk.W) tk.Button(self.f, text="Extract text in one file", command=self.do_one, fg="blue").grid(row=3, column=3) tk.Button(self.f, text="Extract text per page", command=self.do_ppage, fg="blue").grid(row=3, column=2) ttk.Separator(self.f, orient="horizontal").grid(row=5, column=0, columnspan=4, sticky=tk.W+tk.E, padx=5, pady=10) tk.Button(self.f, text="Close", command=self.close).grid(row=11, column=3) self.message = tk.Label(self.f, text="Welcome!") self.message.grid(row=13, column=0, columnspan=4, sticky=tk.W) def open(self, *args): ftypes = [('PDF files (Portable Document Format)', '*.pdf'), ('All files', '*')] fl = filedialog.askopenfilename(filetypes = ftypes) if fl!='': self.file = fl self.open_label["text"] = fl def do_one(self, *args): if(self.file is None): messagebox.showwarning(title="PDF Output", message="Please open the PDF to split before !") else: self.message["text"] = "Extracting" outbasename=self.file[:-4]+".txt" progress = None try: with open(self.file, 'rb') as in_f: inpdf = PyPDF2.PdfFileReader(in_f) progress = subwindows.Progress(self, inpdf.getNumPages(), "Producing PDFs...") progress.message["text"] = 'Reading files' output = "" for i in range(inpdf.getNumPages()): output+=inpdf.getPage(i).extractText() + "\n" progress.next() except IOError: logger.warn("Text extraction : Could not open PDF file {}.".format(self.file)) messagebox.showerror(title="Error", message="Impossible to open PDF file {}".format(self.file)) except Exception as e: logger.warn("Text extraction : Unknown error occured during PDF production. {}".format(str(e))) messagebox.showerror(title="Error", message="An Error occured :\n{}\nNo output produced!".format(e)) finally: if(progress is not None): progress.close() try: with open(outbasename, 'w') as out_f: out_f.write(output) self.message["text"] = "Done" except IOError: logger.warn("Text extraction : Could not open output file {}.".format(outbasename)) messagebox.showerror(title="Error", message="Impossible to open output file {}".format(outbasename)) except Exception as e: logger.warn("Text extraction : Unknown error occured during PDF production. {}".format(str(e))) messagebox.showerror(title="Error", message="An Error occured :\n{}\nNo output produced!".format(e)) def do_ppage(self, *args): if(self.file is None): messagebox.showwarning(title="PDF Output", message="Please open the PDF to split before !") else: self.message["text"] = "Splitting" outfoldername=self.file[:-4] outbasename=self.file[:-4]+"/"+self.file.split('/')[-1][:-4] try: os.mkdir(outfoldername) except OSError as e: logger.warn("Text extraction : Could not create folder {} :: {}".format(outfoldername, e)) messagebox.showerror(title="Error", message="Folder {} already exists or could not be created".format(outfoldername)) progress = None try: with open(self.file, 'rb') as in_f: inpdf = PyPDF2.PdfFileReader(in_f) progress = subwindows.Progress(self, inpdf.getNumPages(), "Producing PDFs...") progress.message["text"] = 'Reading files' for i in range(inpdf.getNumPages()): output = inpdf.getPage(i).extractText() progress.next() with open("{}_{:03d}.txt".format(outbasename, i), 'w') as out_f: out_f.write(output) self.message["text"] = "Done" except IOError: logger.warn("Text extraction : Could not open one of the files.") messagebox.showerror(title="Error", message="IO Error occured :\nImpossible to open one of the files\nNo output produced!") except Exception as e: logger.warn("Text extraction : Unknown error occured during PDF production. {}".format(str(e))) messagebox.showerror(title="Error", message="An Error occured :\n{}\nNo output produced!".format(e)) finally: if(progress is not None): progress.close() def close(self, *args): self.destroy()