140 lines
6.3 KiB
Python
140 lines
6.3 KiB
Python
# -*-coding:utf-8 -*
|
|
|
|
import tkinter as tk
|
|
from tkinter import filedialog
|
|
from tkinter import messagebox
|
|
from tkinter import ttk
|
|
from pdfembannersrc import subwindows
|
|
import logging
|
|
import PyPDF2
|
|
import os
|
|
logger = logging.getLogger()
|
|
|
|
class Interface(tk.Toplevel):
|
|
"""
|
|
Full split interace
|
|
"""
|
|
|
|
def __init__(self, parent, **kwargs):
|
|
tk.Toplevel.__init__(self, parent)
|
|
self.transient(parent)
|
|
self.grab_set()
|
|
self.title("PDF Embanner : full split")
|
|
self.geometry("800x160")
|
|
self.protocol("WM_DELETE_WINDOW", self.close)
|
|
self.bind("<Escape>", self.close)
|
|
|
|
self.file = None
|
|
|
|
self.f = tk.Frame(self, width=768, height=576, **kwargs)
|
|
self.f.pack(fill=tk.BOTH)
|
|
|
|
# Création de nos widgets
|
|
self.f.columnconfigure(1, weight=1)
|
|
self.f.rowconfigure(12, weight=1)
|
|
|
|
tk.Label(self.f, text="Extract text", bg="blue", fg="white", padx=20).grid(row=0, column=0, columnspan=3, sticky=tk.W)
|
|
tk.Label(self.f, text="Return the text in PDF in one or several text files").grid(row=1, column=0, columnspan=3, sticky=tk.W)
|
|
|
|
tk.Button(self.f, text="Open", command=self.open).grid(row=2, column=0)
|
|
self.open_label = tk.Label(self.f, text="-" if self.file is None else self.file)
|
|
self.open_label.grid(row=2, column=1, columnspan=3, sticky=tk.W)
|
|
tk.Button(self.f, text="Extract text in one file", command=self.do_one, fg="blue").grid(row=3, column=3)
|
|
tk.Button(self.f, text="Extract text per page", command=self.do_ppage, fg="blue").grid(row=3, column=2)
|
|
|
|
ttk.Separator(self.f, orient="horizontal").grid(row=5, column=0, columnspan=4, sticky=tk.W+tk.E, padx=5, pady=10)
|
|
|
|
tk.Button(self.f, text="Close", command=self.close).grid(row=11, column=3)
|
|
|
|
self.message = tk.Label(self.f, text="Welcome!")
|
|
self.message.grid(row=13, column=0, columnspan=4, sticky=tk.W)
|
|
|
|
|
|
def open(self, *args):
|
|
ftypes = [('PDF files (Portable Document Format)', '*.pdf'), ('All files', '*')]
|
|
fl = filedialog.askopenfilename(filetypes = ftypes)
|
|
if fl!='':
|
|
self.file = fl
|
|
self.open_label["text"] = fl
|
|
|
|
def do_one(self, *args):
|
|
if(self.file is None):
|
|
messagebox.showwarning(title="PDF Output", message="Please open the PDF to split before !")
|
|
else:
|
|
self.message["text"] = "Extracting"
|
|
outbasename=self.file[:-4]+".txt"
|
|
progress = None
|
|
try:
|
|
with open(self.file, 'rb') as in_f:
|
|
inpdf = PyPDF2.PdfFileReader(in_f)
|
|
progress = subwindows.Progress(self, inpdf.getNumPages(), "Producing PDFs...")
|
|
progress.message["text"] = 'Reading files'
|
|
output = ""
|
|
for i in range(inpdf.getNumPages()):
|
|
output+=inpdf.getPage(i).extractText() + "\n"
|
|
progress.next()
|
|
except IOError:
|
|
logger.warn("Text extraction : Could not open PDF file {}.".format(self.file))
|
|
messagebox.showerror(title="Error",
|
|
message="Impossible to open PDF file {}".format(self.file))
|
|
except Exception as e:
|
|
logger.warn("Text extraction : Unknown error occured during PDF production. {}".format(str(e)))
|
|
messagebox.showerror(title="Error",
|
|
message="An Error occured :\n{}\nNo output produced!".format(e))
|
|
finally:
|
|
if(progress is not None):
|
|
progress.close()
|
|
try:
|
|
with open(outbasename, 'w') as out_f:
|
|
out_f.write(output)
|
|
self.message["text"] = "Done"
|
|
except IOError:
|
|
logger.warn("Text extraction : Could not open output file {}.".format(outbasename))
|
|
messagebox.showerror(title="Error",
|
|
message="Impossible to open output file {}".format(outbasename))
|
|
except Exception as e:
|
|
logger.warn("Text extraction : Unknown error occured during PDF production. {}".format(str(e)))
|
|
messagebox.showerror(title="Error",
|
|
message="An Error occured :\n{}\nNo output produced!".format(e))
|
|
|
|
def do_ppage(self, *args):
|
|
if(self.file is None):
|
|
messagebox.showwarning(title="PDF Output", message="Please open the PDF to split before !")
|
|
else:
|
|
self.message["text"] = "Splitting"
|
|
outfoldername=self.file[:-4]
|
|
outbasename=self.file[:-4]+"/"+self.file.split('/')[-1][:-4]
|
|
try:
|
|
os.mkdir(outfoldername)
|
|
except OSError as e:
|
|
logger.warn("Text extraction : Could not create folder {} :: {}".format(outfoldername, e))
|
|
messagebox.showerror(title="Error",
|
|
message="Folder {} already exists or could not be created".format(outfoldername))
|
|
progress = None
|
|
try:
|
|
with open(self.file, 'rb') as in_f:
|
|
inpdf = PyPDF2.PdfFileReader(in_f)
|
|
progress = subwindows.Progress(self, inpdf.getNumPages(), "Producing PDFs...")
|
|
progress.message["text"] = 'Reading files'
|
|
for i in range(inpdf.getNumPages()):
|
|
output = inpdf.getPage(i).extractText()
|
|
progress.next()
|
|
with open("{}_{:03d}.txt".format(outbasename, i), 'w') as out_f:
|
|
out_f.write(output)
|
|
self.message["text"] = "Done"
|
|
except IOError:
|
|
logger.warn("Text extraction : Could not open one of the files.")
|
|
messagebox.showerror(title="Error",
|
|
message="IO Error occured :\nImpossible to open one of the files\nNo output produced!")
|
|
except Exception as e:
|
|
logger.warn("Text extraction : Unknown error occured during PDF production. {}".format(str(e)))
|
|
messagebox.showerror(title="Error",
|
|
message="An Error occured :\n{}\nNo output produced!".format(e))
|
|
finally:
|
|
if(progress is not None):
|
|
progress.close()
|
|
|
|
def close(self, *args):
|
|
self.destroy()
|
|
|