Anasayfa / python / Python Pdf Split and read invoice no

Python Pdf Split and read invoice no

her sayfasında ayrı bir fatura olan pdf dosyasını her sayfayı ayrı pdf dosyası haline getirip daha sonra o sayfa içinde bulunan fatura numarasına göre ilgili pdf dosyasının adını değiştiren program.

import os
import shutil
import tkinter as tk
from datetime import datetime
from tkinter import filedialog
from PyPDF2 import PdfFileWriter, PdfFileReader
import lxml.html
import pdfquery


def get_digit_count(number):
    count = 0
    while number > 0:
        number = number // 10
        count += 1
    return count


def get_count_files_in_folder(path):
    return len([name for name in os.listdir(path) if os.path.isfile(name)])


def get_list_files_in_folder(path):
    temp_list = []
    with os.scandir(path) as listOfEntries:
        for entry in listOfEntries:
            if entry.is_file() and entry.name.endswith(""):
                temp_list.append(entry.name)
    return temp_list


def split_pdf_pages(inputpdf: str, output_folder: str):
    inputpdf = PdfFileReader(open(inputpdf, 'rb'))
    fmt = get_digit_count(inputpdf.numPages)
    for i in range(inputpdf.numPages):
        output = PdfFileWriter()
        output.addPage(inputpdf.getPage(i))
        with open(output_folder + '/document-page%s.pdf' % str(i).zfill(fmt), 'wb') as outputStream:
            output.write(outputStream)


def pdf_to_xml(pdf_file_path):
    pdf = pdfquery.PDFQuery(pdf_file_path)
    pdf.load()
    strval = str(pdf.get_pyquery(None, page_numbers=[0]))
    pdf.file.close()
    return strval


def get_pdf_invoice_number(pdf_file_path):
    html = lxml.html.document_fromstring(pdf_to_xml(pdf_file_path))
    nodes = html.xpath('/html/body/pdfxml/ltpage/ltrect[1]/lttextlinehorizontal[7]/lttextboxhorizontal/ text()')
    txt = nodes[0].strip()
    # terscevir 14 karakter al sonra tekrar ters cevir temizle gonder
    return str(int(txt[::-1][:14][::-1].strip().replace('-', '')))


def pdf_files_rename(path, master):
    x = 0
    y = len(get_list_files_in_folder(path))
    for xname in get_list_files_in_folder(path):
        srcname = path + os.sep + xname
        desname = path + os.sep + get_pdf_invoice_number(path + '\\' + xname) + '.pdf'
        x += 1
        master.title(' ' + str(x) + '/' + str(y))
        master.update_idletasks()
        os.rename(srcname, desname)
        master.update_idletasks()


class Application(tk.Frame):
    def __init__(self, master=None):
        super().__init__(master)
        self.master = master
        self.with_of_window = 400
        self.height_of_window = 300
        self.screen_with = master.winfo_screenwidth()
        self.screen_height = master.winfo_screenheight()
        self.x_coordinate = (self.screen_with / 2) - (self.with_of_window / 2)
        self.y_coordinate = (self.screen_height / 2) - (self.height_of_window / 2)
        self.pdf_file = ''
        self.dir_path = ''
        self.folder_name = ''
        self.master.geometry(
            '%dx%d+%d+%d' % (self.with_of_window, self.height_of_window, self.x_coordinate, self.y_coordinate))
        self.master.title('Pdf Spliter')
        self.master.resizable(width=False, height=False)
        self.pack()
        self.create_widgets()

    def create_widgets(self):
        self.btn_pdf_sec = tk.Button(self)
        self.btn_pdf_sec["text"] = "Pdf Dosyası Seçmek İçin\n(Tıklayınız)"
        self.btn_pdf_sec["command"] = self.select_pdf_file
        self.btn_pdf_sec.pack(side="top")

        self.quit = tk.Button(self, text="Çıkış", fg="red",
                              command=self.master.destroy)
        self.quit.pack(side="bottom")

    def select_pdf_file(self):
        self.pdf_file = filedialog.askopenfilename(initialdir='', title="Select pdf file ",
                                                   filetypes=(('', "*.pdf"),))
        if self.pdf_file.strip() == '':
            exit(1)
        self.btn_pdf_sec['text'] = 'Lütfen Bekleyiniz\n'
        self.btn_pdf_sec.update_idletasks()
        self.dir_path = os.path.dirname(os.path.realpath(self.pdf_file))
        self.folder_name = datetime.now().strftime("%Y-%m-%d-%H%M%S")
        out_folder = self.dir_path + os.sep + self.folder_name
        if os.path.isdir(out_folder):
            shutil.rmtree(out_folder, ignore_errors=True)
            os.mkdir(out_folder)
        else:
            os.mkdir(out_folder)

        split_pdf_pages(self.pdf_file, out_folder)
        pdf_files_rename(out_folder, self.master)
        self.btn_pdf_sec['text'] = 'İşlem Tamamlandı\n'


root = tk.Tk()

app = Application(master=root)
app.mainloop()

 

Hakkında ibrahim

İlgili Makaleler

python DownloadProgressBar

import urllib import urllib.request from tqdm import tqdm class DownloadProgressBar(tqdm): def update_to(self, b=1, bsize=1, tsize=None): …

Bir cevap yazın

E-posta hesabınız yayımlanmayacak. Gerekli alanlar * ile işaretlenmişlerdir