from flask import Flask, request, jsonify import fitz # PyMuPDF import os app = Flask(__name__) # Define the upload folder UPLOAD_FOLDER = 'uploads' if not os.path.exists(UPLOAD_FOLDER): os.makedirs(UPLOAD_FOLDER) @app.route('/upload', methods=['POST']) def upload_file(): # Check if the post request has the file part if 'file' not in request.files: return jsonify({'error': 'No file part in the request'}), 400 file = request.files['file'] # If the user does not select a file, the browser submits an # empty file without a filename. if file.filename == '': return jsonify({'error': 'No selected file'}), 400 if file and allowed_file(file.filename): filepath = os.path.join(UPLOAD_FOLDER, file.filename) file.save(filepath) # Extract text from the PDF text = extract_text_from_pdf(filepath) # Optionally, remove the file after processing os.remove(filepath) return jsonify({'text': text}), 200 else: return jsonify({'error': 'Invalid file type. Only PDF files are allowed.'}), 400 def allowed_file(filename): return '.' in filename and filename.rsplit('.', 1)[1].lower() == 'pdf' def extract_text_from_pdf(filepath): document = fitz.open(filepath) text = "" for page_num in range(len(document)): page = document.load_page(page_num) text += page.get_text("text") return text if __name__ == '__main__': app.run(debug=True)