| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051 |
- from flask import Flask, request, jsonify
- import fitz # PyMuPDF
- import os
- app = Flask(__name__)
- # Define the upload folder
- UPLOAD_FOLDER = 'uploads'
- if not os.path.exists(UPLOAD_FOLDER):
- os.makedirs(UPLOAD_FOLDER)
- @app.route('/upload', methods=['POST'])
- def upload_file():
- # Check if the post request has the file part
- if 'file' not in request.files:
- return jsonify({'error': 'No file part in the request'}), 400
- file = request.files['file']
- # If the user does not select a file, the browser submits an
- # empty file without a filename.
- if file.filename == '':
- return jsonify({'error': 'No selected file'}), 400
- if file and allowed_file(file.filename):
- filepath = os.path.join(UPLOAD_FOLDER, file.filename)
- file.save(filepath)
- # Extract text from the PDF
- text = extract_text_from_pdf(filepath)
- # Optionally, remove the file after processing
- os.remove(filepath)
- return jsonify({'text': text}), 200
- else:
- return jsonify({'error': 'Invalid file type. Only PDF files are allowed.'}), 400
- def allowed_file(filename):
- return '.' in filename and filename.rsplit('.', 1)[1].lower() == 'pdf'
- def extract_text_from_pdf(filepath):
- document = fitz.open(filepath)
- text = ""
- for page_num in range(len(document)):
- page = document.load_page(page_num)
- text += page.get_text("text")
- return text
- if __name__ == '__main__':
- app.run(debug=True)
|