|
|
@@ -0,0 +1,51 @@
|
|
|
+from flask import Flask, request, jsonify
|
|
|
+import fitz # PyMuPDF
|
|
|
+import os
|
|
|
+
|
|
|
+app = Flask(__name__)
|
|
|
+
|
|
|
+# Define the upload folder
|
|
|
+UPLOAD_FOLDER = 'uploads'
|
|
|
+if not os.path.exists(UPLOAD_FOLDER):
|
|
|
+ os.makedirs(UPLOAD_FOLDER)
|
|
|
+
|
|
|
+@app.route('/upload', methods=['POST'])
|
|
|
+def upload_file():
|
|
|
+ # Check if the post request has the file part
|
|
|
+ if 'file' not in request.files:
|
|
|
+ return jsonify({'error': 'No file part in the request'}), 400
|
|
|
+
|
|
|
+ file = request.files['file']
|
|
|
+
|
|
|
+ # If the user does not select a file, the browser submits an
|
|
|
+ # empty file without a filename.
|
|
|
+ if file.filename == '':
|
|
|
+ return jsonify({'error': 'No selected file'}), 400
|
|
|
+
|
|
|
+ if file and allowed_file(file.filename):
|
|
|
+ filepath = os.path.join(UPLOAD_FOLDER, file.filename)
|
|
|
+ file.save(filepath)
|
|
|
+
|
|
|
+ # Extract text from the PDF
|
|
|
+ text = extract_text_from_pdf(filepath)
|
|
|
+
|
|
|
+ # Optionally, remove the file after processing
|
|
|
+ os.remove(filepath)
|
|
|
+
|
|
|
+ return jsonify({'text': text}), 200
|
|
|
+ else:
|
|
|
+ return jsonify({'error': 'Invalid file type. Only PDF files are allowed.'}), 400
|
|
|
+
|
|
|
+def allowed_file(filename):
|
|
|
+ return '.' in filename and filename.rsplit('.', 1)[1].lower() == 'pdf'
|
|
|
+
|
|
|
+def extract_text_from_pdf(filepath):
|
|
|
+ document = fitz.open(filepath)
|
|
|
+ text = ""
|
|
|
+ for page_num in range(len(document)):
|
|
|
+ page = document.load_page(page_num)
|
|
|
+ text += page.get_text("text")
|
|
|
+ return text
|
|
|
+
|
|
|
+if __name__ == '__main__':
|
|
|
+ app.run(debug=True)
|