app.py 1.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051
  1. from flask import Flask, request, jsonify
  2. import fitz # PyMuPDF
  3. import os
  4. app = Flask(__name__)
  5. # Define the upload folder
  6. UPLOAD_FOLDER = 'uploads'
  7. if not os.path.exists(UPLOAD_FOLDER):
  8. os.makedirs(UPLOAD_FOLDER)
  9. @app.route('/upload', methods=['POST'])
  10. def upload_file():
  11. # Check if the post request has the file part
  12. if 'file' not in request.files:
  13. return jsonify({'error': 'No file part in the request'}), 400
  14. file = request.files['file']
  15. # If the user does not select a file, the browser submits an
  16. # empty file without a filename.
  17. if file.filename == '':
  18. return jsonify({'error': 'No selected file'}), 400
  19. if file and allowed_file(file.filename):
  20. filepath = os.path.join(UPLOAD_FOLDER, file.filename)
  21. file.save(filepath)
  22. # Extract text from the PDF
  23. text = extract_text_from_pdf(filepath)
  24. # Optionally, remove the file after processing
  25. os.remove(filepath)
  26. return jsonify({'text': text}), 200
  27. else:
  28. return jsonify({'error': 'Invalid file type. Only PDF files are allowed.'}), 400
  29. def allowed_file(filename):
  30. return '.' in filename and filename.rsplit('.', 1)[1].lower() == 'pdf'
  31. def extract_text_from_pdf(filepath):
  32. document = fitz.open(filepath)
  33. text = ""
  34. for page_num in range(len(document)):
  35. page = document.load_page(page_num)
  36. text += page.get_text("text")
  37. return text
  38. if __name__ == '__main__':
  39. app.run(debug=True)