Let me explain how to create docker conatiner to run Document processing Agent by AI-Team :
Let us generate these now.
from flask import Flask, jsonify, request
from flask_cors import CORS # Import CORS
import requests
from functools import lru_cache
import asyncio
import easyocr
from PIL import Image
import io
import re
app = Flask(__name__)
CORS(app) # Enable CORS globally for all routes
reader = easyocr.Reader(['en'])
# Mock database (replace with real database later)
mock_user_data = {
"userId": 1,
"panNumber": "ABCDE1234F",
"registeredAddress": "123 Business Park, Mumbai, Maharashtra, India"
}
# Cache LLM responses for frequently asked questions
@lru_cache(maxsize=128)
def get_llm_response(prompt):
full_prompt = f"{prompt} Please provide a short and relevant answer (max 10 words)."
url = "http://localhost:11434/api/generate"
payload = {
"model": "llama3.2",
"prompt": full_prompt,
"stream": False
}
response = requests.post(url, json=payload)
if response.status_code == 200:
llm_response = response.json().get("response", "I am not sure about this.")
if len(llm_response.split()) > 10: # Limit to 10 words
llm_response = " ".join(llm_response.split()[:10]) + "..."
return llm_response
return "Error communicating with LLM."
# Endpoint for basic and generic data
@app.route('/api/auto-fill', methods=['GET'])
async def auto_fill():
user_data = mock_user_data
business_activity_prompt = "Describe the primary business activity for a company in India."
ownership_type_prompt = "What are common ownership types for Indian companies?"
business_activity_task = asyncio.to_thread(get_llm_response, business_activity_prompt)
ownership_type_task = asyncio.to_thread(get_llm_response, ownership_type_prompt)
business_activity, ownership_type = await asyncio.gather(business_activity_task, ownership_type_task)
return jsonify({
"panNumber": user_data["panNumber"],
"registeredAddress": user_data["registeredAddress"],
"businessActivity": business_activity,
"ownershipType": ownership_type
})
# Other endpoints remain unchanged...
# Initialize EasyOCR reader
# English language
# Endpoint for document processing
@app.route('/api/process-documents', methods=['POST'])
def process_documents():
try:
print("Processing uploaded documents...") # Log the start of the process
files = request.files.getlist('documents')
if not files:
return jsonify({"error": "No files uploaded"}), 400
extracted_data = {"gstNumber": "", "directorAadhaar": ""}
for file in files:
print(f"Processing file: {file.filename}") # Log the file name
# Check file type
if file.filename.lower().endswith(('.png', '.jpg', '.jpeg')):
print("Detected image file.") # Log file type
try:
image = Image.open(io.BytesIO(file.read()))
# Convert image to bytes for EasyOCR
image_bytes = io.BytesIO()
image.save(image_bytes, format='PNG')
image_bytes.seek(0)
# Extract text using EasyOCR
result = reader.readtext(image_bytes.getvalue())
text = " ".join([item[1] for item in result]) # Join all detected text
print(f"Extracted text from image: {text[:100]}...") # Log extracted text
except Exception as e:
print(f"Error processing image: {e}")
return jsonify({"error": f"Failed to process image file: {file.filename}"}), 500
elif file.filename.lower().endswith('.pdf'):
print("Detected PDF file.") # Log file type
try:
from pdf2image import convert_from_bytes
images = convert_from_bytes(file.read())
text = ""
for img in images:
# Convert each page to bytes for EasyOCR
img_byte_arr = io.BytesIO()
img.save(img_byte_arr, format='PNG')
img_byte_arr.seek(0)
# Extract text using EasyOCR
result = reader.readtext(img_byte_arr.getvalue())
text += " ".join([item[1] for item in result]) # Join all detected text
print(f"Extracted text from PDF: {text[:100]}...") # Log extracted text
except Exception as e:
print(f"Error processing PDF: {e}")
return jsonify({"error": f"Failed to process PDF file: {file.filename}"}), 500
else:
return jsonify({"error": f"Unsupported file type: {file.filename}"}), 400
# Extract GST and Aadhaar numbers using regex
gst_match = re.search(r'\b\d{2}[A-Z]{5}\d{4}[A-Z]{1}[A-Z\d]{1}[Z]{1}[A-Z\d]{1}\b', text)
aadhaar_match = re.search(r'\b\d{4}-\d{4}-\d{4}\b', text)
if gst_match:
extracted_data["gstNumber"] = gst_match.group(0)
print(f"Extracted GST Number: {gst_match.group(0)}") # Log GST number
if aadhaar_match:
extracted_data["directorAadhaar"] = aadhaar_match.group(0)
print(f"Extracted Aadhaar Number: {aadhaar_match.group(0)}") # Log Aadhaar number
return jsonify(extracted_data)
except Exception as e:
print(f"Unexpected error: {e}") # Log unexpected errors
return jsonify({"error": "An unexpected error occurred while processing the documents."}), 500
# Mock function to validate company names against Indian naming conventions
def validate_company_name(name):
prohibited_words = ["bank", "government", "reserve"] # Example prohibited words
if any(word in name.lower() for word in prohibited_words):
return False
return True
# Function to generate AI-based name suggestions
def get_ai_suggestions(firstName):
# Craft a prompt for the LLM
prompt = f"Generate 5 creative and unique company name suggestions based on '{firstName}' that adhere to Indian company naming conventions."
# Call the LLM via Ollama
url = "http://localhost:11434/api/generate"
payload = {
"model": "llama3.2",
"prompt": prompt,
"stream": False
}
response = requests.post(url, json=payload)
if response.status_code == 200:
llm_response = response.json().get("response", "")
suggestions = [name.strip() for name in llm_response.split("\n") if name.strip()]
return suggestions[:5] # Return only 5 suggestions
return []
# Endpoint for AI-based name suggestions
@app.route('/api/suggest-names', methods=['POST'])
def suggest_names():
try:
data = request.get_json()
firstName = data.get("firstName", "").strip()
if not firstName:
return jsonify({"error": "First name preference is required."}), 400
# Validate the first preference
if not validate_company_name(firstName):
return jsonify({"error": "The provided name does not meet Indian company naming conventions."}), 400
# Generate AI-based suggestions
suggestions = get_ai_suggestions(firstName)
if not suggestions:
return jsonify({"error": "Failed to generate name suggestions."}), 500
return jsonify({"suggestions": suggestions})
except Exception as e:
print(f"Unexpected error: {e}")
return jsonify({"error": "An unexpected error occurred while generating name suggestions."}), 500
if __name__ == '__main__':
app.run(debug=True, port=3000)
This file provides a simple interface to interact with the backend.
This file creates a containerized environment for the Flask application.
This file lists all dependencies.
Now you have everything to run your AI-powered form generator and document processor inside a Docker container! Let me know if you need any modifications. π