"""
LLM Operations Module
Handles website extraction and LLM analysis operations
"""

import os
import logging
from datetime import datetime
from typing import List
from dotenv import load_dotenv

from langchain.chat_models import init_chat_model
from langchain_core.documents import Document
from langchain_core.prompts import PromptTemplate
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import UnstructuredURLLoader
from langchain_qdrant import QdrantVectorStore, RetrievalMode
from langgraph.graph import START, StateGraph
from typing_extensions import TypedDict

from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams

from .crawler import Crawler

# Load environment variables
load_dotenv()

# Configuration
MODEL_NAME = 'gpt-5-mini'
EMBEDDINGS_MODEL_NAME = 'text-embedding-3-small'
PROMPT_FILE = 'proprietary/prompt.txt'

# Initialize Qdrant client
client = QdrantClient(
    url=os.getenv('QDRANT_URL'),
    api_key=os.getenv('QDRANT_API_KEY'),
    port=os.getenv('QDRANT_PORT'),
    timeout=100
)

def extract_website(company, url):
    """Extract website content and store in vector database."""
    # Crawl valid site URLs
    crawler = Crawler(company=company, url=url)
    crawler.run()

    # Load URLs into vector database
    urls = crawler.get_pages()
    logging.debug(urls)

    loader = UnstructuredURLLoader(urls=urls, show_progress_bar=True)
    documents = loader.load()

    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    all_splits = text_splitter.split_documents(documents)

    # Add scan timestamp to document metadata
    scan_timestamp = datetime.now().strftime("%Y-%m-%d %H:%M")
    for doc in all_splits:
        doc.metadata['scan_timestamp'] = scan_timestamp

    embeddings = OpenAIEmbeddings(model=EMBEDDINGS_MODEL_NAME)

    if client.collection_exists(collection_name=company):
        client.delete_collection(collection_name=company)

    client.create_collection(
        collection_name=company,
        vectors_config=VectorParams(size=1536, distance=Distance.COSINE),
    )

    qdrant = QdrantVectorStore(
        client=client,
        collection_name=company,
        embedding=embeddings,
        retrieval_mode=RetrievalMode.DENSE,
    )
    qdrant.add_documents(documents=all_splits)


def run_llm(company, question, clarification=""):
    """Run LLM analysis on a question using the vector store."""
    llm = init_chat_model(MODEL_NAME, model_provider="openai")

    vector_store = QdrantVectorStore(
        client=client,
        collection_name=company,
        embedding=OpenAIEmbeddings(model=EMBEDDINGS_MODEL_NAME),
        retrieval_mode=RetrievalMode.DENSE,
    )

    # Define state for application
    class State(TypedDict):
        question: str
        clarification: str
        context: List[Document]
        answer: str

    # Define application steps
    def retrieve(state: State):
        retrieved_docs = vector_store.similarity_search(state["question"], k=5)
        return {"context": retrieved_docs}

    def generate(state: State):
        docs_content = '\n\n'
        for doc in state['context']:
            docs_content += 'Bron: ' + str(doc.metadata['source'] + '\n')
            docs_content += 'Inhoud: ' + doc.page_content + '\n'

        #logging.info(docs_content)

        messages = prompt.invoke({
            "question": state["question"],
            "context": docs_content,
            "clarification": state["clarification"]
        })

        logging.info(messages)
        response = llm.invoke(messages)
        return {"answer": response.content}

    # Load prompt template from file
    with open(PROMPT_FILE, 'r', encoding='utf-8') as f:
        template = f.read()

    prompt = PromptTemplate.from_template(template)

    # Compile application and test
    graph_builder = StateGraph(State).add_sequence([retrieve, generate])
    graph_builder.add_edge(START, "retrieve")
    graph = graph_builder.compile()

    return graph.invoke({"question": question, "clarification": clarification})


def get_qdrant_client():
    """Get the Qdrant client instance."""
    return client
