"""
Gemini 2.0 Flash Dosage Extractor
AI fallback for low-confidence dosage extractions.
Fast (~200ms), cheap (~$0.0001/request), and accurate.
"""

import os
import json
import time
from typing import Dict, List, Optional

from src.utils.logger import get_logger

logger = get_logger(__name__)


# Try new google.genai first, then fall back to legacy google.generativeai
_GENAI_CLIENT = None
_GENAI_BACKEND = None

try:
    from google import genai
    _GENAI_BACKEND = "google.genai"
except ImportError:
    try:
        import google.generativeai as genai_legacy
        _GENAI_BACKEND = "google.generativeai"
    except ImportError:
        logger.warning("Neither google-genai nor google-generativeai installed. Gemini unavailable.")


class GeminiDosageExtractor:
    """
    Extract and verify dosage information using Google Gemini 2.0 Flash.
    Used as AI fallback when regex confidence < 0.85.
    """

    EXTRACTION_PROMPT = """You are a medical dosage extraction expert. Extract structured dosage information from the following medicine data.

Medicine Name: {medicine_name}
Composition: {composition}
How to Use: {how_to_use}
Medicine Type: {medicine_type}
Primary Use: {primary_use}

Extract and return ONLY a JSON object with these fields:
{{
    "quantity": "dosage strength (e.g., '500 mg', '1 tablet', '10 ml', 'As prescribed')",
    "dosage": "frequency code: OD (once daily), BD (twice daily), TDS (thrice daily), QDS (four times daily), OW (once weekly), BW (twice weekly), OM (once monthly), OY (once yearly), or ONCE (single dose/one-time)",
    "frequency": "human readable frequency (e.g., 'Once daily', 'Twice daily')",
    "dose_morning": 0 or 1 or 0.5,
    "dose_afternoon": 0 or 1 or 0.5,
    "dose_night": 0 or 1 or 0.5,
    "meal_preference": "one of: Empty Stomach, Before Meal, After Meal, With Meal, At Bedtime, As advised",
    "duration": "treatment duration (e.g., '5-7 days', 'Long-term', 'As prescribed')",
    "confidence": 0.0 to 1.0
}}

Rules:
- Use the composition to determine quantity/strength
- Use how_to_use text to determine frequency and meal timing
- Use primary_use to infer duration (antibiotics: 5-7 days, chronic conditions: Long-term)
- dose_morning/dose_afternoon/dose_night = how many units at breakfast/lunch/dinner:
  - OD morning: 1,0,0 | OD bedtime: 0,0,1 | BD: 1,0,1 | TDS: 1,1,1 | OW/BW/OM/OY/ONCE: 1,0,0
  - Use 0.5 for half-tablet dosing
- For meal_preference, be specific: prefer "Before Meal" or "After Meal" over "With Meal". Use "With Meal" only when the medicine must be taken during a meal (e.g., Metformin, enzyme supplements). Use "Before Meal" for medicines that need an empty or near-empty stomach for absorption (e.g., PPIs, antacids, H2 blockers). Use "After Meal" for medicines that may cause gastric irritation (e.g., NSAIDs, steroids).
- If information is missing or unclear, use safe defaults and lower confidence
- Return ONLY the JSON object, no markdown or explanation"""

    VERIFICATION_PROMPT = """You are a medical dosage verification expert. Verify if the following extracted dosage data is medically accurate.

Medicine: {medicine_name}
Composition: {composition}
Extracted Data:
- Dosage: {dosage} ({frequency})
- Meal Preference: {meal_preference}
- Duration: {duration}

Is this medically appropriate? Respond ONLY with a JSON object:
{{
    "is_correct": true or false,
    "corrected_dosage": "OD/BD/TDS/QDS or keep original",
    "corrected_meal": "corrected meal preference or keep original",
    "corrected_duration": "corrected duration or keep original",
    "confidence": 0.0 to 1.0,
    "explanation": "brief reason"
}}

Return ONLY the JSON object."""

    def __init__(self):
        self._available = False
        self._client = None  # For google.genai (new SDK)
        self._model = None   # For google.generativeai (legacy SDK)
        self._backend = _GENAI_BACKEND
        self._cache = {}
        self._cache_max = 10000
        self._model_name = os.getenv("GEMINI_MODEL", "gemini-2.5-flash-lite")

        api_key = os.getenv("GOOGLE_API_KEY")
        if not api_key:
            logger.warning("GOOGLE_API_KEY not set in environment")
            return

        if not self._backend:
            return

        try:
            if self._backend == "google.genai":
                from google import genai as genai_new
                self._client = genai_new.Client(api_key=api_key)
                self._available = True
                logger.info(f"Gemini extractor initialized (google.genai SDK, model={self._model_name})")
            elif self._backend == "google.generativeai":
                import google.generativeai as genai_legacy
                genai_legacy.configure(api_key=api_key)
                self._model = genai_legacy.GenerativeModel(self._model_name)
                self._available = True
                logger.info(f"Gemini extractor initialized (legacy SDK, model={self._model_name})")
        except Exception as e:
            logger.warning(f"Gemini setup failed: {e}")

    def is_available(self) -> bool:
        return self._available

    def _generate(self, prompt: str, max_tokens: int = 1024) -> Optional[str]:
        """Generate content using whichever SDK is available. No retries on rate-limit to avoid extra billing."""
        try:
            if self._backend == "google.genai" and self._client:
                response = self._client.models.generate_content(
                    model=self._model_name,
                    contents=prompt,
                    config={
                        "temperature": 0.1,
                        "max_output_tokens": max_tokens,
                    },
                )
                return response.text
            elif self._backend == "google.generativeai" and self._model:
                import google.generativeai as genai_legacy
                response = self._model.generate_content(
                    prompt,
                    generation_config=genai_legacy.types.GenerationConfig(
                        temperature=0.1,
                        max_output_tokens=max_tokens,
                    ),
                )
                return response.text
            return None
        except Exception as e:
            err_str = str(e)
            if '429' in err_str or 'RESOURCE_EXHAUSTED' in err_str:
                logger.warning(f"Gemini rate-limited — skipping (no retry to save billing)")
                return None
            raise

    def extract_dosage(
        self,
        medicine_name: str,
        composition: str,
        how_to_use: str,
        medicine_type: str = "",
        primary_use: str = "",
    ) -> Optional[Dict]:
        """
        Extract dosage information using Gemini 2.0 Flash.

        Returns dict with: quantity, dosage, frequency, meal_preference, duration, confidence
        Returns None if Gemini is unavailable or fails.
        """
        if not self._available:
            return None

        # Check cache
        cache_key = f"{medicine_name}|{composition}|{how_to_use}"
        if cache_key in self._cache:
            logger.debug(f"Cache hit for {medicine_name}")
            return self._cache[cache_key]

        prompt = self.EXTRACTION_PROMPT.format(
            medicine_name=medicine_name,
            composition=composition,
            how_to_use=how_to_use,
            medicine_type=medicine_type,
            primary_use=primary_use,
        )

        try:
            text = self._generate(prompt)
            if text:
                result = self._parse_json_response(text)
                if result:
                    result = self._normalize_result(result)
                    if len(self._cache) < self._cache_max:
                        self._cache[cache_key] = result
                    logger.info(f"Gemini extracted dosage for {medicine_name} (conf={result.get('confidence', 0):.2f})")
                    return result
        except Exception as e:
            logger.error(f"Gemini extraction failed for {medicine_name}: {e}")

        return None

    def verify_dosage(
        self,
        medicine_name: str,
        composition: str,
        extracted_data: Dict,
    ) -> Optional[Dict]:
        """
        Verify extracted dosage data using Gemini.
        Returns verification result or None on failure.
        """
        if not self._available:
            return None

        prompt = self.VERIFICATION_PROMPT.format(
            medicine_name=medicine_name,
            composition=composition,
            dosage=extracted_data.get('dosage', 'BD'),
            frequency=extracted_data.get('frequency', 'Twice daily'),
            meal_preference=extracted_data.get('meal_preference', 'As advised'),
            duration=extracted_data.get('duration', 'As prescribed'),
        )

        try:
            text = self._generate(prompt)
            if text:
                result = self._parse_json_response(text)
                if result:
                    logger.info(f"Gemini verified {medicine_name}: correct={result.get('is_correct')}")
                    return result
        except Exception as e:
            logger.error(f"Gemini verification failed for {medicine_name}: {e}")

        return None

    DOSAGE_LOOKUP_PROMPT = """You are a pharmaceutical expert. Given ONLY a medicine name, provide accurate dosage information about it.

Medicine Name: {medicine_name}

Return ONLY a JSON object with these fields:
{{
    "composition": "active ingredients with strengths (e.g., 'Paracetamol 650mg')",
    "quantity": "dosage strength (e.g., '650 mg', '1 tablet', '10 ml')",
    "dosage": "frequency code: OD (once daily), BD (twice daily), TDS (thrice daily), QDS (four times daily), OW (once weekly), BW (twice weekly), OM (once monthly), OY (once yearly), or ONCE (single dose/one-time)",
    "frequency": "human readable (e.g., 'Twice daily')",
    "dose_morning": 0 or 1 or 0.5,
    "dose_afternoon": 0 or 1 or 0.5,
    "dose_night": 0 or 1 or 0.5,
    "meal_preference": "one of: Empty Stomach, Before Meal, After Meal, With Meal, At Bedtime, As advised",
    "duration": "typical duration (e.g., '5-7 days', 'Long-term', 'As prescribed')",
    "confidence": 0.0 to 1.0
}}

Rules:
- If you recognize the medicine, provide accurate pharmaceutical data
- dose_morning/dose_afternoon/dose_night = how many units at breakfast/lunch/dinner:
  - OD morning: 1,0,0 | OD bedtime: 0,0,1 | BD: 1,0,1 | TDS: 1,1,1 | OW/BW/OM/OY/ONCE: 1,0,0
  - Use 0.5 for half-tablet dosing
- For meal_preference, be specific: prefer "Before Meal" or "After Meal" over "With Meal". Use "With Meal" only when the medicine must be taken during a meal (e.g., Metformin, enzyme supplements). Use "Before Meal" for medicines that need an empty or near-empty stomach for absorption (e.g., PPIs, antacids, H2 blockers). Use "After Meal" for medicines that may cause gastric irritation (e.g., NSAIDs, steroids).
- If you don't recognize the medicine name, set confidence below 0.5 and use safe defaults
- Return ONLY the JSON object, no markdown or explanation"""

    LOOKUP_PROMPT = """You are a pharmaceutical expert. Given ONLY a medicine name, provide complete structured information about it.

Medicine Name: {medicine_name}

Return ONLY a JSON object with these fields:
{{
    "composition": "active ingredients with strengths (e.g., 'Paracetamol 650mg')",
    "product_form": "physical form of the product (e.g., 'Tablet', 'Capsule', 'Syrup', 'Injection', 'Cream', 'Drops', 'Gel', 'Ointment', 'Inhaler', 'Suspension')",
    "medicine_type": "type (e.g., 'Tablet', 'Capsule', 'Syrup', 'Injection', 'Cream')",
    "primary_use": "main therapeutic use (e.g., 'Treatment of fever and mild to moderate pain')",
    "how_to_use": "usage instructions (e.g., 'Take with water after meal. BD (twice daily).')",
    "quantity": "dosage strength (e.g., '650 mg', '1 tablet')",
    "dosage": "frequency code: OD, BD, TDS, QDS, OW (once weekly), BW (twice weekly), OM (once monthly), OY (once yearly), or ONCE (single dose/one-time)",
    "frequency": "human readable (e.g., 'Twice daily')",
    "dose_morning": 0 or 1 or 0.5,
    "dose_afternoon": 0 or 1 or 0.5,
    "dose_night": 0 or 1 or 0.5,
    "meal_preference": "one of: Empty Stomach, Before Meal, After Meal, With Meal, At Bedtime, As advised",
    "duration": "typical duration (e.g., '5-7 days', 'Long-term', 'As prescribed')",
    "common_side_effects": "common side effects (e.g., 'Nausea, Dizziness, Headache')",
    "benefits": "key therapeutic benefits and advantages of this medicine",
    "how_it_works": "mechanism of action in simple terms",
    "safety_advice": "important warnings, precautions, and who should avoid this medicine",
    "allergens": "known allergens, drug allergies, and cross-sensitivities (e.g., 'Penicillin allergy patients should avoid')",
    "symptoms": "symptoms/conditions this medicine treats (e.g., 'fever, headache, body pain, mild to moderate pain')",
    "alcohol_interaction": "interaction with alcohol (Safe, Unsafe, or Caution with explanation)",
    "pregnancy_interaction": "safety during pregnancy (Safe, Unsafe, Consult Doctor, or specific category)",
    "lactation_interaction": "safety during breastfeeding",
    "age_group_dosages": {{
        "infant_0_2": {{"dosage": "dose for 0-2 years or 'Not recommended'", "notes": "special instructions"}},
        "child_2_12": {{"dosage": "dose for 2-12 years", "notes": "e.g., weight-based dosing"}},
        "adolescent_12_18": {{"dosage": "dose for 12-18 years", "notes": "special instructions"}},
        "adult_18_65": {{"dosage": "standard adult dose", "notes": "standard instructions"}},
        "elderly_65_plus": {{"dosage": "dose for 65+ years", "notes": "e.g., reduced dose, renal adjustment"}}
    }},
    "confidence": 0.0 to 1.0
}}

Rules:
- If you recognize the medicine, provide accurate pharmaceutical data
- Age group dosages must be medically accurate; use "Consult doctor" if unsure
- dose_morning/dose_afternoon/dose_night = how many units at breakfast/lunch/dinner:
  - OD morning: 1,0,0 | OD bedtime: 0,0,1 | BD: 1,0,1 | TDS: 1,1,1 | OW/BW/OM/OY/ONCE: 1,0,0
  - Use 0.5 for half-tablet dosing
- For meal_preference, be specific: prefer "Before Meal" or "After Meal" over "With Meal". Use "With Meal" only when the medicine must be taken during a meal (e.g., Metformin, enzyme supplements). Use "Before Meal" for medicines that need an empty or near-empty stomach for absorption (e.g., PPIs, antacids, H2 blockers). Use "After Meal" for medicines that may cause gastric irritation (e.g., NSAIDs, steroids).
- If you don't recognize the medicine name, set confidence below 0.5 and use safe defaults
- Return ONLY the JSON object, no markdown or explanation"""

    def lookup_medicine_dosage(self, medicine_name: str) -> Optional[Dict]:
        """
        Lightweight lookup: returns only dosage fields for a medicine name.
        Much smaller response than lookup_unknown_medicine, unlikely to truncate.
        """
        if not self._available:
            return None

        cache_key = f"dosage_lookup|{medicine_name}"
        if cache_key in self._cache:
            logger.debug(f"Cache hit for dosage lookup: {medicine_name}")
            return self._cache[cache_key]

        prompt = self.DOSAGE_LOOKUP_PROMPT.format(medicine_name=medicine_name)

        try:
            text = self._generate(prompt, max_tokens=1024)
            if text:
                result = self._parse_json_response(text)
                if result:
                    result = self._normalize_result(result)
                    result['medicine_name'] = medicine_name
                    if len(self._cache) < self._cache_max:
                        self._cache[cache_key] = result
                    logger.info(
                        f"Gemini dosage lookup: {medicine_name} "
                        f"(conf={result.get('confidence', 0):.2f})"
                    )
                    return result
        except Exception as e:
            logger.error(f"Gemini dosage lookup failed for {medicine_name}: {e}")

        return None

    def batch_lookup_medicine_dosages(self, medicines) -> Dict[str, Optional[Dict]]:
        """
        Batch lookup: returns dosage fields for multiple medicines.

        Args:
            medicines: Either a List[str] of medicine names (backward compatible),
                       or a List[dict] with keys: name, composition, how_to_use, primary_use
        """
        # Normalize input to list of dicts
        if not medicines:
            return {}
        if isinstance(medicines[0], str):
            med_list = [{"name": n} for n in medicines]
        else:
            med_list = medicines

        medicine_names = [m["name"] for m in med_list]

        if not self._available or not medicine_names:
            return {name: None for name in medicine_names}

        results = {}
        uncached = []
        uncached_meds = []
        for m in med_list:
            cache_key = f"dosage_lookup|{m['name']}"
            if cache_key in self._cache:
                results[m["name"]] = self._cache[cache_key]
            else:
                uncached.append(m["name"])
                uncached_meds.append(m)

        if not uncached:
            return results

        if len(uncached) == 1:
            results[uncached[0]] = self.lookup_medicine_dosage(uncached[0])
            return results

        # Build medicine list with context for better identification
        medicine_lines = []
        for m in uncached_meds:
            line = f"- {m['name']}"
            details = []
            if m.get("composition"):
                details.append(f"Composition: {m['composition']}")
            if m.get("how_to_use"):
                details.append(f"How to use: {m['how_to_use'][:200]}")
            if m.get("primary_use"):
                details.append(f"Primary use: {m['primary_use'][:200]}")
            if details:
                line += " | " + " | ".join(details)
            medicine_lines.append(line)

        medicines_text = "\n".join(medicine_lines)

        # Build a batch prompt with dosage + age-specific dosages
        prompt = f"""You are a pharmaceutical expert. Given a list of medicines with their composition and usage context, provide accurate dosage information including age-specific dosages for each.

Medicines:
{medicines_text}

Return ONLY a JSON array where each element has these fields:
[
  {{
    "medicine_name": "exact medicine name as provided",
    "composition": "active ingredients with strengths",
    "quantity": "dosage strength (e.g., '500 mg', '1 tablet', '10 ml')",
    "dosage": "frequency code: OD, BD, TDS, QDS, OW (once weekly), BW (twice weekly), OM (once monthly), OY (once yearly), or ONCE (single dose/one-time)",
    "frequency": "human readable frequency (e.g., 'Once daily', 'Twice daily')",
    "dose_morning": 0 or 1 or 0.5,
    "dose_afternoon": 0 or 1 or 0.5,
    "dose_night": 0 or 1 or 0.5,
    "meal_preference": "one of: Empty Stomach, Before Meal, After Meal, With Meal, At Bedtime, As advised",
    "duration": "typical duration (e.g., '5-7 days', 'Long-term', 'As prescribed')",
    "age_group_dosages": {{
      "infant_0_2": {{"dosage": "dose for 0-2 years or 'Not recommended'", "notes": "special instructions"}},
      "child_2_12": {{"dosage": "dose for 2-12 years", "notes": "e.g., weight-based dosing"}},
      "adolescent_12_18": {{"dosage": "dose for 12-18 years", "notes": "special instructions"}},
      "adult_18_65": {{"dosage": "standard adult dose", "notes": "standard instructions"}},
      "elderly_65_plus": {{"dosage": "dose for 65+ years", "notes": "e.g., reduced dose, renal adjustment"}}
    }},
    "confidence": 0.0 to 1.0
  }}
]

Rules:
- Return one object per medicine in the same order as provided
- dose_morning/dose_afternoon/dose_night represent how many units to take at breakfast/lunch/dinner:
  - OD morning medicine: dose_morning=1, dose_afternoon=0, dose_night=0
  - OD bedtime medicine: dose_morning=0, dose_afternoon=0, dose_night=1
  - BD medicine: dose_morning=1, dose_afternoon=0, dose_night=1
  - TDS medicine: dose_morning=1, dose_afternoon=1, dose_night=1
  - Half-tablet dosing: use 0.5 (e.g., dose_morning=0.5, dose_afternoon=0, dose_night=0.5)
- Age group dosages must be medically accurate; use "Consult doctor" if unsure
- For meal_preference, be specific: prefer "Before Meal" or "After Meal" over "With Meal". Use "With Meal" only when the medicine must be taken during a meal (e.g., Metformin). Use "Before Meal" for medicines needing empty stomach (e.g., PPIs, antacids). Use "After Meal" for medicines causing gastric irritation (e.g., NSAIDs, steroids).
- Provide medically accurate data. Use standard pharmaceutical references.
- If you don't recognize a medicine, set confidence below 0.5 and use safe defaults
- Return ONLY the JSON array, no markdown or explanation"""

        try:
            text = self._generate(prompt, max_tokens=1024 * len(uncached))
            if text:
                parsed = self._parse_json_array_response(text)
                if parsed and isinstance(parsed, list):
                    for i, item in enumerate(parsed):
                        if not isinstance(item, dict):
                            continue
                        med_name = item.get('medicine_name', uncached[i] if i < len(uncached) else None)
                        if med_name:
                            normalized = self._normalize_result(item)
                            normalized['medicine_name'] = med_name
                            cache_key = f"dosage_lookup|{med_name}"
                            if len(self._cache) < self._cache_max:
                                self._cache[cache_key] = normalized
                            results[med_name] = normalized
                            logger.info(
                                f"Gemini batch dosage lookup: {med_name} "
                                f"(conf={normalized.get('confidence', 0):.2f})"
                            )
        except Exception as e:
            logger.error(f"Gemini batch dosage lookup failed: {e}")

        # Mark any missing as None (no individual fallback to avoid rate limit storms)
        for name in uncached:
            if name not in results:
                results[name] = None

        return results

    def lookup_unknown_medicine(self, medicine_name: str) -> Optional[Dict]:
        """
        Look up an unknown medicine by name only using Gemini.
        Returns a dict with fields for both Medicine record creation and dosage extraction.
        Returns None if Gemini is unavailable or fails.
        """
        if not self._available:
            return None

        # Check cache
        cache_key = f"lookup|{medicine_name}"
        if cache_key in self._cache:
            logger.debug(f"Cache hit for lookup: {medicine_name}")
            return self._cache[cache_key]

        prompt = self.LOOKUP_PROMPT.format(medicine_name=medicine_name)

        try:
            text = self._generate(prompt, max_tokens=4096)
            if text:
                result = self._parse_json_response(text)
                if result:
                    result = self._normalize_lookup_result(result, medicine_name)
                    if len(self._cache) < self._cache_max:
                        self._cache[cache_key] = result
                    logger.info(
                        f"Gemini looked up unknown medicine: {medicine_name} "
                        f"(conf={result.get('confidence', 0):.2f})"
                    )
                    return result
        except Exception as e:
            logger.error(f"Gemini lookup failed for {medicine_name}: {e}")

        return None

    BATCH_LOOKUP_PROMPT = """You are a pharmaceutical expert. Given a list of medicine names, provide complete structured information about each one.

Medicines: {medicine_names}

Return ONLY a JSON array where each element is a JSON object with these fields:
[
  {{
    "medicine_name": "exact medicine name as provided",
    "composition": "active ingredients with strengths",
    "product_form": "physical form (e.g., 'Tablet', 'Capsule', 'Syrup', 'Injection', 'Cream', 'Drops', 'Gel', 'Ointment')",
    "medicine_type": "Tablet, Capsule, Syrup, etc.",
    "primary_use": "main therapeutic use",
    "how_to_use": "usage instructions with frequency",
    "quantity": "dosage strength (e.g., '650 mg')",
    "dosage": "frequency code: OD, BD, TDS, QDS, OW (once weekly), BW (twice weekly), OM (once monthly), OY (once yearly), or ONCE (single dose/one-time)",
    "frequency": "human readable (e.g., 'Twice daily')",
    "meal_preference": "one of: Empty Stomach, Before Meal, After Meal, With Meal, At Bedtime, As advised",
    "duration": "typical duration",
    "common_side_effects": "common side effects",
    "benefits": "key therapeutic benefits",
    "how_it_works": "mechanism of action in simple terms",
    "safety_advice": "important warnings and precautions",
    "allergens": "known allergens and cross-sensitivities",
    "symptoms": "symptoms/conditions this medicine treats (e.g., 'fever, headache, body pain')",
    "alcohol_interaction": "interaction with alcohol",
    "pregnancy_interaction": "safety during pregnancy",
    "lactation_interaction": "safety during breastfeeding",
    "age_group_dosages": {{
        "infant_0_2": {{"dosage": "dose or 'Not recommended'", "notes": "instructions"}},
        "child_2_12": {{"dosage": "dose for 2-12 years", "notes": "instructions"}},
        "adolescent_12_18": {{"dosage": "dose for 12-18 years", "notes": "instructions"}},
        "adult_18_65": {{"dosage": "standard adult dose", "notes": "instructions"}},
        "elderly_65_plus": {{"dosage": "dose for 65+", "notes": "instructions"}}
    }},
    "confidence": 0.0 to 1.0
  }}
]

Rules:
- Return one object per medicine in the same order as provided
- Age group dosages must be medically accurate; use "Consult doctor" if unsure
- For meal_preference, be specific: prefer "Before Meal" or "After Meal" over "With Meal". Use "With Meal" only when the medicine must be taken during a meal (e.g., Metformin, enzyme supplements). Use "Before Meal" for medicines that need an empty or near-empty stomach for absorption (e.g., PPIs, antacids, H2 blockers). Use "After Meal" for medicines that may cause gastric irritation (e.g., NSAIDs, steroids).
- If you don't recognize a medicine, set confidence below 0.5 and use safe defaults
- Return ONLY the JSON array, no markdown or explanation"""

    def batch_lookup_medicines(self, medicine_names: List[str]) -> Dict[str, Optional[Dict]]:
        """
        Look up multiple unknown medicines in a single Gemini call.
        Returns dict mapping medicine_name -> lookup result (or None).
        """
        if not self._available or not medicine_names:
            return {name: None for name in medicine_names}

        # Check cache first, collect uncached names
        results = {}
        uncached = []
        for name in medicine_names:
            cache_key = f"lookup|{name}"
            if cache_key in self._cache:
                results[name] = self._cache[cache_key]
                logger.debug(f"Cache hit for lookup: {name}")
            else:
                uncached.append(name)

        if not uncached:
            return results

        # Single medicine -> use regular lookup (already has retry)
        if len(uncached) == 1:
            results[uncached[0]] = self.lookup_unknown_medicine(uncached[0])
            return results

        prompt = self.BATCH_LOOKUP_PROMPT.format(
            medicine_names=", ".join(uncached)
        )

        try:
            text = self._generate(prompt, max_tokens=4096 * len(uncached))
            if text:
                parsed = self._parse_json_array_response(text)
                if parsed and isinstance(parsed, list):
                    for i, item in enumerate(parsed):
                        if not isinstance(item, dict):
                            continue
                        # Match by medicine_name field or by index
                        med_name = item.get('medicine_name', uncached[i] if i < len(uncached) else None)
                        if med_name:
                            normalized = self._normalize_lookup_result(item, med_name)
                            cache_key = f"lookup|{med_name}"
                            if len(self._cache) < self._cache_max:
                                self._cache[cache_key] = normalized
                            results[med_name] = normalized
                            logger.info(
                                f"Gemini batch lookup: {med_name} "
                                f"(conf={normalized.get('confidence', 0):.2f})"
                            )
        except Exception as e:
            logger.error(f"Gemini batch lookup failed: {e}")

        # Fill in any missing results with individual fallback
        for name in uncached:
            if name not in results:
                results[name] = self.lookup_unknown_medicine(name)

        return results

    def _parse_json_array_response(self, text: str) -> Optional[list]:
        """Parse a JSON array from Gemini response."""
        text = text.strip()

        # Remove markdown code blocks
        if '```json' in text:
            text = text.split('```json', 1)[1].split('```', 1)[0].strip()
        elif '```' in text:
            text = text.split('```', 1)[1].split('```', 1)[0].strip()

        # Find the array
        if not text.startswith('['):
            bracket_idx = text.find('[')
            if bracket_idx != -1:
                text = text[bracket_idx:]

        try:
            return json.loads(text)
        except json.JSONDecodeError:
            # Use shared repair logic for truncated JSON
            fixed = self._repair_truncated_json(text)
            if fixed:
                try:
                    result = json.loads(fixed)
                    logger.info("Recovered truncated Gemini batch JSON successfully")
                    return result
                except json.JSONDecodeError:
                    pass
            logger.warning("Failed to parse Gemini batch JSON array")
            return None

    def _normalize_lookup_result(self, result: Dict, medicine_name: str) -> Dict:
        """Normalize lookup result fields."""
        # Reuse dosage normalization
        result = self._normalize_result(result)

        # Ensure medicine-record fields have defaults
        result.setdefault('composition', 'Consult physician')
        result.setdefault('product_form', '')
        result.setdefault('medicine_type', 'Unknown')
        result.setdefault('primary_use', 'Consult physician')
        result.setdefault('how_to_use', 'Take as directed by physician')
        result.setdefault('common_side_effects', '')
        result.setdefault('benefits', '')
        result.setdefault('how_it_works', '')
        result.setdefault('safety_advice', '')
        result.setdefault('allergens', '')
        result.setdefault('symptoms', '')
        result.setdefault('alcohol_interaction', '')
        result.setdefault('pregnancy_interaction', '')
        result.setdefault('lactation_interaction', '')
        result.setdefault('age_group_dosages', {})
        result['medicine_name'] = medicine_name

        # Ensure age_group_dosages is a dict
        if not isinstance(result.get('age_group_dosages'), dict):
            result['age_group_dosages'] = {}

        return result

    def explain_medicine(self, medicine_name: str, composition: str, primary_use: str) -> Optional[str]:
        """Generate a simple patient-friendly explanation of a medicine."""
        if not self._available:
            return None

        prompt = (
            f"Explain {medicine_name} (contains {composition}) "
            f"used for {primary_use} in simple terms for patients. "
            f"Keep it under 3 sentences."
        )

        try:
            return self._generate(prompt)
        except Exception as e:
            logger.error(f"Gemini explanation failed for {medicine_name}: {e}")
            return None

    def _repair_truncated_json(self, text: str) -> Optional[str]:
        """Attempt to repair truncated JSON by closing open strings, braces, and brackets."""
        fixed = text

        # Count unescaped quotes to detect unterminated strings
        in_string = False
        i = 0
        while i < len(fixed):
            ch = fixed[i]
            if ch == '\\' and in_string:
                i += 2  # skip escaped character
                continue
            if ch == '"':
                in_string = not in_string
            i += 1

        # If we're inside an unterminated string, close it and trim the incomplete value
        if in_string:
            # Find the last complete key-value pair by trimming back to last comma or colon
            # outside a string, then close the string
            last_quote = fixed.rfind('"')
            if last_quote > 0:
                # Truncate at the opening quote of the incomplete value and remove trailing key
                # Find the last complete entry: look for last ',' before the dangling quote
                last_comma = fixed.rfind(',', 0, last_quote)
                last_brace = fixed.rfind('{', 0, last_quote)
                last_bracket = fixed.rfind('[', 0, last_quote)
                cut_point = max(last_comma, last_brace, last_bracket)
                if cut_point > 0:
                    if fixed[cut_point] == ',':
                        fixed = fixed[:cut_point]  # remove trailing comma + incomplete pair
                    else:
                        fixed = fixed[:cut_point + 1]  # keep the opening brace/bracket

        # Close any open braces/brackets
        open_braces = fixed.count('{') - fixed.count('}')
        open_brackets = fixed.count('[') - fixed.count(']')
        if open_braces > 0 or open_brackets > 0:
            # Remove trailing comma if present
            stripped = fixed.rstrip()
            if stripped.endswith(','):
                fixed = stripped[:-1]
            fixed += '}' * max(open_braces, 0)
            fixed += ']' * max(open_brackets, 0)

        return fixed

    def _parse_json_response(self, text: str) -> Optional[Dict]:
        """Parse JSON from Gemini response, handling markdown code blocks and truncation."""
        text = text.strip()

        # Remove markdown code blocks
        if '```json' in text:
            text = text.split('```json', 1)[1].split('```', 1)[0].strip()
        elif '```' in text:
            text = text.split('```', 1)[1].split('```', 1)[0].strip()

        # Try to extract JSON object if there's extra text around it
        if not text.startswith('{'):
            brace_idx = text.find('{')
            if brace_idx != -1:
                text = text[brace_idx:]

        try:
            return json.loads(text)
        except json.JSONDecodeError as e:
            logger.warning(f"Failed to parse Gemini JSON: {e}")
            logger.debug(f"Raw response: {text[:500]}")

            # Attempt to repair truncated JSON (unterminated strings, missing braces)
            fixed = self._repair_truncated_json(text)
            if fixed and fixed != text:
                try:
                    result = json.loads(fixed)
                    logger.info("Recovered truncated Gemini JSON successfully")
                    return result
                except json.JSONDecodeError:
                    pass

            return None

    # Default meal-slot schedules when Gemini doesn't provide dose_morning/afternoon/night
    DOSAGE_CODE_TO_SCHEDULE = {
        'OD': (1, 0, 0),
        'BD': (1, 0, 1),
        'TDS': (1, 1, 1),
        'QDS': (1, 1, 1),  # QDS = 4x/day, best approximation in 3 slots
        'OW': (1, 0, 0),
        'BW': (1, 0, 0),
        'OM': (1, 0, 0),
        'OY': (1, 0, 0),
        'ONCE': (1, 0, 0),
    }

    def _normalize_result(self, result: Dict) -> Dict:
        """Normalize extracted result fields."""
        # Normalize dosage code
        dosage = result.get('dosage', 'BD').upper().strip()
        if dosage not in ('OD', 'BD', 'TDS', 'QDS', 'OW', 'BW', 'OM', 'OY', 'ONCE'):
            dosage = 'BD'
        result['dosage'] = dosage

        # Normalize frequency
        freq_map = {
            'OD': 'Once daily',
            'BD': 'Twice daily',
            'TDS': 'Thrice daily',
            'QDS': 'Four times daily',
            'OW': 'Once weekly',
            'BW': 'Twice weekly',
            'OM': 'Once monthly',
            'OY': 'Once yearly',
            'ONCE': 'Single dose',
        }
        result['frequency'] = freq_map.get(dosage, result.get('frequency', 'Twice daily'))

        # Normalize meal-slot schedule
        # Use Gemini-provided values if present, otherwise derive from dosage code
        has_schedule = any(
            result.get(k) is not None
            for k in ('dose_morning', 'dose_afternoon', 'dose_night')
        )
        if has_schedule:
            for key in ('dose_morning', 'dose_afternoon', 'dose_night'):
                try:
                    result[key] = float(result.get(key, 0))
                except (ValueError, TypeError):
                    result[key] = 0
        else:
            # Derive from dosage code
            m, a, n = self.DOSAGE_CODE_TO_SCHEDULE.get(dosage, (1, 0, 1))
            # Special case: bedtime medicines
            meal_pref_raw = (result.get('meal_preference') or '').lower()
            if 'bedtime' in meal_pref_raw and dosage == 'OD':
                m, a, n = 0, 0, 1
            result['dose_morning'] = float(m)
            result['dose_afternoon'] = float(a)
            result['dose_night'] = float(n)

        # Build human-readable schedule string
        def _fmt(v):
            return str(int(v)) if v == int(v) else str(v)
        result['dose_schedule'] = (
            f"{_fmt(result['dose_morning'])}-"
            f"{_fmt(result['dose_afternoon'])}-"
            f"{_fmt(result['dose_night'])}"
        )

        # Normalize meal preference
        meal_pref = (result.get('meal_preference') or '').strip().lower()
        meal_map = {
            'empty stomach': 'Empty Stomach',
            'before meal': 'Before Meal',
            'before meals': 'Before Meal',
            'before food': 'Before Meal',
            'after meal': 'After Meal',
            'after meals': 'After Meal',
            'after food': 'After Meal',
            'with meal': 'With Meal',
            'with meals': 'With Meal',
            'with food': 'With Meal',
            'at bedtime': 'At Bedtime',
            'before bedtime': 'At Bedtime',
            'as advised': 'As advised',
            'as directed': 'As advised',
        }
        result['meal_preference'] = meal_map.get(meal_pref, 'As advised')

        # Ensure confidence is a float
        try:
            result['confidence'] = float(result.get('confidence', 0.85))
        except (ValueError, TypeError):
            result['confidence'] = 0.85

        # Default quantity and duration
        if not result.get('quantity'):
            result['quantity'] = 'As prescribed'
        if not result.get('duration'):
            result['duration'] = 'As prescribed'

        return result

    def clear_cache(self):
        """Clear the in-memory cache."""
        self._cache.clear()


# Singleton
_extractor_instance = None


def get_gemini_extractor() -> GeminiDosageExtractor:
    """Get singleton Gemini extractor instance."""
    global _extractor_instance
    if _extractor_instance is None:
        _extractor_instance = GeminiDosageExtractor()
    return _extractor_instance
