{"api_version":"1.3.13","collections":{"blog":"Blog posts and articles","cursuri":"Course content - excluded from default searches, must be explicitly requested","cursuri_externe":"External course PDFs - excluded from default searches, must be explicitly requested","distribuitori":"Distributor information","docs":"Official documentation and manuals (articles)","docs_intrebari":"Q&A from Docs_Intrebari (documentation questions)","implementari":"Implementation guides","legal":"Legal and fiscal documentation (Romanian law corpus) - excluded from default searches, must be explicitly requested","page":"Website pages and general content","programari":"Programari (scheduled items with strong recency bias)","seminarii":"Seminar materials"},"endpoints":{"/api/db-assistant":{"description":"Single-endpoint orchestrator: infers intent, retrieves schema candidates, optionally runs safe SELECT queries","method":"POST","request_body":{"databases":{"description":"Optional database filter (e.g., ['xecutive'])","type":"array[string]"},"execute_data":{"default":true,"description":"If false, returns schema candidates only","type":"boolean"},"max_rows":{"default":20,"description":"Maximum rows for SELECT TOP","type":"integer"},"n_results":{"default":10,"description":"Schema candidate pool size","type":"integer"},"planner_mode":{"default":"heuristic","description":"heuristic | execution_guided (execution_guided tries multiple LLM plans and selects best executed result)","type":"string"},"query":{"description":"Natural language request","required":true,"type":"string"},"retrieval_mode":{"default":"standard","description":"standard | full (full uses smart retrieval with table+column+graph signals)","type":"string"}},"response_fields":{"action_taken":"Schema-only or SQL execution action","candidate_tables":"Ranked table candidates","execution":"Returned rows/columns (when executing)","intent_type":"Inferred intent: schema_info | data_rows | data_count | analytics_query","planner_mode":"Effective planner mode used","repair_attempts":"Candidate attempts that failed before fallback","retrieval_mode":"Effective retrieval mode used","sql_plan":"Generated SQL (when executing)"}},"/api/db-search":{"description":"Search database schema knowledge for Text-to-SQL / NL2SQL context","method":"POST","request_body":{"chunk_types":{"description":"Filter to specific chunk types: table, view, stored_procedure, function, relationship","type":"array[string]"},"databases":{"description":"Filter to specific databases (e.g., ['xecutive', 'sc_exemplu'])","type":"array[string]"},"format_for_llm":{"default":false,"description":"Return pre-formatted context string for LLM consumption","type":"boolean"},"n_results":{"default":10,"description":"Maximum results to return","type":"integer"},"query":{"description":"Natural language query about database schema","required":true,"type":"string"},"smart_mode":{"default":false,"description":"Use LLM intent analysis + graph traversal (QueryWeaver-style). Better for natural language queries.","type":"boolean"},"weights":{"description":"Override hybrid weights: {bm25_weight, vector_weight}","type":"object"}},"response_fields":{"intent":"Query intent analysis (only if smart_mode=true)","join_paths":"Discovered JOIN paths (only if smart_mode=true and multiple tables found)","llm_context":"Formatted context string (only if format_for_llm=true)","query":"The search query","result_count":"Number of results found","results":"Array of schema chunks with metadata"}},"/api/db-search-smart":{"description":"Smart semantic search using LLM intent analysis + graph traversal (dedicated endpoint)","method":"POST","request_body":{"databases":{"description":"Filter to specific databases","type":"array[string]"},"db_description":{"description":"Database description for LLM context","type":"string"},"debug":{"default":false,"description":"Return debug information","type":"boolean"},"include_connected":{"default":true,"description":"Include graph-connected tables","type":"boolean"},"n_results":{"default":10,"description":"Maximum results","type":"integer"},"query":{"description":"Natural language query","required":true,"type":"string"}},"response_fields":{"intent":"LLM-analyzed query intent with table/column descriptions","join_paths":"Discovered JOIN paths between result tables","query":"The search query","result_count":"Number of results found","results":"Array of matching schema chunks"}},"/api/dbk-integrity":{"description":"Quick integrity verdict for database_knowledge health after ETL runs","method":"GET","response_fields":{"index_name":"Evaluated index name","integrity.checks":"Detailed checks and failure reasons","integrity.score":"Lightweight health score (0-100)","integrity.status":"Overall verdict: clean | warning | error","integrity.summary":"Check counts by severity"}},"/api/dbk-status":{"description":"Inspect the current database_knowledge index contents, ETL hashes, and DBK source configuration","method":"GET","response_fields":{"configuration":"Current DBK source aliases and connection targets","counts":"Counts by chunk_type, feature_layer, and database","etl":"Last ETL run and schema hash files","index":"Health, docs count, deleted docs, and store size","index_exists":"Whether database_knowledge currently exists","integrity":"Computed clean/warning/error verdict with per-check breakdown","run_report":"Latest persisted DBK ETL verdict summary (if available)","samples":"One example document per feature layer","warnings":"Quick signals for missing or risky DBK coverage"}},"/api/status":{"description":"Get system status and collection statistics","method":"GET","response_fields":{"available_source_types":"List of searchable document types","collections":"Document counts per collection","embeddings_status":"Status of embeddings generation","status":"Service health status","uptime":"Process uptime","version":"API version"}},"/api/table-context":{"description":"Get full schema context for specific tables by name","method":"POST","request_body":{"database":{"description":"Optional: filter to specific database","type":"string"},"tables":{"description":"Table names to retrieve (e.g., ['Clienti', 'dbo.Comenzi'])","required":true,"type":"array[string]"}},"response_fields":{"llm_context":"Formatted context string for LLM","results":"Array of table chunks with full content","tables_found":"Number of tables found","tables_requested":"Input table names"}},"/help":{"description":"This endpoint - returns API documentation","method":"GET"},"/search":{"description":"Hybrid search endpoint with semantic search, BM25, and optional reranking","examples":{"advanced_config":{"description":"Search with custom ranking parameters and debug info","request":{"config":{"config_override":{"HALF_LIFE_DAYS":{"blog":30},"K_MAX":{"blog":0.8}},"debug":true,"n_results":10,"use_reranker":true},"query":"ultimele noutati"}},"advanced_filtered_search":{"description":"Search with advanced filters using filter parameter","request":{"filter":{"source_ids":[4824,4825],"source_types":["programari"]},"query":"TVA"}},"basic_search":{"description":"Simple search across all collections","request":{"query":"cum configurez casa de marcat"}},"distributor_partners_only":{"description":"Search only among partner distributors","request":{"filter":{"source_types":["distribuitori/partener"]},"query":"contact"}},"granular_subtype_filtering":{"description":"Filter by specific subtypes/subcategories","request":{"filter":{"source_types":["docs/retail_online"]},"query":"comenzi online"}},"type_filtered_search":{"description":"Search within specific document type using legacy doc_type parameter","request":{"doc_type":"docs","query":"factura"}}},"method":"POST","request_body":{"config":{"description":"Dynamic search behavior controls","properties":{"config_override":{"available_parameters":"\"\"\"\nRanking configuration for search quality and recency-aware ordering.\n\nAll parameters support runtime override via config_override in search API.\n\nIMPORTANT: These values are BOOTSTRAP DEFAULTS only.  At container startup,\nsrc.config.langfuse.initialize_config() overwrites them with the authoritative\nvalues from Langfuse (prompt: hybridsearch-search-config, label: production).\nThe disk cache (opensearch_data/langfuse_config_cache.json) persists across\nrestarts, so these hardcoded values are only used on the very first deployment\nif Langfuse is unreachable.  Keep them roughly in sync with the Langfuse config\nbut do not rely on them being the source of truth.\n\"\"\"\n\n# ============================================================================\n# SEARCH PIPELINE CONFIGURATION\n# ============================================================================\n\n# Global recall limit - maximum candidates retrieved from OpenSearch before ranking/reranking\nTOTAL_RECALL = 500\n\n# Neural reranker configuration\nUSE_RERANKER = False  # Enable/disable cross-encoder reranking\nRERANK_TOP_K = 15  # Legacy/debug-only parameter (replaced by dynamic cliff detection in production)\nRERANK_MAX_LENGTH = 256  # Max tokens for reranking (truncation length for cross-encoder)\n\n# Dynamic reranking cliff detection parameters\nRERANK_CLIFF_MIN = 6           # Start scanning for cliff at position N\nRERANK_CLIFF_MAX = 30          # Stop scanning at position N / max K\nRERANK_CLIFF_WINDOW = 5        # Rolling window size for gap analysis\nRERANK_CLIFF_MULTIPLIER = 2.0  # Cliff threshold = gap \u2265 N\u00d7 recent average\nRERANK_FALLBACK = 30           # Fallback K if no cliff found\n\n# API result limits\nUI_TOP_K_DEFAULT = 5       # Default result count if not specified\nSCORE_DECIMAL_PLACES = 2   # Decimal places for score display\n\n\n# ============================================================================\n# RECENCY BOOST SYSTEM\n# ============================================================================\n\n# Collection importance multiplier - applied AFTER z-score normalization, BEFORE recency boost\nCOLLECTION_IMPORTANCE_MULTIPLIER = {\n    \"docs\": 1.3,                  # Documentation (high importance)\n    \"docs_intrebari\": 0.9,        # Q&A from docs (slightly lower than docs)\n    \"programari\": 1.2,            # Appointments (high importance)\n    \"legal\": 1.3,                 # Legal documents (high importance for accuracy)\n    \"blog\": 1.0,                  # Standard importance\n    \"page\": 1.0,\n    \"implementari\": 0.9,\n    \"seminarii\": 1.0,\n    \"cursuri\": 1.0,\n    \"distribuitori\": 1.0,\n    \"cursuri_externe\": 1.0,\n}\n\n# Collections excluded from recency boost (use semantics only, no time decay)\n# Add collection names here to disable all recency calculations for them\nEXCLUDED_RECENCY_BOOST = {}\n\n# Exponential decay parameters - half-life in days for recency decay\n# Lower half-life = faster decay (aggressive freshness priority)\nHALF_LIFE_DAYS = {\n    \"docs\": 365,\n    \"docs_intrebari\": 365,\n    \"programari\": 7,\n    \"blog\": 180,\n    \"page\": 180,\n    \"implementari\": 180,\n    \"seminarii\": 180,\n    \"cursuri\": 180,\n    \"distribuitori\": 180,\n    \"cursuri_externe\": 365,\n}\n\n# Soft knee configuration - two-phase decay system\n# Only collections listed here use gentle decay after threshold\n# Others use single-phase exponential decay\n# Must be < HALF_LIFE_DAYS for that collection\nKNEE_THRESHOLD_DAYS = {\n    \"programari\": 14,   # Switch to gentle decay after 2 weeks\n}\n\n# Gentle half-life used after knee threshold (two-phase decay)\n# Must be > HALF_LIFE_DAYS for same collection\nGENTLE_HALF_LIFE_DAYS = {\n    \"programari\": 30,   # Gentle 1 month decay after 2 week knee\n}\n\n# Maximum recency boost strength - controls boost intensity\n# boost = K_MAX * exp(-age / half_life)\n# 0.0 = no boost, 1.0 = 100% boost, >1.0 = aggressive boost\nK_MAX = {\n    \"docs\": 0.5,\n    \"docs_intrebari\": 0.0,\n    \"programari\": 1.0,\n    \"blog\": 0.15,\n    \"page\": 0.15,\n    \"implementari\": 0.0,\n    \"seminarii\": 0.15,\n    \"cursuri\": 0.15,\n    \"distribuitori\": 0.15,\n    \"cursuri_externe\": 0.1,\n}\n\n\n# ============================================================================\n# NEGATIVE PENALTY SYSTEM\n# ============================================================================\n\n# Maximum penalty strength for very old documents\nK_PENALTY = {\n    \"programari\": 0.50,\n}\n\n# Age threshold in days after which penalties start applying\nPENALTY_THRESHOLD_DAYS = {\n    \"programari\": 360,\n}\n\n# Penalty half-life - how quickly penalties increase after threshold\nPENALTY_HALF_LIFE_DAYS = {\n    \"programari\": 720,\n}\n\n\n# ============================================================================\n# QUALITY GATE SYSTEM\n# ============================================================================\n\n# Determines how much recency boost is dampened based on document position in initial ranking\n# Quality coefficient is position-based: 1.0 at top, QUALITY_COEF_FLOOR at bottom\nQUALITY_COEF_FLOOR = 0.7   # Minimum quality coefficient (prevents fully zeroing recency)\n\n\n# ============================================================================\n# HYBRID SEARCH & FIELD BOOST CONFIGURATION\n# ============================================================================\n\n# Note: BM25/vector tier weights and token thresholds live in the HYBRID PIPELINE\n# section below (CHARS_PER_TOKEN, QUERY_*_THRESHOLD, BM25_WEIGHT_*).\n\n# BM25 field boost multipliers - relative importance of fields in keyword search\nCONTENT_FIELD_BOOST = 1.0       # Main document content field\nEXPANDED_FIELD_BOOST = 0.85     # Doc2query generated questions\nDB_KEYWORDS_FIELD_BOOST = 1.1   # Database-provided keywords\n\n\n# ============================================================================\n# FRESHNESS INTENT DETECTION\n# ============================================================================\n\n# Freshness intent boost multiplier - extra boost when query contains temporal keywords\n# Applied multiplicatively to base recency boost when freshness intent detected\n# Examples: \"ultimele noutati\", \"recent\", \"actualizari\"\n# Detection patterns and logic are in src/search/multi_collection_search.py\nFRESHNESS_INTENT_BOOST = 1.75\n\n\n# ============================================================================\n# HYBRID PIPELINE (OpenSearch search_pipeline + per-query BM25)\n# ============================================================================\n# Token-length tiering selects which pipeline runs. Vector weight is (1 - BM25 weight).\n# Pipeline definitions are created at container startup; changing weights or\n# normalization/combination requires restart to recreate pipelines.\n\nCHARS_PER_TOKEN = 3.5\nQUERY_SHORT_THRESHOLD = 3\nQUERY_LONG_THRESHOLD = 7\n\nBM25_WEIGHT_SHORT = 0.6\nBM25_WEIGHT_MEDIUM = 0.5\nBM25_WEIGHT_LONG = 0.4\n\nNORMALIZATION_TECHNIQUE = \"z_score\"\nCOMBINATION_TECHNIQUE = \"arithmetic_mean\"\nBM25_MATCH_TYPE = \"most_fields\"\n\n\n# ============================================================================\n# RERANK DECISION (dominant-winner skip + cliff analysis window)\n# ============================================================================\n\nDOMINANT_WINNER_FLOOR = 0.1\nDOMINANT_WINNER_MULTIPLIER = 2.0\nRERANK_ANALYSIS_WINDOW = 30\n\n\n# ============================================================================\n# RECENCY FALLBACKS & FRESHNESS QUERY STRIPPING\n# ============================================================================\n# Used when a collection is missing from HALF_LIFE_DAYS / K_MAX dicts, or for\n# missing-document-date behavior and freshness-intent half-life scaling.\n\nDEFAULT_HALF_LIFE_DAYS = 60\nDEFAULT_K_MAX = 0.10\nFRESHNESS_HALF_LIFE_DIVISOR = 2.0\nMISSING_DATE_DECAY = 0.01\nFRESHNESS_STRIP_MIN_LENGTH = 2\n","description":"Temporarily override ranking parameters for this search only. All values are validated and clamped to safe ranges. Changes do not persist across requests. See 'available_parameters' below for full configuration file with all parameters, types, defaults, and documentation.","required":false,"type":"object"},"content_variant":{"default":"display","description":"Controls response text fields only. 'display' uses content_display when available; 'raw' uses ranking text (content).","options":["display","raw"],"type":"string"},"debug":{"default":false,"description":"Return detailed debug information including per-document scoring breakdown, query processing steps, and ranking multipliers","type":"boolean"},"disable_recency":{"default":false,"description":"Disable recency boost in ranking. When true, all documents are ranked purely by semantic/keyword relevance without time-based adjustments","type":"boolean"},"min_tokens":{"default":null,"description":"Minimum total token count across returned matching chunks. When set, results expand beyond n_results until the threshold is met. Accepts a positive integer or the string 'default' (resolved to n_results * min_tokens_per_result, configurable via Langfuse, fallback 500). Omit to disable.","type":"integer or string"},"n_results":{"default":5,"description":"Number of results to return","range":"1-200","type":"integer"},"use_reranker":{"default":false,"description":"Enable cross-encoder reranking for improved semantic relevance. When enabled, initial search results are re-scored using a neural model for better accuracy","type":"boolean"}},"required":false,"type":"object"},"doc_type":{"default":"all (excluding legal, cursuri, and cursuri_externe)","description":"Filter results by document type. Note: 'legal', 'cursuri', and 'cursuri_externe' are excluded by default - use doc_type='all' explicitly or filter.source_types to include them","options":["all","docs","docs_intrebari","blog","page","programari","implementari","seminarii","cursuri","distribuitori","legal","cursuri_externe"],"required":false,"type":"string"},"filter":{"description":"Advanced filtering options for targeted search","properties":{"date_after":{"description":"Only return documents with date >= this value (ISO format)","example":"2024-01-01","type":"string"},"date_before":{"description":"Only return documents with date <= this value (ISO format)","example":"2025-01-01","type":"string"},"metadata_filters":{"description":"Filter by any metadata field","example":{"author":"John Doe","category":"technical"},"type":"object"},"source_ids":{"description":"Filter by specific document IDs","example":[4824,4825,4826],"type":"array[integer]"},"source_types":{"available_types":{"blog":"Blog posts and articles","cursuri":"Course content (excluded from default search)","cursuri_externe":"External course PDFs (excluded from default search, must be explicitly requested)","distribuitori":"Distributor and partner information","docs":"Official documentation and manuals","docs_intrebari":"Q&A from Docs_Intrebari (documentation questions)","implementari":"Implementation guides","legal":"Romanian fiscal law corpus (excluded from default search, must be explicitly requested)","page":"Website pages (products, general content)","programari":"Scheduled items with recency bias","seminarii":"Seminar materials"},"description":"Filter by document types. Supports granular filtering using '/' separator for subtypes/subcategories. Use [\"all\"] to include every collection (except cursuri and database_knowledge). When omitted, searches all collections EXCEPT 'legal', 'cursuri', 'cursuri_externe', 'docs_intrebari', and 'database_knowledge'.","examples":{"search_all_food_service":["docs/nexus_fast_food","docs/nexus_restaurant","docs/nexus_ospatari","docs/nexus_bucatarie"],"search_all_sources":["all"],"search_doc_questions":["docs_intrebari"],"search_docs_and_blog":["docs","blog"],"search_erp_docs":["docs/erp"],"search_hr_modules":["docs/nexus_salarii","docs/nexus_resurse_umane","docs/nexus_pontaj","docs/nexus_hr_self_service","docs/nexus_condica_prezenta"],"search_legal_only":["legal"],"search_partners_only":["distribuitori/partener"],"search_retail_docs":["docs/retail"],"search_retail_online_docs":["docs/retail_online"],"search_vanzari_docs":["docs/vanzari"]},"granular_filters":{"distribuitori_subtypes":{"distribuitori/distribuitor":"Official distributors only","distribuitori/partener":"Partners only"},"docs_subcategories":{"docs/contclient":"ContClient documentation","docs/nexus_analist":"Nexus Analist documentation","docs/nexus_bord":"Nexus Bord (dashboard) documentation","docs/nexus_bucatarie":"Nexus Buc\u0103t\u0103rie (kitchen module) documentation","docs/nexus_catering":"Nexus Catering documentation","docs/nexus_condica_prezenta":"Nexus Condica Prezenta (attendance) documentation","docs/nexus_crm":"Nexus CRM documentation","docs/nexus_crm_online":"Nexus CRM Online documentation","docs/nexus_erp":"Nexus ERP core documentation","docs/nexus_facturare_online":"Nexus Facturare Online documentation","docs/nexus_fast_food":"Nexus Fast Food documentation","docs/nexus_gdpr":"Nexus GDPR documentation","docs/nexus_hotel":"Nexus Hotel documentation","docs/nexus_hr_self_service":"Nexus HR Self Service documentation","docs/nexus_info_manager":"Nexus Info Manager documentation","docs/nexus_info_produs":"Nexus Info Produs documentation","docs/nexus_instant_promo":"Nexus Instant Promo documentation","docs/nexus_livrari":"Nexus Livrari (deliveries) documentation","docs/nexus_magazin":"Nexus Magazin documentation","docs/nexus_ospatari":"Nexus Osp\u0103tari (waiters module) documentation","docs/nexus_parc_auto":"Nexus Parc Auto (fleet management) documentation","docs/nexus_pontaj":"Nexus Pontaj (time tracking) documentation","docs/nexus_rapoarte_online":"Nexus Rapoarte Online documentation","docs/nexus_restaurant":"Nexus Restaurant documentation","docs/nexus_resurse_umane":"Nexus Resurse Umane (HR) documentation","docs/nexus_salarii":"Nexus Salarii (payroll) documentation","docs/nexus_self_checkout_supermarket":"Nexus Self Checkout Supermarket documentation","docs/nexus_sfa_online":"Nexus SFA Online documentation","docs/nexus_supermarket":"Nexus Supermarket documentation","docs/nexyshop_emag_marketplace":"NexyShop eMAG Marketplace documentation","docs/nexyshop_magazin_online":"NexyShop Magazin Online documentation","docs/retail_online_dashboard":"Retail Online Dashboard documentation","docs/retail_online_kiosk":"Retail Online Kiosk documentation","docs/retail_online_orders":"Retail Online Orders documentation","docs/retail_online_register":"Retail Online Register (POS) documentation"},"page_subtypes":{"page/page":"General website pages only","page/produs":"Product pages only","page/studiu-de-caz":"Case study pages only"}},"type":"array[string]","umbrella_filters":{"docs/erp":"ERP core (Nexus ERP, Salarii, Resurse Umane, Analist, CRM, Parc Auto, GDPR)","docs/rapoarte":"Reporting (Info Manager, Rapoarte Online, Bord Online)","docs/restaurant":"Restaurant modules (Osp\u0103tari, Buc\u0103t\u0103rie)","docs/retail":"Retail & Hospitality (Supermarket, Magazin, Restaurant, Fast-Food, Catering, Self Checkout, Hotel)","docs/retail_online":"Retail Online (Kiosk, Dashboard, Register, Orders)","docs/salarizare":"Payroll & HR (Condica Prezenta, HR Self Service, Pontaj)","docs/vanzari":"Sales (SFA, Facturare, CRM Online, Livr\u0103ri, Instant Promo, Info Produs)","docs/vanzari_online":"Online Sales (NexyShop, eMAG Marketplace, ContClient)"}}},"required":false,"type":"object"},"query":{"description":"The search query text","required":true,"type":"string"},"top_k":{"default":null,"description":"(Deprecated) Legacy parameter for result count. Use config.n_results instead","range":"1-100","required":false,"type":"integer"}},"response_format":{"description":"Unified response format for all searches (with or without filters). Returns document-level results with rich content.","fields":{"chunk_id":"Chunk-level ID matching the OpenSearch _id (e.g., 'doc_5039_chunk_3'). Useful for deduplication across parallel searches.","date":"Document date in YYYY-MM-DD format (if available)","first_chunk":"First chunk of the document (always present)","full_document":"Complete document text (only if different from first_chunk)","link":"URL to the source document","matching_chunk":"The highest-scoring chunk (only if different from first_chunk)","score":"Relevance score (0-10+, higher is better)","source_id":"Unique document ID (can be used with filter.source_ids)","subtype":"Granular subtype (e.g., 'retail_online', 'produs', 'partener', 'intrebari', 'interventii') - only present when applicable","title":"Document title","type":"Top-level document type (e.g., 'docs', 'docs_intrebari', 'page', 'distribuitori', 'blog')"},"notes":["One result per document (not per chunk)","Filters are applied but response format remains the same","LLMs get up to 3 pieces of content per document: first_chunk, matching_chunk, full_document"]}}},"granular_filtering":{"description":"source_types supports granular subcategories using slash notation","examples":[{"filter":"docs","matches":"All documentation"},{"filter":"docs/retail_online","matches":"Only Retail Online documentation"},{"filter":"docs/nexus_fast_food","matches":"Only Nexus Fast Food documentation"},{"filter":"retail_online","matches":"Retail Online content across ALL collections"}]}}
