{
  "openapi": "3.1.0",
  "info": {
    "title": "Voice AI Scoring API",
    "version": "1.0.0",
    "description": "Public API that scores voice-annotation transcripts against gold references using the Smallest ASR Labelling Framework (HARD regex/whitelist rules + SOFT rules with LLM-judge rescue). Designed for Indic + code-switched (Hindi/English) speech.",
    "contact": {
      "name": "Voice AI Scoring",
      "url": "https://github.com/LazarusStack/voice-ai-scoring-metrics"
    },
    "license": {
      "name": "MIT"
    }
  },
  "servers": [
    {
      "url": "/",
      "description": "Same-origin (relative)"
    }
  ],
  "tags": [
    {
      "name": "scoring",
      "description": "Score annotator submissions"
    },
    {
      "name": "meta",
      "description": "Spec and health"
    }
  ],
  "paths": {
    "/api/score": {
      "post": {
        "tags": [
          "scoring"
        ],
        "summary": "Score one annotator submission against a gold reference",
        "description": "Returns a `ScoreResult` with score in `[0, 1]`, pass/fail flag, all HARD violations, SOFT disagreements, rescued disagreements, lexical breakdown, and step-by-step scoring math.",
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/ScoreRequest"
              },
              "examples": {
                "hindi_english_pair": {
                  "$ref": "#/components/examples/SinglePair"
                }
              }
            }
          }
        },
        "responses": {
          "200": {
            "description": "Scored",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/ScoreResult"
                }
              }
            }
          },
          "400": {
            "description": "Malformed JSON or missing keys"
          },
          "422": {
            "description": "Submissions could not be parsed"
          },
          "500": {
            "description": "Unexpected scorer failure"
          }
        }
      },
      "get": {
        "tags": [
          "meta"
        ],
        "summary": "Endpoint health",
        "responses": {
          "200": {
            "description": "OK"
          }
        }
      }
    },
    "/api/batch": {
      "post": {
        "tags": [
          "scoring"
        ],
        "summary": "Score many pairs in one request; get per-clip + aggregate",
        "description": "Each pair is scored independently. Bad pairs return an `ok:false` entry and the batch continues. The `summary` aggregates only over successfully scored pairs.",
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/BatchRequest"
              },
              "examples": {
                "two_clips": {
                  "$ref": "#/components/examples/Batch"
                }
              }
            }
          }
        },
        "responses": {
          "200": {
            "description": "Batch scored",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/BatchResponse"
                }
              }
            }
          },
          "400": {
            "description": "Malformed JSON or missing 'pairs'"
          },
          "500": {
            "description": "Unexpected batch failure"
          }
        }
      },
      "get": {
        "tags": [
          "meta"
        ],
        "summary": "Endpoint health",
        "responses": {
          "200": {
            "description": "OK"
          }
        }
      }
    },
    "/api/docs": {
      "get": {
        "tags": [
          "meta"
        ],
        "summary": "This OpenAPI spec",
        "responses": {
          "200": {
            "description": "OpenAPI 3.1 JSON"
          }
        }
      }
    }
  },
  "components": {
    "schemas": {
      "Utterance": {
        "type": "object",
        "required": [
          "speaker",
          "text"
        ],
        "properties": {
          "speaker": {
            "type": "string",
            "description": "e.g., 'Speaker_1', 'Speaker_Machine'",
            "example": "Speaker_1"
          },
          "start": {
            "type": "string",
            "description": "'HH:MM:SS.mmm' or float seconds",
            "example": "00:00:00.000"
          },
          "end": {
            "type": "string",
            "example": "00:00:09.680"
          },
          "text": {
            "type": "string",
            "description": "Transcription. Trailing `[Label]` emotion tag (Happy/Angry/Sad/Fear/Surprise/Disgust/Excited/Sarcastic/None) is extracted into emotion_primary and stripped before lexical scoring.",
            "example": "मैंने कल Delhi से नया laptop खरीदा। [None]"
          }
        }
      },
      "ScoreRequest": {
        "type": "object",
        "required": [
          "annotator",
          "gold"
        ],
        "properties": {
          "annotator": {
            "type": "array",
            "items": {
              "$ref": "#/components/schemas/Utterance"
            }
          },
          "gold": {
            "type": "array",
            "items": {
              "$ref": "#/components/schemas/Utterance"
            }
          }
        }
      },
      "BatchRequest": {
        "type": "object",
        "required": [
          "pairs"
        ],
        "properties": {
          "pairs": {
            "type": "array",
            "items": {
              "type": "object",
              "required": [
                "annotator",
                "gold"
              ],
              "properties": {
                "file_id": {
                  "type": "string",
                  "description": "Optional. Used to label results."
                },
                "annotator": {
                  "type": "array",
                  "items": {
                    "$ref": "#/components/schemas/Utterance"
                  }
                },
                "gold": {
                  "type": "array",
                  "items": {
                    "$ref": "#/components/schemas/Utterance"
                  }
                }
              }
            }
          },
          "pass_threshold": {
            "type": "number",
            "default": 0.97,
            "description": "Override the 0.97 default."
          }
        }
      },
      "Violation": {
        "type": "object",
        "properties": {
          "rule_id": {
            "type": "string",
            "example": "1.6"
          },
          "severity": {
            "type": "string",
            "enum": [
              "hard",
              "soft"
            ]
          },
          "message": {
            "type": "string"
          },
          "utterance_index": {
            "type": "integer"
          },
          "found": {
            "type": "string"
          },
          "expected": {
            "type": "string"
          }
        }
      },
      "ScoreMath": {
        "type": "object",
        "description": "Step-by-step math behind the final score.",
        "properties": {
          "lex_component": {
            "type": "number"
          },
          "blended_lexical_error": {
            "type": "number"
          },
          "hard_violation_count": {
            "type": "integer"
          },
          "hard_violation_penalty_per_item": {
            "type": "number"
          },
          "hard_penalty": {
            "type": "number"
          },
          "soft_disagreement_count": {
            "type": "integer"
          },
          "soft_disagreement_penalty_per_item": {
            "type": "number"
          },
          "soft_penalty": {
            "type": "number"
          },
          "raw_score": {
            "type": "number"
          },
          "final_score": {
            "type": "number"
          },
          "pass_threshold": {
            "type": "number"
          },
          "formula": {
            "type": "string"
          }
        }
      },
      "CategoryScore": {
        "type": "object",
        "description": "One of the three top-level buckets (wer / punctuation / tags). The portal and most integrations should read `breakdown.categories.<name>` rather than the flat `hard_violations` / `soft_disagreements` lists.",
        "properties": {
          "name": {
            "type": "string",
            "enum": [
              "wer",
              "punctuation",
              "tags"
            ]
          },
          "label": {
            "type": "string",
            "description": "Human-readable label, e.g. 'Lexical accuracy (WER)'."
          },
          "score": {
            "type": "number",
            "minimum": 0,
            "maximum": 1
          },
          "passed": {
            "type": "boolean"
          },
          "pass_threshold": {
            "type": "number",
            "default": 0.97
          },
          "hard_violation_count": {
            "type": "integer"
          },
          "soft_disagreement_count": {
            "type": "integer"
          },
          "rescued_count": {
            "type": "integer"
          },
          "violations": {
            "type": "array",
            "items": {
              "$ref": "#/components/schemas/Violation"
            },
            "description": "HARD violations in this category only."
          },
          "soft_disagreements": {
            "type": "array",
            "items": {
              "$ref": "#/components/schemas/Violation"
            }
          },
          "rescued": {
            "type": "array",
            "items": {
              "$ref": "#/components/schemas/Violation"
            }
          },
          "details": {
            "type": "object",
            "description": "Category-specific extras. For `wer` this is {blended_lexical_error, latin_token_count, indic_token_count}. Empty for the others."
          }
        }
      },
      "ScoreResult": {
        "type": "object",
        "required": [
          "file_id",
          "score",
          "passed",
          "breakdown"
        ],
        "properties": {
          "file_id": {
            "type": "string"
          },
          "score": {
            "type": "number",
            "minimum": 0,
            "maximum": 1,
            "description": "Overall composite score across all categories."
          },
          "passed": {
            "type": "boolean"
          },
          "breakdown": {
            "type": "object",
            "properties": {
              "categories": {
                "type": "object",
                "description": "The three top-level buckets — the primary shape the portal renders from.",
                "properties": {
                  "wer": {
                    "$ref": "#/components/schemas/CategoryScore"
                  },
                  "punctuation": {
                    "$ref": "#/components/schemas/CategoryScore"
                  },
                  "tags": {
                    "$ref": "#/components/schemas/CategoryScore"
                  }
                }
              },
              "hard_violations": {
                "type": "array",
                "description": "Flat list — kept for back-compat. Prefer `categories`.",
                "items": {
                  "$ref": "#/components/schemas/Violation"
                }
              },
              "soft_disagreements": {
                "type": "array",
                "items": {
                  "$ref": "#/components/schemas/Violation"
                }
              },
              "rescued_disagreements": {
                "type": "array",
                "items": {
                  "$ref": "#/components/schemas/Violation"
                }
              },
              "lexical": {
                "type": "object"
              },
              "math": {
                "$ref": "#/components/schemas/ScoreMath"
              }
            }
          }
        }
      },
      "BatchResponse": {
        "type": "object",
        "properties": {
          "results": {
            "type": "array",
            "items": {
              "oneOf": [
                {
                  "type": "object",
                  "properties": {
                    "file_id": {
                      "type": "string"
                    },
                    "ok": {
                      "type": "boolean",
                      "enum": [
                        true
                      ]
                    },
                    "result": {
                      "$ref": "#/components/schemas/ScoreResult"
                    }
                  }
                },
                {
                  "type": "object",
                  "properties": {
                    "file_id": {
                      "type": "string"
                    },
                    "ok": {
                      "type": "boolean",
                      "enum": [
                        false
                      ]
                    },
                    "error": {
                      "type": "string"
                    },
                    "detail": {
                      "type": "string"
                    }
                  }
                }
              ]
            }
          },
          "summary": {
            "type": "object",
            "properties": {
              "n_total": {
                "type": "integer"
              },
              "n_scored": {
                "type": "integer"
              },
              "n_failed_to_parse": {
                "type": "integer"
              },
              "n_passed": {
                "type": "integer"
              },
              "n_failed": {
                "type": "integer"
              },
              "pass_rate": {
                "type": "number"
              },
              "average_score": {
                "type": "number"
              },
              "avg_hard_violations": {
                "type": "number"
              },
              "avg_soft_disagreements": {
                "type": "number"
              },
              "pass_threshold": {
                "type": "number"
              },
              "elapsed_ms": {
                "type": "integer"
              }
            }
          }
        }
      }
    },
    "examples": {
      "SinglePair": {
        "summary": "Code-switched Hindi/English utterance",
        "value": {
          "annotator": [
            {
              "speaker": "Speaker_1",
              "start": "00:00:00.000",
              "end": "00:00:02.500",
              "text": "मैंने कल Delhi से नया laptop खरीदा। [None]"
            }
          ],
          "gold": [
            {
              "speaker": "Speaker_1",
              "start": "00:00:00.000",
              "end": "00:00:02.500",
              "text": "मैंने कल Delhi से नया laptop खरीदा। [None]"
            }
          ]
        }
      },
      "Batch": {
        "summary": "Two clips, one with an error",
        "value": {
          "pairs": [
            {
              "file_id": "clip_001",
              "annotator": [
                {
                  "speaker": "Speaker_1",
                  "text": "मैंने कल खरीदा। [None]"
                }
              ],
              "gold": [
                {
                  "speaker": "Speaker_1",
                  "text": "मैंने कल खरीदा। [None]"
                }
              ]
            },
            {
              "file_id": "clip_002",
              "annotator": [
                {
                  "speaker": "Speaker_1",
                  "text": "hello; world। [None]"
                }
              ],
              "gold": [
                {
                  "speaker": "Speaker_1",
                  "text": "hello world। [None]"
                }
              ]
            }
          ]
        }
      }
    }
  }
}