Speech to Text API v3.0

Speech to Text API v3.0.

Request URL

Request headers

string
Media type of the body sent to the API.
string
Subscription key which provides access to this API. Found in your Cognitive Services accounts.

Request body

The details of the new transcription.

{
  "contentUrls": [
    "https://contoso.com/mystoragelocation",
    "https://contoso.com/myotherstoragelocation"
  ],
  "properties": {
    "diarizationEnabled": false,
    "wordLevelTimestampsEnabled": false,
    "punctuationMode": "DictatedAndAutomatic",
    "profanityFilterMode": "Masked"
  },
  "locale": "en-US",
  "displayName": "Transcription using default model for en-US"
}
{
  "title": "Transcription",
  "required": [
    "displayName",
    "locale"
  ],
  "type": "object",
  "properties": {
    "links": {
      "title": "Links",
      "type": "object",
      "properties": {
        "files": {
          "description": "The location to get all files of this entity.",
          "type": "string",
          "readOnly": true
        }
      },
      "description": "The links for additional actions or content related to this transcription.",
      "readOnly": true
    },
    "properties": {
      "title": "TranscriptionProperties",
      "type": "object",
      "properties": {
        "diarizationEnabled": {
          "description": "A value indicating whether diarization (speaker separation) is requested.",
          "type": "boolean",
          "readOnly": false
        },
        "wordLevelTimestampsEnabled": {
          "description": "A value indicating whether word level timestamps are requested.",
          "type": "boolean",
          "readOnly": false
        },
        "duration": {
          "description": "The duration of the transcription. The duration is encoded as ISO 8601 duration\r\n(\"PnYnMnDTnHnMnS\", see https://en.wikipedia.org/wiki/ISO_8601#Durations).",
          "type": "string",
          "readOnly": true
        },
        "channels": {
          "description": "A collection of the requested channel numbers.\r\nIn the default case, the channels 0 and 1 are considered.",
          "type": "array",
          "items": {
            "format": "int32",
            "type": "integer"
          },
          "readOnly": false
        },
        "destinationContainerUrl": {
          "description": "The requested destination container.",
          "type": "string",
          "readOnly": false
        },
        "punctuationMode": {
          "description": "The requested punctuation mode.",
          "enum": [
            "None",
            "Dictated",
            "Automatic",
            "DictatedAndAutomatic"
          ],
          "type": "string",
          "readOnly": false
        },
        "profanityFilterMode": {
          "description": "The requested profanity filter mode.",
          "enum": [
            "None",
            "Removed",
            "Tags",
            "Masked"
          ],
          "type": "string",
          "readOnly": false
        },
        "timeToLive": {
          "description": "How long the transcription will be kept in the system. Once the transcription reaches the time to live\r\nafter completion (successful or failed) it will be automatically deleted. Not setting this value or setting\r\nto 0 will disable automatic deletion.\r\nThe duration is encoded as ISO 8601 duration (\"PnYnMnDTnHnMnS\", see https://en.wikipedia.org/wiki/ISO_8601#Durations).",
          "type": "string",
          "readOnly": false
        },
        "email": {
          "description": "The email address to send email notifications to in case the operation completes.\r\nThe value will be removed after successfully sending the email.",
          "type": "string",
          "readOnly": false
        },
        "error": {
          "title": "EntityError",
          "type": "object",
          "properties": {
            "code": {
              "description": "The code of this error.",
              "type": "string",
              "readOnly": true
            },
            "message": {
              "description": "The message for this error.",
              "type": "string",
              "readOnly": true
            }
          },
          "description": "The details of the error in case the entity is in a failed state.",
          "readOnly": true
        }
      },
      "description": "Additional configuration options when creating a new transcription and additional metadata provided by the service.",
      "readOnly": false
    },
    "self": {
      "description": "The location of this entity.",
      "type": "string",
      "readOnly": true
    },
    "model": {
      "title": "EntityReference",
      "required": [
        "self"
      ],
      "type": "object",
      "properties": {
        "self": {
          "description": "The location of the referenced entity.",
          "type": "string",
          "readOnly": false
        }
      },
      "description": "The model used in this transcription.",
      "readOnly": false
    },
    "project": {
      "title": "EntityReference",
      "required": [
        "self"
      ],
      "type": "object",
      "properties": {
        "self": {
          "description": "The location of the referenced entity.",
          "type": "string",
          "readOnly": false
        }
      },
      "description": "The project, the transcription is associated with.",
      "readOnly": false
    },
    "dataset": {
      "title": "EntityReference",
      "required": [
        "self"
      ],
      "type": "object",
      "properties": {
        "self": {
          "description": "The location of the referenced entity.",
          "type": "string",
          "readOnly": false
        }
      },
      "description": "A list of acoustic or audio files datasets containing the audio files that will be transcribed.",
      "readOnly": false
    },
    "contentUrls": {
      "description": "A list of content urls to get audio files from for transcription. Up to 1000 urls are allowed.\r\nThis property will not be returned in a response.",
      "type": "array",
      "items": {
        "type": "string"
      },
      "readOnly": false
    },
    "contentContainerUrl": {
      "description": "A URL for an Azure blob container that contains the audio files. A container is allowed to have a maximum size of 5GB and a maximum number of 10000 blobs.\r\nThe maximum size for a blob is 2.5GB. \r\nContainer SAS should contain 'r' (read) and 'l' (list) permissions. \r\nThis property will not be returned in a response.",
      "type": "string",
      "readOnly": false
    },
    "displayName": {
      "description": "The display name of the object.",
      "type": "string",
      "readOnly": false
    },
    "description": {
      "description": "The description of the object.",
      "type": "string",
      "readOnly": false
    },
    "customProperties": {
      "description": "The custom properties of this entity. The maximum allowed key length is 64 characters, the maximum\r\nallowed value length is 256 characters and the count of allowed entries is 10.",
      "type": "object",
      "additionalProperties": {
        "type": "string"
      },
      "readOnly": false
    },
    "locale": {
      "description": "The locale of the contained data.",
      "type": "string",
      "readOnly": false
    },
    "lastActionDateTime": {
      "format": "date-time",
      "description": "The time-stamp when the current status was entered.\r\nThe time stamp is encoded as ISO 8601 date and time format\r\n(\"YYYY-MM-DDThh:mm:ssZ\", see https://en.wikipedia.org/wiki/ISO_8601#Combined_date_and_time_representations).",
      "type": "string",
      "readOnly": true
    },
    "status": {
      "description": "The status of the object.",
      "enum": [
        "NotStarted",
        "Running",
        "Succeeded",
        "Failed"
      ],
      "type": "string",
      "readOnly": true,
      "x-ms-enum": {
        "name": "Status",
        "modelAsString": false
      }
    },
    "createdDateTime": {
      "format": "date-time",
      "description": "The time-stamp when the object was created.\r\nThe time stamp is encoded as ISO 8601 date and time format\r\n(\"YYYY-MM-DDThh:mm:ssZ\", see https://en.wikipedia.org/wiki/ISO_8601#Combined_date_and_time_representations).",
      "type": "string",
      "readOnly": true
    }
  },
  "example": {
    "contentUrls": [
      "https://contoso.com/mystoragelocation",
      "https://contoso.com/myotherstoragelocation"
    ],
    "properties": {
      "diarizationEnabled": false,
      "wordLevelTimestampsEnabled": false,
      "punctuationMode": "DictatedAndAutomatic",
      "profanityFilterMode": "Masked"
    },
    "locale": "en-US",
    "displayName": "Transcription using default model for en-US"
  }
}

Response 201

The response contains the location of the entity as header.

{
  "self": "https://westus.api.cognitive.microsoft.com/speechtotext/v3.0/transcriptions/9c142230-a9e4-4dbb-8cc7-70ca43d5cc91",
  "contentUrls": [
    "https://contoso.com/",
    "https://contoso2.com/"
  ],
  "model": {
    "self": "https://westus.api.cognitive.microsoft.com/speechtotext/v3.0/models/021a72d0-54c4-43d3-8254-27336ead9037"
  },
  "links": {
    "files": "https://westus.api.cognitive.microsoft.com/speechtotext/v3.0/transcriptions/9c142230-a9e4-4dbb-8cc7-70ca43d5cc91/files"
  },
  "properties": {
    "diarizationEnabled": false,
    "wordLevelTimestampsEnabled": false,
    "channels": [
      0,
      1
    ],
    "punctuationMode": "DictatedAndAutomatic",
    "profanityFilterMode": "Masked",
    "duration": "PT42S"
  },
  "lastActionDateTime": "2019-01-07T11:36:07Z",
  "status": "Succeeded",
  "createdDateTime": "2019-01-07T11:34:12Z",
  "locale": "en-US",
  "displayName": "Transcription using adapted model en-US",
  "customProperties": {
    "key": "value"
  }
}
{
  "title": "Transcription",
  "required": [
    "displayName",
    "locale"
  ],
  "type": "object",
  "properties": {
    "links": {
      "title": "Links",
      "type": "object",
      "properties": {
        "files": {
          "description": "The location to get all files of this entity.",
          "type": "string",
          "readOnly": true
        }
      },
      "description": "The links for additional actions or content related to this transcription.",
      "readOnly": true
    },
    "properties": {
      "title": "TranscriptionProperties",
      "type": "object",
      "properties": {
        "diarizationEnabled": {
          "description": "A value indicating whether diarization (speaker separation) is requested.",
          "type": "boolean",
          "readOnly": false
        },
        "wordLevelTimestampsEnabled": {
          "description": "A value indicating whether word level timestamps are requested.",
          "type": "boolean",
          "readOnly": false
        },
        "duration": {
          "description": "The duration of the transcription. The duration is encoded as ISO 8601 duration\r\n(\"PnYnMnDTnHnMnS\", see https://en.wikipedia.org/wiki/ISO_8601#Durations).",
          "type": "string",
          "readOnly": true
        },
        "channels": {
          "description": "A collection of the requested channel numbers.\r\nIn the default case, the channels 0 and 1 are considered.",
          "type": "array",
          "items": {
            "format": "int32",
            "type": "integer"
          },
          "readOnly": false
        },
        "destinationContainerUrl": {
          "description": "The requested destination container.",
          "type": "string",
          "readOnly": false
        },
        "punctuationMode": {
          "description": "The requested punctuation mode.",
          "enum": [
            "None",
            "Dictated",
            "Automatic",
            "DictatedAndAutomatic"
          ],
          "type": "string",
          "readOnly": false
        },
        "profanityFilterMode": {
          "description": "The requested profanity filter mode.",
          "enum": [
            "None",
            "Removed",
            "Tags",
            "Masked"
          ],
          "type": "string",
          "readOnly": false
        },
        "timeToLive": {
          "description": "How long the transcription will be kept in the system. Once the transcription reaches the time to live\r\nafter completion (successful or failed) it will be automatically deleted. Not setting this value or setting\r\nto 0 will disable automatic deletion.\r\nThe duration is encoded as ISO 8601 duration (\"PnYnMnDTnHnMnS\", see https://en.wikipedia.org/wiki/ISO_8601#Durations).",
          "type": "string",
          "readOnly": false
        },
        "email": {
          "description": "The email address to send email notifications to in case the operation completes.\r\nThe value will be removed after successfully sending the email.",
          "type": "string",
          "readOnly": false
        },
        "error": {
          "title": "EntityError",
          "type": "object",
          "properties": {
            "code": {
              "description": "The code of this error.",
              "type": "string",
              "readOnly": true
            },
            "message": {
              "description": "The message for this error.",
              "type": "string",
              "readOnly": true
            }
          },
          "description": "The details of the error in case the entity is in a failed state.",
          "readOnly": true
        }
      },
      "description": "Additional configuration options when creating a new transcription and additional metadata provided by the service.",
      "readOnly": false
    },
    "self": {
      "description": "The location of this entity.",
      "type": "string",
      "readOnly": true
    },
    "model": {
      "title": "EntityReference",
      "required": [
        "self"
      ],
      "type": "object",
      "properties": {
        "self": {
          "description": "The location of the referenced entity.",
          "type": "string",
          "readOnly": false
        }
      },
      "description": "The model used in this transcription.",
      "readOnly": false
    },
    "project": {
      "title": "EntityReference",
      "required": [
        "self"
      ],
      "type": "object",
      "properties": {
        "self": {
          "description": "The location of the referenced entity.",
          "type": "string",
          "readOnly": false
        }
      },
      "description": "The project, the transcription is associated with.",
      "readOnly": false
    },
    "dataset": {
      "title": "EntityReference",
      "required": [
        "self"
      ],
      "type": "object",
      "properties": {
        "self": {
          "description": "The location of the referenced entity.",
          "type": "string",
          "readOnly": false
        }
      },
      "description": "A list of acoustic or audio files datasets containing the audio files that will be transcribed.",
      "readOnly": false
    },
    "contentUrls": {
      "description": "A list of content urls to get audio files from for transcription. Up to 1000 urls are allowed.\r\nThis property will not be returned in a response.",
      "type": "array",
      "items": {
        "type": "string"
      },
      "readOnly": false
    },
    "contentContainerUrl": {
      "description": "A URL for an Azure blob container that contains the audio files. A container is allowed to have a maximum size of 5GB and a maximum number of 10000 blobs.\r\nThe maximum size for a blob is 2.5GB. \r\nContainer SAS should contain 'r' (read) and 'l' (list) permissions. \r\nThis property will not be returned in a response.",
      "type": "string",
      "readOnly": false
    },
    "displayName": {
      "description": "The display name of the object.",
      "type": "string",
      "readOnly": false
    },
    "description": {
      "description": "The description of the object.",
      "type": "string",
      "readOnly": false
    },
    "customProperties": {
      "description": "The custom properties of this entity. The maximum allowed key length is 64 characters, the maximum\r\nallowed value length is 256 characters and the count of allowed entries is 10.",
      "type": "object",
      "additionalProperties": {
        "type": "string"
      },
      "readOnly": false
    },
    "locale": {
      "description": "The locale of the contained data.",
      "type": "string",
      "readOnly": false
    },
    "lastActionDateTime": {
      "format": "date-time",
      "description": "The time-stamp when the current status was entered.\r\nThe time stamp is encoded as ISO 8601 date and time format\r\n(\"YYYY-MM-DDThh:mm:ssZ\", see https://en.wikipedia.org/wiki/ISO_8601#Combined_date_and_time_representations).",
      "type": "string",
      "readOnly": true
    },
    "status": {
      "description": "The status of the object.",
      "enum": [
        "NotStarted",
        "Running",
        "Succeeded",
        "Failed"
      ],
      "type": "string",
      "readOnly": true,
      "x-ms-enum": {
        "name": "Status",
        "modelAsString": false
      }
    },
    "createdDateTime": {
      "format": "date-time",
      "description": "The time-stamp when the object was created.\r\nThe time stamp is encoded as ISO 8601 date and time format\r\n(\"YYYY-MM-DDThh:mm:ssZ\", see https://en.wikipedia.org/wiki/ISO_8601#Combined_date_and_time_representations).",
      "type": "string",
      "readOnly": true
    }
  },
  "example": {
    "contentUrls": [
      "https://contoso.com/mystoragelocation",
      "https://contoso.com/myotherstoragelocation"
    ],
    "properties": {
      "diarizationEnabled": false,
      "wordLevelTimestampsEnabled": false,
      "punctuationMode": "DictatedAndAutomatic",
      "profanityFilterMode": "Masked"
    },
    "locale": "en-US",
    "displayName": "Transcription using default model for en-US"
  }
}

Response 400

In case the operation cannot be performed successfully with the specified values.

{
  "code": "InvalidRequest",
  "message": "The base model isn't valid for this operation.",
  "innerError": {
    "code": "InvalidBaseModel",
    "message": "The base model isn't valid for this operation."
  }
}

Response 401

In case the user isn't authorized.

{
  "code": "Unauthorized",
  "message": "Authentication is required to access the resource."
}

Response 403

In case authorized user isn't known or doesn't have the required permissions.

{
  "code": "Forbidden",
  "message": "No permission to access this resource."
}

Response 429

In case the rate limit has been exceeded.

{
  "code": "TooManyRequests",
  "message": "The rate limit has been reached. The timeout in seconds can be found in the Retry-After header."
}

Code samples

@ECHO OFF

curl -v -X POST "https://switzerlandwest.api.cognitive.microsoft.com/speechtotext/v3.0/transcriptions"
-H "Content-Type: application/json"
-H "Ocp-Apim-Subscription-Key: {subscription key}"

--data-ascii "{body}" 
using System;
using System.Net.Http.Headers;
using System.Text;
using System.Net.Http;
using System.Web;

namespace CSHttpClientSample
{
    static class Program
    {
        static void Main()
        {
            MakeRequest();
            Console.WriteLine("Hit ENTER to exit...");
            Console.ReadLine();
        }
        
        static async void MakeRequest()
        {
            var client = new HttpClient();
            var queryString = HttpUtility.ParseQueryString(string.Empty);

            // Request headers
            client.DefaultRequestHeaders.Add("Ocp-Apim-Subscription-Key", "{subscription key}");

            var uri = "https://switzerlandwest.api.cognitive.microsoft.com/speechtotext/v3.0/transcriptions?" + queryString;

            HttpResponseMessage response;

            // Request body
            byte[] byteData = Encoding.UTF8.GetBytes("{body}");

            using (var content = new ByteArrayContent(byteData))
            {
               content.Headers.ContentType = new MediaTypeHeaderValue("< your content type, i.e. application/json >");
               response = await client.PostAsync(uri, content);
            }

        }
    }
}	
// // This sample uses the Apache HTTP client from HTTP Components (http://hc.apache.org/httpcomponents-client-ga/)
import java.net.URI;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;

public class JavaSample 
{
    public static void main(String[] args) 
    {
        HttpClient httpclient = HttpClients.createDefault();

        try
        {
            URIBuilder builder = new URIBuilder("https://switzerlandwest.api.cognitive.microsoft.com/speechtotext/v3.0/transcriptions");


            URI uri = builder.build();
            HttpPost request = new HttpPost(uri);
            request.setHeader("Content-Type", "application/json");
            request.setHeader("Ocp-Apim-Subscription-Key", "{subscription key}");


            // Request body
            StringEntity reqEntity = new StringEntity("{body}");
            request.setEntity(reqEntity);

            HttpResponse response = httpclient.execute(request);
            HttpEntity entity = response.getEntity();

            if (entity != null) 
            {
                System.out.println(EntityUtils.toString(entity));
            }
        }
        catch (Exception e)
        {
            System.out.println(e.getMessage());
        }
    }
}

<!DOCTYPE html>
<html>
<head>
    <title>JSSample</title>
    <script src="http://ajax.googleapis.com/ajax/libs/jquery/1.9.0/jquery.min.js"></script>
</head>
<body>

<script type="text/javascript">
    $(function() {
        var params = {
            // Request parameters
        };
      
        $.ajax({
            url: "https://switzerlandwest.api.cognitive.microsoft.com/speechtotext/v3.0/transcriptions?" + $.param(params),
            beforeSend: function(xhrObj){
                // Request headers
                xhrObj.setRequestHeader("Content-Type","application/json");
                xhrObj.setRequestHeader("Ocp-Apim-Subscription-Key","{subscription key}");
            },
            type: "POST",
            // Request body
            data: "{body}",
        })
        .done(function(data) {
            alert("success");
        })
        .fail(function() {
            alert("error");
        });
    });
</script>
</body>
</html>
#import <Foundation/Foundation.h>

int main(int argc, const char * argv[])
{
    NSAutoreleasePool * pool = [[NSAutoreleasePool alloc] init];
    
    NSString* path = @"https://switzerlandwest.api.cognitive.microsoft.com/speechtotext/v3.0/transcriptions";
    NSArray* array = @[
                         // Request parameters
                         @"entities=true",
                      ];
    
    NSString* string = [array componentsJoinedByString:@"&"];
    path = [path stringByAppendingFormat:@"?%@", string];

    NSLog(@"%@", path);

    NSMutableURLRequest* _request = [NSMutableURLRequest requestWithURL:[NSURL URLWithString:path]];
    [_request setHTTPMethod:@"POST"];
    // Request headers
    [_request setValue:@"application/json" forHTTPHeaderField:@"Content-Type"];
    [_request setValue:@"{subscription key}" forHTTPHeaderField:@"Ocp-Apim-Subscription-Key"];
    // Request body
    [_request setHTTPBody:[@"{body}" dataUsingEncoding:NSUTF8StringEncoding]];
    
    NSURLResponse *response = nil;
    NSError *error = nil;
    NSData* _connectionData = [NSURLConnection sendSynchronousRequest:_request returningResponse:&response error:&error];

    if (nil != error)
    {
        NSLog(@"Error: %@", error);
    }
    else
    {
        NSError* error = nil;
        NSMutableDictionary* json = nil;
        NSString* dataString = [[NSString alloc] initWithData:_connectionData encoding:NSUTF8StringEncoding];
        NSLog(@"%@", dataString);
        
        if (nil != _connectionData)
        {
            json = [NSJSONSerialization JSONObjectWithData:_connectionData options:NSJSONReadingMutableContainers error:&error];
        }
        
        if (error || !json)
        {
            NSLog(@"Could not parse loaded json with error:%@", error);
        }
        
        NSLog(@"%@", json);
        _connectionData = nil;
    }
    
    [pool drain];

    return 0;
}
<?php
// This sample uses the Apache HTTP client from HTTP Components (http://hc.apache.org/httpcomponents-client-ga/)
require_once 'HTTP/Request2.php';

$request = new Http_Request2('https://switzerlandwest.api.cognitive.microsoft.com/speechtotext/v3.0/transcriptions');
$url = $request->getUrl();

$headers = array(
    // Request headers
    'Content-Type' => 'application/json',
    'Ocp-Apim-Subscription-Key' => '{subscription key}',
);

$request->setHeader($headers);

$parameters = array(
    // Request parameters
);

$url->setQueryVariables($parameters);

$request->setMethod(HTTP_Request2::METHOD_POST);

// Request body
$request->setBody("{body}");

try
{
    $response = $request->send();
    echo $response->getBody();
}
catch (HttpException $ex)
{
    echo $ex;
}

?>
########### Python 2.7 #############
import httplib, urllib, base64

headers = {
    # Request headers
    'Content-Type': 'application/json',
    'Ocp-Apim-Subscription-Key': '{subscription key}',
}

params = urllib.urlencode({
})

try:
    conn = httplib.HTTPSConnection('switzerlandwest.api.cognitive.microsoft.com')
    conn.request("POST", "/speechtotext/v3.0/transcriptions?%s" % params, "{body}", headers)
    response = conn.getresponse()
    data = response.read()
    print(data)
    conn.close()
except Exception as e:
    print("[Errno {0}] {1}".format(e.errno, e.strerror))

####################################

########### Python 3.2 #############
import http.client, urllib.request, urllib.parse, urllib.error, base64

headers = {
    # Request headers
    'Content-Type': 'application/json',
    'Ocp-Apim-Subscription-Key': '{subscription key}',
}

params = urllib.parse.urlencode({
})

try:
    conn = http.client.HTTPSConnection('switzerlandwest.api.cognitive.microsoft.com')
    conn.request("POST", "/speechtotext/v3.0/transcriptions?%s" % params, "{body}", headers)
    response = conn.getresponse()
    data = response.read()
    print(data)
    conn.close()
except Exception as e:
    print("[Errno {0}] {1}".format(e.errno, e.strerror))

####################################
require 'net/http'

uri = URI('https://switzerlandwest.api.cognitive.microsoft.com/speechtotext/v3.0/transcriptions')


request = Net::HTTP::Post.new(uri.request_uri)
# Request headers
request['Content-Type'] = 'application/json'
# Request headers
request['Ocp-Apim-Subscription-Key'] = '{subscription key}'
# Request body
request.body = "{body}"

response = Net::HTTP.start(uri.host, uri.port, :use_ssl => uri.scheme == 'https') do |http|
    http.request(request)
end

puts response.body