Amazon Transcribe converts audio to text (Speech-to-Text):
# Start transcription job
aws transcribe start-transcription-job \
--transcription-job-name <JOB_NAME> \
--language-code en-US \
--media-format wav \
--media '{"MediaFileUri":"s3://<BUCKET>/<KEY>"}' \
--output-bucket-name <BUCKET> \
--region <REGION>
# Get job status
aws transcribe get-transcription-job \
--transcription-job-name <JOB_NAME> \
--region <REGION>
# Or list all jobs
aws transcribe list-transcription-jobs \
--region <REGION>
Results will include:
# Download transcript from S3
aws s3 cp s3://<BUCKET>/<TRANSCRIPT_KEY> transcript.json
# Or view directly
aws s3api get-object \
--bucket <BUCKET> \
--key <TRANSCRIPT_KEY> \
transcript.json
cat transcript.json
# Delete job
aws transcribe delete-transcription-job \
--transcription-job-name <JOB_NAME> \
--region <REGION>
import boto3
import json
class TranscribeStreamingService:
def __init__(self, region: str = 'ap-southeast-1'):
self.client = boto3.client('transcribe-streaming', region_name=region)
def transcribe_stream(self, audio_stream):
"""Transcribe audio stream in real-time"""
response = self.client.start_stream_transcription(
language_code='en-US',
media_sample_rate_hz=16000,
media_encoding='pcm',
AudioStream=audio_stream
)
for event in response['TranscriptResultStream']:
if 'TranscriptEvent' in event:
result = event['TranscriptEvent']['Transcript']
for item in result['Results']:
if item['IsPartial']:
print(f"Partial: {item['Alternatives'][0]['Transcript']}")
else:
print(f"Final: {item['Alternatives'][0]['Transcript']}")
import boto3
import uuid
class TranscribeService:
def __init__(self, region: str = 'ap-southeast-1'):
self.client = boto3.client('transcribe', region_name=region)
self.bucket_name = 'lexi-be-speakingaudiobucket'
def transcribe_audio(self, audio_url: str) -> str:
"""Transcribe audio file to text"""
job_name = f"lexi-transcribe-{uuid.uuid4()}"
response = self.client.start_transcription_job(
TranscriptionJobName=job_name,
Media={'MediaFileUri': audio_url},
MediaFormat='wav',
LanguageCode='en-US',
OutputBucketName=self.bucket_name,
OutputKey=f'transcripts/{job_name}.json'
)
return response['TranscriptionJob']['TranscriptionJobName']
def get_transcription(self, job_name: str) -> str:
"""Get transcription result"""
response = self.client.get_transcription_job(
TranscriptionJobName=job_name
)
job = response['TranscriptionJob']
if job['TranscriptionJobStatus'] == 'COMPLETED':
transcript_uri = job['Transcript']['TranscriptFileUri']
return transcript_uri
return None
# View transcription jobs
aws cloudwatch get-metric-statistics \
--namespace AWS/Transcribe \
--metric-name SuccessfulTranscriptionJobs \
--start-time 2026-05-01T00:00:00Z \
--end-time 2026-05-02T00:00:00Z \
--period 3600 \
--statistics Sum \
--region <REGION>
# View failed jobs
aws cloudwatch get-metric-statistics \
--namespace AWS/Transcribe \
--metric-name FailedTranscriptionJobs \
--start-time 2026-05-01T00:00:00Z \
--end-time 2026-05-02T00:00:00Z \
--period 3600 \
--statistics Sum \
--region <REGION>
Issue: Job failed
# Check job status
aws transcribe get-transcription-job \
--transcription-job-name <JOB_NAME> \
--region <REGION> \
--query 'TranscriptionJob.FailureReason'
Issue: Audio file not found
# Check S3 file
aws s3 ls s3://<BUCKET>/<KEY>
# Or upload file
aws s3 cp <LOCAL_FILE> s3://<BUCKET>/<KEY>
Issue: Unsupported format
# Convert audio format
ffmpeg -i input.mp3 -acodec pcm_s16le -ar 16000 output.wav
# Or use different format
# Supported: mp3, mp4, wav, flac, ogg, amr, webm
Continue to Polly to learn how to synthesize speech from text.