feat: Implement native Android MediaSession and foreground service for TTS playback

- Add `ReaderTtsMediaService` to handle background playback, media controls, and notifications on Android
- Integrate `MediaSessionCompat` to support external media controls and lock screen integration
- Add `ReaderTtsMediaBridge` for synchronized state communication between Kotlin and Flutter
- Update `TtsNotifier` to use the native Android service when available, with a fallback for other platforms
- Implement sentence-level highlighting and tapping to start reading from a specific location
- Update Android manifest with necessary permissions for foreground services and notifications
- Adjust TTS speech rate constants and improve playback health monitoring and recovery logic
This commit is contained in:
2026-04-10 18:56:36 +07:00
parent 2d41121b84
commit 76edaa25a4
9 changed files with 1706 additions and 214 deletions
+5
View File
@@ -4,6 +4,7 @@ import java.util.Properties
plugins {
id("com.android.application")
id("kotlin-android")
id("kotlin-parcelize")
// The Flutter Gradle Plugin must be applied after the Android and Kotlin Gradle plugins.
id("dev.flutter.flutter-gradle-plugin")
id("com.google.gms.google-services")
@@ -66,6 +67,10 @@ android {
}
}
dependencies {
implementation("androidx.media:media:1.7.0")
}
flutter {
source = "../.."
}
+16 -37
View File
@@ -5,43 +5,6 @@
"storage_bucket": "reader-1658c.firebasestorage.app"
},
"client": [
{
"client_info": {
"mobilesdk_app_id": "1:308259929553:android:9142ae16d9ddd8a91c34f0",
"android_client_info": {
"package_name": "com.example.reader_app"
}
},
"oauth_client": [
{
"client_id": "308259929553-7cdc4g8fe7os799trig7hk7ugkuansov.apps.googleusercontent.com",
"client_type": 1,
"android_info": {
"package_name": "com.example.reader_app",
"certificate_hash": "f7e9f7ec9bafd1de69934b2c9b52ee491d73bad7"
}
},
{
"client_id": "308259929553-9oame596io3s4lcj9cdb5db6v3i6f6rk.apps.googleusercontent.com",
"client_type": 3
}
],
"api_key": [
{
"current_key": "AIzaSyBibgTrvBWtJBL4PGeIyahBwRlYKcjQ47k"
}
],
"services": {
"appinvite_service": {
"other_platform_oauth_client": [
{
"client_id": "308259929553-9oame596io3s4lcj9cdb5db6v3i6f6rk.apps.googleusercontent.com",
"client_type": 3
}
]
}
}
},
{
"client_info": {
"mobilesdk_app_id": "1:308259929553:android:14f7828b9b9ca9d31c34f0",
@@ -50,6 +13,22 @@
}
},
"oauth_client": [
{
"client_id": "308259929553-fd8teopc4chi2jjd8kr5vn9inn35ar6j.apps.googleusercontent.com",
"client_type": 1,
"android_info": {
"package_name": "dev.fevirtus.reader",
"certificate_hash": "fa21a3e6a319b71b2dd0ef9573b22046dba5d55c"
}
},
{
"client_id": "308259929553-kdfvnu11cq6k9a2l1b3gtrmfmtsggduk.apps.googleusercontent.com",
"client_type": 1,
"android_info": {
"package_name": "dev.fevirtus.reader",
"certificate_hash": "f7e9f7ec9bafd1de69934b2c9b52ee491d73bad7"
}
},
{
"client_id": "308259929553-9oame596io3s4lcj9cdb5db6v3i6f6rk.apps.googleusercontent.com",
"client_type": 3
+7
View File
@@ -2,6 +2,9 @@
<uses-permission android:name="android.permission.INTERNET"/>
<uses-permission android:name="android.permission.WAKE_LOCK"/>
<uses-permission android:name="android.permission.REQUEST_IGNORE_BATTERY_OPTIMIZATIONS"/>
<uses-permission android:name="android.permission.FOREGROUND_SERVICE"/>
<uses-permission android:name="android.permission.FOREGROUND_SERVICE_MEDIA_PLAYBACK"/>
<uses-permission android:name="android.permission.POST_NOTIFICATIONS"/>
<application
android:label="reader_app"
android:name="${applicationName}"
@@ -34,6 +37,10 @@
<meta-data
android:name="flutterEmbedding"
android:value="2" />
<service
android:name=".tts.ReaderTtsMediaService"
android:exported="false"
android:foregroundServiceType="mediaPlayback" />
</application>
<!-- Required to query activities that can process text, see:
https://developer.android.com/training/package-visibility and
@@ -6,12 +6,19 @@ import android.net.Uri
import android.os.Build
import android.os.PowerManager
import android.provider.Settings
import androidx.core.app.NotificationManagerCompat
import io.flutter.embedding.engine.FlutterEngine
import io.flutter.embedding.android.FlutterActivity
import io.flutter.plugin.common.EventChannel
import io.flutter.plugin.common.MethodChannel
import com.example.reader_app.tts.ReaderTtsMediaBridge
import com.example.reader_app.tts.ReaderTtsMediaService
import com.example.reader_app.tts.ReaderTtsSegment
class MainActivity : FlutterActivity() {
private val channelName = "reader_app/tts_background"
private val mediaChannelName = "reader_app/tts_media"
private val mediaEventsChannelName = "reader_app/tts_media_events"
private var wakeLock: PowerManager.WakeLock? = null
override fun configureFlutterEngine(flutterEngine: FlutterEngine) {
@@ -35,6 +42,117 @@ class MainActivity : FlutterActivity() {
else -> result.notImplemented()
}
}
MethodChannel(flutterEngine.dartExecutor.binaryMessenger, mediaChannelName)
.setMethodCallHandler { call, result ->
when (call.method) {
"initialize" -> {
val enabled = call.argument<Boolean>("backgroundModeEnabled") ?: true
ReaderTtsMediaService.initialize(this, enabled)
result.success(ReaderTtsMediaBridge.snapshot())
}
"getSnapshot" -> result.success(ReaderTtsMediaBridge.snapshot())
"startReading" -> {
val startIndex = call.argument<Int>("startIndex") ?: 0
val contentKey = call.argument<String>("contentKey")
val title = call.argument<String>("title")
val speed = call.argument<Double>("speed") ?: 0.9
val language = call.argument<String>("language") ?: "vi-VN"
val voiceName = call.argument<String>("voiceName")
val backgroundModeEnabled = call.argument<Boolean>("backgroundModeEnabled") ?: true
ReaderTtsMediaService.startReading(
this,
parseSegments(call.argument<List<*>>("segments")),
startIndex,
contentKey,
title,
speed,
language,
voiceName,
backgroundModeEnabled,
)
result.success(null)
}
"pause" -> {
ReaderTtsMediaService.pause(this)
result.success(null)
}
"resume" -> {
ReaderTtsMediaService.resume(this)
result.success(null)
}
"stop" -> {
ReaderTtsMediaService.stop(this)
result.success(null)
}
"skipForward" -> {
ReaderTtsMediaService.skipForward(this)
result.success(null)
}
"skipBack" -> {
ReaderTtsMediaService.skipBack(this)
result.success(null)
}
"setSpeed" -> {
val speed = call.argument<Double>("speed") ?: 0.9
ReaderTtsMediaService.setSpeed(this, speed)
result.success(null)
}
"setVoiceByName" -> {
ReaderTtsMediaService.setVoice(
this,
call.argument<String>("voiceName"),
call.argument<String>("language"),
)
result.success(null)
}
"setBackgroundModeEnabled" -> {
val enabled = call.argument<Boolean>("enabled") ?: true
ReaderTtsMediaService.setBackgroundModeEnabled(this, enabled)
result.success(null)
}
"areNotificationsEnabled" -> {
result.success(NotificationManagerCompat.from(this).areNotificationsEnabled())
}
"openNotificationSettings" -> {
openNotificationSettings()
result.success(null)
}
"dispose" -> result.success(null)
else -> result.notImplemented()
}
}
EventChannel(flutterEngine.dartExecutor.binaryMessenger, mediaEventsChannelName)
.setStreamHandler(
object : EventChannel.StreamHandler {
override fun onListen(arguments: Any?, events: EventChannel.EventSink) {
ReaderTtsMediaBridge.attachSink(events)
}
override fun onCancel(arguments: Any?) {
ReaderTtsMediaBridge.detachSink()
}
},
)
}
private fun parseSegments(rawSegments: List<*>?): ArrayList<ReaderTtsSegment> {
val segments = arrayListOf<ReaderTtsSegment>()
rawSegments.orEmpty().forEach { item ->
val map = item as? Map<*, *> ?: return@forEach
val text = map["text"]?.toString() ?: return@forEach
val paragraphIndex = (map["paragraphIndex"] as? Number)?.toInt() ?: -1
val start = (map["start"] as? Number)?.toInt() ?: -1
val end = (map["end"] as? Number)?.toInt() ?: -1
segments += ReaderTtsSegment(
text = text,
paragraphIndex = paragraphIndex,
start = start,
end = end,
)
}
return segments
}
private fun isIgnoringBatteryOptimizations(): Boolean {
@@ -76,6 +194,19 @@ class MainActivity : FlutterActivity() {
wakeLock = null
}
private fun openNotificationSettings() {
val intent = if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) {
Intent(Settings.ACTION_APP_NOTIFICATION_SETTINGS).apply {
putExtra(Settings.EXTRA_APP_PACKAGE, packageName)
}
} else {
Intent(Settings.ACTION_APPLICATION_DETAILS_SETTINGS).apply {
data = Uri.fromParts("package", packageName, null)
}
}
startActivity(intent)
}
override fun onDestroy() {
setWakeLockEnabled(false)
super.onDestroy()
@@ -0,0 +1,44 @@
package com.example.reader_app.tts
import io.flutter.plugin.common.EventChannel
object ReaderTtsMediaBridge {
private var eventSink: EventChannel.EventSink? = null
private var latestSnapshot: Map<String, Any?> = defaultSnapshot()
@Synchronized
fun attachSink(sink: EventChannel.EventSink) {
eventSink = sink
sink.success(HashMap(latestSnapshot))
}
@Synchronized
fun detachSink() {
eventSink = null
}
@Synchronized
fun publish(snapshot: Map<String, Any?>) {
latestSnapshot = HashMap(snapshot)
eventSink?.success(HashMap(latestSnapshot))
}
@Synchronized
fun snapshot(): Map<String, Any?> = HashMap(latestSnapshot)
private fun defaultSnapshot(): Map<String, Any?> = hashMapOf(
"status" to "idle",
"paragraphIndex" to 0,
"totalParagraphs" to 0,
"activeParagraphIndex" to -1,
"progressStart" to -1,
"progressEnd" to -1,
"contentKey" to null,
"completedCount" to 0,
"backgroundModeEnabled" to true,
"language" to "vi-VN",
"voiceName" to null,
"availableVietnameseVoices" to emptyList<Map<String, String>>()
)
}
@@ -0,0 +1,924 @@
package com.example.reader_app.tts
import android.annotation.SuppressLint
import android.app.NotificationChannel
import android.app.NotificationManager
import android.app.PendingIntent
import android.app.Service
import android.content.Context
import android.content.Intent
import android.media.AudioAttributes
import android.media.AudioFocusRequest
import android.media.AudioManager
import android.os.Build
import android.os.Bundle
import android.os.Handler
import android.os.IBinder
import android.os.Looper
import android.os.Parcelable
import android.speech.tts.TextToSpeech
import android.speech.tts.UtteranceProgressListener
import android.util.Log
import androidx.core.app.NotificationCompat
import androidx.core.app.NotificationManagerCompat
import androidx.core.content.ContextCompat
import androidx.media.app.NotificationCompat.MediaStyle
import android.support.v4.media.MediaMetadataCompat
import android.support.v4.media.session.MediaSessionCompat
import android.support.v4.media.session.PlaybackStateCompat
import com.example.reader_app.R
import kotlinx.parcelize.Parcelize
import java.util.Locale
@Parcelize
data class ReaderTtsSegment(
val text: String,
val paragraphIndex: Int,
val start: Int,
val end: Int,
) : Parcelable
class ReaderTtsMediaService : Service(), TextToSpeech.OnInitListener {
companion object {
private const val NOTIFICATION_ID = 46021
private const val CHANNEL_ID = "reader_tts_playback"
private const val CHANNEL_NAME = "Reader TTS"
private const val BASE_SPEED = 0.9
private const val TAG = "ReaderTtsMediaService"
private const val HEALTH_CHECK_INTERVAL_MS = 1500L
const val ACTION_INIT = "com.example.reader_app.tts.INIT"
const val ACTION_START_READING = "com.example.reader_app.tts.START_READING"
const val ACTION_PAUSE = "com.example.reader_app.tts.PAUSE"
const val ACTION_RESUME = "com.example.reader_app.tts.RESUME"
const val ACTION_STOP = "com.example.reader_app.tts.STOP"
const val ACTION_SKIP_FORWARD = "com.example.reader_app.tts.SKIP_FORWARD"
const val ACTION_SKIP_BACK = "com.example.reader_app.tts.SKIP_BACK"
const val ACTION_SET_SPEED = "com.example.reader_app.tts.SET_SPEED"
const val ACTION_SET_VOICE = "com.example.reader_app.tts.SET_VOICE"
const val ACTION_SET_BACKGROUND_MODE = "com.example.reader_app.tts.SET_BACKGROUND_MODE"
const val EXTRA_SEGMENTS = "segments"
const val EXTRA_START_INDEX = "startIndex"
const val EXTRA_CONTENT_KEY = "contentKey"
const val EXTRA_TITLE = "title"
const val EXTRA_SPEED = "speed"
const val EXTRA_LANGUAGE = "language"
const val EXTRA_VOICE_NAME = "voiceName"
const val EXTRA_BACKGROUND_MODE_ENABLED = "backgroundModeEnabled"
const val EXTRA_CLEAR_CONTENT_KEY = "clearContentKey"
fun initialize(context: Context, backgroundModeEnabled: Boolean) {
context.startService(
Intent(context, ReaderTtsMediaService::class.java).apply {
action = ACTION_INIT
putExtra(EXTRA_BACKGROUND_MODE_ENABLED, backgroundModeEnabled)
},
)
}
fun startReading(
context: Context,
segments: ArrayList<ReaderTtsSegment>,
startIndex: Int,
contentKey: String?,
title: String?,
speed: Double,
language: String,
voiceName: String?,
backgroundModeEnabled: Boolean,
) {
ContextCompat.startForegroundService(
context,
Intent(context, ReaderTtsMediaService::class.java).apply {
action = ACTION_START_READING
putParcelableArrayListExtra(EXTRA_SEGMENTS, segments)
putExtra(EXTRA_START_INDEX, startIndex)
putExtra(EXTRA_CONTENT_KEY, contentKey)
putExtra(EXTRA_TITLE, title)
putExtra(EXTRA_SPEED, speed)
putExtra(EXTRA_LANGUAGE, language)
putExtra(EXTRA_VOICE_NAME, voiceName)
putExtra(EXTRA_BACKGROUND_MODE_ENABLED, backgroundModeEnabled)
},
)
}
fun pause(context: Context) =
context.startService(Intent(context, ReaderTtsMediaService::class.java).apply {
action = ACTION_PAUSE
})
fun resume(context: Context) =
context.startService(Intent(context, ReaderTtsMediaService::class.java).apply {
action = ACTION_RESUME
})
fun stop(context: Context, clearContentKey: Boolean = true) =
context.startService(Intent(context, ReaderTtsMediaService::class.java).apply {
action = ACTION_STOP
putExtra(EXTRA_CLEAR_CONTENT_KEY, clearContentKey)
})
fun skipForward(context: Context) =
context.startService(Intent(context, ReaderTtsMediaService::class.java).apply {
action = ACTION_SKIP_FORWARD
})
fun skipBack(context: Context) =
context.startService(Intent(context, ReaderTtsMediaService::class.java).apply {
action = ACTION_SKIP_BACK
})
fun setSpeed(context: Context, speed: Double) =
context.startService(Intent(context, ReaderTtsMediaService::class.java).apply {
action = ACTION_SET_SPEED
putExtra(EXTRA_SPEED, speed)
})
fun setVoice(context: Context, voiceName: String?, language: String?) =
context.startService(Intent(context, ReaderTtsMediaService::class.java).apply {
action = ACTION_SET_VOICE
putExtra(EXTRA_VOICE_NAME, voiceName)
putExtra(EXTRA_LANGUAGE, language)
})
fun setBackgroundModeEnabled(context: Context, enabled: Boolean) =
context.startService(Intent(context, ReaderTtsMediaService::class.java).apply {
action = ACTION_SET_BACKGROUND_MODE
putExtra(EXTRA_BACKGROUND_MODE_ENABLED, enabled)
})
}
private val mainHandler = Handler(Looper.getMainLooper())
private lateinit var notificationManager: NotificationManagerCompat
private lateinit var mediaSession: MediaSessionCompat
private lateinit var audioManager: AudioManager
private var audioFocusRequest: AudioFocusRequest? = null
private var tts: TextToSpeech? = null
private var isTtsReady = false
private var isForegroundActive = false
private var status = "idle"
private var speed = 0.9
private var language = "vi-VN"
private var voiceName: String? = null
private var contentKey: String? = null
private var title: String? = null
private var segments: List<ReaderTtsSegment> = emptyList()
private var currentIndex = 0
private var completedCount = 0
private var backgroundModeEnabled = true
private var availableVoices: List<Map<String, String>> = emptyList()
private var sessionGeneration = 0
private var lastStartedUtterance: String? = null
private var currentUtteranceId: String? = null
private var currentUtteranceStarted = false
private var pendingReplayAfterInit = false
private var currentSegmentRetry = 0
private var consecutiveSilentHealthChecks = 0
private var utteranceWatchdog: Runnable? = null
private var pausedByAudioFocus = false
private var lastSpeakRequestTimeMs = 0L
private val playbackHealthRunnable = object : Runnable {
override fun run() {
runPlaybackHealthCheck()
mainHandler.postDelayed(this, HEALTH_CHECK_INTERVAL_MS)
}
}
private val audioFocusListener = AudioManager.OnAudioFocusChangeListener { focusChange ->
mainHandler.post {
when (focusChange) {
AudioManager.AUDIOFOCUS_LOSS,
AudioManager.AUDIOFOCUS_LOSS_TRANSIENT -> {
if (status == "playing") {
pausedByAudioFocus = true
handlePause()
}
}
AudioManager.AUDIOFOCUS_GAIN -> {
if (pausedByAudioFocus && status == "paused") {
pausedByAudioFocus = false
handleResume()
}
}
}
}
}
override fun onCreate() {
super.onCreate()
notificationManager = NotificationManagerCompat.from(this)
audioManager = getSystemService(Context.AUDIO_SERVICE) as AudioManager
createNotificationChannel()
setupMediaSession()
setupTextToSpeech()
mainHandler.postDelayed(playbackHealthRunnable, HEALTH_CHECK_INTERVAL_MS)
publishSnapshot()
}
override fun onBind(intent: Intent?): IBinder? = null
override fun onStartCommand(intent: Intent?, flags: Int, startId: Int): Int {
when (intent?.action) {
ACTION_INIT -> {
backgroundModeEnabled = intent.getBooleanExtra(
EXTRA_BACKGROUND_MODE_ENABLED,
backgroundModeEnabled,
)
publishSnapshot()
}
ACTION_START_READING -> handleStartReading(intent)
ACTION_PAUSE -> handlePause()
ACTION_RESUME -> handleResume()
ACTION_STOP -> handleStop(
clearContentKey = intent.getBooleanExtra(EXTRA_CLEAR_CONTENT_KEY, true),
)
ACTION_SKIP_FORWARD -> handleSkip(1)
ACTION_SKIP_BACK -> handleSkip(-1)
ACTION_SET_SPEED -> {
speed = intent.getDoubleExtra(EXTRA_SPEED, speed)
applyVoiceAndSpeedSettings()
publishSnapshot()
}
ACTION_SET_VOICE -> {
voiceName = intent.getStringExtra(EXTRA_VOICE_NAME)
language = intent.getStringExtra(EXTRA_LANGUAGE) ?: language
applyVoiceAndSpeedSettings()
publishSnapshot()
}
ACTION_SET_BACKGROUND_MODE -> {
backgroundModeEnabled = intent.getBooleanExtra(
EXTRA_BACKGROUND_MODE_ENABLED,
backgroundModeEnabled,
)
syncNotificationState()
publishSnapshot()
}
}
return START_STICKY
}
private fun setupTextToSpeech() {
tts = TextToSpeech(applicationContext, this)
tts?.setOnUtteranceProgressListener(
object : UtteranceProgressListener() {
override fun onStart(utteranceId: String?) {
if (utteranceId == null) return
mainHandler.post {
if (!isActiveUtterance(utteranceId)) return@post
if (utteranceId != currentUtteranceId) return@post
lastStartedUtterance = utteranceId
currentUtteranceStarted = true
currentSegmentRetry = 0
status = "playing"
scheduleUtteranceWatchdog(utteranceId)
syncNotificationState()
publishSnapshot()
}
}
override fun onDone(utteranceId: String?) {
if (utteranceId == null) return
mainHandler.post {
if (!isActiveUtterance(utteranceId)) return@post
if (utteranceId != currentUtteranceId) return@post
clearUtteranceRuntimeState()
handleUtteranceCompleted(parseUtteranceIndex(utteranceId))
}
}
@Deprecated("Deprecated in Java")
override fun onError(utteranceId: String?) {
onError(utteranceId, TextToSpeech.ERROR)
}
override fun onError(utteranceId: String?, errorCode: Int) {
if (utteranceId == null) return
mainHandler.post {
if (!isActiveUtterance(utteranceId)) return@post
if (utteranceId != currentUtteranceId) return@post
clearUtteranceRuntimeState()
handlePlaybackFailure()
}
}
},
)
}
override fun onInit(initStatus: Int) {
isTtsReady = initStatus == TextToSpeech.SUCCESS
if (isTtsReady) {
refreshAvailableVoices()
applyVoiceAndSpeedSettings()
if ((pendingReplayAfterInit || status == "playing") && segments.isNotEmpty()) {
pendingReplayAfterInit = false
speakCurrentSegment(forceRestart = true)
}
} else {
status = "idle"
}
syncNotificationState()
publishSnapshot()
}
private fun refreshAvailableVoices() {
val ttsInstance = tts ?: return
val vietnameseVoices = ttsInstance.voices
?.filter { voice -> voice.locale?.toLanguageTag()?.lowercase()?.startsWith("vi") == true }
?.mapNotNull { voice ->
val locale = voice.locale?.toLanguageTag() ?: return@mapNotNull null
mapOf("name" to voice.name, "locale" to locale)
}
.orEmpty()
.distinctBy { voice -> "${voice["name"]}:${voice["locale"]}" }
.sortedBy { voice -> voice["name"] }
availableVoices = vietnameseVoices
if (voiceName.isNullOrBlank()) {
val preferred = vietnameseVoices.firstOrNull { voice ->
val normalized = voice["name"]?.lowercase().orEmpty()
normalized.contains("female") || normalized.contains("natural")
} ?: vietnameseVoices.firstOrNull()
voiceName = preferred?.get("name")
language = preferred?.get("locale") ?: language
}
}
private fun applyVoiceAndSpeedSettings() {
val ttsInstance = tts ?: return
ttsInstance.setSpeechRate(speed.toFloat())
val locale = language.toLocale()
ttsInstance.setLanguage(locale)
val matchingVoice = ttsInstance.voices?.firstOrNull { voice ->
voice.name == voiceName && voice.locale?.toLanguageTag() == language
}
if (matchingVoice != null) {
ttsInstance.voice = matchingVoice
}
}
private fun handleStartReading(intent: Intent) {
backgroundModeEnabled = intent.getBooleanExtra(
EXTRA_BACKGROUND_MODE_ENABLED,
backgroundModeEnabled,
)
speed = intent.getDoubleExtra(EXTRA_SPEED, speed)
language = intent.getStringExtra(EXTRA_LANGUAGE) ?: language
voiceName = intent.getStringExtra(EXTRA_VOICE_NAME)
contentKey = intent.getStringExtra(EXTRA_CONTENT_KEY)
title = intent.getStringExtra(EXTRA_TITLE)
segments = extractSegments(intent)
currentIndex = intent.getIntExtra(EXTRA_START_INDEX, 0)
.coerceIn(0, (segments.size - 1).coerceAtLeast(0))
sessionGeneration += 1
clearUtteranceRuntimeState()
status = "playing"
pausedByAudioFocus = false
pendingReplayAfterInit = false
tts?.stop()
publishSnapshot()
if (!isTtsReady) return
speakCurrentSegment(forceRestart = true)
}
private fun handlePause() {
if (status != "playing") return
sessionGeneration += 1
clearUtteranceRuntimeState()
status = "paused"
pendingReplayAfterInit = false
tts?.stop()
syncNotificationState()
publishSnapshot()
}
private fun handleResume() {
if (segments.isEmpty()) return
status = "playing"
sessionGeneration += 1
clearUtteranceRuntimeState()
pendingReplayAfterInit = false
publishSnapshot()
if (!isTtsReady) return
speakCurrentSegment(forceRestart = true)
}
private fun handleStop(clearContentKey: Boolean) {
sessionGeneration += 1
clearUtteranceRuntimeState()
status = "idle"
currentIndex = 0
segments = emptyList()
title = null
if (clearContentKey) {
contentKey = null
}
tts?.stop()
abandonAudioFocus()
syncNotificationState()
publishSnapshot()
stopSelf()
}
private fun handleSkip(direction: Int) {
if (segments.isEmpty()) return
val nextIndex = (currentIndex + direction).coerceIn(0, segments.lastIndex)
if (nextIndex == currentIndex && status == "idle") return
currentIndex = nextIndex
sessionGeneration += 1
clearUtteranceRuntimeState()
status = "playing"
pendingReplayAfterInit = false
tts?.stop()
publishSnapshot()
if (!isTtsReady) return
speakCurrentSegment(forceRestart = true)
}
private fun handleUtteranceCompleted(completedIndex: Int) {
if (status != "playing") return
if (completedIndex != currentIndex) return
val nextIndex = currentIndex + 1
if (nextIndex >= segments.size) {
status = "idle"
currentIndex = 0
completedCount += 1
clearUtteranceRuntimeState()
abandonAudioFocus()
syncNotificationState()
publishSnapshot()
stopSelf()
return
}
currentIndex = nextIndex
speakCurrentSegment(forceRestart = false)
}
private fun handlePlaybackFailure() {
status = "idle"
clearUtteranceRuntimeState()
pendingReplayAfterInit = false
abandonAudioFocus()
syncNotificationState()
publishSnapshot()
stopSelf()
}
private fun speakCurrentSegment(forceRestart: Boolean) {
if (segments.isEmpty() || !isTtsReady) return
if (!requestAudioFocus()) {
handlePlaybackFailure()
return
}
val segment = segments.getOrNull(currentIndex) ?: run {
handlePlaybackFailure()
return
}
applyVoiceAndSpeedSettings()
status = "playing"
// Reset retry counter when advancing to a new segment; keep it when retrying same segment.
if (!forceRestart) {
currentSegmentRetry = 0
}
syncNotificationState()
publishSnapshot()
val utteranceId = "${sessionGeneration}:${currentIndex}:${System.nanoTime()}"
lastStartedUtterance = if (forceRestart) null else lastStartedUtterance
currentUtteranceId = utteranceId
currentUtteranceStarted = false
lastSpeakRequestTimeMs = System.currentTimeMillis()
scheduleUtteranceWatchdog(utteranceId)
val speakResult = if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.LOLLIPOP) {
tts?.speak(segment.text, TextToSpeech.QUEUE_FLUSH, Bundle(), utteranceId)
} else {
@Suppress("DEPRECATION")
tts?.speak(segment.text, TextToSpeech.QUEUE_FLUSH, null)
}
if (speakResult == TextToSpeech.ERROR) {
recoverFromSilentPlayback("speak_error")
}
}
private fun scheduleUtteranceWatchdog(utteranceId: String) {
clearUtteranceWatchdog()
val segment = currentSegment() ?: return
val timeoutMs = estimateUtteranceTimeoutMs(segment.text)
val guard = Runnable {
if (status != "playing") return@Runnable
if (utteranceId != currentUtteranceId) return@Runnable
recoverFromSilentPlayback("watchdog_timeout")
}
utteranceWatchdog = guard
mainHandler.postDelayed(guard, timeoutMs)
}
private fun clearUtteranceWatchdog() {
utteranceWatchdog?.let(mainHandler::removeCallbacks)
utteranceWatchdog = null
}
private fun clearUtteranceRuntimeState() {
clearUtteranceWatchdog()
lastStartedUtterance = null
currentUtteranceId = null
currentUtteranceStarted = false
consecutiveSilentHealthChecks = 0
}
private fun estimateUtteranceTimeoutMs(text: String): Long {
val safeSpeed = speed.coerceIn(0.2, 1.5)
val multiplier = (BASE_SPEED / safeSpeed).coerceIn(0.5, 3.0)
// Use 200ms/char (was 90ms) and a larger 10s buffer so the watchdog does not
// fire prematurely for longer Vietnamese sentences (e.g. ~150 chars ≈ 17 s at 0.9×).
val estimate = (text.length * 200L * multiplier).toLong() + 10_000L
return estimate.coerceIn(15_000L, 180_000L)
}
private fun recoverFromSilentPlayback(reason: String) {
if (status != "playing") return
Log.w(TAG, "Recover from silent playback: $reason (index=$currentIndex retry=$currentSegmentRetry)")
if (segments.isEmpty()) {
handlePlaybackFailure()
return
}
clearUtteranceRuntimeState()
if (currentSegmentRetry >= 2) {
handlePlaybackFailure()
return
}
currentSegmentRetry += 1
if (currentSegmentRetry >= 2) {
rebuildTtsEngineForRecovery(reason)
return
}
tts?.stop()
speakCurrentSegment(forceRestart = true)
}
private fun rebuildTtsEngineForRecovery(reason: String) {
Log.w(TAG, "Rebuilding TextToSpeech engine for recovery: $reason")
pendingReplayAfterInit = true
isTtsReady = false
tts?.stop()
tts?.shutdown()
setupTextToSpeech()
}
private fun runPlaybackHealthCheck() {
if (status != "playing") return
if (segments.isEmpty()) return
val ttsInstance = tts
if (ttsInstance == null) {
rebuildTtsEngineForRecovery("tts_instance_null")
return
}
if (!isTtsReady) {
if (!pendingReplayAfterInit) {
rebuildTtsEngineForRecovery("tts_not_ready")
}
return
}
val isSpeaking = try {
ttsInstance.isSpeaking
} catch (_: Exception) {
false
}
if (!currentUtteranceStarted) {
if (!isSpeaking) {
// Allow a grace period after speak() is called before flagging as silent.
// onStart typically fires within ~100 ms; 4 s covers slow TTS initialisation.
val elapsedSinceSpeak = System.currentTimeMillis() - lastSpeakRequestTimeMs
if (elapsedSinceSpeak > 4_000L) {
recoverFromSilentPlayback("no_onStart_and_not_speaking")
}
}
return
}
if (isSpeaking) {
consecutiveSilentHealthChecks = 0
return
}
consecutiveSilentHealthChecks += 1
if (consecutiveSilentHealthChecks < 2) {
return
}
// Engine stopped speaking but onDone was never delivered; advance manually.
consecutiveSilentHealthChecks = 0
clearUtteranceRuntimeState()
handleUtteranceCompleted(currentIndex)
}
private fun requestAudioFocus(): Boolean {
return if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) {
val request = audioFocusRequest
?: AudioFocusRequest.Builder(AudioManager.AUDIOFOCUS_GAIN)
.setAudioAttributes(
AudioAttributes.Builder()
.setUsage(AudioAttributes.USAGE_MEDIA)
.setContentType(AudioAttributes.CONTENT_TYPE_SPEECH)
.build(),
)
.setAcceptsDelayedFocusGain(false)
.setOnAudioFocusChangeListener(audioFocusListener)
.build()
.also { audioFocusRequest = it }
audioManager.requestAudioFocus(request) == AudioManager.AUDIOFOCUS_REQUEST_GRANTED
} else {
@Suppress("DEPRECATION")
audioManager.requestAudioFocus(
audioFocusListener,
AudioManager.STREAM_MUSIC,
AudioManager.AUDIOFOCUS_GAIN,
) == AudioManager.AUDIOFOCUS_REQUEST_GRANTED
}
}
private fun abandonAudioFocus() {
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) {
audioFocusRequest?.let(audioManager::abandonAudioFocusRequest)
} else {
@Suppress("DEPRECATION")
audioManager.abandonAudioFocus(audioFocusListener)
}
}
private fun isActiveUtterance(utteranceId: String): Boolean {
val generation = utteranceId.substringBefore(':').toIntOrNull() ?: return false
return generation == sessionGeneration
}
private fun parseUtteranceIndex(utteranceId: String): Int {
val parts = utteranceId.split(':')
return parts.getOrNull(1)?.toIntOrNull() ?: currentIndex
}
private fun currentSegment(): ReaderTtsSegment? = segments.getOrNull(currentIndex)
private fun currentProgressLabel(): String {
if (segments.isEmpty()) return voiceName ?: language
return "Câu ${currentIndex + 1}/${segments.size}"
}
private fun appLabel(): String = applicationInfo.loadLabel(packageManager).toString()
private fun buildLaunchIntent(): PendingIntent? {
val launchIntent = packageManager.getLaunchIntentForPackage(packageName)?.apply {
flags = Intent.FLAG_ACTIVITY_SINGLE_TOP or Intent.FLAG_ACTIVITY_CLEAR_TOP
}
return launchIntent?.let {
PendingIntent.getActivity(
this,
100,
it,
PendingIntent.FLAG_UPDATE_CURRENT or PendingIntent.FLAG_IMMUTABLE,
)
}
}
private fun buildServicePendingIntent(action: String): PendingIntent {
return PendingIntent.getService(
this,
action.hashCode(),
Intent(this, ReaderTtsMediaService::class.java).apply {
this.action = action
if (action == ACTION_STOP) {
putExtra(EXTRA_CLEAR_CONTENT_KEY, true)
}
},
PendingIntent.FLAG_UPDATE_CURRENT or PendingIntent.FLAG_IMMUTABLE,
)
}
@SuppressLint("MissingPermission")
private fun buildNotification() = NotificationCompat.Builder(this, CHANNEL_ID)
.setSmallIcon(R.mipmap.ic_launcher)
.setContentTitle(title ?: appLabel())
.setContentText(currentProgressLabel())
.setContentIntent(buildLaunchIntent())
.setDeleteIntent(buildServicePendingIntent(ACTION_STOP))
.setOnlyAlertOnce(true)
.setOngoing(status == "playing")
.setVisibility(NotificationCompat.VISIBILITY_PUBLIC)
.setCategory(NotificationCompat.CATEGORY_TRANSPORT)
.addAction(
android.R.drawable.ic_media_previous,
"Lùi câu",
buildServicePendingIntent(ACTION_SKIP_BACK),
)
.addAction(
if (status == "playing") android.R.drawable.ic_media_pause else android.R.drawable.ic_media_play,
if (status == "playing") "Tạm dừng" else "Tiếp tục",
buildServicePendingIntent(if (status == "playing") ACTION_PAUSE else ACTION_RESUME),
)
.addAction(
android.R.drawable.ic_menu_close_clear_cancel,
"Dừng",
buildServicePendingIntent(ACTION_STOP),
)
.addAction(
android.R.drawable.ic_media_next,
"Tới câu",
buildServicePendingIntent(ACTION_SKIP_FORWARD),
)
.setStyle(
MediaStyle()
.setMediaSession(mediaSession.sessionToken)
.setShowActionsInCompactView(0, 1, 3),
)
.build()
private fun setupMediaSession() {
mediaSession = MediaSessionCompat(this, "ReaderTtsMediaSession")
mediaSession.setCallback(
object : MediaSessionCompat.Callback() {
override fun onPlay() = handleResume()
override fun onPause() = handlePause()
override fun onStop() = handleStop(clearContentKey = true)
override fun onSkipToNext() = handleSkip(1)
override fun onSkipToPrevious() = handleSkip(-1)
},
)
mediaSession.isActive = true
updateMediaSessionState()
}
private fun updateMediaSessionState() {
val playbackState = when (status) {
"playing" -> PlaybackStateCompat.STATE_PLAYING
"paused" -> PlaybackStateCompat.STATE_PAUSED
else -> PlaybackStateCompat.STATE_STOPPED
}
val actions = PlaybackStateCompat.ACTION_PLAY or
PlaybackStateCompat.ACTION_PAUSE or
PlaybackStateCompat.ACTION_STOP or
PlaybackStateCompat.ACTION_SKIP_TO_NEXT or
PlaybackStateCompat.ACTION_SKIP_TO_PREVIOUS
mediaSession.setPlaybackState(
PlaybackStateCompat.Builder()
.setActions(actions)
.setState(playbackState, currentIndex.toLong(), 1.0f)
.build(),
)
mediaSession.setMetadata(
MediaMetadataCompat.Builder()
.putString(MediaMetadataCompat.METADATA_KEY_TITLE, title ?: appLabel())
.putString(MediaMetadataCompat.METADATA_KEY_ARTIST, currentProgressLabel())
.build(),
)
}
@SuppressLint("MissingPermission")
private fun syncNotificationState() {
updateMediaSessionState()
if (!backgroundModeEnabled) {
if (isForegroundActive) {
stopForeground(true)
isForegroundActive = false
}
notificationManager.cancel(NOTIFICATION_ID)
return
}
when (status) {
"playing" -> {
val notification = buildNotification()
if (!isForegroundActive) {
startForeground(NOTIFICATION_ID, notification)
isForegroundActive = true
} else {
notificationManager.notify(NOTIFICATION_ID, notification)
}
}
"paused" -> {
val notification = buildNotification()
if (isForegroundActive) {
stopForeground(false)
isForegroundActive = false
}
notificationManager.notify(NOTIFICATION_ID, notification)
}
else -> {
if (isForegroundActive) {
stopForeground(true)
isForegroundActive = false
}
notificationManager.cancel(NOTIFICATION_ID)
}
}
}
private fun publishSnapshot() {
val segment = currentSegment()
val canExposeSegmentProgress = status == "playing" && currentUtteranceStarted
ReaderTtsMediaBridge.publish(
hashMapOf(
"status" to status,
"paragraphIndex" to currentIndex,
"totalParagraphs" to segments.size,
"activeParagraphIndex" to if (canExposeSegmentProgress) {
(segment?.paragraphIndex ?: -1)
} else {
-1
},
"progressStart" to if (canExposeSegmentProgress) {
(segment?.start ?: -1)
} else {
-1
},
"progressEnd" to if (canExposeSegmentProgress) {
(segment?.end ?: -1)
} else {
-1
},
"contentKey" to contentKey,
"completedCount" to completedCount,
"backgroundModeEnabled" to backgroundModeEnabled,
"language" to language,
"voiceName" to voiceName,
"availableVietnameseVoices" to availableVoices,
),
)
}
private fun createNotificationChannel() {
if (Build.VERSION.SDK_INT < Build.VERSION_CODES.O) return
val manager = getSystemService(Context.NOTIFICATION_SERVICE) as NotificationManager
val channel = NotificationChannel(
CHANNEL_ID,
CHANNEL_NAME,
NotificationManager.IMPORTANCE_LOW,
).apply {
description = "Điều khiển đọc truyện bằng TTS"
setShowBadge(false)
}
manager.createNotificationChannel(channel)
}
private fun extractSegments(intent: Intent): List<ReaderTtsSegment> {
return if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.TIRAMISU) {
intent.getParcelableArrayListExtra(EXTRA_SEGMENTS, ReaderTtsSegment::class.java)
?: arrayListOf()
} else {
@Suppress("DEPRECATION")
(intent.getParcelableArrayListExtra<ReaderTtsSegment>(EXTRA_SEGMENTS)
?: arrayListOf())
}
}
override fun onDestroy() {
mainHandler.removeCallbacks(playbackHealthRunnable)
status = "idle"
currentIndex = 0
segments = emptyList()
clearUtteranceRuntimeState()
pendingReplayAfterInit = false
publishSnapshot()
tts?.stop()
tts?.shutdown()
abandonAudioFocus()
if (isForegroundActive) {
stopForeground(true)
isForegroundActive = false
}
mediaSession.release()
super.onDestroy()
}
}
private fun String.toLocale(): Locale {
val normalized = replace('_', '-')
return Locale.forLanguageTag(normalized).takeIf { it.language.isNotBlank() }
?: Locale("vi", "VN")
}