Add stress tester that simulates super high chance of BLE failures

and fix bug that it revealed (missing handling of error during discovery)
1.2-legacy
geeksville 2020-07-02 09:38:08 -07:00
rodzic e5d6ffc4bd
commit 103b21aafd
4 zmienionych plików z 60 dodań i 31 usunięć

Wyświetl plik

@ -1,5 +1,6 @@
# Remaining tasks before declaring 1.0
- add faq entry about range and antennas and rain
- first message sent is still doubled for some people
- disable software update button after update finishes
- let users set arbitrary params in android

Wyświetl plik

@ -354,35 +354,40 @@ class BluetoothInterface(val service: RadioInterfaceService, val address: String
private var isFirstTime = true
private fun doDiscoverServicesAndInit() {
// FIXME - no need to discover services more than once - instead use lazy() to use them in future attempts
safe!!.asyncDiscoverServices { discRes ->
discRes.getOrThrow() // FIXME, instead just try to reconnect?
try {
discRes.getOrThrow()
service.serviceScope.handledLaunch {
try {
debug("Discovered services!")
delay(1000) // android BLE is buggy and needs a 500ms sleep before calling getChracteristic, or you might get back null
service.serviceScope.handledLaunch {
try {
debug("Discovered services!")
delay(1000) // android BLE is buggy and needs a 500ms sleep before calling getChracteristic, or you might get back null
/* if (isFirstTime) {
isFirstTime = false
throw BLEException("Faking a BLE failure")
} */
/* if (isFirstTime) {
isFirstTime = false
throw BLEException("Faking a BLE failure")
} */
fromNum = getCharacteristic(BTM_FROMNUM_CHARACTER)
fromNum = getCharacteristic(BTM_FROMNUM_CHARACTER)
// We treat the first send by a client as special
isFirstSend = true
// We treat the first send by a client as special
isFirstSend = true
// Now tell clients they can (finally use the api)
service.onConnect()
// Now tell clients they can (finally use the api)
service.onConnect()
// Immediately broadcast any queued packets sitting on the device
doReadFromRadio(true)
} catch (ex: BLEException) {
scheduleReconnect(
"Unexpected error in initial device enumeration, forcing disconnect $ex"
)
// Immediately broadcast any queued packets sitting on the device
doReadFromRadio(true)
} catch (ex: BLEException) {
scheduleReconnect(
"Unexpected error in initial device enumeration, forcing disconnect $ex"
)
}
}
} catch (ex: BLEException) {
scheduleReconnect(
"Unexpected error discovering services, forcing disconnect $ex"
)
}
}
}

Wyświetl plik

@ -85,8 +85,11 @@ class SafeBluetooth(private val context: Context, private val device: BluetoothD
}
override fun toString(): String {
return super.toString()
return "Work:$tag"
}
/// Connection work items are treated specially
fun isConnect() = tag == "connect" || tag == "reconnect"
}
/**
@ -122,6 +125,7 @@ class SafeBluetooth(private val context: Context, private val device: BluetoothD
private val STATUS_RELIABLE_WRITE_FAILED = 4403
private val STATUS_TIMEOUT = 4404
private val STATUS_NOSTART = 4405
private val STATUS_SIMFAILURE = 4406
private val gattCallback = object : BluetoothGattCallback() {
@ -157,7 +161,11 @@ class SafeBluetooth(private val context: Context, private val device: BluetoothD
if (oldstate == BluetoothProfile.STATE_CONNECTED) {
info("Lost connection - aborting current work: $currentWork")
lostConnection("lost connection")
// If we get a disconnect, just try again otherwise fail all current operations
if (currentWork?.isConnect() == true)
dropAndReconnect()
else
lostConnection("lost connection")
} else if (status == 133) {
// We were not previously connected and we just failed with our non-auto connection attempt. Therefore we now need
// to do an autoconnection attempt. When that attempt succeeds/fails the normal callbacks will be called
@ -285,6 +293,12 @@ class SafeBluetooth(private val context: Context, private val device: BluetoothD
}
}
// To test loss of BLE faults we can randomly fail a certain % of all work items. We
// skip this for "connect" items because the handling for connection failure is special
var simFailures = false
var failPercent =
10 // 15% failure is unusably high because of constant reconnects, 7% somewhat usable, 10% pretty bad
private val failRandom = Random()
private var activeTimeout: Job? = null
@ -311,13 +325,22 @@ class SafeBluetooth(private val context: Context, private val device: BluetoothD
isSettingMtu =
false // Most work is not doing MTU stuff, the work that is will re set this flag
val started = newWork.startWork()
if (!started) {
errormsg("Failed to start work, returned error status")
completeWork(
STATUS_NOSTART,
Unit
) // abandon the current attempt and try for another
val failThis =
simFailures && !newWork.isConnect() && failRandom.nextInt(100) < failPercent
if (failThis) {
errormsg("Simulating random work failure!")
completeWork(STATUS_SIMFAILURE, Unit)
} else {
val started = newWork.startWork()
if (!started) {
errormsg("Failed to start work, returned error status")
completeWork(
STATUS_NOSTART,
Unit
) // abandon the current attempt and try for another
}
}
}
}

@ -1 +1 @@
Subproject commit 2f9243202d9db33b0aa0c7656bc8916ad3712914
Subproject commit 95c5a9aa950f917857a3cc0c7cd84a4a56993032