Discussion:
[PATCH] wifi: Reset device->scanning if scan has not returned in 15 secs
p***@jolla.com
2015-10-22 21:47:34 UTC
Permalink
From: Pasi Sjöholm <***@jollamobile.com>

Due unknown reason sometimes device->scanning is not set to false after
wifi scanning (connman 1.30 and wpa_supplicant 2.5).
This is probably due callback-function not being called after wifi scan
and therefore it needs to have a timer to prevent deadlock.
---
plugins/wifi.c | 50 +++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 49 insertions(+), 1 deletion(-)

diff --git a/plugins/wifi.c b/plugins/wifi.c
index dfe849f..35d4fe6 100644
--- a/plugins/wifi.c
+++ b/plugins/wifi.c
@@ -61,6 +61,7 @@

#define CLEANUP_TIMEOUT 8 /* in seconds */
#define INACTIVE_TIMEOUT 12 /* in seconds */
+#define SCAN_FAIL_TIMEOUT 15 /* in seconds */
#define FAVORITE_MAXIMUM_RETRIES 2

#define BGSCAN_DEFAULT "simple:30:-45:300"
@@ -131,6 +132,7 @@ struct wifi_data {
struct hidden_params *hidden;
bool postpone_hidden;
struct wifi_tethering_info *tethering_param;
+ unsigned int scan_fail_timeout;
/**
* autoscan "emulation".
*/
@@ -817,9 +819,25 @@ static void reset_autoscan(struct connman_device *device)
connman_device_unref(device);
}

+static gboolean scan_fail_timeout(gpointer data)
+{
+ struct connman_device *device = data;
+ struct wifi_data *wifi = connman_device_get_data(device);
+
+ DBG("");
+
+ if (!wifi)
+ return FALSE;
+
+ connman_device_set_scanning(device, CONNMAN_SERVICE_TYPE_WIFI, false);
+ wifi->scan_fail_timeout = 0;
+
+ return FALSE;
+}
+
static void stop_autoscan(struct connman_device *device)
{
- const struct wifi_data *wifi = connman_device_get_data(device);
+ struct wifi_data *wifi = connman_device_get_data(device);

if (!wifi || !wifi->autoscan)
return;
@@ -827,6 +845,11 @@ static void stop_autoscan(struct connman_device *device)
reset_autoscan(device);

connman_device_set_scanning(device, CONNMAN_SERVICE_TYPE_WIFI, false);
+
+ if (wifi->scan_fail_timeout) {
+ g_source_remove(wifi->scan_fail_timeout);
+ wifi->scan_fail_timeout = 0;
+ }
}

static void check_p2p_technology(void)
@@ -876,6 +899,10 @@ static void wifi_remove(struct connman_device *device)
if (wifi->p2p_connection_timeout)
g_source_remove(wifi->p2p_connection_timeout);

+ if (wifi->scan_fail_timeout) {
+ g_source_remove(wifi->scan_fail_timeout);
+ }
+
remove_networks(device, wifi);

connman_device_set_powered(device, false);
@@ -1193,6 +1220,11 @@ static int throw_wifi_scan(struct connman_device *device,
if (ret == 0) {
connman_device_set_scanning(device,
CONNMAN_SERVICE_TYPE_WIFI, true);
+
+ wifi->scan_fail_timeout = g_timeout_add_seconds(
+ SCAN_FAIL_TIMEOUT,
+ scan_fail_timeout,
+ device);
} else
connman_device_unref(device);

@@ -1262,6 +1294,11 @@ static void scan_callback(int result, GSupplicantInterface *interface,
if (scanning) {
connman_device_set_scanning(device,
CONNMAN_SERVICE_TYPE_WIFI, false);
+
+ if (wifi && wifi->scan_fail_timeout) {
+ g_source_remove(wifi->scan_fail_timeout);
+ wifi->scan_fail_timeout = 0;
+ }
}

if (result != -ENOLINK)
@@ -1516,6 +1553,11 @@ static int wifi_disable(struct connman_device *device)
connman_device_unref(wifi->device);
}

+ if (wifi->scan_fail_timeout) {
+ g_source_remove(wifi->scan_fail_timeout);
+ wifi->scan_fail_timeout = 0;
+ }
+
/* In case of a user scan, device is still referenced */
if (connman_device_get_scanning(device)) {
connman_device_set_scanning(device,
@@ -1864,6 +1906,12 @@ static int wifi_scan(enum connman_service_type type,
if (ret == 0) {
connman_device_set_scanning(device,
CONNMAN_SERVICE_TYPE_WIFI, true);
+
+ wifi->scan_fail_timeout = g_timeout_add_seconds(
+ SCAN_FAIL_TIMEOUT,
+ scan_fail_timeout,
+ device);
+
} else {
g_supplicant_free_scan_params(scan_params);
connman_device_unref(device);
--
2.1.4
Patrik Flykt
2015-10-28 09:32:12 UTC
Permalink
Hi,
Post by p***@jolla.com
Due unknown reason sometimes device->scanning is not set to false after
wifi scanning (connman 1.30 and wpa_supplicant 2.5).
This is probably due callback-function not being called after wifi scan
and therefore it needs to have a timer to prevent deadlock.
---
You probably have evidence that the callback is not called, right?
Please state that in the commit message.

What will happen if wpa_supplicant will call the callback after 16
seconds, i.e. after ConnMan has triggered scan_fail_timeout? Will there
be any artefacts if this happens?

Cheers,

Patrik
Post by p***@jolla.com
plugins/wifi.c | 50 +++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 49 insertions(+), 1 deletion(-)
diff --git a/plugins/wifi.c b/plugins/wifi.c
index dfe849f..35d4fe6 100644
--- a/plugins/wifi.c
+++ b/plugins/wifi.c
@@ -61,6 +61,7 @@
#define CLEANUP_TIMEOUT 8 /* in seconds */
#define INACTIVE_TIMEOUT 12 /* in seconds */
+#define SCAN_FAIL_TIMEOUT 15 /* in seconds */
#define FAVORITE_MAXIMUM_RETRIES 2
#define BGSCAN_DEFAULT "simple:30:-45:300"
@@ -131,6 +132,7 @@ struct wifi_data {
struct hidden_params *hidden;
bool postpone_hidden;
struct wifi_tethering_info *tethering_param;
+ unsigned int scan_fail_timeout;
/**
* autoscan "emulation".
*/
@@ -817,9 +819,25 @@ static void reset_autoscan(struct connman_device *device)
connman_device_unref(device);
}
+static gboolean scan_fail_timeout(gpointer data)
+{
+ struct connman_device *device = data;
+ struct wifi_data *wifi = connman_device_get_data(device);
+
+ DBG("");
+
+ if (!wifi)
+ return FALSE;
+
+ connman_device_set_scanning(device, CONNMAN_SERVICE_TYPE_WIFI, false);
+ wifi->scan_fail_timeout = 0;
+
+ return FALSE;
+}
+
static void stop_autoscan(struct connman_device *device)
{
- const struct wifi_data *wifi = connman_device_get_data(device);
+ struct wifi_data *wifi = connman_device_get_data(device);
if (!wifi || !wifi->autoscan)
return;
@@ -827,6 +845,11 @@ static void stop_autoscan(struct connman_device *device)
reset_autoscan(device);
connman_device_set_scanning(device, CONNMAN_SERVICE_TYPE_WIFI, false);
+
+ if (wifi->scan_fail_timeout) {
+ g_source_remove(wifi->scan_fail_timeout);
+ wifi->scan_fail_timeout = 0;
+ }
}
static void check_p2p_technology(void)
@@ -876,6 +899,10 @@ static void wifi_remove(struct connman_device *device)
if (wifi->p2p_connection_timeout)
g_source_remove(wifi->p2p_connection_timeout);
+ if (wifi->scan_fail_timeout) {
+ g_source_remove(wifi->scan_fail_timeout);
+ }
+
remove_networks(device, wifi);
connman_device_set_powered(device, false);
@@ -1193,6 +1220,11 @@ static int throw_wifi_scan(struct connman_device *device,
if (ret == 0) {
connman_device_set_scanning(device,
CONNMAN_SERVICE_TYPE_WIFI, true);
+
+ wifi->scan_fail_timeout = g_timeout_add_seconds(
+ SCAN_FAIL_TIMEOUT,
+ scan_fail_timeout,
+ device);
} else
connman_device_unref(device);
@@ -1262,6 +1294,11 @@ static void scan_callback(int result, GSupplicantInterface *interface,
if (scanning) {
connman_device_set_scanning(device,
CONNMAN_SERVICE_TYPE_WIFI, false);
+
+ if (wifi && wifi->scan_fail_timeout) {
+ g_source_remove(wifi->scan_fail_timeout);
+ wifi->scan_fail_timeout = 0;
+ }
}
if (result != -ENOLINK)
@@ -1516,6 +1553,11 @@ static int wifi_disable(struct connman_device *device)
connman_device_unref(wifi->device);
}
+ if (wifi->scan_fail_timeout) {
+ g_source_remove(wifi->scan_fail_timeout);
+ wifi->scan_fail_timeout = 0;
+ }
+
/* In case of a user scan, device is still referenced */
if (connman_device_get_scanning(device)) {
connman_device_set_scanning(device,
@@ -1864,6 +1906,12 @@ static int wifi_scan(enum connman_service_type type,
if (ret == 0) {
connman_device_set_scanning(device,
CONNMAN_SERVICE_TYPE_WIFI, true);
+
+ wifi->scan_fail_timeout = g_timeout_add_seconds(
+ SCAN_FAIL_TIMEOUT,
+ scan_fail_timeout,
+ device);
+
} else {
g_supplicant_free_scan_params(scan_params);
connman_device_unref(device);
Pasi Sjöholm
2015-10-28 23:59:58 UTC
Permalink
Post by Patrik Flykt
Post by p***@jolla.com
Due unknown reason sometimes device->scanning is not set to false after
wifi scanning (connman 1.30 and wpa_supplicant 2.5).
This is probably due callback-function not being called after wifi scan
and therefore it needs to have a timer to prevent deadlock.
---
You probably have evidence that the callback is not called, right?
Please state that in the commit message.
I don't have logs stored anymore on my disk but I'm rather sure that
wifi.c:scan_callback() is not being called when this happens as all
attemps to scan by connman are blocked by device->scanning being true (I
usually get bored after waiting for couple of hours ;)).

When the deadlock happens and I can manually ask wpa_supplicant to scan
available networks from the commandline. After doing this ConnMan will
survive from the deadlock through NetworkAdded being signaled and
autoconnectable network being available as ConnMan gets connected and
autoscan will be stopped.

The problem is hard to reproduce (no known steps) but it has happened to
me around 4 times within last few weeks.
Post by Patrik Flykt
What will happen if wpa_supplicant will call the callback after 16
seconds, i.e. after ConnMan has triggered scan_fail_timeout? Will there
be any artefacts if this happens?
By quickly looking at the code there shouldn't be any issues but I can't
be 100% sure. In theory one could stop autoscan after scan has been
started and restart it immediately before the scan_callback gets called
to have the same effect and ConnMan must be able to handle it.

Currently the TIMEOUT defined in the dbus.c seems to be 30 secs and
after that the callback-function should be called if no reply has been
received (if I read and understand the code correctly). So having 60
seconds timeout should be ok as there should not be any calls coming
from wpa_supplicant after that.

I will modify the timeout and edit the commit message a bit.

Br,
Pasi
p***@jolla.com
2015-10-29 00:16:13 UTC
Permalink
From: Pasi Sjöholm <***@jollamobile.com>

Due unknown reason sometimes scan_callback()-function is not being called
after the scan has been succesfully initiated.

This will lead into deadlock situation where device->scanning is
indefinitely true and will forbid any future attemps of scanning available
wifi networks.

The deadlock can be manually sorted out by commanding wpa_supplicant
to scan available networks. If any networks are available and one or
more of them being autoconnectable ConnMan will connect them after
receiving NetworkAdded-signal from wpa_supplicant. This will cause
autoscan-feature to stop and setting the device->scanning to false.
---
plugins/wifi.c | 50 +++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 49 insertions(+), 1 deletion(-)

diff --git a/plugins/wifi.c b/plugins/wifi.c
index dfe849f..1e6507b 100644
--- a/plugins/wifi.c
+++ b/plugins/wifi.c
@@ -61,6 +61,7 @@

#define CLEANUP_TIMEOUT 8 /* in seconds */
#define INACTIVE_TIMEOUT 12 /* in seconds */
+#define SCAN_FAIL_TIMEOUT 60 /* in seconds */
#define FAVORITE_MAXIMUM_RETRIES 2

#define BGSCAN_DEFAULT "simple:30:-45:300"
@@ -131,6 +132,7 @@ struct wifi_data {
struct hidden_params *hidden;
bool postpone_hidden;
struct wifi_tethering_info *tethering_param;
+ unsigned int scan_fail_timeout;
/**
* autoscan "emulation".
*/
@@ -817,9 +819,25 @@ static void reset_autoscan(struct connman_device *device)
connman_device_unref(device);
}

+static gboolean scan_fail_timeout(gpointer data)
+{
+ struct connman_device *device = data;
+ struct wifi_data *wifi = connman_device_get_data(device);
+
+ DBG("");
+
+ if (!wifi)
+ return FALSE;
+
+ connman_device_set_scanning(device, CONNMAN_SERVICE_TYPE_WIFI, false);
+ wifi->scan_fail_timeout = 0;
+
+ return FALSE;
+}
+
static void stop_autoscan(struct connman_device *device)
{
- const struct wifi_data *wifi = connman_device_get_data(device);
+ struct wifi_data *wifi = connman_device_get_data(device);

if (!wifi || !wifi->autoscan)
return;
@@ -827,6 +845,11 @@ static void stop_autoscan(struct connman_device *device)
reset_autoscan(device);

connman_device_set_scanning(device, CONNMAN_SERVICE_TYPE_WIFI, false);
+
+ if (wifi->scan_fail_timeout) {
+ g_source_remove(wifi->scan_fail_timeout);
+ wifi->scan_fail_timeout = 0;
+ }
}

static void check_p2p_technology(void)
@@ -876,6 +899,10 @@ static void wifi_remove(struct connman_device *device)
if (wifi->p2p_connection_timeout)
g_source_remove(wifi->p2p_connection_timeout);

+ if (wifi->scan_fail_timeout) {
+ g_source_remove(wifi->scan_fail_timeout);
+ }
+
remove_networks(device, wifi);

connman_device_set_powered(device, false);
@@ -1193,6 +1220,11 @@ static int throw_wifi_scan(struct connman_device *device,
if (ret == 0) {
connman_device_set_scanning(device,
CONNMAN_SERVICE_TYPE_WIFI, true);
+
+ wifi->scan_fail_timeout = g_timeout_add_seconds(
+ SCAN_FAIL_TIMEOUT,
+ scan_fail_timeout,
+ device);
} else
connman_device_unref(device);

@@ -1262,6 +1294,11 @@ static void scan_callback(int result, GSupplicantInterface *interface,
if (scanning) {
connman_device_set_scanning(device,
CONNMAN_SERVICE_TYPE_WIFI, false);
+
+ if (wifi && wifi->scan_fail_timeout) {
+ g_source_remove(wifi->scan_fail_timeout);
+ wifi->scan_fail_timeout = 0;
+ }
}

if (result != -ENOLINK)
@@ -1516,6 +1553,11 @@ static int wifi_disable(struct connman_device *device)
connman_device_unref(wifi->device);
}

+ if (wifi->scan_fail_timeout) {
+ g_source_remove(wifi->scan_fail_timeout);
+ wifi->scan_fail_timeout = 0;
+ }
+
/* In case of a user scan, device is still referenced */
if (connman_device_get_scanning(device)) {
connman_device_set_scanning(device,
@@ -1864,6 +1906,12 @@ static int wifi_scan(enum connman_service_type type,
if (ret == 0) {
connman_device_set_scanning(device,
CONNMAN_SERVICE_TYPE_WIFI, true);
+
+ wifi->scan_fail_timeout = g_timeout_add_seconds(
+ SCAN_FAIL_TIMEOUT,
+ scan_fail_timeout,
+ device);
+
} else {
g_supplicant_free_scan_params(scan_params);
connman_device_unref(device);
--
2.1.4
Loading...