diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 1b7abaa..7f9d27d 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -7,17 +7,18 @@ The format is based on `Keep a Changelog `
and this project adheres to `Semantic Versioning `_.
----------
-Unreleased
+`0.7.0`_ - 2023-01-14
----------
Fixed
-----
- Sticker pack names save correctly now
-- Explicitly add psycopg2-binary as dependency because sqlalchemy extra doesn't seem to work anymore.
+- Explicitly add psycopg2-binary as dependency because sqlalchemy extra doesn't seem to work anymore
- Try to map user ids to names during json dump import. (#17)
Added
-----
- Add script to import data from desktop client json dumps
+- Add ECDF plot for message counts by user with ``/stats count-dist``
-------------
`0.6.4`_ - 2022-02-27
@@ -131,7 +132,7 @@ Fixed
----------------------
- Initial release
-.. _Unreleased: https://github.com/mkdryden/telegram-stats-bot/compare/v0.6.2...HEAD
+.. _Unreleased: https://github.com/mkdryden/telegram-stats-bot/compare/v0.7.0...HEAD
.. _0.1.1: https://github.com/mkdryden/telegram-stats-bot/releases/tag/v0.1.1
.. _0.2.0: https://github.com/mkdryden/telegram-stats-bot/releases/tag/v0.2.0
.. _0.3.0: https://github.com/mkdryden/telegram-stats-bot/releases/tag/v0.3.0
@@ -142,3 +143,4 @@ Fixed
.. _0.6.1: https://github.com/mkdryden/telegram-stats-bot/releases/tag/v0.6.1
.. _0.6.2: https://github.com/mkdryden/telegram-stats-bot/releases/tag/v0.6.2
.. _0.6.3: https://github.com/mkdryden/telegram-stats-bot/releases/tag/v0.6.3
+.. _0.7.0: https://github.com/mkdryden/telegram-stats-bot/releases/tag/v0.7.0
diff --git a/README.rst b/README.rst
index a4ba154..d68ff62 100644
--- a/README.rst
+++ b/README.rst
@@ -45,6 +45,8 @@ Table of contents
- `counts`_
+ - `count-dist`_
+
- `hours`_
- `days`_
@@ -251,6 +253,13 @@ counts
@WhereAreMyManners 30481 5.1
@TheWorstOfTheBest 28705 4.8
+count-dist
+----------
+``/stats count-dist`` returns an ECDF plot of the users in the group by message count.
+
+.. image:: examples/count-dist.png
+ :alt: Example of count-dist plot
+
hours
-----
``/stats hours`` returns a plot of message frequency for the hours of the day.
diff --git a/examples/count-dist.png b/examples/count-dist.png
new file mode 100644
index 0000000..51a0250
Binary files /dev/null and b/examples/count-dist.png differ
diff --git a/pyproject.toml b/pyproject.toml
index e906e4d..023fcb1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
[tool.poetry]
name = "telegram-stats-bot"
-version = "0.6.4"
+version = "0.7.0"
description = "A logging and statistics bot for Telegram based on python-telegram-bot."
authors = ["Michael DM Dryden "]
repository = "https://github.com/mkdryden/telegram-stats-bot"
diff --git a/telegram_stats_bot/stats.py b/telegram_stats_bot/stats.py
index 5f03cc4..525fefa 100644
--- a/telegram_stats_bot/stats.py
+++ b/telegram_stats_bot/stats.py
@@ -72,6 +72,7 @@ class InternalParser(argparse.ArgumentParser):
class StatsRunner(object):
allowed_methods = {'counts': "get_chat_counts",
+ 'count-dist': 'get_chat_ecdf',
'hours': "get_counts_by_hour",
'days': "get_counts_by_day",
'week': "get_week_by_hourday",
@@ -205,6 +206,75 @@ class StatsRunner(object):
return f"```\n{text}\n```", None
+ def get_chat_ecdf(self, lquery: str = None, mtype: str = None, start: str = None, end: str = None,
+ log: bool = False) -> Tuple[Union[str, None], Union[None, BytesIO]]:
+ """
+ Get message counts by number of users as an ECDF plot.
+ :param lquery: Limit results to lexical query (&, |, !, )
+ :param mtype: Limit results to message type (text, sticker, photo, etc.)
+ :param start: Start timestamp (e.g. 2019, 2019-01, 2019-01-01, "2019-01-01 14:21")
+ :param end: End timestamp (e.g. 2019, 2019-01, 2019-01-01, "2019-01-01 14:21")
+ :param log: Plot with log scale.
+ """
+ sql_dict = {}
+ query_conditions = []
+
+ if lquery:
+ query_conditions.append(f"text_index_col @@ to_tsquery( {random_quote(lquery)} )")
+
+ if mtype:
+ if mtype not in ('text', 'sticker', 'photo', 'animation', 'video', 'voice', 'location', 'video_note',
+ 'audio', 'document', 'poll'):
+ raise HelpException(f'mtype {mtype} is invalid.')
+ query_conditions.append(f"""type = '{mtype}'""")
+
+ if start:
+ sql_dict['start_dt'] = pd.to_datetime(start)
+ query_conditions.append("date >= %(start_dt)s")
+
+ if end:
+ sql_dict['end_dt'] = pd.to_datetime(end)
+ query_conditions.append("date < %(end_dt)s")
+
+ query_where = ""
+ if query_conditions:
+ query_where = f"WHERE {' AND '.join(query_conditions)}"
+
+ query = f"""
+ SELECT "from_user", COUNT(*) as "count"
+ FROM "messages_utc"
+ {query_where}
+ GROUP BY "from_user"
+ ORDER BY "count" DESC;
+ """
+
+ with self.engine.connect() as con:
+ df = pd.read_sql_query(query, con, params=sql_dict)
+
+ if len(df) == 0:
+ return "No matching messages", None
+
+ fig = Figure(constrained_layout=True)
+ subplot = fig.subplots()
+
+ sns.ecdfplot(df, y='count', stat='count', log_scale=log, ax=subplot)
+ subplot.set_xlabel('User')
+ subplot.set_ylabel('Messages')
+
+ if lquery:
+ subplot.set_title(f"Messages by User for {lquery}")
+ else:
+ subplot.set_title("Messages by User")
+
+ sns.despine(fig=fig)
+
+ bio = BytesIO()
+ bio.name = 'plot.png'
+ fig.savefig(bio, bbox_inches='tight')
+ bio.seek(0)
+
+ return None, bio
+
def get_counts_by_hour(self, user: Tuple[int, str] = None, lquery: str = None, start: str = None, end: str = None) \
-> Tuple[Union[str, None], Union[None, BytesIO]]:
"""