From 9040001b59573193ffe4509b0610a5b0dcf2d6c8 Mon Sep 17 00:00:00 2001
From: gouravkr <gourav2711@gmail.com>
Date: Sun, 9 Jan 2022 09:23:54 +0530
Subject: [PATCH] separated scrape function, completed synonym functionality

---
 readme.md   |  4 +--
 word_bot.py | 76 +++++++++++++++++++++++++++++++++--------------------
 2 files changed, 50 insertions(+), 30 deletions(-)

diff --git a/readme.md b/readme.md
index 60f3344..4128ec1 100644
--- a/readme.md
+++ b/readme.md
@@ -4,7 +4,7 @@ This bot helps you learn new words for your GRE/GMAT preparations. All you have
 
 ### To run the bot on your own server
 1. Open Telegram and create a bot using [@BotFather](https://t.me/BotFather)
-2. Get the bots token
+2. Get the bot's token
 3. Clone this repository
 4. Rename .env.sample to .env
 5. Add all the values in the .env file including the bot token
@@ -14,7 +14,7 @@ This bot helps you learn new words for your GRE/GMAT preparations. All you have
 Have fun learning random words!
 
 ### To-do
-1. complete synonyms functionality
+1. ~~Complete synonyms functionality~~
 2. Add the ability to fetch specific words
 3. Get a complete list of words (current list is less common words only)
 4. Linkify synonyms
diff --git a/word_bot.py b/word_bot.py
index c464ecb..dd35947 100644
--- a/word_bot.py
+++ b/word_bot.py
@@ -40,32 +40,23 @@ def start(update, context):
     context.bot.send_message(chat_id=update.effective_chat.id, text=start_text)
     
 
-def slugify(message):
+def slugify(message: str) -> str:
+    """ This function adds relevant escape characters as per Telegram's markdown parsing rules """
+
     message = message.replace(".", "\\.").replace("*", "\\*").replace("(", "\\(").replace(")", "\\)").replace("-", "\\-")
     return message
 
 
-def word_def(update, context):
-    pgcon = connect_db()
-    if pgcon is None:
-        print("Db connection failed")
+def scrape_def(word: str) -> dict:
+    """ This function scrapes the definition of a word from lexico.com"""
 
-    word_query = 'select * from gre_words order by random() limit 1'
-
-    ox_base_url = 'https://www.lexico.com/definition/{}?locale=en'
-    while True:
-        cur = pgcon.cursor()
-        cur.execute(word_query)
-        word_tup = cur.fetchall()
-        selected_word = word_tup[0][1].lower()
-        cur.close()
-        def_page = requests.get(ox_base_url.format(selected_word))
-        #print(selected_word, def_page.status_code)
-        if def_page.status_code != 200:
-            print(f'Word not found: {selected_word}')
-        else:
-            break
+    ox_url = f'https://www.lexico.com/definition/{word}?locale=en'
+    def_page = requests.get(ox_url)
 
+    if def_page.status_code != 200:
+        print(f'Word not found: {selected_word}')
+        return None
+    
     def_soup = BeautifulSoup(def_page.content, 'html.parser')
 
     my_dict = {}
@@ -82,20 +73,41 @@ def word_def(update, context):
     my_dict['definition'] = []
     for i, j in enumerate(definition):
         my_dict['definition'].append(j.text)
+
+    return my_dict
+
+
+def word_def(update, context):
+    pgcon = connect_db()
+    if pgcon is None:
+        print("Db connection failed")
+
+    word_query = 'select * from gre_words order by random() limit 1'
+
+    while True:
+        cur = pgcon.cursor()
+        cur.execute(word_query)
+        row = cur.fetchall()
+        selected_word = row[0][1].lower()
+        cur.close()
+
+        my_dict = scrape_def(selected_word)
+        if my_dict is not None:
+            break
     
     msg = "*{word}*\n_{type}_\n{defn}".format(
         word=my_dict['word'], type=slugify(my_dict['type']),
         defn='\n'.join([slugify(i) for i in my_dict['definition']]))
 
-    button_callback_data = f"p~{my_dict['word']}~{my_dict['audio']}"
-    print(f"{msg=}\n{my_dict=}\n{button_callback_data=}")
+    button_callback_data_audio = f"p~{my_dict['word']}~{my_dict['audio']}"
+    button_callback_data_synonyms = f"s~{my_dict['word']}"
     
     keyboard = [
-                       [
-                           InlineKeyboardButton("Pronunciation", callback_data=button_callback_data),
-                           InlineKeyboardButton("Synonyms", callback_data=button_callback_data)
-                       ]
+                   [
+                       InlineKeyboardButton("Pronunciation", callback_data=button_callback_data_audio),
+                       InlineKeyboardButton("Synonyms", callback_data=button_callback_data_synonyms)
                    ]
+               ]
     print(keyboard)
     reply_markup = InlineKeyboardMarkup(keyboard)
     msg_response = context.bot.send_message(update.effective_chat.id, text=msg, parse_mode=ParseMode.MARKDOWN_V2, reply_markup=reply_markup)
@@ -112,21 +124,29 @@ def get_synonyms(word):
 
 
 def button(update, context):
+    """ Responds to callback buttons in the original message.
+
+        If the callback type is audio, then responds with already available audio URL
+        If the callback type is synonyms, then fetchs the synonyms and responds
+    """
+
     query = update.callback_query
     data = query.data
     request_type = data.split('~')[0]
     word = data.split('~')[1]
-    audio = f"https://lex-audio.useremarkable.com/mp3/{data.split('~')[2]}"
+
     query.answer()
     if request_type == 'p':
         #query.edit_message_reply_markup(reply_markup=None)
+        audio = f"https://lex-audio.useremarkable.com/mp3/{data.split('~')[2]}"
         context.bot.send_audio(update.effective_chat.id, audio=audio, reply_to_message_id=query.message.message_id)
     elif request_type == 's':
         #query.edit_message_reply_markup(reply_markup=None)
         synonyms = get_synonyms(word)
         print(synonyms)
         context.bot.send_message(update.effective_chat.id, text=synonyms, reply_to_message_id=query.message.message_id, parse_mode=ParseMode.MARKDOWN_V2)
-    
+
+
 def main():
     wordbot_token = os.getenv('WORDBOT_TOKEN')
     updater = Updater(token=wordbot_token, use_context=True)