separated scrape function, completed synonym functionality

2022-01-09 09:23:54 +05:30 · 2022-01-09 09:23:54 +05:30 · 9040001b59
commit 9040001b59
parent 84b24305b6
2 changed files with 50 additions and 30 deletions
--- a/readme.md
+++ b/readme.md
@ -4,7 +4,7 @@ This bot helps you learn new words for your GRE/GMAT preparations. All you have
 ### To run the bot on your own server
 1. Open Telegram and create a bot using [@BotFather](https://t.me/BotFather)
-2. Get the bots token
+2. Get the bot's token
 3. Clone this repository
 4. Rename .env.sample to .env
 5. Add all the values in the .env file including the bot token
@ -14,7 +14,7 @@ This bot helps you learn new words for your GRE/GMAT preparations. All you have
 Have fun learning random words!
 ### To-do
-1. complete synonyms functionality
+1. ~~Complete synonyms functionality~~
 2. Add the ability to fetch specific words
 3. Get a complete list of words (current list is less common words only)
 4. Linkify synonyms
--- a/word_bot.py
+++ b/word_bot.py
@ -40,32 +40,23 @@ def start(update, context):
    context.bot.send_message(chat_id=update.effective_chat.id, text=start_text)
-def slugify(message):
+def slugify(message: str) -> str:
    """ This function adds relevant escape characters as per Telegram's markdown parsing rules """
    message = message.replace(".", "\\.").replace("*", "\\*").replace("(", "\\(").replace(")", "\\)").replace("-", "\\-")
    return message
-def word_def(update, context):
+def scrape_def(word: str) -> dict:
-    pgcon = connect_db()
+    """ This function scrapes the definition of a word from lexico.com"""
    if pgcon is None:
        print("Db connection failed")
-    word_query = 'select * from gre_words order by random() limit 1'
+    ox_url = f'https://www.lexico.com/definition/{word}?locale=en'
-
+    def_page = requests.get(ox_url)
    ox_base_url = 'https://www.lexico.com/definition/{}?locale=en'
    while True:
        cur = pgcon.cursor()
        cur.execute(word_query)
        word_tup = cur.fetchall()
        selected_word = word_tup[0][1].lower()
        cur.close()
        def_page = requests.get(ox_base_url.format(selected_word))
        #print(selected_word, def_page.status_code)
        if def_page.status_code != 200:
            print(f'Word not found: {selected_word}')
        else:
            break
    if def_page.status_code != 200:
        print(f'Word not found: {selected_word}')
        return None
    def_soup = BeautifulSoup(def_page.content, 'html.parser')
    my_dict = {}
@ -82,20 +73,41 @@ def word_def(update, context):
    my_dict['definition'] = []
    for i, j in enumerate(definition):
        my_dict['definition'].append(j.text)
    return my_dict
 def word_def(update, context):
    pgcon = connect_db()
    if pgcon is None:
        print("Db connection failed")
    word_query = 'select * from gre_words order by random() limit 1'
    while True:
        cur = pgcon.cursor()
        cur.execute(word_query)
        row = cur.fetchall()
        selected_word = row[0][1].lower()
        cur.close()
        my_dict = scrape_def(selected_word)
        if my_dict is not None:
            break
    msg = "*{word}*\n_{type}_\n{defn}".format(
        word=my_dict['word'], type=slugify(my_dict['type']),
        defn='\n'.join([slugify(i) for i in my_dict['definition']]))
-    button_callback_data = f"p~{my_dict['word']}~{my_dict['audio']}"
+    button_callback_data_audio = f"p~{my_dict['word']}~{my_dict['audio']}"
-    print(f"{msg=}\n{my_dict=}\n{button_callback_data=}")
+    button_callback_data_synonyms = f"s~{my_dict['word']}"
    keyboard = [
-                       [
+                   [
-                           InlineKeyboardButton("Pronunciation", callback_data=button_callback_data),
+                       InlineKeyboardButton("Pronunciation", callback_data=button_callback_data_audio),
-                           InlineKeyboardButton("Synonyms", callback_data=button_callback_data)
+                       InlineKeyboardButton("Synonyms", callback_data=button_callback_data_synonyms)
                       ]
                   ]
               ]
    print(keyboard)
    reply_markup = InlineKeyboardMarkup(keyboard)
    msg_response = context.bot.send_message(update.effective_chat.id, text=msg, parse_mode=ParseMode.MARKDOWN_V2, reply_markup=reply_markup)
@ -112,21 +124,29 @@ def get_synonyms(word):
 def button(update, context):
    """ Responds to callback buttons in the original message.
        If the callback type is audio, then responds with already available audio URL
        If the callback type is synonyms, then fetchs the synonyms and responds
    """
    query = update.callback_query
    data = query.data
    request_type = data.split('~')[0]
    word = data.split('~')[1]
-    audio = f"https://lex-audio.useremarkable.com/mp3/{data.split('~')[2]}"
+
    query.answer()
    if request_type == 'p':
        #query.edit_message_reply_markup(reply_markup=None)
        audio = f"https://lex-audio.useremarkable.com/mp3/{data.split('~')[2]}"
        context.bot.send_audio(update.effective_chat.id, audio=audio, reply_to_message_id=query.message.message_id)
    elif request_type == 's':
        #query.edit_message_reply_markup(reply_markup=None)
        synonyms = get_synonyms(word)
        print(synonyms)
        context.bot.send_message(update.effective_chat.id, text=synonyms, reply_to_message_id=query.message.message_id, parse_mode=ParseMode.MARKDOWN_V2)
-    
+
 def main():
    wordbot_token = os.getenv('WORDBOT_TOKEN')
    updater = Updater(token=wordbot_token, use_context=True)