From 9040001b59573193ffe4509b0610a5b0dcf2d6c8 Mon Sep 17 00:00:00 2001 From: gouravkr Date: Sun, 9 Jan 2022 09:23:54 +0530 Subject: [PATCH] separated scrape function, completed synonym functionality --- readme.md | 4 +-- word_bot.py | 76 +++++++++++++++++++++++++++++++++-------------------- 2 files changed, 50 insertions(+), 30 deletions(-) diff --git a/readme.md b/readme.md index 60f3344..4128ec1 100644 --- a/readme.md +++ b/readme.md @@ -4,7 +4,7 @@ This bot helps you learn new words for your GRE/GMAT preparations. All you have ### To run the bot on your own server 1. Open Telegram and create a bot using [@BotFather](https://t.me/BotFather) -2. Get the bots token +2. Get the bot's token 3. Clone this repository 4. Rename .env.sample to .env 5. Add all the values in the .env file including the bot token @@ -14,7 +14,7 @@ This bot helps you learn new words for your GRE/GMAT preparations. All you have Have fun learning random words! ### To-do -1. complete synonyms functionality +1. ~~Complete synonyms functionality~~ 2. Add the ability to fetch specific words 3. Get a complete list of words (current list is less common words only) 4. Linkify synonyms diff --git a/word_bot.py b/word_bot.py index c464ecb..dd35947 100644 --- a/word_bot.py +++ b/word_bot.py @@ -40,32 +40,23 @@ def start(update, context): context.bot.send_message(chat_id=update.effective_chat.id, text=start_text) -def slugify(message): +def slugify(message: str) -> str: + """ This function adds relevant escape characters as per Telegram's markdown parsing rules """ + message = message.replace(".", "\\.").replace("*", "\\*").replace("(", "\\(").replace(")", "\\)").replace("-", "\\-") return message -def word_def(update, context): - pgcon = connect_db() - if pgcon is None: - print("Db connection failed") +def scrape_def(word: str) -> dict: + """ This function scrapes the definition of a word from lexico.com""" - word_query = 'select * from gre_words order by random() limit 1' - - ox_base_url = 'https://www.lexico.com/definition/{}?locale=en' - while True: - cur = pgcon.cursor() - cur.execute(word_query) - word_tup = cur.fetchall() - selected_word = word_tup[0][1].lower() - cur.close() - def_page = requests.get(ox_base_url.format(selected_word)) - #print(selected_word, def_page.status_code) - if def_page.status_code != 200: - print(f'Word not found: {selected_word}') - else: - break + ox_url = f'https://www.lexico.com/definition/{word}?locale=en' + def_page = requests.get(ox_url) + if def_page.status_code != 200: + print(f'Word not found: {selected_word}') + return None + def_soup = BeautifulSoup(def_page.content, 'html.parser') my_dict = {} @@ -82,20 +73,41 @@ def word_def(update, context): my_dict['definition'] = [] for i, j in enumerate(definition): my_dict['definition'].append(j.text) + + return my_dict + + +def word_def(update, context): + pgcon = connect_db() + if pgcon is None: + print("Db connection failed") + + word_query = 'select * from gre_words order by random() limit 1' + + while True: + cur = pgcon.cursor() + cur.execute(word_query) + row = cur.fetchall() + selected_word = row[0][1].lower() + cur.close() + + my_dict = scrape_def(selected_word) + if my_dict is not None: + break msg = "*{word}*\n_{type}_\n{defn}".format( word=my_dict['word'], type=slugify(my_dict['type']), defn='\n'.join([slugify(i) for i in my_dict['definition']])) - button_callback_data = f"p~{my_dict['word']}~{my_dict['audio']}" - print(f"{msg=}\n{my_dict=}\n{button_callback_data=}") + button_callback_data_audio = f"p~{my_dict['word']}~{my_dict['audio']}" + button_callback_data_synonyms = f"s~{my_dict['word']}" keyboard = [ - [ - InlineKeyboardButton("Pronunciation", callback_data=button_callback_data), - InlineKeyboardButton("Synonyms", callback_data=button_callback_data) - ] + [ + InlineKeyboardButton("Pronunciation", callback_data=button_callback_data_audio), + InlineKeyboardButton("Synonyms", callback_data=button_callback_data_synonyms) ] + ] print(keyboard) reply_markup = InlineKeyboardMarkup(keyboard) msg_response = context.bot.send_message(update.effective_chat.id, text=msg, parse_mode=ParseMode.MARKDOWN_V2, reply_markup=reply_markup) @@ -112,21 +124,29 @@ def get_synonyms(word): def button(update, context): + """ Responds to callback buttons in the original message. + + If the callback type is audio, then responds with already available audio URL + If the callback type is synonyms, then fetchs the synonyms and responds + """ + query = update.callback_query data = query.data request_type = data.split('~')[0] word = data.split('~')[1] - audio = f"https://lex-audio.useremarkable.com/mp3/{data.split('~')[2]}" + query.answer() if request_type == 'p': #query.edit_message_reply_markup(reply_markup=None) + audio = f"https://lex-audio.useremarkable.com/mp3/{data.split('~')[2]}" context.bot.send_audio(update.effective_chat.id, audio=audio, reply_to_message_id=query.message.message_id) elif request_type == 's': #query.edit_message_reply_markup(reply_markup=None) synonyms = get_synonyms(word) print(synonyms) context.bot.send_message(update.effective_chat.id, text=synonyms, reply_to_message_id=query.message.message_id, parse_mode=ParseMode.MARKDOWN_V2) - + + def main(): wordbot_token = os.getenv('WORDBOT_TOKEN') updater = Updater(token=wordbot_token, use_context=True)