From c4e3c9956bcfacfa1bc9895f486a0d5ca03dfb83 Mon Sep 17 00:00:00 2001 From: Gourav Kumar Date: Sun, 9 Jan 2022 08:25:50 +0530 Subject: [PATCH] First commit --- .gitignore | 4 ++ word_bot.py | 144 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 148 insertions(+) create mode 100644 .gitignore create mode 100644 word_bot.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f2dba00 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +botenv +logs +.env +logs/* \ No newline at end of file diff --git a/word_bot.py b/word_bot.py new file mode 100644 index 0000000..c464ecb --- /dev/null +++ b/word_bot.py @@ -0,0 +1,144 @@ +""" +This is a telegram bot to fetch random word definition. +Currently supported commands are: +word - get a random word with definition +""" + +import datetime +import logging +import os +import psycopg2 +import requests +import time + +from bs4 import BeautifulSoup +from dotenv import load_dotenv +from telegram import ParseMode, InlineKeyboardButton, InlineKeyboardMarkup +from telegram.ext import Updater, CommandHandler, MessageHandler, Filters, CallbackQueryHandler + +load_dotenv() + +logging.basicConfig(filename='logs/word_bot.log', filemode='a', format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', + level=logging.DEBUG) +logger = logging.getLogger() + + +def connect_db(): + pgcon = psycopg2.connect( + database=os.getenv("DB_NAME"), + user=os.getenv("DB_USER"), + password=os.getenv("DB_PWD"), + host=os.getenv("DB_HOST"), + port=os.getenv("DB_PORT") + ) + + return pgcon + + +def start(update, context): + start_text = "Welcome to Random Word Bot. To fetch a word, just send /word" + context.bot.send_message(chat_id=update.effective_chat.id, text=start_text) + + +def slugify(message): + message = message.replace(".", "\\.").replace("*", "\\*").replace("(", "\\(").replace(")", "\\)").replace("-", "\\-") + return message + + +def word_def(update, context): + pgcon = connect_db() + if pgcon is None: + print("Db connection failed") + + word_query = 'select * from gre_words order by random() limit 1' + + ox_base_url = 'https://www.lexico.com/definition/{}?locale=en' + while True: + cur = pgcon.cursor() + cur.execute(word_query) + word_tup = cur.fetchall() + selected_word = word_tup[0][1].lower() + cur.close() + def_page = requests.get(ox_base_url.format(selected_word)) + #print(selected_word, def_page.status_code) + if def_page.status_code != 200: + print(f'Word not found: {selected_word}') + else: + break + + def_soup = BeautifulSoup(def_page.content, 'html.parser') + + my_dict = {} + word = def_soup.find_all("span", {"class": "hw"}) + my_dict['word'] = slugify(word[0].text.capitalize()) + + grammatical_type = def_soup.find_all("span", {"class": "pos"}) + my_dict['type'] = u'\u2022 ' + grammatical_type[0].text.capitalize() + + definition = def_soup.find_all("span", {"class": "ind"}) + + audio = def_soup.find_all("audio") + my_dict['audio'] = audio[0]['src'].replace('https://lex-audio.useremarkable.com/mp3/', '') + my_dict['definition'] = [] + for i, j in enumerate(definition): + my_dict['definition'].append(j.text) + + msg = "*{word}*\n_{type}_\n{defn}".format( + word=my_dict['word'], type=slugify(my_dict['type']), + defn='\n'.join([slugify(i) for i in my_dict['definition']])) + + button_callback_data = f"p~{my_dict['word']}~{my_dict['audio']}" + print(f"{msg=}\n{my_dict=}\n{button_callback_data=}") + + keyboard = [ + [ + InlineKeyboardButton("Pronunciation", callback_data=button_callback_data), + InlineKeyboardButton("Synonyms", callback_data=button_callback_data) + ] + ] + print(keyboard) + reply_markup = InlineKeyboardMarkup(keyboard) + msg_response = context.bot.send_message(update.effective_chat.id, text=msg, parse_mode=ParseMode.MARKDOWN_V2, reply_markup=reply_markup) + print(type(msg_response)) + + +def get_synonyms(word): + syn_url = 'https://www.lexico.com/synonyms/{}?locale=en' + response = requests.get(syn_url.format(word.lower())) + soup = BeautifulSoup(response.content, 'html.parser') + synonyms = soup.find_all(['strong', 'span'], {'class': 'syn'}) + synonym_list = [f'\n*{slugify(i.string)}*' if i.name == 'strong' else i.string for i in synonyms] + return (''.join(synonym_list)) + + +def button(update, context): + query = update.callback_query + data = query.data + request_type = data.split('~')[0] + word = data.split('~')[1] + audio = f"https://lex-audio.useremarkable.com/mp3/{data.split('~')[2]}" + query.answer() + if request_type == 'p': + #query.edit_message_reply_markup(reply_markup=None) + context.bot.send_audio(update.effective_chat.id, audio=audio, reply_to_message_id=query.message.message_id) + elif request_type == 's': + #query.edit_message_reply_markup(reply_markup=None) + synonyms = get_synonyms(word) + print(synonyms) + context.bot.send_message(update.effective_chat.id, text=synonyms, reply_to_message_id=query.message.message_id, parse_mode=ParseMode.MARKDOWN_V2) + +def main(): + wordbot_token = os.getenv('WORDBOT_TOKEN') + updater = Updater(token=wordbot_token, use_context=True) + dispatcher = updater.dispatcher + dispatcher.add_handler(CommandHandler('start', start)) + dispatcher.add_handler(CommandHandler('word', word_def)) + dispatcher.add_handler(CallbackQueryHandler(button)) + updater.start_polling() + updater.idle() + + +if __name__ == '__main__': + print('word_bot is running. Press ctrl+c to stop.') + main() +