separated scrape function, completed synonym functionality

This commit is contained in:
Gourav Kumar 2022-01-09 09:23:54 +05:30
parent 84b24305b6
commit 9040001b59
2 changed files with 50 additions and 30 deletions

View File

@ -4,7 +4,7 @@ This bot helps you learn new words for your GRE/GMAT preparations. All you have
### To run the bot on your own server
1. Open Telegram and create a bot using [@BotFather](https://t.me/BotFather)
2. Get the bots token
2. Get the bot's token
3. Clone this repository
4. Rename .env.sample to .env
5. Add all the values in the .env file including the bot token
@ -14,7 +14,7 @@ This bot helps you learn new words for your GRE/GMAT preparations. All you have
Have fun learning random words!
### To-do
1. complete synonyms functionality
1. ~~Complete synonyms functionality~~
2. Add the ability to fetch specific words
3. Get a complete list of words (current list is less common words only)
4. Linkify synonyms

View File

@ -40,32 +40,23 @@ def start(update, context):
context.bot.send_message(chat_id=update.effective_chat.id, text=start_text)
def slugify(message):
def slugify(message: str) -> str:
""" This function adds relevant escape characters as per Telegram's markdown parsing rules """
message = message.replace(".", "\\.").replace("*", "\\*").replace("(", "\\(").replace(")", "\\)").replace("-", "\\-")
return message
def word_def(update, context):
pgcon = connect_db()
if pgcon is None:
print("Db connection failed")
def scrape_def(word: str) -> dict:
""" This function scrapes the definition of a word from lexico.com"""
word_query = 'select * from gre_words order by random() limit 1'
ox_base_url = 'https://www.lexico.com/definition/{}?locale=en'
while True:
cur = pgcon.cursor()
cur.execute(word_query)
word_tup = cur.fetchall()
selected_word = word_tup[0][1].lower()
cur.close()
def_page = requests.get(ox_base_url.format(selected_word))
#print(selected_word, def_page.status_code)
if def_page.status_code != 200:
print(f'Word not found: {selected_word}')
else:
break
ox_url = f'https://www.lexico.com/definition/{word}?locale=en'
def_page = requests.get(ox_url)
if def_page.status_code != 200:
print(f'Word not found: {selected_word}')
return None
def_soup = BeautifulSoup(def_page.content, 'html.parser')
my_dict = {}
@ -82,20 +73,41 @@ def word_def(update, context):
my_dict['definition'] = []
for i, j in enumerate(definition):
my_dict['definition'].append(j.text)
return my_dict
def word_def(update, context):
pgcon = connect_db()
if pgcon is None:
print("Db connection failed")
word_query = 'select * from gre_words order by random() limit 1'
while True:
cur = pgcon.cursor()
cur.execute(word_query)
row = cur.fetchall()
selected_word = row[0][1].lower()
cur.close()
my_dict = scrape_def(selected_word)
if my_dict is not None:
break
msg = "*{word}*\n_{type}_\n{defn}".format(
word=my_dict['word'], type=slugify(my_dict['type']),
defn='\n'.join([slugify(i) for i in my_dict['definition']]))
button_callback_data = f"p~{my_dict['word']}~{my_dict['audio']}"
print(f"{msg=}\n{my_dict=}\n{button_callback_data=}")
button_callback_data_audio = f"p~{my_dict['word']}~{my_dict['audio']}"
button_callback_data_synonyms = f"s~{my_dict['word']}"
keyboard = [
[
InlineKeyboardButton("Pronunciation", callback_data=button_callback_data),
InlineKeyboardButton("Synonyms", callback_data=button_callback_data)
]
[
InlineKeyboardButton("Pronunciation", callback_data=button_callback_data_audio),
InlineKeyboardButton("Synonyms", callback_data=button_callback_data_synonyms)
]
]
print(keyboard)
reply_markup = InlineKeyboardMarkup(keyboard)
msg_response = context.bot.send_message(update.effective_chat.id, text=msg, parse_mode=ParseMode.MARKDOWN_V2, reply_markup=reply_markup)
@ -112,21 +124,29 @@ def get_synonyms(word):
def button(update, context):
""" Responds to callback buttons in the original message.
If the callback type is audio, then responds with already available audio URL
If the callback type is synonyms, then fetchs the synonyms and responds
"""
query = update.callback_query
data = query.data
request_type = data.split('~')[0]
word = data.split('~')[1]
audio = f"https://lex-audio.useremarkable.com/mp3/{data.split('~')[2]}"
query.answer()
if request_type == 'p':
#query.edit_message_reply_markup(reply_markup=None)
audio = f"https://lex-audio.useremarkable.com/mp3/{data.split('~')[2]}"
context.bot.send_audio(update.effective_chat.id, audio=audio, reply_to_message_id=query.message.message_id)
elif request_type == 's':
#query.edit_message_reply_markup(reply_markup=None)
synonyms = get_synonyms(word)
print(synonyms)
context.bot.send_message(update.effective_chat.id, text=synonyms, reply_to_message_id=query.message.message_id, parse_mode=ParseMode.MARKDOWN_V2)
def main():
wordbot_token = os.getenv('WORDBOT_TOKEN')
updater = Updater(token=wordbot_token, use_context=True)