Browse Source

separated scrape function, completed synonym functionality

master
Gourav Kumar 2 years ago
parent
commit
9040001b59
  1. 4
      readme.md
  2. 76
      word_bot.py

4
readme.md

@ -4,7 +4,7 @@ This bot helps you learn new words for your GRE/GMAT preparations. All you have
### To run the bot on your own server
1. Open Telegram and create a bot using [@BotFather](https://t.me/BotFather)
2. Get the bots token
2. Get the bot's token
3. Clone this repository
4. Rename .env.sample to .env
5. Add all the values in the .env file including the bot token
@ -14,7 +14,7 @@ This bot helps you learn new words for your GRE/GMAT preparations. All you have
Have fun learning random words!
### To-do
1. complete synonyms functionality
1. ~~Complete synonyms functionality~~
2. Add the ability to fetch specific words
3. Get a complete list of words (current list is less common words only)
4. Linkify synonyms

76
word_bot.py

@ -40,32 +40,23 @@ def start(update, context):
context.bot.send_message(chat_id=update.effective_chat.id, text=start_text)
def slugify(message):
def slugify(message: str) -> str:
""" This function adds relevant escape characters as per Telegram's markdown parsing rules """
message = message.replace(".", "\\.").replace("*", "\\*").replace("(", "\\(").replace(")", "\\)").replace("-", "\\-")
return message
def word_def(update, context):
pgcon = connect_db()
if pgcon is None:
print("Db connection failed")
def scrape_def(word: str) -> dict:
""" This function scrapes the definition of a word from lexico.com"""
word_query = 'select * from gre_words order by random() limit 1'
ox_base_url = 'https://www.lexico.com/definition/{}?locale=en'
while True:
cur = pgcon.cursor()
cur.execute(word_query)
word_tup = cur.fetchall()
selected_word = word_tup[0][1].lower()
cur.close()
def_page = requests.get(ox_base_url.format(selected_word))
#print(selected_word, def_page.status_code)
if def_page.status_code != 200:
print(f'Word not found: {selected_word}')
else:
break
ox_url = f'https://www.lexico.com/definition/{word}?locale=en'
def_page = requests.get(ox_url)
if def_page.status_code != 200:
print(f'Word not found: {selected_word}')
return None
def_soup = BeautifulSoup(def_page.content, 'html.parser')
my_dict = {}
@ -82,20 +73,41 @@ def word_def(update, context):
my_dict['definition'] = []
for i, j in enumerate(definition):
my_dict['definition'].append(j.text)
return my_dict
def word_def(update, context):
pgcon = connect_db()
if pgcon is None:
print("Db connection failed")
word_query = 'select * from gre_words order by random() limit 1'
while True:
cur = pgcon.cursor()
cur.execute(word_query)
row = cur.fetchall()
selected_word = row[0][1].lower()
cur.close()
my_dict = scrape_def(selected_word)
if my_dict is not None:
break
msg = "*{word}*\n_{type}_\n{defn}".format(
word=my_dict['word'], type=slugify(my_dict['type']),
defn='\n'.join([slugify(i) for i in my_dict['definition']]))
button_callback_data = f"p~{my_dict['word']}~{my_dict['audio']}"
print(f"{msg=}\n{my_dict=}\n{button_callback_data=}")
button_callback_data_audio = f"p~{my_dict['word']}~{my_dict['audio']}"
button_callback_data_synonyms = f"s~{my_dict['word']}"
keyboard = [
[
InlineKeyboardButton("Pronunciation", callback_data=button_callback_data),
InlineKeyboardButton("Synonyms", callback_data=button_callback_data)
]
[
InlineKeyboardButton("Pronunciation", callback_data=button_callback_data_audio),
InlineKeyboardButton("Synonyms", callback_data=button_callback_data_synonyms)
]
]
print(keyboard)
reply_markup = InlineKeyboardMarkup(keyboard)
msg_response = context.bot.send_message(update.effective_chat.id, text=msg, parse_mode=ParseMode.MARKDOWN_V2, reply_markup=reply_markup)
@ -112,21 +124,29 @@ def get_synonyms(word):
def button(update, context):
""" Responds to callback buttons in the original message.
If the callback type is audio, then responds with already available audio URL
If the callback type is synonyms, then fetchs the synonyms and responds
"""
query = update.callback_query
data = query.data
request_type = data.split('~')[0]
word = data.split('~')[1]
audio = f"https://lex-audio.useremarkable.com/mp3/{data.split('~')[2]}"
query.answer()
if request_type == 'p':
#query.edit_message_reply_markup(reply_markup=None)
audio = f"https://lex-audio.useremarkable.com/mp3/{data.split('~')[2]}"
context.bot.send_audio(update.effective_chat.id, audio=audio, reply_to_message_id=query.message.message_id)
elif request_type == 's':
#query.edit_message_reply_markup(reply_markup=None)
synonyms = get_synonyms(word)
print(synonyms)
context.bot.send_message(update.effective_chat.id, text=synonyms, reply_to_message_id=query.message.message_id, parse_mode=ParseMode.MARKDOWN_V2)
def main():
wordbot_token = os.getenv('WORDBOT_TOKEN')
updater = Updater(token=wordbot_token, use_context=True)

Loading…
Cancel
Save