telegram-wordbot/word_bot.py

"""
This is a telegram bot to fetch random word definition.
Currently supported commands are:
word - get a random word with definition
"""

import datetime
import logging
import os
import psycopg2
import requests
import time

from bs4 import BeautifulSoup
from dotenv import load_dotenv
from telegram import ParseMode, InlineKeyboardButton, InlineKeyboardMarkup
from telegram.ext import Updater, CommandHandler, MessageHandler, Filters, CallbackQueryHandler

load_dotenv()

logging.basicConfig(filename='logs/word_bot.log', filemode='a', format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
                     level=logging.DEBUG)
logger = logging.getLogger()


def connect_db():
    pgcon = psycopg2.connect(
        database=os.getenv("DB_NAME"),
        user=os.getenv("DB_USER"),
        password=os.getenv("DB_PWD"),
        host=os.getenv("DB_HOST"),
        port=os.getenv("DB_PORT")
    )

    return pgcon


def start(update, context):
    start_text = "Welcome to Random Word Bot. To fetch a word, just send /word"
    context.bot.send_message(chat_id=update.effective_chat.id, text=start_text)


def slugify(message: str) -> str:
    """ This function adds relevant escape characters as per Telegram's markdown parsing rules """

    message = message.replace(".", "\\.").replace("*", "\\*").replace("(", "\\(").replace(")", "\\)").replace("-", "\\-")
    return message


def scrape_def(word: str) -> dict:
    """ This function scrapes the definition of a word from lexico.com"""

    ox_url = f'https://www.lexico.com/definition/{word}?locale=en'
    def_page = requests.get(ox_url)

    if def_page.status_code != 200:
        print(f'Word not found: {selected_word}')
        return None

    def_soup = BeautifulSoup(def_page.content, 'html.parser')

    my_dict = {}
    word = def_soup.find_all("span", {"class": "hw"})
    my_dict['word'] = slugify(word[0].text.capitalize())

    grammatical_type = def_soup.find_all("span", {"class": "pos"})
    my_dict['type'] = u'\u2022 ' + grammatical_type[0].text.capitalize()

    definition = def_soup.find_all("span", {"class": "ind"})

    audio = def_soup.find_all("audio")
    my_dict['audio'] = audio[0]['src'].replace('https://lex-audio.useremarkable.com/mp3/', '')
    my_dict['definition'] = []
    for i, j in enumerate(definition):
        my_dict['definition'].append(j.text)

    return my_dict


def word_def(update, context):
    pgcon = connect_db()
    if pgcon is None:
        print("Db connection failed")

    word_query = 'select * from gre_words order by random() limit 1'

    while True:
        cur = pgcon.cursor()
        cur.execute(word_query)
        row = cur.fetchall()
        selected_word = row[0][1].lower()
        cur.close()

        my_dict = scrape_def(selected_word)
        if my_dict is not None:
            break

    msg = "*{word}*\n_{type}_\n{defn}".format(
        word=my_dict['word'], type=slugify(my_dict['type']),
        defn='\n'.join([slugify(i) for i in my_dict['definition']]))

    button_callback_data_audio = f"p~{my_dict['word']}~{my_dict['audio']}"
    button_callback_data_synonyms = f"s~{my_dict['word']}"

    keyboard = [
                   [
                       InlineKeyboardButton("Pronunciation", callback_data=button_callback_data_audio),
                       InlineKeyboardButton("Synonyms", callback_data=button_callback_data_synonyms)
                   ]
               ]
    print(keyboard)
    reply_markup = InlineKeyboardMarkup(keyboard)
    msg_response = context.bot.send_message(update.effective_chat.id, text=msg, parse_mode=ParseMode.MARKDOWN_V2, reply_markup=reply_markup)
    print(type(msg_response))


def get_synonyms(word):
    syn_url = 'https://www.lexico.com/synonyms/{}?locale=en'
    response = requests.get(syn_url.format(word.lower()))
    soup = BeautifulSoup(response.content, 'html.parser')
    synonyms = soup.find_all(['strong', 'span'], {'class': 'syn'})
    synonym_list = [f'\n*{slugify(i.string)}*' if i.name == 'strong' else i.string for i in synonyms]
    return (''.join(synonym_list))


def button(update, context):
    """ Responds to callback buttons in the original message.

        If the callback type is audio, then responds with already available audio URL
        If the callback type is synonyms, then fetchs the synonyms and responds
    """

    query = update.callback_query
    data = query.data
    request_type = data.split('~')[0]
    word = data.split('~')[1]

    query.answer()
    if request_type == 'p':
        #query.edit_message_reply_markup(reply_markup=None)
        audio = f"https://lex-audio.useremarkable.com/mp3/{data.split('~')[2]}"
        context.bot.send_audio(update.effective_chat.id, audio=audio, reply_to_message_id=query.message.message_id)
    elif request_type == 's':
        #query.edit_message_reply_markup(reply_markup=None)
        synonyms = get_synonyms(word)
        print(synonyms)
        context.bot.send_message(update.effective_chat.id, text=synonyms, reply_to_message_id=query.message.message_id, parse_mode=ParseMode.MARKDOWN_V2)


def main():
    wordbot_token = os.getenv('WORDBOT_TOKEN')
    updater = Updater(token=wordbot_token, use_context=True)
    dispatcher = updater.dispatcher
    dispatcher.add_handler(CommandHandler('start', start))
    dispatcher.add_handler(CommandHandler('word', word_def))
    dispatcher.add_handler(CallbackQueryHandler(button))
    updater.start_polling()
    updater.idle()


if __name__ == '__main__':
    print('word_bot is running. Press ctrl+c to stop.')
    main()