Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
# Python3 program for a word frequency
# counter after crawling a web-page
import requests
from bs4 import BeautifulSoup
import operator
from collections import Counter
import matplotlib.pyplot as plt
import numpy as np

'''Function defining the web-crawler/core
spider, which will fetch information from
a given website, and push the contents to
the second function clean_wordlist()'''
def start(url):

# empty list to store the contents of
# the website fetched from our web-crawler
wordlist = []
source_code = requests.get(url).text

# BeautifulSoup object which will
# ping the requested url for data
soup = BeautifulSoup(source_code, 'html.parser')

# Text in given web-page is stored under
# the <div> tags with class <entry-content>
for each_text in soup.findAll('div', {'class':'entry-content'}):
content = each_text.text

# use split() to break the sentence into
# words and convert them into lowercase
words = content.lower().split()

for each_word in words:
wordlist.append(each_word)
clean_wordlist(wordlist)

# Function removes any unwanted symbols
def clean_wordlist(wordlist):

clean_list =[]
for word in wordlist:
symbols = '\!@#$%^&*()_-+={[}]|\;:"<>?/., '

for i in range (0, len(symbols)):
word = word.replace(symbols[i], '')

if len(word) > 0:
clean_list.append(word)
create_dictionary(clean_list)

# Creates a dictionary conatining each word's
# count and top_20 ocuuring words
def create_dictionary(clean_list):
word_count = {}

for word in clean_list:
if word in word_count:
word_count[word] += 1
else:
word_count[word] = 1

''' To get count of each word in
the crawled page -->

# operator.itemgetter() takes one
# parameter either 1(denotes keys)
# or 0 (denotes corresponding values)

for key, value in sorted(word_count.items(),
key = operator.itemgetter(1)):
print ("% s : % s " % (key, value))

<-- '''


c = Counter(word_count)

# returns the most occurring elements
top = c.most_common(10)
print(top)
print(type(top))
labels, ys = zip(*top)
xs = np.arange(len(labels))
width = 1

plt.bar(xs, ys, width, align = 'center')
plt.title("Avi Kasliwal Submission")
plt.ylabel("Count")

# Driver code
if __name__ == '__main__':
start("https://www.geeksforgeeks.org/binary-tree-data-structure/")

Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[('tree', 6), ('binary', 5), ('a', 4), ('child', 3), ('data', 2), ('have', 2), ('2', 2), ('children', 2), ('left', 2), ('right', 2)]\n",
"<class 'list'>\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAEICAYAAABYoZ8gAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAATMklEQVR4nO3dfbAldX3n8fdHBgUEgoa7BBgnV1fiQ6wC3AsRCT4AZSDjamL5gGuUENdZ82DQsLqjW+6u2S2XbLmJJnFNZolCwoMBxIpKFLICE90SlAFXwdFoyAAj4ICKPCRGhnz3j+6rh8kwt2fm9j0z83u/qm7d7j7d/ft235nP6f6dPt2pKiRJ7XjMtAuQJC0tg1+SGmPwS1JjDH5JaozBL0mNMfglqTEGvxZFknckOWeJ2zw3yX/rh09I8rVFXv9/SXL+Yq6zX+9skkqybJHW98kkp+/E8n+U5J2LUYt2D4vyD097liTXAEcCP1FV/zhkmap69zbW9wLg/Kpa3o8/FvgwcAhwalXdt7M1V9VngKft7Hq2R5J3AG8AZoB7gf9bVa9ayhoAqurUnVz+jYtVi3YPHvHrEZLMAicABbxkhPU/DrgMOAh40WKE/jT0R9ivBU6uqv2BOeDT061KGsbg15ZeB1wLnAv8sPsgyXOS3JVkr4lpv5jkS/3wgt0iSfYDPg7sDaysqgf76ccm+VySe5PcmeQP+7MC0vm9JJuSfC/Jl5I8ayvrfkGSjf3wGUk+PvHaN5JcPDF+e5Kj+uH39eP3JVmX5ISB++kY4Iqq+luAqrqrqtZMtLEhyckT41vbP7+S5I5+m8/aYt5Lkpyf5P4kX07yU0ne3u+H25O8aGL+a5L82374qUnW9vvqniR/vtB+nOwy68ff0O+z7yT5WJLDJl6rJG9M8vUk303y/iQZuM+0izD4taXXARf0Pz+X5BCAqroWeBA4cWLefwNcOHC9jwM+CXwfeElV/cPEaw8DbwEOBo4DTgJ+rX/tRcDzgJ+iO0t4FfDtBdpaC5yQ5DFJDqV7ozkeIMlTgP2BL/XzfgE4Cnhivy2XJNlnwPZcC7wuyVuTzE2+IW6HFwJH0G3j6sk3CuBfA38GPAG4EbiC7v/r4cBvA3/8KOv8r8CV/XLLgT/opw/aj0lOBP478ErgUOBWum65SS+me+M7sp/v5wZur3YRBr9+KMnPAj8JXFxV64C/pQv3eRcBr+7nPQD4+X7aEAfQhfp5W35uUFXrquraqtpcVRvoQu35/csP9cs+HUhVra+qO7fVUFXdAtxPF+jPpwvNbyZ5ej/+mar6p37e86vq233b/5PuDWrBzwqq6nzgTXShtxbYlGT1kB0x4V1V9WBVfRn4EP2+7X2mqq6oqs3AJXSfI5xdVQ/RBfFskoO2ss6H6P6Gh1XV96vqsxPTh+zH1wAfrKob+r/T24Hj+i7AeWdX1b1VdRtwNd1+1m7E4Nek04Erq+qefvxCJrp7+vGX9f30LwNuqKpbB677HuA04LwkjzhC7LsxPtF3Jd0HvJvu6J+qugr4Q+D9wLeSrEly4ID21gIvoDvKXQtcQxf6z+/H59s+K8n6vvvjXuDH5tteSFVdUFUn0x1BvxH47S23bQG3TwzfChw2Mf6tieF/AO6pqocnxqE7c9nS24AAn09yc5Jf6Wsduh8P62uhX+4BujODwyfmuWti+O8fpQ7twgx+AZBkX7rT9uf3AXwXXffLkUmOBKiqr9CFwqlsXzcP/fKX0V0Fc2mSF0689AHgq8ARVXUg8A668Jpf7ver6l8BP03XVfHWAc3NB/8J/fBatgj+vj//P/Tb/YSqOgj43mTbA7froaq6hK77aP7zhweB/SZm+4mtLPqkieEVwB3b0+6j1HJXVb2hqg4D/h3wv5I8tX9tyH68g+6MAYAkjwd+HPjmztamXYfBr3m/QNfX/ky6U/ejgGcAn6Hr9593IfCbdEfSl2xvI1V1EfAbwF8kOb6ffABwH/BA3x3zq/PzJzkmyc8k2ZsuTL/f17mQtXR96PtW1cZ+O06hC7EbJ9rdDNwNLEvyn4AhZxMk+eUkK5Mc0H+WcCpdoF7Xz/JF4LQkeyeZA16+ldW8M8l+SX4aOAP48yFtL1DXK5Is70e/S3d11sPbsR8vBM5IclR/Zvdu4Lq+C057CINf804HPlRVt/VHjXdV1V103QOvyY++bHQR3ZH0VRNdQtulqs4DzgIuT3Is8O/pziDuB/43jwzAA/tp36U72/g28J4BbfwN8ABd4NNfNnoL3bX284F3Bd0Hzn/Tr/v7PLL7ZVvuozszuY3uGv7/AfzqRJ/6O4F/2df9LrZ+drQW+AbdZaDvqaorB7a9LccA1yV5APgYcGZV/R0D92NVfbqv/SPAnf02nLYIdWkXEh/EIklt8Yhfkhpj8EtSYwx+SWqMwS9Jjdml7s558MEH1+zs7LTLkKTdxrp16+6pqpntWWaXCv7Z2Vmuv/76aZchSbuNJEO/Pf9DdvVIUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxowa/EkOSnJpkq/2D7s4bsz2JEkLG/s6/vcBn6qql6d7ePZ+Cy0gSRrXaMHfP9btecAvA1TVD4AfjNWeJGmYMY/4n0L3ZKMP9Y/uW0f3UIgHJ2dKsgpYBbBixYodbmx29eU7XulO2HD2yqm0K0k7asw+/mXAs4EPVNXRdI97W73lTFW1pqrmqmpuZma7bjchSdoBYwb/RmBjVc0/g/RSujcCSdIUjRb8/fNab0/ytH7SScBXxmpPkjTM2Ff1vAm4oL+i5xbgjJHbkyQtYNTgr6ovAnNjtiFJ2j5+c1eSGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ1xuCXpMYY/JLUGINfkhpj8EtSYwx+SWqMwS9JjTH4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGrNszJUn2QDcDzwMbK6quTHbkyQtbNTg772wqu5ZgnYkSQPY1SNJjRn7iL+AK5MU8MdVtWbLGZKsAlYBrFixYuRyFt/s6sun1vaGs1dOrW1Ju6+xj/iPr6pnA6cCv57keVvOUFVrqmququZmZmZGLkeSNGrwV9Ud/e9NwEeBY8dsT5K0sNGCP8njkxwwPwy8CLhprPYkScOM2cd/CPDRJPPtXFhVnxqxPUnSAKMFf1XdAhw51volSTvGyzklqTEGvyQ1xuCXpMYY/JLUGINfkhpj8EtSYwx+SWqMwS9JjTH4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ1ZvTgT7JXkhuTfGLstiRJC1uKI/4zgfVL0I4kaYBRgz/JcmAlcM6Y7UiShls28vrfC7wNOODRZkiyClgFsGLFipHL2bPMrr58Ku1uOHvlVNqVtDhGO+JP8mJgU1Wt29Z8VbWmquaqam5mZmasciRJvTG7eo4HXpJkA/Bh4MQk54/YniRpgNGCv6reXlXLq2oWOA24qqp+aaz2JEnDeB2/JDVm7A93Aaiqa4BrlqItSdK2ecQvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ1ZlDwJzl+yDRJ0q5v6BH/HwycJknaxW3z7pxJjgOeC8wk+a2Jlw4E9hqzMEnSOBa6LfNjgf37+Safm3sf8PKxipIkjWebwV9Va4G1Sc6tqluXqCZJ0oiGPojlcUnWALOTy1TViWMUJUkaz9DgvwT4I+Ac4OHxypEkjW1o8G+uqg+MWokkaUkMvZzz40l+LcmhSZ44/zNqZZKkUQw94j+9//3WiWkFPGVxy5EkjW1Q8FfVk8cuRJK0NAYFf5LXbW16Vf3p4pYjSRrb0K6eYyaG9wFOAm4ADH5J2s0M7ep50+R4kh8D/myUiiRJo9rR2zL/PXDEtmZIsk+Szyf5f0luTvKuHWxLkrSIhvbxf5zuKh7obs72DODiBRb7R+DEqnogyd7AZ5N8sqqu3eFqJUk7bWgf/3smhjcDt1bVxm0tUFUFPNCP7t3/1KMvIUlaCkP7+NcmOYQffcj79SHLJdkLWAc8FXh/VV23lXlWAasAVqxYMWS1mrLZ1ZdPre0NZ6+cWtvSnmLoE7heCXweeAXwSuC6JAvelrmqHq6qo4DlwLFJnrWVedZU1VxVzc3MzGxf9ZKk7Ta0q+c/AsdU1SaAJDPA/wEuHbJwVd2b5BrgFOCmHahTkrRIhl7V85j50O99e6Flk8wkOagf3hc4GfjqDlUpSVo0Q4/4P5XkCuCifvxVwF8usMyhwHl9P/9jgIur6hM7VqYkabEs9MzdpwKHVNVbk7wM+FkgwOeAC7a1bFV9CTh6sQqVJC2Ohbp63gvcD1BVl1XVb1XVW+iO9t87dnGSpMW3UPDP9kfuj1BV19M9hlGStJtZKPj32cZr+y5mIZKkpbFQ8H8hyRu2nJjk9XRfzJIk7WYWuqrnzcBHk7yGHwX9HPBY4BfHLEySNI5tBn9VfQt4bpIXAvPfur28qq4avTJJ0iiG3qvnauDqkWuRJC2BHb0fvyRpN2XwS1JjDH5JaozBL0mNMfglqTEGvyQ1xuCXpMYY/JLUGINfkhpj8EtSYwx+SWqMwS9JjTH4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqzGjBn+RJSa5Osj7JzUnOHKstSdJwgx62voM2A2dV1Q1JDgDWJfmrqvrKiG1KkhYw2hF/Vd1ZVTf0w/cD64HDx2pPkjTMmEf8P5RkFjgauG4rr60CVgGsWLFiKcrRbmx29eXTLkFadBvOXrmk7Y3+4W6S/YGPAG+uqvu2fL2q1lTVXFXNzczMjF2OJDVv1OBPsjdd6F9QVZeN2ZYkaZgxr+oJ8CfA+qr63bHakSRtnzGP+I8HXgucmOSL/c/Pj9ieJGmA0T7crarPAhlr/ZKkHeM3dyWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ1xuCXpMYY/JLUGINfkhpj8EtSYwx+SWqMwS9JjTH4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMaMFf5IPJtmU5Kax2pAkbb8xj/jPBU4Zcf2SpB0wWvBX1V8D3xlr/ZKkHTP1Pv4kq5Jcn+T6u+++e9rlSNIeb+rBX1VrqmququZmZmamXY4k7fGmHvySpKVl8EtSY8a8nPMi4HPA05JsTPL6sdqSJA23bKwVV9Wrx1q3JGnH2dUjSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ1xuCXpMYY/JLUGINfkhpj8EtSYwx+SWqMwS9JjTH4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMaMGf5JTknwtyTeSrB6zLUnSMKMFf5K9gPcDpwLPBF6d5JljtSdJGmbMI/5jgW9U1S1V9QPgw8BLR2xPkjTAshHXfThw+8T4RuBntpwpySpgVT/6QJKvjVjTozkYuGcK7U6T29wGt3k3kN/ZqcWftr0LjBn82cq0+mcTqtYAa0asY0FJrq+quWnWsNTc5ja4zXu+JNdv7zJjdvVsBJ40Mb4cuGPE9iRJA4wZ/F8Ajkjy5CSPBU4DPjZie5KkAUbr6qmqzUl+A7gC2Av4YFXdPFZ7O2mqXU1T4ja3wW3e82339qbqn3W7S5L2YH5zV5IaY/BLUmOaD/6WbiuR5ElJrk6yPsnNSc6cdk1LJcleSW5M8olp17IUkhyU5NIkX+3/3sdNu6axJXlL/+/6piQXJdln2jUttiQfTLIpyU0T056Y5K+SfL3//YSF1tN08Dd4W4nNwFlV9QzgOcCv7+HbO+lMYP20i1hC7wM+VVVPB45kD9/2JIcDvwnMVdWz6C4oOW26VY3iXOCULaatBj5dVUcAn+7Ht6np4Kex20pU1Z1VdUM/fD9dGBw+3arGl2Q5sBI4Z9q1LIUkBwLPA/4EoKp+UFX3TreqJbEM2DfJMmA/9sDvDVXVXwPf2WLyS4Hz+uHzgF9YaD2tB//WbiuxxwchQJJZ4GjguulWsiTeC7wN+KdpF7JEngLcDXyo7946J8njp13UmKrqm8B7gNuAO4HvVdWV061qyRxSVXdCd3AH/IuFFmg9+AfdVmJPk2R/4CPAm6vqvmnXM6YkLwY2VdW6adeyhJYBzwY+UFVHAw8y4PR/d9b3a78UeDJwGPD4JL803ap2Xa0Hf3O3lUiyN13oX1BVl027niVwPPCSJBvouvJOTHL+dEsa3UZgY1XNn81dSvdGsCc7Gfi7qrq7qh4CLgOeO+Walsq3khwK0P/etNACrQd/U7eVSBK6ft/1VfW7065nKVTV26tqeVXN0v19r6qqPfpIsKruAm5PMn/XxpOAr0yxpKVwG/CcJPv1/85PYg//QHvCx4DT++HTgb9YaIEx7865y9vNbiuxGI4HXgt8OckX+2nvqKq/nGJNGsebgAv6A5pbgDOmXM+oquq6JJcCN9BdvXYje+CtG5JcBLwAODjJRuA/A2cDFyd5Pd0b4CsWXI+3bJCktrTe1SNJzTH4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmP+PxjGGEqlNlVlAAAAAElFTkSuQmCC\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"# Python3 program for a word frequency \n",
"# counter after crawling a web-page \n",
"import requests \n",
"from bs4 import BeautifulSoup \n",
"import operator \n",
"from collections import Counter \n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
" \n",
"'''Function defining the web-crawler/core \n",
"spider, which will fetch information from \n",
"a given website, and push the contents to \n",
"the second function clean_wordlist()'''\n",
"def start(url): \n",
" \n",
" # empty list to store the contents of \n",
" # the website fetched from our web-crawler \n",
" wordlist = [] \n",
" source_code = requests.get(url).text \n",
" \n",
" # BeautifulSoup object which will \n",
" # ping the requested url for data \n",
" soup = BeautifulSoup(source_code, 'html.parser') \n",
" \n",
" # Text in given web-page is stored under \n",
" # the <div> tags with class <entry-content> \n",
" for each_text in soup.findAll('div', {'class':'entry-content'}): \n",
" content = each_text.text \n",
" \n",
" # use split() to break the sentence into \n",
" # words and convert them into lowercase \n",
" words = content.lower().split() \n",
" \n",
" for each_word in words: \n",
" wordlist.append(each_word) \n",
" clean_wordlist(wordlist) \n",
" \n",
"# Function removes any unwanted symbols \n",
"def clean_wordlist(wordlist): \n",
" \n",
" clean_list =[] \n",
" for word in wordlist: \n",
" symbols = '\\!@#$%^&*()_-+={[}]|\\;:\"<>?/., '\n",
" \n",
" for i in range (0, len(symbols)): \n",
" word = word.replace(symbols[i], '') \n",
" \n",
" if len(word) > 0: \n",
" clean_list.append(word) \n",
" create_dictionary(clean_list) \n",
" \n",
"# Creates a dictionary conatining each word's \n",
"# count and top_20 ocuuring words \n",
"def create_dictionary(clean_list): \n",
" word_count = {} \n",
" \n",
" for word in clean_list: \n",
" if word in word_count: \n",
" word_count[word] += 1\n",
" else: \n",
" word_count[word] = 1\n",
" \n",
" ''' To get count of each word in \n",
" the crawled page --> \n",
" \n",
" # operator.itemgetter() takes one \n",
" # parameter either 1(denotes keys) \n",
" # or 0 (denotes corresponding values) \n",
" \n",
" for key, value in sorted(word_count.items(), \n",
" key = operator.itemgetter(1)): \n",
" print (\"% s : % s \" % (key, value)) \n",
" \n",
" <-- '''\n",
" \n",
" \n",
" c = Counter(word_count) \n",
" \n",
" # returns the most occurring elements \n",
" top = c.most_common(10) \n",
" print(top) \n",
" print(type(top))\n",
" labels, ys = zip(*top)\n",
" xs = np.arange(len(labels)) \n",
" width = 1\n",
"\n",
" plt.bar(xs, ys, width, align = 'center')\n",
" plt.title(\"Avi Kasliwal Submission\")\n",
" plt.xticks(DayOfWeekOfCall, LABELS)\n",
" plt.ylabel(\"Count\")\n",
" \n",
"# Driver code \n",
"if __name__ == '__main__': \n",
" start(\"https://www.geeksforgeeks.org/binary-tree-data-structure/\") \n",
" "
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import pyttsx3 #pip install pyttsx3
import speech_recognition as sr #pip install speechRecognition
from textblob import TextBlob
import flair

engine = pyttsx3.init('sapi5')
voices = engine.getProperty('voices')
# print(voices[1].id)
engine.setProperty('voice', voices[0].id)

def speak(sent):
engine.say(sent)
engine.runAndWait()

def listen():
#It takes microphone input from the user and returns string output

r = sr.Recognizer()
with sr.Microphone() as source:
print("Listening...")
r.pause_threshold = 1
audio = r.listen(source)

try:
print("Recognizing...")
query = r.recognize_google(audio, language='en-in')
print(f"User said: {query}\n")

except Exception as e:
# print(e)
print("Say that again please...")
return "None"
return query


if __name__ == "__main__":
sent = listen()
speak(f"input sentence is : {sent}")
speak("Sentiment of the sentence according to textblob is ...")
speak(TextBlob(sent).sentiment.polarity)
flair_sentiment = flair.models.TextClassifier.load('en-sentiment')
s = flair.data.Sentence(sent)
flair_sentiment.predict(s)
total_sentiment = s.labels
speak("Sentiment of the sentence according to flair is ...")
speak(total_sentiment)