Programming > CODING SOLUTION > Questions and Answers > NLP_Using_Python_ College of Engineering & Technology CSE AI (All)
NLP Using Python 1. 1. HandsOn- Simple Operations with text a. Problem 1 #!/bin/python3 import math import os import random import re import sys import zipfile os.environ['NLTK_DATA'] = os.g ... etcwd()+"/nltk_data" from nltk.corpus import gutenberg from nltk.text import Text # # Complete the 'calculateWordCounts' function below. # # # def calculateWordCounts(text): # Write your code here n_words = len(text) print(n_words) n_unique_words = len(set(text)) print(n_unique_words) word_coverage1 = math.floor(n_words / n_unique_words) print(word_coverage1) if __name__ == '__main__': text = input() if not os.path.exists(os.getcwd()+"/nltk_data"): with zipfile.ZipFile("nltk_data.zip", 'r') as zip_ref: zip_ref.extractall(os.getcwd()) text = Text(gutenberg.words(text)) calculateWordCounts(text) b. Problem 2 #!/bin/python3 import mathimport os import random import re import sys import zipfile os.environ['NLTK_DATA'] = os.getcwd() + "/nltk_data" from nltk.corpus import gutenberg from nltk.text import Text # # Complete the 'filterWords' function below. # # # def filterWords(text): # Write your code here unique_words = set(text) ing_words = [word for word in list(unique_words) if word.endswith('ing')] big_words = [word for word in list(text) if len(word) > 15 ] upper_words = [word for word in list(unique_words) if word.isupper()] return (ing_words, big_words, upper_words) if __name__ == '__main__': text = input() if not os.path.exists(os.getcwd() + "/nltk_data"): with zipfile.ZipFile("nltk_data.zip", 'r') as zip_ref: zip_ref.extractall(os.getcwd()) text = Text(gutenberg.words(text)) ing_words, big_words, upper_words = filterWords(text) print(sorted(ing_words)) print(sorted(big_words)) print(sorted(upper_words)) c. Problem 3 #!/bin/python3 import math import os import random import re import sys import zipfile os.environ['NLTK_DATA'] = os.getcwd() + "/nltk_data" from nltk.corpus import gutenbergfrom nltk.text import Text import nltk # # Complete the 'findWordFreq' function below. # # def findWordFreq(text, word): # Write your code here wordfreq = 0 for w in text: if(w == word): wordfreq = wordfreq+1 textfreq = nltk.FreqDist(word for word in list(text) if word.isalpha()) mostfreq = textfreq.max() return(wordfreq, mostfreq) if __name__ == '__main__': text = input() word = input() if not os.path.exists(os.getcwd() + "/nltk_data"): with zipfile.ZipFile("nltk_data.zip", 'r') as zip_ref: zip_ref.extractall(os.getcwd()) text = Text(gutenberg.words(text)) word_freq, max_freq = findWordFreq(text, word) print(word_freq) print(max_freq) 2. Hands-On-Accessing Text Corpora a. Problem 1 #!/bin/python3 import math import os import random import re import sys import zipfile os.environ['NLTK_DATA'] = os.getcwd() + "/nltk_data" import nltkfrom nltk.corpus import inaugural # # Complete the 'accessTextCorpora' function below. # # The function accepts following parameters: # 1. STRING fileid # 2. STRING word # def accessTextCorpora(fileid, word): # Write your code here file_words = inaugural.words(fileid) wordcoverage = int(len(file_words)/len(set(file_words))) ed_words = [words for words in set(file_words) if words.en dswith('ed')] textfreq2 = [word.lower() for word in file_words if word.i salpha()] textfreq = nltk.FreqDist(textfreq2) wordfreq = textfreq[word] return wordcoverage, ed_words, wordfreq if __name__ == '__main__': fileid = input() word = input() if not os.path.exists(os.getcwd() + "/nltk_data"): with zipfile.ZipFile("nltk_data.zip", 'r') as zip_ref: zip_ref.extractall(os.getcwd()) word_coverage, ed_words, word_freq = accessTextCorpora(fil eid, word) print(word_coverage) print(sorted(ed_words)) print(word_freq) b. Problem 2 [Show More]
Last updated: 2 years ago
Preview 1 out of 12 pages
Buy this document to get the full access instantly
Instant Download Access after purchase
Buy NowInstant download
We Accept:
Can't find what you want? Try our AI powered Search
Connected school, study & course
About the document
Uploaded On
Jan 17, 2023
Number of pages
12
Written in
All
This document has been written for:
Uploaded
Jan 17, 2023
Downloads
0
Views
163
Scholarfriends.com Online Platform by Browsegrades Inc. 651N South Broad St, Middletown DE. United States.
We're available through e-mail, Twitter, Facebook, and live chat.
FAQ
Questions? Leave a message!
Copyright © Scholarfriends · High quality services·