NLP Using Python Fresco Play hands on solution Hacker Rank

Kindly Note: If you had any programmatic error, please comment below.

LAB 1: Welcome to NLP Using Python - Simple Text Operation.

Question 1: Hands on NLP Python simple text operations - 1

Solution 1:

#!/bin/python3

#Write your code here


import math
import os
import random
import re
import sys
import zipfile
os.environ['NLTK_DATA'] = os.getcwd() + "/nltk_data"
from nltk.corpus import gutenberg
from nltk.text import Text

#
# Complete the 'calculateWordCounts' function below.
#

def calculateWordCounts(text):
    # Write your code here
    
    # Task 1:
    # Find the number of words in 'text', and print the result.
    total_word= len(text)
    print(total_word)
    
    # Task 2:
    # Find the number of unique words in 'text', and print the result.
    unique_word= len(set(text))
    print(unique_word)
    
    # Task 3:
    # Calculate the word coverage of 'text' obtained from the number of words and number of unique words,and print the result.
    average= total_word/unique_word
    print(math.floor(average))
    
if __name__ == '__main__':
    text = input()
    if not os.path.exists(os.getcwd() + "/nltk_data"):
        with zipfile.ZipFile("nltk_data.zip", 'r') as zip_ref:
            zip_ref.extractall(os.getcwd())
    
    text = Text(gutenberg.words(text))

    calculateWordCounts(text)

Question 2: Hands on NLP Python simple text operations - 2

Solution 2:

#!/bin/python3

#Write your code here


import math
import os
import random
import re
import sys
import zipfile

os.environ['NLTK_DATA'] = os.getcwd() + "/nltk_data"
from nltk.corpus import gutenberg
from nltk.text import Text


#
# Complete the 'filterWords' function below.
#


def filterWords(text):
    # Write your code here
    
    # Task 1:
    # Filter the words ending with 'ing' from the set of unique words of 'text', and store into 'ing_words' variable as a list.
    unique_word= set(text)
    ing_words= [ words for words in unique_word if words.endswith('ing') ]
     
    # Task 2:
    # Filter the words whose length is greater then 15 from the complete set of 'text', and store into 'large_words' variable as a list.
    large_words= [ words for words in text if len(words.strip())>15]
    
    # Task 3:
    # Filter the words having all letters in upper case from the set of unique words of 'text', and store into 'upper_words' variable as a list.
    upper_words= [ words for words in unique_word if  (type(words) == str and words.isupper())]   
    
    return ing_words,large_words,upper_words

if __name__ == '__main__':
    text = input()
    if not os.path.exists(os.getcwd() + "/nltk_data"):
        with zipfile.ZipFile("nltk_data.zip", 'r') as zip_ref:
            zip_ref.extractall(os.getcwd())
            
    text = Text(gutenberg.words(text))

    ing_words, big_words, upper_words = filterWords(text)

    print(sorted(ing_words))
    print(sorted(big_words))
    print(sorted(upper_words))

Question 3: Hands on NLP Python simple text operations - 3.

Solution 3:

#!/bin/python3

#Write your code here


#!/bin/python3

import math
import os
import random
import re
import sys
import zipfile
os.environ['NLTK_DATA'] = os.getcwd() + "/nltk_data"
from nltk.corpus import gutenberg
from nltk.text import Text
import nltk


#
# Complete the 'findWordFreq' function below.
#
# 


def findWordFreq(text, word):
    # Write your code here
    
    # Task 1:
    # Find the frequency for the given 'word', and store it into the variable 'wordfreq'.
    textfreq = [word for word in text if word.isalpha()]
    FreqDist = nltk.FreqDist(textfreq)
    wordfreq=  FreqDist[word]
    
    # Task 2:
    # Find the word which has a maximum frequency from the 'textfreq', and store into the variable 'maxfreq'.
    max_value = max(FreqDist.values())    
    maxfreq= [ item for item in FreqDist if FreqDist[item]== max_value][0]
    
    return    wordfreq, maxfreq


if __name__ == '__main__':
    text = input()
    word = input()
    if not os.path.exists(os.getcwd() + "/nltk_data"):
        with zipfile.ZipFile("nltk_data.zip", 'r') as zip_ref:
            zip_ref.extractall(os.getcwd())

    text = Text(gutenberg.words(text))

    word_freq, max_freq = findWordFreq(text, word)

    print(word_freq)
    print(max_freq)

LAB 2: Welcome to NLP Using Python - Accessing Text Corpora

Question 1: Hands on - NLP Python accessing text corpora.

Solution 1: NLP Python accessing text corpora.

#!/bin/python3

#Write your code here


#!/bin/python3

import math
import os
import random
import re
import sys
import zipfile
os.environ['NLTK_DATA'] = os.getcwd() + "/nltk_data"
import nltk


#
# Complete the 'accessTextCorpora' function below.
#
# The function accepts following parameters:
#  1. STRING fileid
#  2. STRING word
#
from nltk.corpus import inaugural
def accessTextCorpora(fileid, word):
    # Write your code here
    
    # Task 1:
    # Compute the word coverage for the given 'field' associated with the text corpus 'Inaugural', and store the result into 'wordcoverage'. 
    
    text_to_word_list_convert =  inaugural.words(fileid)    
    wordfreq = nltk.FreqDist(text_to_word_list_convert)
    
    unique_word = wordfreq.items()
    unique_word_length = len(unique_word)    
    total_words = list(wordfreq.elements())
    total_words_length = len(list(wordfreq.elements()))
    
    wordcoverage = int(total_words_length/ unique_word_length)

    # Task 2:
    # Filter the words ending with 'ed' from the set of unique words for the given 'fileid' of 'Inaugural' corpus, and store it into 'ed_words' variable as a list.
    ed_words=   [  word for word in set(total_words) if word.endswith("ed") ]    
    
    # Task 3:
    # Convert all the words into 'lowercase'. Determine the frequency distribution of all the words having only alphabets for the given 'fileid' of 'Inaugural' corpus, and store it into a variable 'textfreq'. Find the frequency for the given 'word', and store it into 'wordfreq'.
    whole_word_lower= [  word.lower() for word in total_words  if word.isalpha()]   
    wordfreq=whole_word_lower.count(word)
    
    # Return wordcoverage, ed_words ,wordfreq
    return wordcoverage, ed_words ,wordfreq

if __name__ == '__main__':
    fileid = input()
    word = input()

    if not os.path.exists(os.getcwd() + "/nltk_data"):
        with zipfile.ZipFile("nltk_data.zip", 'r') as zip_ref:
            zip_ref.extractall(os.getcwd())

    word_coverage, ed_words, word_freq = accessTextCorpora(fileid, word)

    print(word_coverage)
    print(sorted(ed_words))
    print(word_freq)

Question 2: Hands on NLP python user specific text corpora.

Solution 2:

#!/bin/python3

#Write your code here


#!/bin/python3

import math
import os
import random
import re
import sys
import nltk


#
# Complete the 'createUserTextCorpora' function below.
#
# The function accepts following parameters:
#  1. STRING filecontent1
#  2. STRING filecontent2
#
# from nltk.corpus import inaugural
from nltk.corpus import PlaintextCorpusReader
def createUserTextCorpora(filecontent1, filecontent2):
    # Write your code here

    # Task 1:
    # Create a text file name called 'content1.txt', and write content 'filecontent1' inside the nltk_data folder.
    #
    with open(os.path.join('nltk_data/', 'content1.txt'), "w") as f:
        f.write(filecontent1)
        f.close()
    
    # Task 2:
    # Create a text file name called 'content2.txt', and write content 'filecontent2' inside the nltk_data folder.
    #
    with open(os.path.join('nltk_data/', 'content2.txt'), "w") as f2:
        f2.write(filecontent2) 
        f2.close() 
     
    # Task 3:
    # Convert your collection of text files inside the 'nltk_data' folder into a text corpus, store it into the 'text_corpus' variable.
    #
    corpus_root_directory = 'nltk_data/'
    text_corpus = PlaintextCorpusReader(corpus_root_directory,r'.*\.txt')
    
    
    p1= text_corpus.words('content1.txt')
    p2= text_corpus.words('content2.txt')
    
    # Task 4:
    # Compute the number of words and number of unique words of all the file IDs associated with the text corpus,
    # And store into 'no_of_words_corpus1', 'no_of_unique_words_corpus1', 'no_of_words_corpus2', 'no_of_unique_words_corpus2', variable.
    #
    wordfreq = nltk.FreqDist(p1)
    g= list(wordfreq.elements())

    no_of_words_corpus1 = len(g)
    no_of_unique_words_corpus1 =  len(set(g))
    
    wordfreq2 = nltk.FreqDist(p2)
    g2= list(wordfreq2.elements())
    no_of_words_corpus2 =  len(g2)
    no_of_unique_words_corpus2 =  len(set(g2))
    
    return text_corpus, no_of_words_corpus1, no_of_unique_words_corpus1, no_of_words_corpus2, no_of_unique_words_corpus2
        
   

if __name__ == '__main__':
    filecontent1 = input()

    filecontent2 = input()

    path = os.path.join(os.getcwd(), "nltk_data")
    os.makedirs(path, exist_ok=True)
    for file in os.listdir(path):
        os.remove(path+"\\"+file)


    text_corpus, no_of_words_corpus1, no_of_unique_words_corpus1, no_of_words_corpus2, no_of_unique_words_corpus2 = createUserTextCorpora(filecontent1, filecontent2)
    expected_corpus_files = ['content1.txt', 'content2.txt']
    if type(text_corpus) == nltk.corpus.reader.plaintext.PlaintextCorpusReader and sorted(list(text_corpus.fileids())) == expected_corpus_files:
        print(no_of_words_corpus1)
        print(no_of_unique_words_corpus1)
        print(no_of_words_corpus2)
        print(no_of_unique_words_corpus2)

LAB 3: Welcome to NLP Using Python - Conditional Frequency Distribution.

Problem 3: Hands-On: Conditional Frequency.

Solution : Conditional Frequency

#!/bin/python3

#Write your code here


#!/bin/python3

import math
import os
import random
import re
import sys
import zipfile
os.environ['NLTK_DATA'] = os.getcwd()+"/nltk_data"
import nltk

#
# Complete the 'calculateCFD' function below.
#
# The function accepts following parameters:
#  1. STRING_ARRAY cfdconditions
#  2. STRING_ARRAY cfdevents
#
from nltk.corpus import brown, stopwords
# nltk.download('brown')
def calculateCFD(cfdconditions, cfdevents):
    
    # Task 1:
    # Determine the conditional frequency of all the words (convert into lower case and remove all the stop words) for the given category 'cfdconditions' of the brown corpora. 
    # Store the result in 'cdev_cfd'.    
    cfd = nltk.ConditionalFreqDist([ 
    (genre, word.lower()) 
    for genre in cfdconditions
    for word in brown.words(categories=genre) ])
    
    cfd.tabulate(conditions=cfdconditions, samples=cfdevents)
    
    cdev_cfd= set(stopwords.words('english'))
    
    # Task 2: Determine the words ending with 'ing' or 'ed'. Compute conditional frequency distribution, where the condition is 'cfdconditions', and the event is either 'ing' or 'ed'.
    # Store the conditional frequency distribution in the variable 'inged_cfd'.
    
    temp_genre = [ (genre, word.lower()) for genre in brown.categories() for word in brown.words(categories=genre) if (word.lower().endswith('ing') or word.lower().endswith('ed'))]
    
    generc_word_list= [list(x) for x in temp_genre ]
    
    for wd in generc_word_list:
        if wd[1].endswith('ing') and wd[1] not in cdev_cfd:
            wd[1]= 'ing'
        elif wd[1].endswith('ed') and wd[1] not in cdev_cfd:
            wd[1] = 'ed'
            
    inged_cfd = nltk.ConditionalFreqDist(generc_word_list)
    inged_cfd.tabulate(conditions = cfdconditions, samples=['ed', 'ing'])       
    

if __name__ == '__main__':
    cfdconditions_count = int(input().strip())

    cfdconditions = []

    for _ in range(cfdconditions_count):
        cfdconditions_item = input()
        cfdconditions.append(cfdconditions_item)

    cfdevents_count = int(input().strip())

    cfdevents = []

    for _ in range(cfdevents_count):
        cfdevents_item = input()
        cfdevents.append(cfdevents_item)

    if not os.path.exists(os.getcwd() + "/nltk_data"):
        with zipfile.ZipFile("nltk_data.zip", 'r') as zip_ref:
            zip_ref.extractall(os.getcwd())

    calculateCFD(cfdconditions, cfdevents)

LAB 4: Welcome to NLP Using Python - Processing Raw Text.

Problem 4: Hands on Stemming

Solution:

#!/bin/python3

#Write your code here


#!/bin/python3

import math
import os
import random
import re
import sys
import zipfile
os.environ['NLTK_DATA'] = os.getcwd() + "/nltk_data"
import nltk

#
# Complete the 'processRawText' function below.
#
# The function accepts STRING textURL as parameter.
#
from urllib import request
def processRawText(textURL):
    # Write your code here
    
    # Task 1: 
    # Read the text content from the given link 'textURL'. Store the content in the variable 'textcontent'.
    #    
    url = textURL
    textcontent = request.urlopen(url).read()
    
    # Task 2:
    # Tokenize all the words in the 'textcontent', and convert them into lower case. Store the tokenized list of words in 'tokenizedlcwords'.
    #
    
    text_content1 = textcontent.decode('unicode_escape')  # Converts bytes to unicode
    tokens1 = nltk.word_tokenize(text_content1)
    wordfreq_m= nltk.FreqDist(tokens1)
    
    tokenizedlcwords =  [  word.lower()  for word in tokens1 ]
    
    # Task 3:
    # Find the number of words in 'tokenizedlcwords', and store the result in  'noofwords'.
    #
    noofwords = len(tokenizedlcwords)
    
    # Task 4:
    # Find the number of unique words in 'tokenizedlcwords', and store the result in  'noofunqwords'.
    #
    noofunqwords = len(set( tokenizedlcwords))   
    
    # Task 5 :
    # Calculate the word coverage of 'tokenizedlcwords' obtained from the number of words and number of unique words, Store the result in the 'wordcov'.
    #
    wordcov =  int( noofwords / noofunqwords)
    
    # Task 6:
    # Determine the frequency distribution of all words having only alphabets in 'tokenizedlcwords'. Store the result in the variable 'wordfreq'.
    wordfreq_list = [  word  for word in tokens1 if word.isalpha() ]
    wordfreq= len(wordfreq_list)

    # Task 7
    # Find the maximum frequent word of 'tokenizedlcwords'. Store the result in the variable 'maxfreq'   
    dict_of_word_frequency = { x: tokenizedlcwords.count(x) for x in tokenizedlcwords }
    maxfreq = max(dict_of_word_frequency, key= dict_of_word_frequency.get)
    
    # print()
    return noofwords, noofunqwords, wordcov,maxfreq

if __name__ == '__main__':
    textURL = input()

    if not os.path.exists(os.getcwd() + "/nltk_data"):
        with zipfile.ZipFile("nltk_data.zip", 'r') as zip_ref:
            zip_ref.extractall(os.getcwd())

    noofwords, noofunqwords, wordcov, maxfreq = processRawText(textURL)
    print(noofwords)
    print(noofunqwords)
    print(wordcov)
    print(maxfreq)

LAB 5: Welcome to NLP Using Python - Bigrams and Collocations.

Problem 5: Hands on Bigrams, ngrams

Solution:

#!/bin/python3

#Write your code here


#!/bin/python3

import math
import os
import random
import re
import sys
import zipfile
os.environ['NLTK_DATA'] = os.getcwd() + "/nltk_data"

import nltk


#
# Complete the 'performBigramsAndCollocations' function below.
#
# The function accepts following parameters:
#  1. STRING textcontent
#  2. STRING word
#

#
from nltk.corpus import stopwords
def performBigramsAndCollocations(textcontent, word):
    # Write your code here
    
    # Task 1: Tokenize all the words given in 'textcontent'. The word should contain alphabetes or numbers or underscore. Store the tokenized list of words in 'tokenizedwords'
    reg_exp= r'([A-Za-z0-9\_]+)'
    tokenizedwords = nltk.regexp_tokenize(textcontent , pattern= reg_exp)
    
    # Task 2: Convert all the words into lowercase. Store the result in 'tokenizedwords'.
    tokenizedwords = [  word.lower() for word in tokenizedwords ]
    
    # Task 3: Compute bigrams of the list 'tokenizedwords'. Store the list of bigrams in 'tokenizedwordsbigrams'
    tokenizedwordsbigrams = nltk.bigrams(tokenizedwords)
    
    # Task 4:
    # Filter only the bigrams from 'tokenizedwordsbigrams', where the words are not part of 'stopwords'.
    # Store the result in  'tokenizednonstopwordsbigrams'.  
    
    sw = set(stopwords.words('english'))
    temp = list(tokenizedwordsbigrams)

    sw = set(stopwords.words('english'))

    tokenizedwordsbigrams = temp
    # print(tokenizedwordsbigrams)

    tokenizednonstopwordsbigrams = [ j   for j  in tokenizedwordsbigrams  if j[0] not in sw and  j[1] not in sw   ]
    
    # Task 5:
    # Compute the Conditional Frequency of 'tokenizednonstopwordsbigrams', where condition and event refer to the word.
    # Store the result in 'cfd_bigrams'.
    
    cfd_bigrams = nltk.ConditionalFreqDist(tokenizednonstopwordsbigrams)
    
    # Task 6:
    # Determine the three most frrerquent words occuring after the given 'word'.
    # Store the result in 'mostfrequentwordafter'.
    mostfrequentwordafter = cfd_bigrams[word].most_common(3)
    
    # Task 7:
    # Generate collocations from 'tokenizedwords'. Store list of collocations words in 'collocationwords'.
    collocation_list = nltk.Text(tokenizedwords).collocation_list() 
    collocationwords=  [ " ".join([s1,s2]) for s1,s2  in collocation_list  ] 
    
    return  mostfrequentwordafter, collocationwords
    
    

if __name__ == '__main__':
    textcontent = input()

    word = input()

    if not os.path.exists(os.getcwd() + "/nltk_data"):
        with zipfile.ZipFile("nltk_data.zip", 'r') as zip_ref:
            zip_ref.extractall(os.getcwd())

    mostfrequentwordafter, collocationwords = performBigramsAndCollocations(textcontent, word)
    print(sorted(mostfrequentwordafter, key=lambda element: (element[1], element[0]), reverse=True))
    print(sorted(collocationwords))

LAB 6: Welcome to NLP Using Python - Stemming and Lemmatization.

Problem 6: Hands on Stemming and Lemmatization

Solution:

#!/bin/python3

#Write your code here


# LAB 6:
# Welcome to NLP Using Python - Stemming and Lemmatization



#!/bin/python3

import math
import os
import random
import re
import sys
import zipfile
os.environ['NLTK_DATA'] = os.getcwd()+"/nltk_data"
import nltk

#
# Complete the 'performStemAndLemma' function below.
#
# The function accepts STRING textcontent as parameter.

from nltk.corpus import stopwords
def performStemAndLemma(textcontent):
    # Write your code here
    # Task 1:
    # Task 1: Tokenize all the words given in 'textcontent'. The word should contain alphabetes or numbers or underscore. Store the tokenized list of words in 'tokenizedwords'
    
    reg_exp= r'([A-Za-z0-9\_]+)'
    tokenizedwords = nltk.regexp_tokenize(textcontent , pattern= reg_exp)
    
    # Task 2: Convert all the words into lowercase. Store the result in 'tokenizedwords'.
    tokenizedwords = [  word.lower() for word in set(tokenizedwords) ]
    
    # Task 3:
    # Remove all the stop words from the 'tokenizedwords'. Store the result into the variable 'filteredwords'.
    sw = set(stopwords.words('english'))
    filteredwords = [ word for word in tokenizedwords if (word not in sw)]
    
    # Task 4: 
    # Stem each word present in 'filteredwords' with 'PorterStemmer', and store the resultin the list 'porterstemmedwords'.
    porterstemmedwords =   [  nltk.PorterStemmer().stem(word)  for word in filteredwords ]
    
    # Task 5:
    # Stem each word present in 'filteredwords' with 'LancasterStemmer', and store the resultin the list 'lancasterstemmedwords'.
    lancasterstemmedwords =  [  nltk.LancasterStemmer().stem(word)  for word in filteredwords ]  
    
    # Task 6:
    # Stem each word present in 'filteredwords' with 'WordNetLemmatizer', and store the resultin the list 'lemmatizedwords'.
    lemmatizedwords = [  nltk.WordNetLemmatizer().lemmatize(word)  for word in filteredwords ]
     
    
    return porterstemmedwords, lancasterstemmedwords, lemmatizedwords

if __name__ == '__main__':
    textcontent = input()

    if not os.path.exists(os.getcwd() + "/nltk_data"):
        with zipfile.ZipFile("nltk_data.zip", 'r') as zip_ref:
            zip_ref.extractall(os.getcwd())

    porterstemmedwords, lancasterstemmedwords, lemmatizedwords = performStemAndLemma(textcontent)

    print(sorted(porterstemmedwords))
    print(sorted(lancasterstemmedwords))
    print(sorted(lemmatizedwords))

LAB 7: Welcome to NLP Using Python - POS Tagging

Problem 7: Hands on POS Tagging.

Solution: Hands on POS Tagging.

#!/bin/python3

#Write your code here
import math
import os
import random
import re
import sys
import zipfile
os.environ['NLTK_DATA'] = os.getcwd() + "/nltk_data"
from nltk.corpus import brown
import nltk



#
# Complete the 'tagPOS' function below.
#
# The function accepts following parameters:
#  1. STRING textcontent
#  2. STRING taggedtextcontent
#

def tagPOS(textcontent, taggedtextcontent, defined_tags):
    # Write your code here
    
    # Task 1:
    # Tag the part of speech for the given 'textcontent' words, store the result into the variable 'nltk_pos_tags'.
    words = nltk.word_tokenize(textcontent)    
    nltk_pos_tags= nltk.pos_tag(words)
    
    # Task 2:
    # Tag the part of speech for the given 'taggedtextcontent' words using the 'Tagged Text method'. Store the result into the variable 'tagged_pos_tag'.
    tagged_pos_tag =  [ nltk.tag.str2tuple(word) for word in taggedtextcontent.split() ]
    
    # Task 3:
    # Tag the part of speech for the given 'textcontent' words and use 'defined_tags' as a model in the 'Lookup Tagger method'. Store the result into the variable 'unigram_pos_tag'.
    #
    words = nltk.word_tokenize(textcontent)
    baseline_tagger = nltk.UnigramTagger(model=defined_tags)
    
    unigram_pos_tag = baseline_tagger.tag(words) 
    
    return nltk_pos_tags, tagged_pos_tag, unigram_pos_tag
    

if __name__ == '__main__':
    textcontent = input()

    taggedtextcontent = input()
    
    if not os.path.exists(os.getcwd() + "/nltk_data"):
        with zipfile.ZipFile("nltk_data.zip", 'r') as zip_ref:
            zip_ref.extractall(os.getcwd())

    defined_tags = dict(brown.tagged_words(tagset='universal'))

    nltk_pos_tags, tagged_pos_tag, unigram_pos_tag = tagPOS(textcontent, taggedtextcontent, defined_tags)

    print(nltk_pos_tags)
    print(tagged_pos_tag)
    print(unigram_pos_tag)

File_1675368309025

PDFcup.com

NLP Using Python Fresco Play hands on solution Hacker Rank

LAB 1: Welcome to NLP Using Python - Simple Text Operation.

Question 1: Hands on NLP Python simple text operations - 1

Question 2: Hands on NLP Python simple text operations - 2

Question 3: Hands on NLP Python simple text operations - 3.

LAB 2: Welcome to NLP Using Python - Accessing Text Corpora

Question 1: Hands on - NLP Python accessing text corpora.

Question 2: Hands on NLP python user specific text corpora.

LAB 3: Welcome to NLP Using Python - Conditional Frequency Distribution.

Problem 3: Hands-On: Conditional Frequency.

LAB 4: Welcome to NLP Using Python - Processing Raw Text.

Problem 4: Hands on Stemming

LAB 5: Welcome to NLP Using Python - Bigrams and Collocations.

Problem 5: Hands on Bigrams, ngrams

LAB 6: Welcome to NLP Using Python - Stemming and Lemmatization.

Problem 6: Hands on Stemming and Lemmatization

LAB 7: Welcome to NLP Using Python - POS Tagging

Problem 7: Hands on POS Tagging.

About the author

Post a Comment

PDFcup.com