LAB 1
from gensim.downloader import load
print("Loading pre-trained GloVe model (50 dimensions)...")
model = load("glove-wiki-gigaword-50")
def ewr():
result = model.most_similar(positive=['king', 'woman'], negative=['man'], topn=1)
print("\nking - man + woman = ?", result[0][0])
print("similarity:",result[0][1])
result = model.most_similar(positive=['paris', 'italy'], negative=['france'], topn=1)
print("\nparis - france + italy = ?", result[0][0])
print("similarity:",result[0][1])
result = model.most_similar(positive=['programming'], topn=5)
print("\nTop 5 words similar to 'programming':")
for word, similarity in result:
print(word, similarity)
ewr()
LAB 2
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from gensim.downloader import load
def reduce_dimensions(embeddings):
pca = PCA(n_components=2)
reduced_embeddings = pca.fit_transform(embeddings)
return reduced_embeddings
def visualize_embeddings(words, reduced_embeddings):
plt.figure(figsize=(10, 6))
for i, word in enumerate(words):
x, y = reduced_embeddings[i]
plt.scatter(x, y, color='blue', marker='o')
plt.text(x + 0.02, y + 0.02, word, fontsize=12)
plt.title("2D Visualization of Word Embeddings")
plt.xlabel("PCA Component 1")
plt.ylabel("PCA Component 2")
plt.grid()
plt.show()
def get_similar_words(word, model):
print(f"Top 5 words similar to '{word}':")
similar_words = model.most_similar(word, topn=10)
for similar_word, similarity in similar_words:
print(f"{similar_word} ({similarity:.4f})")
print("Loading pre-trained GloVe model (50 dimensions)...")
model = load("glove-wiki-gigaword-50")
words = ['football', 'basketball', 'soccer', 'tennis', 'cricket',
'hockey', 'baseball', 'golf', 'volleyball', 'rugby']
embeddings = [model[word] for word in words]
reduced_embeddings = reduce_dimensions(embeddings)
visualize_embeddings(words, reduced_embeddings)
get_similar_words("programming", model)
LAB 3
from gensim.models import Word2Vec
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
corpus = [
"The patient was diagnosed with diabetes and hypertension.",
"RI scans reveal abnormalities in the brain tissue.",
"The treatment involves antibiotics and regular monitoring.",
"Symptoms include fever, fatigue, and muscle pain.",
"The vaccine is effective against several viral infections.",
"Doctors recommend physical therapy for recovery.",
"The clinical trial results were published in the journal.",
"The surgeon performed a minimally invasive procedure.",
"The prescription includes pain relievers and anti-inflammatory drugs.",
"The diagnosis confirmed a rare genetic disorder."
]
tokenized_corpus = [sentence.lower().split() for sentence in corpus]
model = Word2Vec(sentences=tokenized_corpus, vector_size=5, window=2, min_count=1, epochs=5)
word = input("Enter a word: ").lower()
if word in model.wv:
similar = model.wv.most_similar(word, topn=5)
print(f"Words similar to '{word}':")
for i, (w, score) in enumerate(similar, 1):
print(f"{i}. {w} (Similarity: {score:.4f})")
else:
print("Word not found in vocabulary.")
words = list(model.wv.index_to_key)
word_vectors = model.wv[words]
pca = PCA(n_components=2)
result = pca.fit_transform(word_vectors)
plt.figure(figsize=(10, 8))
plt.scatter(result[:, 0], result[:, 1])
for i, word in enumerate(words):
plt.annotate(word, xy=(result[i, 0], result[i, 1]))
plt.title("Word Embeddings (PCA Projection)")
plt.xlabel("PCA 1")
plt.ylabel("PCA 2")
plt.grid(True)
plt.show()
LAB 4
!pip install cohere gensim
import cohere
import gensim.downloader as api
co = cohere.Client("iKCV07rBnBU5uYH40gPabT4cFY8DkEiZnZgxtIrr")
print("Loading word embeddings...")
model = api.load("glove-wiki-gigaword-100")
print("Model loaded successfully.")
prompt = "write an essay on natural disaster"
def get_first_enriched_prompt(prompt, topn=3):
for word in prompt.split():
try:
similar_words = model.most_similar(word.strip('.,!?').lower(), topn=topn)
for sim, _ in similar_words:
enriched = prompt.replace(word, sim)
return enriched
except:
continue
return None
def get_response(text):
try:
return co.chat(model="command-r", message=text).text.strip()
except Exception as e:
return f"Error: {e}"
print(f"\nOriginal Prompt:\n{prompt}\nResponse:\n{get_response(prompt)}")
enriched_prompt = get_first_enriched_prompt(prompt)
if enriched_prompt:
print(f"\nEnriched Prompt:\n{enriched_prompt}\nResponse:\n{get_response(enriched_prompt)}")
else:
print("\nNo enriched prompt could be generated.")
LAB 5
from gensim.downloader import load
import random
print("Loading pre-trained Glove model (50 dimensions)...")
model =load("glove-wiki-gigaword-50")
print("Model loaded successfully!")
def create_paragraph (iw, sws):
paragraph ="The topic of (iw) is fascinating, often linked to terms like"
random.shuffle (sws)
for word in sws:
paragraph += str(word) + ","
paragraph = paragraph.rstrip(", ") + "."
return paragraph
iw = "hacking"
sws =model.most_similar(iw, topn=5)
words=[word for word, s in sws]
paragraph =create_paragraph (iw, words)
print (paragraph)
LAB 6
#6
!pip install transformers torch
from transformers import pipeline
sentiment_analyzer = pipeline("sentiment-analysis")
while True:
user_input = input("\nPlease enter a sentence to analyze its sentiment (or type 'exit' to quit): ")
if user_input.lower() == "exit":
print("Exiting the program. Goodbye!")
break
if not user_input.strip():
print("Please enter a non-empty sentence.")
continue
result = sentiment_analyzer(user_input)
print("\nSentiment Analysis Result:")
print(f"Label: {result[0]['label']}")
print(f"Confidence: {result[0]['score']:.4f}")
LAB 7
#7
from transformers import pipeline
# Load the summarization model
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
def summarize_text(text, max_length=100, min_length=30):
if len(text.split()) < min_length: # Avoid issues with very short text
return "Text is too short to summarize."
summary = summarizer(text, max_length=max_length, min_length=min_length, do_sample=False)
return summary[0]['summary_text']
# Get input from the user
print("Enter the text you want to summarize:")
user_input = input()
# Get the summary
summary_result = summarize_text(user_input)
# Print the summarized text
print("\nSummary:", summary_result)
LAB 8
#8
!pip install langchain cohere langchain-community
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain_community.llms import Cohere
from langchain_community.document_loaders import TextLoader
cohere_api_key = "JuJktEEFgPDKDmzaUDfmmEaIMldTroCQcvwiUInx"
file_path = "/content/sample text.txt"
loader = TextLoader(file_path)
documents = loader.load()
text_content = documents[0].page_content
prompt_template = PromptTemplate(
input_variables=["text"],
template="Analyze the following text and summarize its key points:\n\nText: {text}\n\nSummary:",
)
cohere_llm = Cohere(cohere_api_key=cohere_api_key, temperature=0.7)
chain = LLMChain(llm=cohere_llm, prompt=prompt_template)
output = chain.run(text=text_content)
print("Generated Summary:")
print(output)
LAB 9
#9
# Install required packages
!pip install wikipedia
import wikipedia
import requests
from bs4 import BeautifulSoup
import re
# Define InstitutionDetails class
class InstitutionDetails:
def __init__(self, name, founded, headquarters, branches, summary):
self.name = name
self.founded = founded
self.headquarters = headquarters
self.branches = branches
self.summary = summary
def __str__(self):
return f"""
Institution Details
-------------------
Name: {self.name}
Founded: {self.founded}
Headquarters: {self.headquarters}
Branches: {", ".join(self.branches) if self.branches else "Not Found"}
Summary:
{self.summary}
"""
# Function to fetch institution info
def fetch_institution_info(name):
try:
page = wikipedia.page(name)
except wikipedia.exceptions.DisambiguationError as e:
return f"Disambiguation Error: {e.options}"
except wikipedia.exceptions.PageError:
return f"No Wikipedia page found for {name}"
# Extract HTML
url = page.url
html = requests.get(url).text
soup = BeautifulSoup(html, 'html.parser')
infobox = soup.find("table", {"class": "infobox"})
founded = "Not Found"
headquarters = "Not Found"
if infobox:
for row in infobox.find_all("tr"):
header = row.find("th")
data = row.find("td")
if header and data:
key = header.text.strip().lower()
value = data.text.strip()
if "founded" in key or "established" in key:
founded = value
elif "headquarters" in key or "location" in key:
headquarters = value
# Branches detection (still heuristic)
branch_keywords = ["New York", "San Francisco", "London", "Tokyo", "Bangalore",
"Cambridge", "Seattle", "Mountain View", "Davangere"]
branches_found = {match for match in branch_keywords
if re.search(rf"\b{re.escape(match)}\b", page.content, re.IGNORECASE)}
branches = list(branches_found) if branches_found else ["Not Found"]
summary = wikipedia.summary(name, sentences=3)
return InstitutionDetails(
name=name,
founded=founded,
headquarters=headquarters,
branches=branches,
summary=summary
)
# Example usage
institution_name = input("Enter institution name: ")
result = fetch_institution_info(institution_name)
print(result)
LAB 10
#10
# Install required packages
!pip install PyMuPDF faiss-cpu sentence-transformers
import fitz # PyMuPDF
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
from langchain.text_splitter import RecursiveCharacterTextSplitter
from sklearn.metrics.pairwise import cosine_similarity
# Step 1: Load and extract text from ipc.pdf
pdf_path = "ipc.pdf" # Adjust path if needed
pdf_document = fitz.open(pdf_path)
ipc_text = ""
for page_num in range(pdf_document.page_count):
page = pdf_document.load_page(page_num)
ipc_text += page.get_text()
# Step 2: Split the text into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
texts = text_splitter.split_text(ipc_text)
# Step 3: Create embeddings
model = SentenceTransformer("all-MiniLM-L6-v2")
document_embeddings = model.encode(texts, convert_to_tensor=True)
doc_embeddings_np = document_embeddings.cpu().numpy()
# Step 4: Index with FAISS
index = faiss.IndexFlatL2(doc_embeddings_np.shape[1])
index.add(doc_embeddings_np)
# Step 5: Chat function with similarity threshold
def get_response(user_query, threshold=0.6):
query_embedding = model.encode([user_query], convert_to_tensor=True)
query_embedding_np = query_embedding.cpu().numpy()
# Get top match
_, I = index.search(query_embedding_np, k=1)
best_match_index = I[0][0]
best_match_text = texts[best_match_index]
# Calculate cosine similarity
similarity_score = cosine_similarity(query_embedding_np, [doc_embeddings_np[best_match_index]])[0][0]
if similarity_score >= threshold:
return best_match_text.strip()
else:
return "I'm sorry, I couldn't find relevant information in the IPC for your query."
# Step 6: Chat loop
print("IPC Chatbot is ready! (type 'bye' to exit)")
while True:
user_input = input("You: ")
if user_input.lower() == "bye":
print("Bot: Goodbye!")
break
response = get_response(user_input)
print(f"Bot: {response}")
No comments:
Post a Comment