Tarun: GEN AI

LAB 1

from gensim.downloader import load

print("Loading pre-trained GloVe model (50 dimensions)...")

model = load("glove-wiki-gigaword-50")

def ewr():

result = model.most_similar(positive=['king', 'woman'], negative=['man'], topn=1)

print("\nking - man + woman = ?", result[0][0])

print("similarity:",result[0][1])

result = model.most_similar(positive=['paris', 'italy'], negative=['france'], topn=1)

print("\nparis - france + italy = ?", result[0][0])

print("similarity:",result[0][1])

result = model.most_similar(positive=['programming'], topn=5)

print("\nTop 5 words similar to 'programming':")

for word, similarity in result:

print(word, similarity)

ewr()

LAB 2

import matplotlib.pyplot as plt

from sklearn.decomposition import PCA

from gensim.downloader import load

def reduce_dimensions(embeddings):

pca = PCA(n_components=2)

reduced_embeddings = pca.fit_transform(embeddings)

return reduced_embeddings

def visualize_embeddings(words, reduced_embeddings):

plt.figure(figsize=(10, 6))

for i, word in enumerate(words):

x, y = reduced_embeddings[i]

plt.scatter(x, y, color='blue', marker='o')

plt.text(x + 0.02, y + 0.02, word, fontsize=12)

plt.title("2D Visualization of Word Embeddings")

plt.xlabel("PCA Component 1")

plt.ylabel("PCA Component 2")

plt.grid()

plt.show()

def get_similar_words(word, model):

print(f"Top 5 words similar to '{word}':")

similar_words = model.most_similar(word, topn=10)

for similar_word, similarity in similar_words:

print(f"{similar_word} ({similarity:.4f})")

print("Loading pre-trained GloVe model (50 dimensions)...")

model = load("glove-wiki-gigaword-50")

words = ['football', 'basketball', 'soccer', 'tennis', 'cricket',

'hockey', 'baseball', 'golf', 'volleyball', 'rugby']

embeddings = [model[word] for word in words]

reduced_embeddings = reduce_dimensions(embeddings)

visualize_embeddings(words, reduced_embeddings)

get_similar_words("programming", model)

LAB 3

from gensim.models import Word2Vec

from sklearn.decomposition import PCA

import matplotlib.pyplot as plt

corpus = [

"The patient was diagnosed with diabetes and hypertension.",

"RI scans reveal abnormalities in the brain tissue.",

"The treatment involves antibiotics and regular monitoring.",

"Symptoms include fever, fatigue, and muscle pain.",

"The vaccine is effective against several viral infections.",

"Doctors recommend physical therapy for recovery.",

"The clinical trial results were published in the journal.",

"The surgeon performed a minimally invasive procedure.",

"The prescription includes pain relievers and anti-inflammatory drugs.",

"The diagnosis confirmed a rare genetic disorder."

]

tokenized_corpus = [sentence.lower().split() for sentence in corpus]

model = Word2Vec(sentences=tokenized_corpus, vector_size=5, window=2, min_count=1, epochs=5)

word = input("Enter a word: ").lower()

if word in model.wv:

similar = model.wv.most_similar(word, topn=5)

print(f"Words similar to '{word}':")

for i, (w, score) in enumerate(similar, 1):

print(f"{i}. {w} (Similarity: {score:.4f})")

else:

print("Word not found in vocabulary.")

words = list(model.wv.index_to_key)

word_vectors = model.wv[words]

pca = PCA(n_components=2)

result = pca.fit_transform(word_vectors)

plt.figure(figsize=(10, 8))

plt.scatter(result[:, 0], result[:, 1])

for i, word in enumerate(words):

plt.annotate(word, xy=(result[i, 0], result[i, 1]))

plt.title("Word Embeddings (PCA Projection)")

plt.xlabel("PCA 1")

plt.ylabel("PCA 2")

plt.grid(True)

plt.show()

LAB 4

!pip install cohere gensim

import cohere

import gensim.downloader as api

co = cohere.Client("iKCV07rBnBU5uYH40gPabT4cFY8DkEiZnZgxtIrr")

print("Loading word embeddings...")

model = api.load("glove-wiki-gigaword-100")

print("Model loaded successfully.")

prompt = "write an essay on natural disaster"

def get_first_enriched_prompt(prompt, topn=3):

for word in prompt.split():

try:

similar_words = model.most_similar(word.strip('.,!?').lower(), topn=topn)

for sim, _ in similar_words:

enriched = prompt.replace(word, sim)

return enriched

except:

continue

return None

def get_response(text):

try:

return co.chat(model="command-r", message=text).text.strip()

except Exception as e:

return f"Error: {e}"

print(f"\nOriginal Prompt:\n{prompt}\nResponse:\n{get_response(prompt)}")

enriched_prompt = get_first_enriched_prompt(prompt)

if enriched_prompt:

print(f"\nEnriched Prompt:\n{enriched_prompt}\nResponse:\n{get_response(enriched_prompt)}")

else:

print("\nNo enriched prompt could be generated.")

LAB 5

from gensim.downloader import load

import random

print("Loading pre-trained Glove model (50 dimensions)...")

model =load("glove-wiki-gigaword-50")

print("Model loaded successfully!")

def create_paragraph (iw, sws):

paragraph ="The topic of (iw) is fascinating, often linked to terms like"

random.shuffle (sws)

for word in sws:

paragraph += str(word) + ","

paragraph = paragraph.rstrip(", ") + "."

return paragraph

iw = "hacking"

sws =model.most_similar(iw, topn=5)

words=[word for word, s in sws]

paragraph =create_paragraph (iw, words)

print (paragraph)

LAB 6

!pip install transformers torch

from transformers import pipeline

sentiment_analyzer = pipeline("sentiment-analysis")

while True:

user_input = input("\nPlease enter a sentence to analyze its sentiment (or type 'exit' to quit): ")

if user_input.lower() == "exit":

print("Exiting the program. Goodbye!")

break

if not user_input.strip():

print("Please enter a non-empty sentence.")

continue

result = sentiment_analyzer(user_input)

print("\nSentiment Analysis Result:")

print(f"Label: {result[0]['label']}")

print(f"Confidence: {result[0]['score']:.4f}")

LAB 7

from transformers import pipeline

# Load the summarization model

summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

def summarize_text(text, max_length=100, min_length=30):

if len(text.split()) < min_length: # Avoid issues with very short text

return "Text is too short to summarize."

summary = summarizer(text, max_length=max_length, min_length=min_length, do_sample=False)

return summary[0]['summary_text']

# Get input from the user

print("Enter the text you want to summarize:")

user_input = input()

# Get the summary

summary_result = summarize_text(user_input)

# Print the summarized text

print("\nSummary:", summary_result)

LAB 8

!pip install langchain cohere langchain-community

from langchain.chains import LLMChain

from langchain.prompts import PromptTemplate

from langchain_community.llms import Cohere

from langchain_community.document_loaders import TextLoader

cohere_api_key = "JuJktEEFgPDKDmzaUDfmmEaIMldTroCQcvwiUInx"

file_path = "/content/sample text.txt"

loader = TextLoader(file_path)

documents = loader.load()

text_content = documents[0].page_content

prompt_template = PromptTemplate(

input_variables=["text"],

template="Analyze the following text and summarize its key points:\n\nText: {text}\n\nSummary:",

)

cohere_llm = Cohere(cohere_api_key=cohere_api_key, temperature=0.7)

chain = LLMChain(llm=cohere_llm, prompt=prompt_template)

output = chain.run(text=text_content)

print("Generated Summary:")

print(output)

LAB 9

# Install required packages

!pip install wikipedia

import wikipedia

import requests

from bs4 import BeautifulSoup

import re

# Define InstitutionDetails class

class InstitutionDetails:

def __init__(self, name, founded, headquarters, branches, summary):

self.name = name

self.founded = founded

self.headquarters = headquarters

self.branches = branches

self.summary = summary

def __str__(self):

return f"""

Institution Details

-------------------

Name: {self.name}

Founded: {self.founded}

Headquarters: {self.headquarters}

Branches: {", ".join(self.branches) if self.branches else "Not Found"}

Summary:

{self.summary}

"""

# Function to fetch institution info

def fetch_institution_info(name):

try:

page = wikipedia.page(name)

except wikipedia.exceptions.DisambiguationError as e:

return f"Disambiguation Error: {e.options}"

except wikipedia.exceptions.PageError:

return f"No Wikipedia page found for {name}"

# Extract HTML

url = page.url

html = requests.get(url).text

soup = BeautifulSoup(html, 'html.parser')

infobox = soup.find("table", {"class": "infobox"})

founded = "Not Found"

headquarters = "Not Found"

if infobox:

for row in infobox.find_all("tr"):

header = row.find("th")

data = row.find("td")

if header and data:

key = header.text.strip().lower()

value = data.text.strip()

if "founded" in key or "established" in key:

founded = value

elif "headquarters" in key or "location" in key:

headquarters = value

# Branches detection (still heuristic)

branch_keywords = ["New York", "San Francisco", "London", "Tokyo", "Bangalore",

"Cambridge", "Seattle", "Mountain View", "Davangere"]

branches_found = {match for match in branch_keywords

if re.search(rf"\b{re.escape(match)}\b", page.content, re.IGNORECASE)}

branches = list(branches_found) if branches_found else ["Not Found"]

summary = wikipedia.summary(name, sentences=3)

return InstitutionDetails(

name=name,

founded=founded,

headquarters=headquarters,

branches=branches,

summary=summary

)

# Example usage

institution_name = input("Enter institution name: ")

result = fetch_institution_info(institution_name)

print(result)

LAB 10

#10

# Install required packages

!pip install PyMuPDF faiss-cpu sentence-transformers

import fitz # PyMuPDF

import faiss

import numpy as np

from sentence_transformers import SentenceTransformer

from langchain.text_splitter import RecursiveCharacterTextSplitter

from sklearn.metrics.pairwise import cosine_similarity

# Step 1: Load and extract text from ipc.pdf

pdf_path = "ipc.pdf" # Adjust path if needed

pdf_document = fitz.open(pdf_path)

ipc_text = ""

for page_num in range(pdf_document.page_count):

page = pdf_document.load_page(page_num)

ipc_text += page.get_text()

# Step 2: Split the text into chunks

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)

texts = text_splitter.split_text(ipc_text)

# Step 3: Create embeddings

model = SentenceTransformer("all-MiniLM-L6-v2")

document_embeddings = model.encode(texts, convert_to_tensor=True)

doc_embeddings_np = document_embeddings.cpu().numpy()

# Step 4: Index with FAISS

index = faiss.IndexFlatL2(doc_embeddings_np.shape[1])

index.add(doc_embeddings_np)

# Step 5: Chat function with similarity threshold

def get_response(user_query, threshold=0.6):

query_embedding = model.encode([user_query], convert_to_tensor=True)

query_embedding_np = query_embedding.cpu().numpy()

# Get top match

_, I = index.search(query_embedding_np, k=1)

best_match_index = I[0][0]

best_match_text = texts[best_match_index]

# Calculate cosine similarity

similarity_score = cosine_similarity(query_embedding_np, [doc_embeddings_np[best_match_index]])[0][0]

if similarity_score >= threshold:

return best_match_text.strip()

else:

return "I'm sorry, I couldn't find relevant information in the IPC for your query."

# Step 6: Chat loop

print("IPC Chatbot is ready! (type 'bye' to exit)")

while True:

user_input = input("You: ")

if user_input.lower() == "bye":

print("Bot: Goodbye!")

break

response = get_response(user_input)

print(f"Bot: {response}")

Tarun

Wednesday, June 4, 2025

GEN AI

No comments:

Post a Comment

GEN AI

Report Abuse