Sentiment Analyzer: Build One Without Labeled Data (2026)

Project Overview

A sentiment analysis tool that classifies text as positive, negative, or neutral, detects specific emotions (joy, anger, sadness, etc.), and can process CSV batches of reviews, tweets, or feedback at scale.

Learning Goals

Use JSON mode for structured classification outputs
Process batches of data with rate limiting
Map emotions to structured categories
Build a simple CSV pipeline for real-world data

Architecture

plaintext

Input (text / CSV file)
        ↓
LLM classification (JSON output)
        ↓
Structured sentiment + emotion + confidence
        ↓
Results table / CSV export

Implementation

Step 1: Setup

Bash

pip install openai pandas

Step 2: Sentiment Classifier

Python

# analyzer.py
import json
from openai import OpenAI

client = OpenAI()

SENTIMENT_PROMPT = """Analyze the sentiment of the following text.

Return JSON with this exact structure:
{{
  "sentiment": "positive" | "negative" | "neutral",
  "confidence": <0.0-1.0>,
  "emotions": ["joy", "anger", "sadness", "fear", "surprise", "disgust"],
  "intensity": "low" | "medium" | "high",
  "key_phrases": ["phrase1", "phrase2"],
  "summary": "One sentence explanation of the sentiment"
}}

Only include emotions that are present. Return an empty list if no strong emotion.

Text: {text}"""


def analyze_sentiment(text: str) -> dict:
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[{"role": "user", "content": SENTIMENT_PROMPT.format(text=text[:1000])}],
        response_format={"type": "json_object"},
        temperature=0.1,
    )
    return json.loads(response.choices[0].message.content)


def analyze_batch(texts: list[str], delay: float = 0.5) -> list[dict]:
    """Analyze a list of texts with basic rate limiting."""
    import time
    results = []
    for i, text in enumerate(texts):
        print(f"  Analyzing {i+1}/{len(texts)}...", end="\r")
        result = analyze_sentiment(text)
        result["text"] = text[:100]  # Store truncated text for reference
        results.append(result)
        if i < len(texts) - 1:
            time.sleep(delay)
    print()
    return results

Step 3: CSV Pipeline

Python

# pipeline.py
import pandas as pd
from analyzer import analyze_sentiment, analyze_batch


def process_csv(input_path: str, text_column: str, output_path: str = None):
    """Process a CSV file and add sentiment columns."""
    df = pd.read_csv(input_path)

    if text_column not in df.columns:
        raise ValueError(f"Column '{text_column}' not found. Available: {list(df.columns)}")

    texts = df[text_column].fillna("").tolist()
    print(f"Analyzing {len(texts)} texts...")
    results = analyze_batch(texts)

    df["sentiment"] = [r.get("sentiment", "") for r in results]
    df["confidence"] = [r.get("confidence", 0) for r in results]
    df["emotions"] = [", ".join(r.get("emotions", [])) for r in results]
    df["intensity"] = [r.get("intensity", "") for r in results]

    output = output_path or input_path.replace(".csv", "_sentiment.csv")
    df.to_csv(output, index=False)
    print(f"Saved to {output}")

    # Print summary
    print("\n--- Summary ---")
    counts = df["sentiment"].value_counts()
    for sentiment, count in counts.items():
        pct = count / len(df) * 100
        print(f"  {sentiment}: {count} ({pct:.1f}%)")

    return df

Step 4: CLI

Python

# main.py
import argparse
from analyzer import analyze_sentiment
from pipeline import process_csv


def main():
    parser = argparse.ArgumentParser(description="AI Sentiment Analyzer")
    subparsers = parser.add_subparsers(dest="command")

    # Single text analysis
    text_parser = subparsers.add_parser("text", help="Analyze a single text")
    text_parser.add_argument("text", help="Text to analyze")

    # CSV batch processing
    csv_parser = subparsers.add_parser("csv", help="Process a CSV file")
    csv_parser.add_argument("file", help="Path to CSV file")
    csv_parser.add_argument("--column", "-c", default="text", help="Column name containing text")
    csv_parser.add_argument("--output", "-o", help="Output CSV path")

    args = parser.parse_args()

    if args.command == "text":
        result = analyze_sentiment(args.text)
        sentiment = result["sentiment"].upper()
        confidence = result["confidence"]
        emotions = ", ".join(result.get("emotions", [])) or "none"

        emoji = {"POSITIVE": "😊", "NEGATIVE": "😞", "NEUTRAL": "😐"}.get(sentiment, "")
        print(f"\n{emoji} Sentiment: {sentiment} (confidence: {confidence:.0%})")
        print(f"Intensity: {result.get('intensity', 'N/A')}")
        print(f"Emotions: {emotions}")
        print(f"Summary: {result.get('summary', '')}")
        if result.get("key_phrases"):
            print(f"Key phrases: {', '.join(result['key_phrases'])}")

    elif args.command == "csv":
        process_csv(args.file, args.column, args.output)
    else:
        parser.print_help()


if __name__ == "__main__":
    main()

Step 5: Run

Bash

# Single text
python main.py text "This product completely exceeded my expectations! Amazing quality."

# CSV batch
python main.py csv reviews.csv --column review_text --output results.csv

Sample Output

plaintext

😊 Sentiment: POSITIVE (confidence: 96%)
Intensity: high
Emotions: joy, surprise
Summary: The reviewer expresses strong satisfaction and delight with the product quality.
Key phrases: exceeded expectations, amazing quality

Extension Ideas

Aspect-based sentiment — detect sentiment per aspect (price, quality, service)
Real-time Twitter/Reddit analysis — connect to social media APIs
Trend dashboard — track sentiment over time with Plotly charts
Comparison mode — compare sentiment across two CSV datasets
Multilingual support — add language detection and translate before analysis

What to Learn Next

AI data analyst → AI Data Analyst
Structured outputs → OpenAI API Complete Guide