Neural Networks

Neural Networks Basics

Understand how artificial neural networks work, from perceptrons to deep learning.

Biological Inspiration

Artificial neural networks are loosely inspired by the brain. The brain has ~86 billion neurons connected by ~100 trillion synapses. ANNs have artificial neurons connected by weighted edges.

The Perceptron

The simplest neural unit. It:

Takes multiple inputs
Multiplies each by a weight
Sums them up
Adds a bias
Applies an activation function
Outputs a result

Activation Functions

Sigmoid: Output between 0 and 1 (binary classification output)
ReLU: max(0, x) — most common for hidden layers
Softmax: Multi-class probability distribution
Tanh: Output between -1 and 1

Backpropagation

The algorithm for training neural networks. It:

Makes predictions (forward pass)
Computes the error (loss)
Calculates how each weight contributed to the error (backward pass)
Updates weights to reduce error

Example

python

import numpy as np

# Implement a simple neural network from scratch

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    s = sigmoid(x)
    return s * (1 - s)

def relu(x):
    return np.maximum(0, x)

class SimpleNeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size):
        # Initialize weights randomly
        self.W1 = np.random.randn(input_size, hidden_size) * 0.01
        self.b1 = np.zeros((1, hidden_size))
        self.W2 = np.random.randn(hidden_size, output_size) * 0.01
        self.b2 = np.zeros((1, output_size))

    def forward(self, X):
        # Hidden layer
        self.z1 = np.dot(X, self.W1) + self.b1
        self.a1 = relu(self.z1)

        # Output layer
        self.z2 = np.dot(self.a1, self.W2) + self.b2
        self.a2 = sigmoid(self.z2)

        return self.a2

    def backward(self, X, y, learning_rate=0.01):
        m = X.shape[0]

        # Output layer gradient
        dz2 = self.a2 - y
        dW2 = np.dot(self.a1.T, dz2) / m
        db2 = np.sum(dz2, axis=0, keepdims=True) / m

        # Hidden layer gradient
        dz1 = np.dot(dz2, self.W2.T) * (self.z1 > 0)  # ReLU derivative
        dW1 = np.dot(X.T, dz1) / m
        db1 = np.sum(dz1, axis=0, keepdims=True) / m

        # Update weights
        self.W1 -= learning_rate * dW1
        self.b1 -= learning_rate * db1
        self.W2 -= learning_rate * dW2
        self.b2 -= learning_rate * db2

# XOR problem (non-linearly separable)
X = np.array([[0,0], [0,1], [1,0], [1,1]])
y = np.array([[0], [1], [1], [0]])

nn = SimpleNeuralNetwork(2, 4, 1)

# Train
for epoch in range(10000):
    output = nn.forward(X)
    nn.backward(X, y)

    if epoch % 2000 == 0:
        loss = np.mean((output - y) ** 2)
        print(f"Epoch {epoch}, Loss: {loss:.4f}")

# Predict
print("Predictions:", nn.forward(X).round(2).flatten())
print("Actual:     ", y.flatten())

Try it yourself — PYTHON

import numpy as np

# Implement a simple neural network from scratch

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    s = sigmoid(x)
    return s * (1 - s)

def relu(x):
    return np.maximum(0, x)

class SimpleNeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size):
        # Initialize weights randomly
        self.W1 = np.random.randn(input_size, hidden_size) * 0.01
        self.b1 = np.zeros((1, hidden_size))
        self.W2 = np.random.randn(hidden_size, output_size) * 0.01
        self.b2 = np.zeros((1, output_size))

def forward(self, X):
        # Hidden layer
        self.z1 = np.dot(X, self.W1) + self.b1
        self.a1 = relu(self.z1)

# Output layer
        self.z2 = np.dot(self.a1, self.W2) + self.b2
        self.a2 = sigmoid(self.z2)

return self.a2

def backward(self, X, y, learning_rate=0.01):
        m = X.shape[0]

# Output layer gradient
        dz2 = self.a2 - y
        dW2 = np.dot(self.a1.T, dz2) / m
        db2 = np.sum(dz2, axis=0, keepdims=True) / m

# Hidden layer gradient
        dz1 = np.dot(dz2, self.W2.T) * (self.z1 > 0)  # ReLU derivative
        dW1 = np.dot(X.T, dz1) / m
        db1 = np.sum(dz1, axis=0, keepdims=True) / m

# Update weights
        self.W1 -= learning_rate * dW1
        self.b1 -= learning_rate * db1
        self.W2 -= learning_rate * dW2
        self.b2 -= learning_rate * db2

# XOR problem (non-linearly separable)
X = np.array([[0,0], [0,1], [1,0], [1,1]])
y = np.array([[0], [1], [1], [0]])

nn = SimpleNeuralNetwork(2, 4, 1)

# Train
for epoch in range(10000):
    output = nn.forward(X)
    nn.backward(X, y)

if epoch % 2000 == 0:
        loss = np.mean((output - y) ** 2)
        print(f"Epoch {epoch}, Loss: {loss:.4f}")

# Predict
print("Predictions:", nn.forward(X).round(2).flatten())
print("Actual:     ", y.flatten())