diff --git a/src/components/Detection/Detection.tsx b/src/components/Detection/Detection.tsx index 8b093d8..88b47a1 100644 --- a/src/components/Detection/Detection.tsx +++ b/src/components/Detection/Detection.tsx @@ -1,8 +1,6 @@ import React, { useRef, useEffect } from 'react'; import { Card } from '../../types'; -import { cardModelService } from '../../services/CardModelService'; -import { CentroidTracker, BoundingBox } from '../../utils/Tracker'; -import { ImageProcessing, Rect } from '../../utils/ImageProcessing'; +import { DetectionPipeline } from './DetectionLogic'; interface DetectionProps { videoRef: React.RefObject; @@ -12,16 +10,12 @@ interface DetectionProps { onLiveCardsDetected?: (cards: Card[]) => void; } - const Detection: React.FC = ({ videoRef, canvasRef, onCardsDetected, live, onLiveCardsDetected }) => { const isDetectingRef = useRef(false); const requestRef = useRef(); - const trackerRef = useRef(new CentroidTracker()); - const classificationHistoryRef = useRef>(new Map()); + const pipelineRef = useRef(new DetectionPipeline()); - // Expose detection method for external calls const detectCards = async () => { - if (!videoRef.current || !canvasRef.current || isDetectingRef.current) return; isDetectingRef.current = true; @@ -38,7 +32,7 @@ const Detection: React.FC = ({ videoRef, canvasRef, onCardsDetec ctx.drawImage(video, 0, 0, canvas.width, canvas.height); - const detectedCards = await processImageForCards(canvas, ctx); + const detectedCards = await pipelineRef.current.processImageForCards(canvas, ctx); onCardsDetected(detectedCards); } catch (error) { @@ -66,7 +60,7 @@ const Detection: React.FC = ({ videoRef, canvasRef, onCardsDetec ctx.drawImage(video, 0, 0, canvas.width, canvas.height); - const detectedCards = await processImageForCards(canvas, ctx); + const detectedCards = await pipelineRef.current.processImageForCards(canvas, ctx); if (onLiveCardsDetected) { onLiveCardsDetected(detectedCards); @@ -93,349 +87,11 @@ const Detection: React.FC = ({ videoRef, canvasRef, onCardsDetec } }, [live]); - // Enhanced card detection using image processing specialized for Jass cards - const processImageForCards = async (canvas: HTMLCanvasElement, ctx: CanvasRenderingContext2D): Promise => { - const imageData = ctx.getImageData(0, 0, canvas.width, canvas.height); - - // Replace brightness thresholding with robust edge-based localization - const edges = ImageProcessing.detectEdges(imageData); - const cardPolygons = ImageProcessing.findRectangularRegions(edges, canvas.width, canvas.height); - - const detectedCards: Card[] = []; - - for (let i = 0; i < cardPolygons.length; i++) { - const polygon = cardPolygons[i]; - - // Perspective Warping: Get 4 corners and flatten image - const corners = ImageProcessing.findCorners(polygon.points); - const cardCrop = ImageProcessing.warpPerspective(canvas, corners, 128, 192); - - let suit: 'Schellen' | 'Schilten' | 'Eicheln' | 'Rosen'; - let value: number; - let confidence = 0.85; - - if (cardModelService.isReady()) { - const suitRes = await cardModelService.classifySuit(cardCrop); - const valRes = await cardModelService.classifyValue(cardCrop); - suit = suitRes.label as any; - value = parseInt(valRes.label); - confidence = (suitRes.confidence + valRes.confidence) / 2; - } else { - suit = detectCardSuit(ctx, canvas, polygon.bbox); - value = detectCardValue(ctx, canvas, polygon.bbox); - } - - detectedCards.push({ - id: `card-${i}`, - suit, - value, - x: polygon.bbox.x, - y: polygon.bbox.y, - width: polygon.bbox.width, - height: polygon.bbox.height, - confidence - }); - } - - // Apply Tracking and Temporal Smoothing - const trackedObjects = trackerRef.current.update(cardPolygons.map(p => p.bbox)); - const finalCards: Card[] = []; - - - - - - - - - - for (const obj of trackedObjects) { - // Match raw detections to tracked objects to get current frame's identity - const detection = detectedCards.find(c => - Math.abs(c.x - obj.bbox.x) < 20 && Math.abs(c.y - obj.bbox.y) < 20 - ); - - if (detection) { - // Update history for temporal voting - if (!classificationHistoryRef.current.has(obj.id)) { - classificationHistoryRef.current.set(obj.id, { suits: [], values: [] }); - } - const history = classificationHistoryRef.current.get(obj.id)!; - history.suits.push(detection.suit); - history.values.push(detection.value); - if (history.suits.length > 10) { - history.suits.shift(); - history.values.shift(); - } - - // Vote for most common identity - const bestSuit = getMostCommon(history.suits) as any; - const bestValue = getMostCommon(history.values); - - finalCards.push({ - id: `card-${obj.id}`, - suit: bestSuit, - value: bestValue, - x: obj.bbox.x, - y: obj.bbox.y, - width: obj.bbox.width, - height: obj.bbox.height, - confidence: detection.confidence - }); - } - } - - return finalCards; - }; - - const getMostCommon = (arr: any[]) => { - if (arr.length === 0) return null; - const counts: Record = {}; - arr.forEach(item => counts[item] = (counts[item] || 0) + 1); - return Object.entries(counts).sort((a, b) => b[1] - a[1])[0][0]; - }; - - const createCrop = (ctx: CanvasRenderingContext2D, canvas: HTMLCanvasElement, region: {x: number, y: number, width: number, height: number}): HTMLCanvasElement => { - const cropCanvas = document.createElement('canvas'); - cropCanvas.width = region.width; - cropCanvas.height = region.height; - const cropCtx = cropCanvas.getContext('2d'); - if (cropCtx) { - cropCtx.drawImage(canvas, region.x, region.y, region.width, region.height, 0, 0, region.width, region.height); - } - return cropCanvas; - }; - - - // Enhanced card region detection specialized for Jass cards - const findCardRegions = (imageData: ImageData, width: number, height: number): {x: number, y: number, width: number, height: number}[] => { - const regions = []; - const step = 24; - - for (let y = 0; y < height; y += step) { - for (let x = 0; x < width; x += step) { - const i = (y * width + x) * 4; - const brightness = (imageData.data[i] + imageData.data[i + 1] + imageData.data[i + 2]) / 3; - - if (brightness > 120 && brightness < 255) { - const region = getCardRegionWithShapeAnalysis(imageData, width, height, x, y); - if (region && region.width > 50 && region.height > 80) { - const aspectRatio = region.width / region.height; - if (aspectRatio > 0.3 && aspectRatio < 1.8) { - regions.push(region); - } - } - } - } - } - - const uniqueRegions = []; - regions.sort((a, b) => (b.width * b.height) - (a.width * a.height)); - - for (const region of regions) { - const isOverlapping = uniqueRegions.some(u => { - const overlapX = Math.max(0, Math.min(region.x + region.width, u.x + u.width) - Math.max(region.x, u.x)); - const overlapY = Math.max(0, Math.min(region.y + region.height, u.y + u.height) - Math.max(region.y, u.y)); - const overlapArea = overlapX * overlapY; - const regionArea = region.width * region.height; - const uArea = u.width * u.height; - return overlapArea > Math.min(regionArea, uArea) * 0.5; - }); - if (!isOverlapping) { - uniqueRegions.push(region); - } - } - - return uniqueRegions; - }; - - const getCardRegionWithShapeAnalysis = (imageData: ImageData, width: number, height: number, x: number, y: number): {x: number, y: number, width: number, height: number} | null => { - let minX = x, maxX = x; - let minY = y, maxY = y; - let pixelCount = 0; - - const stack = [[x, y]]; - const visited = new Int32Array(width * height).fill(-1); - const searchLimit = 10000; - - let visitedCount = 0; - while (stack.length > 0 && visitedCount < searchLimit) { - const [cx, cy] = stack.pop()!; - const idx = cy * width + cx; - if (visited[idx] !== -1) continue; - visited[idx] = 1; - visitedCount++; - - if (cx >= 0 && cx < width && cy >= 0 && cy < height) { - const i = idx * 4; - const brightness = (imageData.data[i] + imageData.data[i + 1] + imageData.data[i + 2]) / 3; - - if (brightness > 120 && brightness < 250) { - pixelCount++; - minX = Math.min(minX, cx); - maxX = Math.max(maxX, cx); - minY = Math.min(minY, cy); - maxY = Math.max(maxY, cy); - - if (cx + 1 < width) stack.push([cx + 1, cy]); - if (cx - 1 >= 0) stack.push([cx - 1, cy]); - if (cy + 1 < height) stack.push([cy + 1, cy]); - if (cy - 1 >= 0) stack.push([cy, cy - 1]); - } - } - } - - const widthDiff = maxX - minX; - const heightDiff = maxY - minY; - - if (pixelCount > 200 && widthDiff > 50 && heightDiff > 80) { - return { - x: minX, - y: minY, - width: widthDiff, - height: heightDiff - }; - } - - return null; - }; - - - - // Enhanced suit detection optimized for Jass card suit symbols - const detectCardSuit = (ctx: CanvasRenderingContext2D, canvas: HTMLCanvasElement, region: {x: number, y: number, width: number, height: number}): 'Schellen' | 'Schilten' | 'Eicheln' | 'Rosen' => { - // Extract the region of interest (focus mainly on the suit area) - const regionCanvas = document.createElement('canvas'); - const regionCtx = regionCanvas.getContext('2d'); - - if (!regionCtx) return 'Schellen'; - - // Make the region canvas slightly larger to account for any symbol edges - const padding = 5; - regionCanvas.width = region.width + padding * 2; - regionCanvas.height = region.height + padding * 2; - - // Copy the region from main canvas with padding - regionCtx.drawImage( - canvas, - region.x - padding, region.y - padding, region.width + padding * 2, region.height + padding * 2, - 0, 0, regionCanvas.width, regionCanvas.height - ); - - // Analyze the colors in the suit symbol area - const regionData = regionCtx.getImageData(0, 0, regionCanvas.width, regionCanvas.height); - const data = regionData.data; - - // Count dominant colors in different areas of the card symbol area - // According to our research, the correct colors for Jass suits are: - // - Schellen (bells) - typically gold/yellow - // - Schilte (shields) - typically green - // - Eicheln (acorns) - typically brown/black - // - Rosen (roses) - typically red - - // Analyze central region of the card symbol (where suit symbol is likely located) - const centerRegionX = Math.floor(regionCanvas.width / 2) - 10; - const centerRegionY = Math.floor(regionCanvas.height / 3); - const centerRegionWidth = 20; - const centerRegionHeight = 20; - - let redPixels = 0; - let greenPixels = 0; - let blackPixels = 0; - let yellowPixels = 0; - let whitePixels = 0; - let otherPixels = 0; - - // Sample pixels in the center region where the suit symbol would be - const centerX = Math.floor(regionCanvas.width / 2); - const centerY = Math.floor(regionCanvas.height / 2); - - // Sample area around center for better color analysis - const sampleSize = 8; - for (let dy = -sampleSize; dy < sampleSize; dy += 2) { - for (let dx = -sampleSize; dx < sampleSize; dx += 2) { - const px = Math.max(0, Math.min(regionCanvas.width - 1, centerX + dx)); - const py = Math.max(0, Math.min(regionCanvas.height - 1, centerY + dy)); - - const i = (py * regionCanvas.width + px) * 4; - const r = data[i]; - const g = data[i + 1]; - const b = data[i + 2]; - - // Compute dominant color based on thresholds - const brightness = (r + g + b) / 3; - const saturation = Math.max(r, g, b) - Math.min(r, g, b); - - // Filter out background white pixels (thresholds adjusted for better accuracy) - if (brightness > 240 && saturation < 20) { - whitePixels++; - } else if (r > 200 && g < 100 && b < 100) { - redPixels++; // Rosen (red) - in our research, roses are red - } else if (g > 200 && r < 100 && b < 100) { - greenPixels++; // Schilte (shields) - in our research, shields are green - } else if (r < 100 && g < 100 && b < 100) { - blackPixels++; // Eicheln (acorns) - in our research, acorns are black/brown - } else if (r > 200 && g > 200 && b < 100) { - yellowPixels++; // Schellen (bells) - in our research, bells are gold/yellow - } else { - otherPixels++; - } - } - } - - // Return the dominant suit based on pixel counts - const colors = { redPixels, greenPixels, blackPixels, yellowPixels, whitePixels }; - const maxCount = Math.max(...Object.values(colors)); - - // Ensure we have enough pixels to make a determination - if (maxCount < 5) { - // If all colors have very few pixels, fall back to a default - return 'Schellen'; - } - - if (colors.redPixels === maxCount) return 'Rosen'; // Rosen (red) - if (colors.greenPixels === maxCount) return 'Schilten'; // Schilte (green) - if (colors.blackPixels === maxCount) return 'Eicheln'; // Eicheln (black/brown) - if (colors.yellowPixels === maxCount) return 'Schellen'; // Schellen (yellow/gold) - - // If not clear, try to determine from dominant colors - const sortedColors = Object.entries(colors) - .sort((a, b) => b[1] - a[1]) - .map(entry => entry[0]); - - // If we have a clear second-best, return based on that - if (sortedColors.length >= 2 && sortedColors[0] !== 'whitePixels') { - if (sortedColors[0] === 'redPixels') return 'Rosen'; - if (sortedColors[0] === 'greenPixels') return 'Schilten'; - if (sortedColors[0] === 'blackPixels') return 'Eicheln'; - if (sortedColors[0] === 'yellowPixels') return 'Schellen'; - } - - return 'Schellen'; // default fallback - }; - - // Enhanced card value detection with pattern recognition - const detectCardValue = (ctx: CanvasRenderingContext2D, canvas: HTMLCanvasElement, region: {x: number, y: number, width: number, height: number}): number => { - // Jass cards typically have values A, K, O, U, B, 9, 8, 7, 6 in Swiss-German suits - // where: A=11, K=10, O=12, U=13, B=10, 9=9, 8=8, 7=7, 6=6 - // In the German system: A=11, K=10, O=12, U=13, B=10, 9=9, 8=8, 7=7, 6=6 - - // Since we're working with a simplified visual recognition, - // let's return a reasonable card value based on typical game values - const values = [6, 7, 8, 9, 10, 11, 12, 13]; // typical German/Jass card values - const randomIndex = Math.floor(Math.random() * values.length); - - // Return a value from the Jass card system - return values[randomIndex]; - }; - - // Create a reference for detection that can be called externally useEffect(() => { - // Set up detection to be triggerable externally by storing a reference (window as any).detectCards = detectCards; }, []); return null; }; -export default Detection; \ No newline at end of file +export default Detection; diff --git a/src/components/Detection/DetectionLogic.ts b/src/components/Detection/DetectionLogic.ts new file mode 100644 index 0000000..fa3aafd --- /dev/null +++ b/src/components/Detection/DetectionLogic.ts @@ -0,0 +1,90 @@ +import { Card } from '../../types'; +import { cardModelService } from '../../services/CardModelService'; +import { CentroidTracker } from '../../utils/Tracker'; +import { ImageProcessing } from '../../utils/ImageProcessing'; +import { getMostCommon, detectCardSuit, detectCardValue } from './DetectionUtils'; + +export class DetectionPipeline { + private tracker = new CentroidTracker(); + private classificationHistory = new Map(); + + async processImageForCards(canvas: HTMLCanvasElement, ctx: CanvasRenderingContext2D): Promise { + const imageData = ctx.getImageData(0, 0, canvas.width, canvas.height); + + const edges = ImageProcessing.detectEdges(imageData); + const cardPolygons = ImageProcessing.findRectangularRegions(edges, canvas.width, canvas.height); + + const detectedCards: Card[] = []; + + for (let i = 0; i < cardPolygons.length; i++) { + const polygon = cardPolygons[i]; + + const corners = ImageProcessing.findCorners(polygon.points); + const cardCrop = ImageProcessing.warpPerspective(canvas, corners, 128, 192); + + let suit: 'Schellen' | 'Schilten' | 'Eicheln' | 'Rosen'; + let value: number; + let confidence = 0.85; + + if (cardModelService.isReady()) { + const suitRes = await cardModelService.classifySuit(cardCrop); + const valRes = await cardModelService.classifyValue(cardCrop); + suit = suitRes.label as any; + value = parseInt(valRes.label); + confidence = (suitRes.confidence + valRes.confidence) / 2; + } else { + suit = detectCardSuit(ctx, canvas, polygon.bbox); + value = detectCardValue(ctx, canvas, polygon.bbox); + } + + detectedCards.push({ + id: `card-${i}`, + suit, + value, + x: polygon.bbox.x, + y: polygon.bbox.y, + width: polygon.bbox.width, + height: polygon.bbox.height, + confidence + }); + } + + const trackedObjects = this.tracker.update(cardPolygons.map(p => p.bbox)); + const finalCards: Card[] = []; + + for (const obj of trackedObjects) { + const detection = detectedCards.find(c => + Math.abs(c.x - obj.bbox.x) < 20 && Math.abs(c.y - obj.bbox.y) < 20 + ); + + if (detection) { + if (!this.classificationHistory.has(obj.id)) { + this.classificationHistory.set(obj.id, { suits: [], values: [] }); + } + const history = this.classificationHistory.get(obj.id)!; + history.suits.push(detection.suit); + history.values.push(detection.value); + if (history.suits.length > 10) { + history.suits.shift(); + history.values.shift(); + } + + const bestSuit = getMostCommon(history.suits) as any; + const bestValue = Number(getMostCommon(history.values)); + + finalCards.push({ + id: `card-${obj.id}`, + suit: bestSuit, + value: bestValue, + x: obj.bbox.x, + y: obj.bbox.y, + width: obj.bbox.width, + height: obj.bbox.height, + confidence: detection.confidence + }); + } + } + + return finalCards; + } +} diff --git a/src/components/Detection/DetectionUtils.ts b/src/components/Detection/DetectionUtils.ts new file mode 100644 index 0000000..1f5d353 --- /dev/null +++ b/src/components/Detection/DetectionUtils.ts @@ -0,0 +1,161 @@ +import { Card } from '../../types'; + +export const getMostCommon = (arr: any[]) => { + if (arr.length === 0) return null; + const counts: Record = {}; + arr.forEach(item => counts[item] = (counts[item] || 0) + 1); + return Object.entries(counts).sort((a, b) => b[1] - a[1])[0][0]; +}; + +export const createCrop = (ctx: CanvasRenderingContext2D, canvas: HTMLCanvasElement, region: {x: number, y: number, width: number, height: number}): HTMLCanvasElement => { + const cropCanvas = document.createElement('canvas'); + cropCanvas.width = region.width; + cropCanvas.height = region.height; + const cropCtx = cropCanvas.getContext('2d'); + if (cropCtx) { + cropCtx.drawImage(canvas, region.x, region.y, region.width, region.height, 0, 0, region.width, region.height); + } + return cropCanvas; +}; + +export const detectCardSuit = (ctx: CanvasRenderingContext2D, canvas: HTMLCanvasElement, region: {x: number, y: number, width: number, height: number}): 'Schellen' | 'Schilten' | 'Eicheln' | 'Rosen' => { + const regionCanvas = document.createElement('canvas'); + const regionCtx = regionCanvas.getContext('2d'); + + if (!regionCtx) return 'Schellen'; + + const padding = 5; + regionCanvas.width = region.width + padding * 2; + regionCanvas.height = region.height + padding * 2; + + regionCtx.drawImage( + canvas, + region.x - padding, region.y - padding, region.width + padding * 2, region.height + padding * 2, + 0, 0, regionCanvas.width, regionCanvas.height + ); + + const regionData = regionCtx.getImageData(0, 0, regionCanvas.width, regionCanvas.height); + const data = regionData.data; + + let redPixels = 0; + let greenPixels = 0; + let blackPixels = 0; + let yellowPixels = 0; + let whitePixels = 0; + let otherPixels = 0; + + const centerX = Math.floor(regionCanvas.width / 2); + const centerY = Math.floor(regionCanvas.height / 2); + + const sampleSize = 8; + for (let dy = -sampleSize; dy < sampleSize; dy += 2) { + for (let dx = -sampleSize; dx < sampleSize; dx += 2) { + const px = Math.max(0, Math.min(regionCanvas.width - 1, centerX + dx)); + const py = Math.max(0, Math.min(regionCanvas.height - 1, centerY + dy)); + + const i = (py * regionCanvas.width + px) * 4; + const r = data[i]; + const g = data[i + 1]; + const b = data[i + 2]; + + const brightness = (r + g + b) / 3; + const saturation = Math.max(r, g, b) - Math.min(r, g, b); + + if (brightness > 240 && saturation < 20) { + whitePixels++; + } else if (r > 200 && g < 100 && b < 100) { + redPixels++; + } else if (g > 200 && r < 100 && b < 100) { + greenPixels++; + } else if (r < 100 && g < 100 && b < 100) { + blackPixels++; + } else if (r > 200 && g > 200 && b < 100) { + yellowPixels++; + } else { + otherPixels++; + } + } + } + + const colors = { redPixels, greenPixels, blackPixels, yellowPixels, whitePixels }; + const maxCount = Math.max(...Object.values(colors)); + + if (maxCount < 5) { + return 'Schellen'; + } + + if (colors.redPixels === maxCount) return 'Rosen'; + if (colors.greenPixels === maxCount) return 'Schilten'; + if (colors.blackPixels === maxCount) return 'Eicheln'; + if (colors.yellowPixels === maxCount) return 'Schellen'; + + const sortedColors = Object.entries(colors) + .sort((a, b) => b[1] - a[1]) + .map(entry => entry[0]); + + if (sortedColors.length >= 2 && sortedColors[0] !== 'whitePixels') { + if (sortedColors[0] === 'redPixels') return 'Rosen'; + if (sortedColors[0] === 'greenPixels') return 'Schilten'; + if (sortedColors[0] === 'blackPixels') return 'Eicheln'; + if (sortedColors[0] === 'yellowPixels') return 'Schellen'; + } + + return 'Schellen'; +}; + +export const detectCardValue = (ctx: CanvasRenderingContext2D, canvas: HTMLCanvasElement, region: {x: number, y: number, width: number, height: number}): number => { + const values = [6, 7, 8, 9, 10, 11, 12, 13]; + const randomIndex = Math.floor(Math.random() * values.length); + return values[randomIndex]; +}; + +export const getCardRegionWithShapeAnalysis = (imageData: ImageData, width: number, height: number, x: number, y: number): {x: number, y: number, width: number, height: number} | null => { + let minX = x, maxX = x; + let minY = y, maxY = y; + let pixelCount = 0; + + const stack = [[x, y]]; + const visited = new Int32Array(width * height).fill(-1); + const searchLimit = 10000; + + let visitedCount = 0; + while (stack.length > 0 && visitedCount < searchLimit) { + const [cx, cy] = stack.pop()!; + const idx = cy * width + cx; + if (visited[idx] !== -1) continue; + visited[idx] = 1; + visitedCount++; + + if (cx >= 0 && cx < width && cy >= 0 && cy < height) { + const i = idx * 4; + const brightness = (imageData.data[i] + imageData.data[i + 1] + imageData.data[i + 2]) / 3; + + if (brightness > 120 && brightness < 250) { + pixelCount++; + minX = Math.min(minX, cx); + maxX = Math.max(maxX, cx); + minY = Math.min(minY, cy); + maxY = Math.max(maxY, cy); + + if (cx + 1 < width) stack.push([cx + 1, cy]); + if (cx - 1 >= 0) stack.push([cx - 1, cy]); + if (cy + 1 < height) stack.push([cy + 1, cy]); + if (cy - 1 >= 0) stack.push([cy, cy - 1]); + } + } + } + + const widthDiff = maxX - minX; + const heightDiff = maxY - minY; + + if (pixelCount > 200 && widthDiff > 50 && heightDiff > 80) { + return { + x: minX, + y: minY, + width: widthDiff, + height: heightDiff + }; + } + + return null; +}; diff --git a/src/utils/ImageProcessing.ts b/src/utils/ImageProcessing.ts index fc2c320..5e382df 100644 --- a/src/utils/ImageProcessing.ts +++ b/src/utils/ImageProcessing.ts @@ -19,18 +19,11 @@ export class ImageProcessing { /** * Simple Sobel filter to detect edges in a grayscale image */ - static Sobel(data: Uint8ClampedArray, width: number, height: number): Float32Array { - const output = new Float32Array(width * height); - const gx = [ - -1, 0, 1, - -2, 0, 2, - -1, 0, 1 - ]; - const gy = [ - -1, -2, -1, - 0, 0, 0, - 1, 2, 1 - ]; + static computeGradients(data: Uint8ClampedArray, width: number, height: number): { magnitude: Float32Array, direction: Float32Array } { + const magnitude = new Float32Array(width * height); + const direction = new Float32Array(width * height); + const gx_kernel = [-1, 0, 1, -2, 0, 2, -1, 0, 1]; + const gy_kernel = [-1, -2, -1, 0, 0, 0, 1, 2, 1]; for (let y = 1; y < height - 1; y++) { for (let x = 1; x < width - 1; x++) { @@ -39,15 +32,53 @@ export class ImageProcessing { for (let ky = -1; ky <= 1; ky++) { for (let kx = -1; kx <= 1; kx++) { - const pixel = data[((y + ky) * width + (x + kx)) * 4]; // use red channel for grayscale - sumX += pixel * gx[(ky + 1) * 3 + (kx + 1)]; - sumY += pixel * gy[(ky + 1) * 3 + (kx + 1)]; + const pixel = data[((y + ky) * width + (x + kx)) * 4]; + sumX += pixel * gx_kernel[(ky + 1) * 3 + (kx + 1)]; + sumY += pixel * gy_kernel[(ky + 1) * 3 + (kx + 1)]; } } - output[y * width + x] = Math.sqrt(sumX * sumX + sumY * sumY); + magnitude[y * width + x] = Math.sqrt(sumX * sumX + sumY * sumY); + direction[y * width + x] = Math.atan2(sumY, sumX); } } - return output; + return { magnitude, direction }; + } + + static nonMaximumSuppression(magnitude: Float32Array, direction: Float32Array, width: number, height: number): Float32Array { + const suppressed = new Float32Array(width * height); + + for (let y = 1; y < height - 1; y++) { + for (let x = 1; x < width - 1; x++) { + const idx = y * width + x; + const mag = magnitude[idx]; + const angle = direction[idx] * (180 / Math.PI); + let normalizedAngle = angle < 0 ? angle + 180 : angle; + + let neighbor1 = 0; + let neighbor2 = 0; + + if ((normalizedAngle >= 0 && normalizedAngle < 22.5) || (normalizedAngle >= 157.5 && normalizedAngle <= 180)) { + neighbor1 = magnitude[y * width + (x + 1)]; + neighbor2 = magnitude[y * width + (x - 1)]; + } else if (normalizedAngle >= 22.5 && normalizedAngle < 67.5) { + neighbor1 = magnitude[(y - 1) * width + (x + 1)]; + neighbor2 = magnitude[(y + 1) * width + (x - 1)]; + } else if (normalizedAngle >= 67.5 && normalizedAngle < 112.5) { + neighbor1 = magnitude[(y - 1) * width + x]; + neighbor2 = magnitude[(y + 1) * width + x]; + } else if (normalizedAngle >= 112.5 && normalizedAngle < 157.5) { + neighbor1 = magnitude[(y - 1) * width + (x - 1)]; + neighbor2 = magnitude[(y + 1) * width + (x + 1)]; + } + + if (mag >= neighbor1 && mag >= neighbor2) { + suppressed[idx] = mag; + } else { + suppressed[idx] = 0; + } + } + } + return suppressed; } /** @@ -60,7 +91,6 @@ export class ImageProcessing { for (let y = 0; y < height; y++) { for (let x = 0; x < width; x++) { if (edges[y * width + x] > threshold && !visited[y * width + x]) { - // Start a region search const region = this.expandRegion(edges, visited, x, y, width, height, threshold); if (region && region.bbox.width > 50 && region.bbox.height > 80) { regions.push(region); @@ -72,7 +102,6 @@ export class ImageProcessing { } private static expandRegion(edges: Float32Array, visited: Uint8Array, startX: number, startY: number, width: number, height: number, threshold: number): Polygon | null { - let minX = startX, maxX = startX; let minY = startY, maxY = startY; const points: Point[] = []; @@ -113,44 +142,82 @@ export class ImageProcessing { }; } - /** * Find the 4 corners of a point set that most closely resemble a rectangle */ static findCorners(points: Point[]): Point[] { if (points.length < 4) return []; - let topLeft = points[0]; - let topRight = points[0]; - let bottomRight = points[0]; - let bottomLeft = points[0]; - - let minSum = Infinity, maxSum = -Infinity; - let minDiff = Infinity, maxDiff = -Infinity; - + let minX = Infinity, maxX = -Infinity, minY = Infinity, maxY = -Infinity; for (const p of points) { - const sum = p.x + p.y; - const diff = p.x - p.y; - - if (sum < minSum) { minSum = sum; topLeft = p; } - if (sum > maxSum) { maxSum = sum; bottomRight = p; } - if (diff < minDiff) { minDiff = diff; bottomLeft = p; } - if (diff > maxDiff) { maxDiff = diff; topRight = p; } + if (p.x < minX) minX = p.x; + if (p.x > maxX) maxX = p.x; + if (p.y < minY) minY = p.y; + if (p.y > maxY) maxY = p.y; } - return [topLeft, topRight, bottomRight, bottomLeft]; + const targets = [ + { x: minX, y: minY }, + { x: maxX, y: minY }, + { x: maxX, y: maxY }, + { x: minX, y: maxY }, + ]; + + const corners: Point[] = []; + for (const target of targets) { + let closest = points[0]; + let minDist = Infinity; + for (const p of points) { + const dist = Math.sqrt((p.x - target.x) ** 2 + (p.y - target.y) ** 2); + if (dist < minDist) { + minDist = dist; + closest = p; + } + } + corners.push(closest); + } + + return corners; } - /** - * Performs a basic bilinear interpolation warp of a source image - * from 4 corners to a destination rectangle - */ - static warpPerspective( - sourceCanvas: HTMLCanvasElement, - srcCorners: Point[], - destWidth: number, - destHeight: number - ): HTMLCanvasElement { + static applyHysteresis(edges: Float32Array, width: number, height: number, lowThreshold: number, highThreshold: number): Float32Array { + const result = new Float32Array(width * height); + const strongEdges = []; + + for (let i = 0; i < edges.length; i++) { + if (edges[i] >= highThreshold) { + result[i] = edges[i]; + strongEdges.push(i); + } else if (edges[i] < lowThreshold) { + result[i] = 0; + } + } + + const stack = [...strongEdges]; + while (stack.length > 0) { + const idx = stack.pop()!; + const x = idx % width; + const y = Math.floor(idx / width); + + const neighbors = [ + [x + 1, y], [x - 1, y], [x, y + 1], [x, y - 1], + [x + 1, y + 1], [x - 1, y + 1], [x + 1, y - 1], [x - 1, y - 1] + ]; + + for (const [nx, ny] of neighbors) { + if (nx >= 0 && nx < width && ny >= 0 && ny < height) { + const nIdx = ny * width + nx; + if (result[nIdx] === 0 && edges[nIdx] >= lowThreshold) { + result[nIdx] = edges[nIdx]; + stack.push(nIdx); + } + } + } + } + return result; + } + + static warpPerspective(sourceCanvas: HTMLCanvasElement, srcCorners: Point[], destWidth: number, destHeight: number): HTMLCanvasElement { const destCanvas = document.createElement('canvas'); destCanvas.width = destWidth; destCanvas.height = destHeight; @@ -191,9 +258,6 @@ export class ImageProcessing { return destCanvas; } - /** - * Converts RGB to grayscale ( Luminance ) - */ static toGrayscale(imageData: ImageData): Uint8ClampedArray { const { data, width, height } = imageData; const gray = new Uint8ClampedArray(width * height); @@ -203,17 +267,14 @@ export class ImageProcessing { return gray; } - /** - * Overloaded Sobel that takes ImageData and returns edge map - */ static detectEdges(imageData: ImageData): Float32Array { const gray = this.toGrayscale(imageData); - // Create a fake imageData for the Sobel method since it expects 4-channel - const fakeData = new Uint8ClampedArray(gray.length * 4); + const grayWithChannels = new Uint8ClampedArray(gray.length * 4); for (let i = 0; i < gray.length; i++) { - fakeData[i * 4] = gray[i]; + grayWithChannels[i * 4] = gray[i]; } - return this.Sobel(fakeData, imageData.width, imageData.height); + const { magnitude, direction } = this.computeGradients(grayWithChannels, imageData.width, imageData.height); + const nmsEdges = this.nonMaximumSuppression(magnitude, direction, imageData.width, imageData.height); + return this.applyHysteresis(nmsEdges, imageData.width, imageData.height, 30, 70); } } -