http_handle/language.rs
1// SPDX-License-Identifier: AGPL-3.0-only
2// Copyright (c) 2026 Sebastien Rousseau
3
4//! Lightweight language detection with runtime-customizable patterns.
5
6use regex::Regex;
7
8/// Supported languages detected by this module.
9///
10/// # Examples
11///
12/// ```rust
13/// use http_handle::language::Language;
14/// assert_eq!(Language::Rust.as_str(), "rust");
15/// ```
16///
17/// # Panics
18///
19/// This type does not panic.
20#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
21pub enum Language {
22 /// Rust language.
23 Rust,
24 /// Python language.
25 Python,
26 /// JavaScript language.
27 JavaScript,
28 /// Go language.
29 Go,
30 /// Unknown or unsupported language.
31 Unknown,
32}
33
34impl Language {
35 /// Returns a static string identifier for the language.
36 ///
37 /// # Examples
38 ///
39 /// ```rust
40 /// use http_handle::language::Language;
41 /// assert_eq!(Language::Go.as_str(), "go");
42 /// ```
43 ///
44 /// # Panics
45 ///
46 /// This function does not panic.
47 pub const fn as_str(self) -> &'static str {
48 match self {
49 Self::Rust => "rust",
50 Self::Python => "python",
51 Self::JavaScript => "javascript",
52 Self::Go => "go",
53 Self::Unknown => "unknown",
54 }
55 }
56}
57
58#[derive(Clone, Debug)]
59struct PatternRule {
60 language: Language,
61 pattern: Regex,
62}
63
64/// Runtime language detector with optional custom pattern rules.
65///
66/// # Examples
67///
68/// ```rust
69/// use http_handle::language::{Language, LanguageDetector};
70/// let detector = LanguageDetector::new();
71/// assert_eq!(detector.detect("fn main() {}"), Language::Rust);
72/// ```
73///
74/// # Panics
75///
76/// This type does not panic.
77#[derive(Clone, Debug)]
78pub struct LanguageDetector {
79 rules: Vec<PatternRule>,
80}
81
82impl Default for LanguageDetector {
83 fn default() -> Self {
84 Self::new()
85 }
86}
87
88impl LanguageDetector {
89 /// Creates a detector with built-in default patterns.
90 ///
91 /// # Examples
92 ///
93 /// ```rust
94 /// use http_handle::language::{Language, LanguageDetector};
95 /// let detector = LanguageDetector::new();
96 /// assert_eq!(detector.detect("def f(): pass"), Language::Python);
97 /// ```
98 ///
99 /// # Panics
100 ///
101 /// This function panics only if a built-in regex literal is invalid.
102 pub fn new() -> Self {
103 let defaults = [
104 (Language::Rust, r"\b(fn|let|impl|pub|crate)\b"),
105 (Language::Python, r"\b(def|import|lambda|async\s+def)\b"),
106 (
107 Language::JavaScript,
108 r"\b(function|const|let|=>|console\.log)\b",
109 ),
110 (Language::Go, r"\b(func|package|go\s+|defer)\b"),
111 ];
112
113 let rules = defaults
114 .iter()
115 .map(|(language, pattern)| PatternRule {
116 language: *language,
117 pattern: Regex::new(pattern)
118 .expect("default language regex must compile"),
119 })
120 .collect();
121
122 Self { rules }
123 }
124
125 /// Adds a runtime custom pattern to detect a specific language.
126 ///
127 /// # Examples
128 ///
129 /// ```rust
130 /// use http_handle::language::{Language, LanguageDetector};
131 /// let detector = LanguageDetector::new()
132 /// .with_custom_pattern(Language::Go, r"\\bpackage\\b")
133 /// .expect("valid regex");
134 /// assert_eq!(detector.detect("package main"), Language::Go);
135 /// ```
136 ///
137 /// # Errors
138 ///
139 /// Returns an error when `pattern` is not a valid regular expression.
140 ///
141 /// # Panics
142 ///
143 /// This function does not panic.
144 pub fn with_custom_pattern(
145 mut self,
146 language: Language,
147 pattern: &str,
148 ) -> Result<Self, regex::Error> {
149 self.rules.push(PatternRule {
150 language,
151 pattern: Regex::new(pattern)?,
152 });
153 Ok(self)
154 }
155
156 /// Detects the first matching language for a text.
157 ///
158 /// # Examples
159 ///
160 /// ```rust
161 /// use http_handle::language::{Language, LanguageDetector};
162 /// let detector = LanguageDetector::new();
163 /// assert_eq!(detector.detect("const x = 1;"), Language::JavaScript);
164 /// ```
165 ///
166 /// # Panics
167 ///
168 /// This function does not panic.
169 pub fn detect(&self, input: &str) -> Language {
170 self.rules
171 .iter()
172 .find(|rule| rule.pattern.is_match(input))
173 .map_or(Language::Unknown, |rule| rule.language)
174 }
175}
176
177#[cfg(test)]
178mod tests {
179 use super::*;
180
181 #[test]
182 fn detects_default_languages() {
183 let detector = LanguageDetector::new();
184 assert_eq!(
185 detector.detect("fn main() { let x = 1; }"),
186 Language::Rust
187 );
188 assert_eq!(
189 detector.detect("def f(x): return x"),
190 Language::Python
191 );
192 }
193
194 #[test]
195 fn supports_runtime_custom_pattern() {
196 let detector = LanguageDetector::new()
197 .with_custom_pattern(Language::Rust, r"\bcargo\s+build\b")
198 .expect("pattern should compile");
199
200 assert_eq!(
201 detector.detect("cargo build --release"),
202 Language::Rust
203 );
204 }
205
206 #[test]
207 fn language_as_str_is_stable() {
208 assert_eq!(Language::Rust.as_str(), "rust");
209 assert_eq!(Language::Python.as_str(), "python");
210 assert_eq!(Language::JavaScript.as_str(), "javascript");
211 assert_eq!(Language::Go.as_str(), "go");
212 assert_eq!(Language::Unknown.as_str(), "unknown");
213 }
214
215 #[test]
216 fn custom_pattern_validation_errors() {
217 let result = LanguageDetector::new()
218 .with_custom_pattern(Language::Go, r"[");
219 assert!(result.is_err());
220 }
221
222 #[test]
223 fn unknown_language_falls_back() {
224 let detector = LanguageDetector::new();
225 assert_eq!(
226 detector.detect("just prose without code"),
227 Language::Unknown
228 );
229 }
230
231 #[test]
232 fn default_matches_new() {
233 let via_default = LanguageDetector::default();
234 let via_new = LanguageDetector::new();
235 assert_eq!(
236 via_default.detect("fn main() { let x = 1; }"),
237 via_new.detect("fn main() { let x = 1; }")
238 );
239 }
240}