Skip to main content

http_handle/
language.rs

1// SPDX-License-Identifier: AGPL-3.0-only
2// Copyright (c) 2026 Sebastien Rousseau
3
4//! Lightweight language detection with runtime-customizable patterns.
5
6use regex::Regex;
7
8/// Supported languages detected by this module.
9///
10/// # Examples
11///
12/// ```rust
13/// use http_handle::language::Language;
14/// assert_eq!(Language::Rust.as_str(), "rust");
15/// ```
16///
17/// # Panics
18///
19/// This type does not panic.
20#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
21pub enum Language {
22    /// Rust language.
23    Rust,
24    /// Python language.
25    Python,
26    /// JavaScript language.
27    JavaScript,
28    /// Go language.
29    Go,
30    /// Unknown or unsupported language.
31    Unknown,
32}
33
34impl Language {
35    /// Returns a static string identifier for the language.
36    ///
37    /// # Examples
38    ///
39    /// ```rust
40    /// use http_handle::language::Language;
41    /// assert_eq!(Language::Go.as_str(), "go");
42    /// ```
43    ///
44    /// # Panics
45    ///
46    /// This function does not panic.
47    pub const fn as_str(self) -> &'static str {
48        match self {
49            Self::Rust => "rust",
50            Self::Python => "python",
51            Self::JavaScript => "javascript",
52            Self::Go => "go",
53            Self::Unknown => "unknown",
54        }
55    }
56}
57
58#[derive(Clone, Debug)]
59struct PatternRule {
60    language: Language,
61    pattern: Regex,
62}
63
64/// Runtime language detector with optional custom pattern rules.
65///
66/// # Examples
67///
68/// ```rust
69/// use http_handle::language::{Language, LanguageDetector};
70/// let detector = LanguageDetector::new();
71/// assert_eq!(detector.detect("fn main() {}"), Language::Rust);
72/// ```
73///
74/// # Panics
75///
76/// This type does not panic.
77#[derive(Clone, Debug)]
78pub struct LanguageDetector {
79    rules: Vec<PatternRule>,
80}
81
82impl Default for LanguageDetector {
83    fn default() -> Self {
84        Self::new()
85    }
86}
87
88impl LanguageDetector {
89    /// Creates a detector with built-in default patterns.
90    ///
91    /// # Examples
92    ///
93    /// ```rust
94    /// use http_handle::language::{Language, LanguageDetector};
95    /// let detector = LanguageDetector::new();
96    /// assert_eq!(detector.detect("def f(): pass"), Language::Python);
97    /// ```
98    ///
99    /// # Panics
100    ///
101    /// This function panics only if a built-in regex literal is invalid.
102    pub fn new() -> Self {
103        let defaults = [
104            (Language::Rust, r"\b(fn|let|impl|pub|crate)\b"),
105            (Language::Python, r"\b(def|import|lambda|async\s+def)\b"),
106            (
107                Language::JavaScript,
108                r"\b(function|const|let|=>|console\.log)\b",
109            ),
110            (Language::Go, r"\b(func|package|go\s+|defer)\b"),
111        ];
112
113        let rules = defaults
114            .iter()
115            .map(|(language, pattern)| PatternRule {
116                language: *language,
117                pattern: Regex::new(pattern)
118                    .expect("default language regex must compile"),
119            })
120            .collect();
121
122        Self { rules }
123    }
124
125    /// Adds a runtime custom pattern to detect a specific language.
126    ///
127    /// # Examples
128    ///
129    /// ```rust
130    /// use http_handle::language::{Language, LanguageDetector};
131    /// let detector = LanguageDetector::new()
132    ///     .with_custom_pattern(Language::Go, r"\\bpackage\\b")
133    ///     .expect("valid regex");
134    /// assert_eq!(detector.detect("package main"), Language::Go);
135    /// ```
136    ///
137    /// # Errors
138    ///
139    /// Returns an error when `pattern` is not a valid regular expression.
140    ///
141    /// # Panics
142    ///
143    /// This function does not panic.
144    pub fn with_custom_pattern(
145        mut self,
146        language: Language,
147        pattern: &str,
148    ) -> Result<Self, regex::Error> {
149        self.rules.push(PatternRule {
150            language,
151            pattern: Regex::new(pattern)?,
152        });
153        Ok(self)
154    }
155
156    /// Detects the first matching language for a text.
157    ///
158    /// # Examples
159    ///
160    /// ```rust
161    /// use http_handle::language::{Language, LanguageDetector};
162    /// let detector = LanguageDetector::new();
163    /// assert_eq!(detector.detect("const x = 1;"), Language::JavaScript);
164    /// ```
165    ///
166    /// # Panics
167    ///
168    /// This function does not panic.
169    pub fn detect(&self, input: &str) -> Language {
170        self.rules
171            .iter()
172            .find(|rule| rule.pattern.is_match(input))
173            .map_or(Language::Unknown, |rule| rule.language)
174    }
175}
176
177#[cfg(test)]
178mod tests {
179    use super::*;
180
181    #[test]
182    fn detects_default_languages() {
183        let detector = LanguageDetector::new();
184        assert_eq!(
185            detector.detect("fn main() { let x = 1; }"),
186            Language::Rust
187        );
188        assert_eq!(
189            detector.detect("def f(x): return x"),
190            Language::Python
191        );
192    }
193
194    #[test]
195    fn supports_runtime_custom_pattern() {
196        let detector = LanguageDetector::new()
197            .with_custom_pattern(Language::Rust, r"\bcargo\s+build\b")
198            .expect("pattern should compile");
199
200        assert_eq!(
201            detector.detect("cargo build --release"),
202            Language::Rust
203        );
204    }
205
206    #[test]
207    fn language_as_str_is_stable() {
208        assert_eq!(Language::Rust.as_str(), "rust");
209        assert_eq!(Language::Python.as_str(), "python");
210        assert_eq!(Language::JavaScript.as_str(), "javascript");
211        assert_eq!(Language::Go.as_str(), "go");
212        assert_eq!(Language::Unknown.as_str(), "unknown");
213    }
214
215    #[test]
216    fn custom_pattern_validation_errors() {
217        let result = LanguageDetector::new()
218            .with_custom_pattern(Language::Go, r"[");
219        assert!(result.is_err());
220    }
221
222    #[test]
223    fn unknown_language_falls_back() {
224        let detector = LanguageDetector::new();
225        assert_eq!(
226            detector.detect("just prose without code"),
227            Language::Unknown
228        );
229    }
230
231    #[test]
232    fn default_matches_new() {
233        let via_default = LanguageDetector::default();
234        let via_new = LanguageDetector::new();
235        assert_eq!(
236            via_default.detect("fn main() { let x = 1; }"),
237            via_new.detect("fn main() { let x = 1; }")
238        );
239    }
240}