From 3592e5c7812bfddad932f176e257e8797d71a51b Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Mon, 4 May 2026 12:51:49 +0200 Subject: [PATCH] ucm: add DefineRegex 'all' scheme for multiple pattern matches (Syntax 9) Extend DefineRegex to support two matching schemes: - "first" (default): matches pattern once, extracts capture groups - "all": matches pattern multiple times, extracts all matches The "all" scheme creates variables with naming pattern nameN for Nth match and nameN_M for Nth match's Mth capture group, enabling extraction of repeating patterns from strings. Updated documentation with detailed examples and comprehensive description of regex flags, schemes, and variable naming conventions. Signed-off-by: Jaroslav Kysela --- src/ucm/ucm_confdoc.h | 130 +++++++++++++++++++++++++++++++++++++++--- src/ucm/ucm_regex.c | 103 ++++++++++++++++++++++++--------- 2 files changed, 198 insertions(+), 35 deletions(-) diff --git a/src/ucm/ucm_confdoc.h b/src/ucm/ucm_confdoc.h index e5d8b82d..7f507644 100644 --- a/src/ucm/ucm_confdoc.h +++ b/src/ucm/ucm_confdoc.h @@ -738,19 +738,135 @@ Define { } ~~~ -The *DefineRegex* allows substring extraction like: +The *DefineRegex* allows substring extraction using regular expressions (POSIX extended regex). +It can match patterns in strings and extract matched substrings into UCM variables. + +#### DefineRegex Structure + +~~~{.html} +DefineRegex.name { + String "text to match against" + Regex "regex_pattern" + Flags "e" + Scheme "first" +} +~~~ + +Field | Description +---------------------|--------------------- +String | The input string to match the regex pattern against +Regex | POSIX extended regular expression pattern +Flags | Optional regex flags (see below) +Scheme | Matching scheme: "first" (default) or "all" [**Syntax 9**] + +#### Regex Flags + +The Flags field is optional and accepts the following characters: + +Flag | Description +-------|--------------------- +e | Extended POSIX regex (REG_EXTENDED) - default recommended +i | Case-insensitive matching (REG_ICASE) +s | Report only success/fail (REG_NOSUB) +n | Newline-sensitive matching (REG_NEWLINE) + +Multiple flags can be combined, e.g., "ei" for extended and case-insensitive. + +#### Matching Schemes + +**Scheme "first"** (default): Matches the pattern once and extracts capture groups + +The variables created are: +- `name` - the full matched string +- `name1` - first capture group (parentheses in regex) +- `name2` - second capture group +- `nameN` - Nth capture group + +Example with "first" scheme: + +~~~{.html} +DefineRegex.hwdev { + String "hw:2,0" + Regex "hw:([0-9]+),([0-9]+)" + Flags "e" + Scheme "first" +} +~~~ + +This creates variables: +- `hwdev` = "hw:2,0" (full match) +- `hwdev1` = "2" (first capture group - card number) +- `hwdev2` = "0" (second capture group - device number) + +**Scheme "all"** [**Syntax 9**]: Matches the pattern multiple times and extracts all matches + +The variables created are: +- `nameN` - Nth full match (N starts at 1) +- `nameN_1` - Nth match, first capture group +- `nameN_2` - Nth match, second capture group +- `nameN_M` - Nth match, Mth capture group + +Example with "all" scheme: + +~~~{.html} +DefineRegex.devices { + String "device1 device2 device3" + Regex "device([0-9]+)" + Flags "e" + Scheme "all" +} +~~~ + +This creates variables: +- `devices1` = "device1" (first full match) +- `devices1_1` = "1" (first match, capture group 1) +- `devices2` = "device2" (second full match) +- `devices2_1` = "2" (second match, capture group 1) +- `devices3` = "device3" (third full match) +- `devices3_1` = "3" (third match, capture group 1) + +#### Practical Examples + +Extract USB device vendor and product IDs: + +~~~{.html} +DefineRegex.usbids { + String "${sys:bus/usb/devices/1-1/uevent}" + Regex "PRODUCT=([0-9a-f]+)/([0-9a-f]+)" + Flags "e" + Scheme "first" +} +# Creates: usbids (full match), usbids1 (vendor), usbids2 (product) +~~~ + +Parse multiple key=value pairs: ~~~{.html} -DefineRegex.rval { - Regex "(hello)|(regex)" - String "hello, it's my regex" +DefineRegex.params { + String "rate=48000,channels=2,format=S16_LE" + Regex "([a-z]+)=([^,]+)" + Flags "e" + Scheme "all" } +# Creates: params1="rate=48000", params1_1="rate", params1_2="48000" +# params2="channels=2", params2_1="channels", params2_2="2" +# params3="format=S16_LE", params3_1="format", params3_2="S16_LE" ~~~ -The result will be stored to variables *rval1* as *hello* and *rval2* as *regex* (every matched -substrings are stored to a separate variable with the sequence number postfix. +Extract text components: + +~~~{.html} +DefineRegex.model { + String "USB Audio Device Model XYZ123" + Regex "([A-Z]+).*Model ([A-Z0-9]+)" + Flags "e" + Scheme "first" +} +# Creates: model (full match), model1="USB", model2="XYZ123" +~~~ -Variables can be substituted using the `${var:rval1}` reference for example. +Variables can be substituted using `${var:name}` reference. For example, to use the extracted +card number: `PlaybackPCM "hw:${var:hwdev1},0"` ### Macros diff --git a/src/ucm/ucm_regex.c b/src/ucm/ucm_regex.c index 8c257a0f..db5d9463 100644 --- a/src/ucm/ucm_regex.c +++ b/src/ucm/ucm_regex.c @@ -54,47 +54,78 @@ static char *extract_substring(const char *data, regmatch_t *match) } static int set_variables(snd_use_case_mgr_t *uc_mgr, const char *data, - unsigned int match_size, regmatch_t *match, - const char *name) + regex_t *re, const char *name, int scheme_all) { - size_t name2_len = strlen(name) + 16; - char *name2 = alloca(name2_len); + size_t name_len = strlen(name) + 32; + char *var_name = alloca(name_len); + regmatch_t match[20]; char *s; + unsigned int match_idx = 1; unsigned int i; int err; + const char *pos; - if (match[0].rm_so < 0 || match[0].rm_eo < 0) - return 0; - s = extract_substring(data, &match[0]); - if (s == NULL) - return -ENOMEM; - err = uc_mgr_set_variable(uc_mgr, name, s); - free(s); - if (err < 0) - return err; - for (i = 1; i < match_size; i++) { - if (match[i].rm_so < 0 || match[i].rm_eo < 0) - return 0; - s = extract_substring(data, &match[i]); + pos = data; + while (1) { + err = regexec(re, pos, ARRAY_SIZE(match), match, 0); + if (err == REG_NOMATCH) + break; + if (err != 0) + return -EINVAL; + + if (match[0].rm_so < 0 || match[0].rm_eo < 0) + break; + + s = extract_substring(pos, &match[0]); if (s == NULL) return -ENOMEM; - snprintf(name2, name2_len, "%s%u", name, i); - err = uc_mgr_set_variable(uc_mgr, name2, s); + if (scheme_all) { + snprintf(var_name, name_len, "%s%u", name, match_idx); + err = uc_mgr_set_variable(uc_mgr, var_name, s); + } else { + err = uc_mgr_set_variable(uc_mgr, name, s); + } free(s); if (err < 0) return err; + + for (i = 1; i < ARRAY_SIZE(match); i++) { + if (match[i].rm_so < 0 || match[i].rm_eo < 0) + break; + s = extract_substring(pos, &match[i]); + if (s == NULL) + return -ENOMEM; + if (scheme_all) + snprintf(var_name, name_len, "%s%u_%u", name, match_idx, i); + else + snprintf(var_name, name_len, "%s%u", name, i); + err = uc_mgr_set_variable(uc_mgr, var_name, s); + free(s); + if (err < 0) + return err; + } + + if (!scheme_all) + break; + + pos += match[0].rm_eo; + match_idx++; + + if (*pos == '\0') + break; } + return 0; } int uc_mgr_define_regex(snd_use_case_mgr_t *uc_mgr, const char *name, snd_config_t *eval) { - const char *string, *regex_string, *flags_string; + const char *string, *regex_string, *flags_string, *scheme_string; char *s; regex_t re; int options = 0; - regmatch_t match[20]; + int use_scheme_all = 0; int err; if (uc_mgr->conf_format < 3) { @@ -119,6 +150,27 @@ int uc_mgr_define_regex(snd_use_case_mgr_t *uc_mgr, const char *name, return -EINVAL; } + err = get_string(eval, "Scheme", &scheme_string); + if (err == -ENOENT) { + use_scheme_all = 0; + } else if (err < 0) { + snd_error(UCM, "DefineRegex error (Scheme string)"); + return -EINVAL; + } else { + if (strcmp(scheme_string, "first") == 0) { + use_scheme_all = 0; + } else if (strcmp(scheme_string, "all") == 0) { + if (uc_mgr->conf_format < 9) { + snd_error(UCM, "DefineRegex 'all' scheme is supported in v9+ syntax"); + return -EINVAL; + } + use_scheme_all = 1; + } else { + snd_error(UCM, "DefineRegex error (unknown scheme '%s')", scheme_string); + return -EINVAL; + } + } + err = get_string(eval, "Flags", &flags_string); if (err == -ENOENT) { options = REG_EXTENDED; @@ -164,13 +216,8 @@ int uc_mgr_define_regex(snd_use_case_mgr_t *uc_mgr, const char *name, regfree(&re); return err; } - err = regexec(&re, s, ARRAY_SIZE(match), match, 0); - if (err < 0) - err = -errno; - else if (err == REG_NOMATCH) - err = 0; - else - err = set_variables(uc_mgr, s, ARRAY_SIZE(match), match, name); + + err = set_variables(uc_mgr, s, &re, name, use_scheme_all); free(s); regfree(&re); return err; -- 2.52.0