Lojban
The Logical Language
Log in
Username:
Password:
I forgot my password |
CapsLock is on.
Log in
History: BPFK Section: PEG Morphology Algorithm
View page
Source of version: 137
(current)
{CODE()} ; This is a Parsing Expression Grammar for the morphology of Lojban.%%% ; See [http://www.pdos.lcs.mit.edu/~baford/packrat/] %%% ; %%% ; All rules have the form %%% ; %%% ; name <- peg-expression%%% ; %%% ; which means that the grammatical construct "name" is parsed using%%% ; "peg-expression". %%% ; %%% ; 1) Concatenation is expressed by juxtaposition with no operator symbol.%%% ; 2) / represents *ORDERED* alternation (choice). If the first%%% ; option succeeds, the others will never be checked.%%% ; 3) ? indicates that the element to the left is optional.%%% ; 4) * represents optional repetition of the construct to the left.%%% ; 5) + represents one-or-more repetition of the construct to the left.%%% ; 6) () serves to indicate the grouping of the other operators.%%% ; 7) & indicates that the element to the right must follow (but the%%% ; marked element itself does not absorb anything).%%% ; 8) ! indicates that the element to the right must not follow (the%%% ; marked element itself does not absorb anything).%%% ; 9) . represents any character.%%% ; 10) ' ' or " " represents a literal string.%%% ; 11) [] represents a character class. %%% ;%%% ; Repetitions grab as much as they can.%%% ;%%% ;%%% ; --- GRAMMAR ---%%% ; This grammar classifies words by their morphological class (cmene,%%% ; gismu, lujvo, fuhivla, cmavo, and non-lojban-word). %%% ; %%% ;The final section sorts cmavo into grammatical classes (A, BAI, BAhE, ..., ZOhU).%%% ;%%% ; mi'e ((xorxes))%%% ;-------------------------------------------------------------------%%% words <- pause? (word pause?)* word <- lojban-word / non-lojban-word lojban-word <- cmene / cmavo / brivla brivla <- gismu / fuhivla / lujvo ;------------------------------------------------------------------- cmene <- jbocme / zifcme zifcme <- !h (nucleus / glide / h / consonant !pause / digit)* consonant &pause jbocme <- &zifcme (any-syllable / digit)* &pause ;cmene <- !h &consonant-final coda? (any-syllable / digit)* &pause ;consonant-final <- (non-space &non-space)* consonant &pause ;cmene <- !h cmene-syllable* &consonant coda? consonantal-syllable* onset &pause ;cmene-syllable <- !doi-la-lai-lahi coda? consonantal-syllable* onset nucleus / digit ;doi-la-lai-lahi <- (d o i / l a (h? i)?) !h !nucleus ;------------------------------------------------------------------- cmavo <- !cmene !CVCy-lujvo cmavo-form &post-word CVCy-lujvo <- CVC-rafsi y h? initial-rafsi* brivla-core / stressed-CVC-rafsi y short-final-rafsi cmavo-form <- !h !cluster onset (nucleus h)* (!stressed nucleus / nucleus !cluster) / y+ / digit ;------------------------------------------------------------------- lujvo <- !gismu !fuhivla !cmavo initial-rafsi* brivla-core brivla-core <- fuhivla / gismu / CVV-final-rafsi / stressed-initial-rafsi short-final-rafsi stressed-initial-rafsi <- stressed-extended-rafsi / stressed-y-rafsi / stressed-y-less-rafsi initial-rafsi <- extended-rafsi / y-rafsi / !any-extended-rafsi y-less-rafsi !any-extended-rafsi any-extended-rafsi <- fuhivla / extended-rafsi / stressed-extended-rafsi ;------------------------------------------------------------------- fuhivla <- fuhivla-head stressed-syllable consonantal-syllable* final-syllable stressed-extended-rafsi <- stressed-brivla-rafsi / stressed-fuhivla-rafsi extended-rafsi <- brivla-rafsi / fuhivla-rafsi stressed-brivla-rafsi <- &unstressed-syllable brivla-head stressed-syllable h y brivla-rafsi <- &(syllable consonantal-syllable* syllable) brivla-head h y h? stressed-fuhivla-rafsi <- fuhivla-head stressed-syllable !h onset y fuhivla-rafsi <- &unstressed-syllable fuhivla-head !h onset y h? fuhivla-head <- !rafsi-string brivla-head brivla-head <- !cmavo !slinkuhi !h &onset unstressed-syllable* slinkuhi <- !rafsi-string consonant rafsi-string rafsi-string <- y-less-rafsi* (gismu / CVV-final-rafsi / stressed-y-less-rafsi short-final-rafsi / y-rafsi / stressed-y-rafsi / stressed-y-less-rafsi? initial-pair y / hy-rafsi / stressed-hy-rafsi) ;------------------------------------------------------------------- gismu <- (initial-pair stressed-vowel / consonant stressed-vowel consonant) &final-syllable consonant vowel &post-word CVV-final-rafsi <- consonant stressed-vowel h &final-syllable vowel &post-word short-final-rafsi <- &final-syllable (consonant diphthong / initial-pair vowel) &post-word stressed-hy-rafsi <- (long-rafsi stressed-vowel / stressed-CCV-rafsi / stressed-CVV-rafsi) h y stressed-y-rafsi <- (stressed-long-rafsi / stressed-CVC-rafsi) y stressed-y-less-rafsi <- stressed-CVC-rafsi !y / stressed-CCV-rafsi / stressed-CVV-rafsi stressed-long-rafsi <- initial-pair stressed-vowel consonant / consonant stressed-vowel consonant consonant stressed-CVC-rafsi <- consonant stressed-vowel consonant stressed-CCV-rafsi <- initial-pair stressed-vowel stressed-CVV-rafsi <- consonant (unstressed-vowel h stressed-vowel / stressed-diphthong) r-hyphen? hy-rafsi <- (long-rafsi vowel / CCV-rafsi / CVV-rafsi) h y h? y-rafsi <- (long-rafsi / CVC-rafsi) y h? y-less-rafsi <- !y-rafsi !stressed-y-rafsi !hy-rafsi !stressed-hy-rafsi (CVC-rafsi / CCV-rafsi / CVV-rafsi) !h long-rafsi <- initial-pair unstressed-vowel consonant / consonant unstressed-vowel consonant consonant CVC-rafsi <- consonant unstressed-vowel consonant CCV-rafsi <- initial-pair unstressed-vowel CVV-rafsi <- consonant (unstressed-vowel h unstressed-vowel / unstressed-diphthong) r-hyphen? r-hyphen <- r &consonant / n &r ;------------------------------------------------------------------- final-syllable <- onset !y !stressed nucleus !cmene &post-word stressed-syllable <- &stressed syllable / syllable &stress stressed-diphthong <- &stressed diphthong / diphthong &stress stressed-vowel <- &stressed vowel / vowel &stress unstressed-syllable <- !stressed syllable !stress / consonantal-syllable unstressed-diphthong <- !stressed diphthong !stress unstressed-vowel <- !stressed vowel !stress stress <- consonant* h? y? syllable pause stressed <- onset comma* [AEIOU] any-syllable <- onset nucleus coda? / consonantal-syllable syllable <- onset !y nucleus coda? consonantal-syllable <- consonant &syllabic coda coda <- !any-syllable consonant &any-syllable / syllabic? consonant? &pause onset <- h / glide / initial nucleus <- vowel / diphthong / y !nucleus ;----------------------------------------------------------------- glide <- (i / u) &nucleus diphthong <- (a i !i / a u !u / e i !i / o i !i) !nucleus vowel <- (a / e / i / o / u) !nucleus a <- comma* [aA] e <- comma* [eE] i <- comma* [iI] o <- comma* [oO] u <- comma* [uU] y <- comma* [yY] ;------------------------------------------------------------------- cluster <- consonant consonant+ initial-pair <- &initial consonant consonant !consonant initial <- (affricate / sibilant? other? liquid?) !consonant !glide affricate <- t c / t s / d j / d z liquid <- l / r other <- p / t !l / k / f / x / b / d !l / g / v / m / n !liquid sibilant <- c / s !x / (j / z) !n !liquid consonant <- voiced / unvoiced / syllabic syllabic <- l / m / n / r voiced <- b / d / g / j / v / z unvoiced <- c / f / k / p / s / t / x l <- comma* [lL] !h !glide !l m <- comma* [mM] !h !glide !m !z n <- comma* [nN] !h !glide !n !affricate r <- comma* [rR] !h !glide !r b <- comma* [bB] !h !glide !b !unvoiced d <- comma* [dD] !h !glide !d !unvoiced g <- comma* [gG] !h !glide !g !unvoiced v <- comma* [vV] !h !glide !v !unvoiced j <- comma* [jJ] !h !glide !j !z !unvoiced z <- comma* [zZ] !h !glide !z !j !unvoiced s <- comma* [sS] !h !glide !s !c !voiced c <- comma* [cC] !h !glide !c !s !x !voiced x <- comma* [xX] !h !glide !x !c !k !voiced k <- comma* [kK] !h !glide !k !x !voiced f <- comma* [fF] !h !glide !f !voiced p <- comma* [pP] !h !glide !p !voiced t <- comma* [tT] !h !glide !t !voiced h <- comma* ['h] &nucleus ;------------------------------------------------------------------- digit <- comma* [0123456789] !h !nucleus post-word <- pause / !nucleus lojban-word pause <- comma* space-char+ / EOF EOF <- comma* !. comma <- [,] non-lojban-word <- !lojban-word non-space+ NORATS non-space <- !space-char . NORATS space-char <- [.\t\n\r?!\u0020] ;------------------------------------------------------------------- spaces <- !Y initial-spaces initial-spaces <- (comma* space-char / !ybu Y)+ EOF? / EOF ybu <- Y space-char* BU ;------------------------------------------------------------------- A <- &cmavo ( a / e / j i / o / u ) &post-word BAI <- &cmavo ( d u h o / s i h u / z a u / k i h i / d u h i / c u h u / t u h i / t i h u / d i h o / j i h u / r i h a / n i h i / m u h i / k i h u / v a h u / k o i / c a h i / t a h i / p u h e / j a h i / k a i / b a i / f i h e / d e h i / c i h o / m a u / m u h u / r i h i / r a h i / k a h a / p a h u / p a h a / l e h a / k u h u / t a i / b a u / m a h i / c i h e / f a u / p o h i / c a u / m a h e / c i h u / r a h a / p u h a / l i h e / l a h u / b a h i / k a h i / s a u / f a h e / b e h i / t i h i / j a h e / g a h a / v a h o / j i h o / m e h a / d o h e / j i h e / p i h o / g a u / z u h e / m e h e / r a i ) &post-word BAhE <- &cmavo ( b a h e / z a h e ) &post-word BE <- &cmavo ( b e ) &post-word BEI <- &cmavo ( b e i ) &post-word BEhO <- &cmavo ( b e h o ) &post-word BIhE <- &cmavo ( b i h e ) &post-word BIhI <- &cmavo ( m i h i / b i h o / b i h i ) &post-word BO <- &cmavo ( b o ) &post-word BOI <- &cmavo ( b o i ) &post-word BU <- &cmavo ( b u ) &post-word BY <- ybu / &cmavo ( j o h o / r u h o / g e h o / j e h o / l o h a / n a h a / s e h e / t o h a / g a h e / y h y / b y / c y / d y / f y / g y / j y / k y / l y / m y / n y / p y / r y / s y / t y / v y / x y / z y ) &post-word CAhA <- &cmavo ( c a h a / p u h i / n u h o / k a h e ) &post-word CAI <- &cmavo ( p e i / c a i / c u h i / s a i / r u h e ) &post-word CEI <- &cmavo ( c e i ) &post-word CEhE <- &cmavo ( c e h e ) &post-word CO <- &cmavo ( c o ) &post-word COI <- &cmavo ( j u h i / c o i / f i h i / t a h a / m u h o / f e h o / c o h o / p e h u / k e h o / n u h e / r e h i / b e h e / j e h e / m i h e / k i h e / v i h o ) &post-word CU <- &cmavo ( c u ) &post-word CUhE <- &cmavo ( c u h e / n a u ) &post-word DAhO <- &cmavo ( d a h o ) &post-word DOI <- &cmavo ( d o i ) &post-word DOhU <- &cmavo ( d o h u ) &post-word FA <- &cmavo ( f a i / f a / f e / f o / f u / f i h a / f i ) &post-word FAhA <- &cmavo ( d u h a / b e h a / n e h u / v u h a / g a h u / t i h a / n i h a / c a h u / z u h a / r i h u / r u h u / r e h o / t e h e / b u h u / n e h a / p a h o / n e h i / t o h o / z o h i / z e h o / z o h a / f a h a ) &post-word FAhO <- &cmavo ( f a h o ) &post-word FEhE <- &cmavo ( f e h e ) &post-word FEhU <- &cmavo ( f e h u ) &post-word FIhO <- &cmavo ( f i h o ) &post-word FOI <- &cmavo ( f o i ) &post-word FUhA <- &cmavo ( f u h a ) &post-word FUhE <- &cmavo ( f u h e ) &post-word FUhO <- &cmavo ( f u h o ) &post-word GA <- &cmavo ( g e h i / g e / g o / g a / g u ) &post-word GAhO <- &cmavo ( k e h i / g a h o ) &post-word GEhU <- &cmavo ( g e h u ) &post-word GI <- &cmavo ( g i ) &post-word GIhA <- &cmavo ( g i h e / g i h i / g i h o / g i h a / g i h u ) &post-word GOI <- &cmavo ( n o h u / n e / g o i / p o h u / p e / p o h e / p o ) &post-word GOhA <- &cmavo ( m o / n e i / g o h u / g o h o / g o h i / n o h a / g o h e / g o h a / d u / b u h a / b u h e / b u h i / c o h e ) &post-word GUhA <- &cmavo ( g u h e / g u h i / g u h o / g u h a / g u h u ) &post-word I <- &cmavo ( i ) &post-word JA <- &cmavo ( j e h i / j e / j o / j a / j u ) &post-word JAI <- &cmavo ( j a i ) &post-word JOhI <- &cmavo ( j o h i ) &post-word JOI <- &cmavo ( f a h u / p i h u / j o i / c e h o / c e / j o h u / k u h a / j o h e / j u h e ) &post-word KE <- &cmavo ( k e ) &post-word KEhE <- &cmavo ( k e h e ) &post-word KEI <- &cmavo ( k e i ) &post-word KI <- &cmavo ( k i ) &post-word KOhA <- &cmavo ( d a h u / d a h e / d i h u / d i h e / d e h u / d e h e / d e i / d o h i / m i h o / m a h a / m i h a / d o h o / k o h a / f o h u / k o h e / k o h i / k o h o / k o h u / f o h a / f o h e / f o h i / f o h o / v o h a / v o h e / v o h i / v o h o / v o h u / r u / r i / r a / t a / t u / t i / z i h o / k e h a / m a / z u h i / z o h e / c e h u / d a / d e / d i / k o / m i / d o ) &post-word KU <- &cmavo ( k u ) &post-word KUhE <- &cmavo ( k u h e ) &post-word KUhO <- &cmavo ( k u h o ) &post-word LA <- &cmavo ( l a i / l a h i / l a ) &post-word LAU <- &cmavo ( c e h a / l a u / z a i / t a u ) &post-word LAhE <- &cmavo ( t u h a / l u h a / l u h o / l a h e / v u h i / l u h i / l u h e ) &post-word LE <- &cmavo ( l e i / l o i / l e h i / l o h i / l e h e / l o h e / l o / l e ) &post-word LEhU <- &cmavo ( l e h u ) &post-word LI <- &cmavo ( m e h o / l i ) &post-word LIhU <- &cmavo ( l i h u ) &post-word LOhO <- &cmavo ( l o h o ) &post-word LOhU <- &cmavo ( l o h u ) &post-word LU <- &cmavo ( l u ) &post-word LUhU <- &cmavo ( l u h u ) &post-word MAhO <- &cmavo ( m a h o ) &post-word MAI <- &cmavo ( m o h o / m a i ) &post-word ME <- &cmavo ( m e ) &post-word MEhU <- &cmavo ( m e h u ) &post-word MOhE <- &cmavo ( m o h e ) &post-word MOhI <- &cmavo ( m o h i ) &post-word MOI <- &cmavo ( m e i / m o i / s i h e / c u h o / v a h e ) &post-word NA <- &cmavo ( j a h a / n a ) &post-word NAI <- &cmavo ( n a i ) &post-word NAhE <- &cmavo ( t o h e / j e h a / n a h e / n o h e ) &post-word NAhU <- &cmavo ( n a h u ) &post-word NIhE <- &cmavo ( n i h e ) &post-word NIhO <- &cmavo ( n i h o / n o h i ) &post-word NOI <- &cmavo ( v o i / n o i / p o i ) &post-word NU <- &cmavo ( n i / d u h u / s i h o / n u / l i h i / k a / j e i / s u h u / z u h o / m u h e / p u h u / z a h i ) &post-word NUhA <- &cmavo ( n u h a ) &post-word NUhI <- &cmavo ( n u h i ) &post-word NUhU <- &cmavo ( n u h u ) &post-word PA <- &cmavo ( d a u / f e i / g a i / j a u / r e i / v a i / p i h e / p i / f i h u / z a h u / m e h i / n i h u / k i h o / c e h i / m a h u / r a h e / d a h a / s o h a / j i h i / s u h o / s u h e / r o / r a u / s o h u / s o h i / s o h e / s o h o / m o h a / d u h e / t e h o / k a h o / c i h i / t u h o / x o / p a i / n o h o / n o / p a / r e / c i / v o / m u / x a / z e / b i / s o / digit ) &post-word PEhE <- &cmavo ( p e h e ) &post-word PEhO <- &cmavo ( p e h o ) &post-word PU <- &cmavo ( b a / p u / c a ) &post-word RAhO <- &cmavo ( r a h o ) &post-word ROI <- &cmavo ( r e h u / r o i ) &post-word SA <- &cmavo ( s a ) &post-word SE <- &cmavo ( s e / t e / v e / x e ) &post-word SEI <- &cmavo ( s e i / t i h o ) &post-word SEhU <- &cmavo ( s e h u ) &post-word SI <- &cmavo ( s i ) &post-word SOI <- &cmavo ( s o i ) &post-word SU <- &cmavo ( s u ) &post-word TAhE <- &cmavo ( r u h i / t a h e / d i h i / n a h o ) &post-word TEhU <- &cmavo ( t e h u ) &post-word TEI <- &cmavo ( t e i ) &post-word TO <- &cmavo ( t o h i / t o ) &post-word TOI <- &cmavo ( t o i ) &post-word TUhE <- &cmavo ( t u h e ) &post-word TUhU <- &cmavo ( t u h u ) &post-word UI <- &cmavo ( i h a / i e / a h e / u h i / i h o / i h e / a h a / i a / o h i / o h e / e h e / o i / u o / e h i / u h o / a u / u a / a h i / i h u / i i / u h a / u i / a h o / a i / a h u / i u / e i / o h o / e h a / u u / o h a / o h u / u h u / e h o / i o / e h u / u e / i h i / u h e / b a h a / j a h o / c a h e / s u h a / t i h e / k a h u / s e h o / z a h a / p e h i / r u h a / j u h a / t a h o / r a h u / l i h a / b a h u / m u h a / d o h a / t o h u / v a h i / p a h e / z u h u / s a h e / l a h a / k e h u / s a h u / d a h i / j e h u / s a h a / k a u / t a h u / n a h i / j o h a / b i h u / l i h o / p a u / m i h u / k u h i / j i h a / s i h a / p o h o / p e h a / r o h i / r o h e / r o h o / r o h u / r o h a / r e h e / l e h o / j u h o / f u h i / d a i / g a h i / z o h o / b e h u / r i h e / s e h i / s e h a / v u h e / k i h a / x u / g e h e / b u h o ) &post-word VA <- &cmavo ( v i / v a / v u ) &post-word VAU <- &cmavo ( v a u ) &post-word VEI <- &cmavo ( v e i ) &post-word VEhO <- &cmavo ( v e h o ) &post-word VUhU <- &cmavo ( g e h a / f u h u / p i h i / f e h i / v u h u / s u h i / j u h u / g e i / p a h i / f a h i / t e h a / c u h a / v a h a / n e h o / d e h o / f e h a / s a h o / r e h a / r i h o / s a h i / p i h a / s i h i ) &post-word VEhA <- &cmavo ( v e h u / v e h a / v e h i / v e h e ) &post-word VIhA <- &cmavo ( v i h i / v i h a / v i h u / v i h e ) &post-word VUhO <- &cmavo ( v u h o ) &post-word XI <- &cmavo ( x i ) &post-word Y <- &cmavo ( y+ ) &post-word ZAhO <- &cmavo ( c o h i / p u h o / c o h u / m o h u / c a h o / c o h a / d e h a / b a h o / d i h a / z a h o ) &post-word ZEhA <- &cmavo ( z e h u / z e h a / z e h i / z e h e ) &post-word ZEI <- &cmavo ( z e i ) &post-word ZI <- &cmavo ( z u / z a / z i ) &post-word ZIhE <- &cmavo ( z i h e ) &post-word ZO <- &cmavo ( z o ) &post-word ZOI <- &cmavo ( z o i / l a h o ) &post-word ZOhU <- &cmavo ( z o h u ) &post-word {CODE}
About
Introduction
What Others Say
FAQ
Learning
Books
Vocabulary
Lojbanic Software
Community
Web/Email Forums
IRC Chat
Links
News
Dictionary
Swag
Multimedia
Lojbanic Texts
Audio
Wiki
Recent Changes
Popular Pages
How To Edit
The LLG
Official Projects
Publications
Donate!
Contact Us
Search Lojban Resources